1"""Regression tests for urllib"""
2
3import collections
4import urllib
5import httplib
6import io
7import unittest
8import os
9import sys
10import mimetools
11import tempfile
12
13from test import test_support
14from base64 import b64encode
15
16
17def hexescape(char):
18    """Escape char as RFC 2396 specifies"""
19    hex_repr = hex(ord(char))[2:].upper()
20    if len(hex_repr) == 1:
21        hex_repr = "0%s" % hex_repr
22    return "%" + hex_repr
23
24
25def fakehttp(fakedata):
26    class FakeSocket(io.BytesIO):
27
28        def sendall(self, data):
29            FakeHTTPConnection.buf = data
30
31        def makefile(self, *args, **kwds):
32            return self
33
34        def read(self, amt=None):
35            if self.closed:
36                return b""
37            return io.BytesIO.read(self, amt)
38
39        def readline(self, length=None):
40            if self.closed:
41                return b""
42            return io.BytesIO.readline(self, length)
43
44    class FakeHTTPConnection(httplib.HTTPConnection):
45
46        # buffer to store data for verification in urlopen tests.
47        buf = ""
48
49        def connect(self):
50            self.sock = FakeSocket(self.fakedata)
51            self.__class__.fakesock = self.sock
52    FakeHTTPConnection.fakedata = fakedata
53
54    return FakeHTTPConnection
55
56
57class FakeHTTPMixin(object):
58    def fakehttp(self, fakedata):
59        assert httplib.HTTP._connection_class == httplib.HTTPConnection
60
61        httplib.HTTP._connection_class = fakehttp(fakedata)
62
63    def unfakehttp(self):
64        httplib.HTTP._connection_class = httplib.HTTPConnection
65
66
67class urlopen_FileTests(unittest.TestCase):
68    """Test urlopen() opening a temporary file.
69
70    Try to test as much functionality as possible so as to cut down on reliance
71    on connecting to the Net for testing.
72
73    """
74
75    def setUp(self):
76        """Setup of a temp file to use for testing"""
77        self.text = "test_urllib: %s\n" % self.__class__.__name__
78        FILE = file(test_support.TESTFN, 'wb')
79        try:
80            FILE.write(self.text)
81        finally:
82            FILE.close()
83        self.pathname = test_support.TESTFN
84        self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
85
86    def tearDown(self):
87        """Shut down the open object"""
88        self.returned_obj.close()
89        os.remove(test_support.TESTFN)
90
91    def test_interface(self):
92        # Make sure object returned by urlopen() has the specified methods
93        for attr in ("read", "readline", "readlines", "fileno",
94                     "close", "info", "geturl", "getcode", "__iter__"):
95            self.assertTrue(hasattr(self.returned_obj, attr),
96                         "object returned by urlopen() lacks %s attribute" %
97                         attr)
98
99    def test_read(self):
100        self.assertEqual(self.text, self.returned_obj.read())
101
102    def test_readline(self):
103        self.assertEqual(self.text, self.returned_obj.readline())
104        self.assertEqual('', self.returned_obj.readline(),
105                         "calling readline() after exhausting the file did not"
106                         " return an empty string")
107
108    def test_readlines(self):
109        lines_list = self.returned_obj.readlines()
110        self.assertEqual(len(lines_list), 1,
111                         "readlines() returned the wrong number of lines")
112        self.assertEqual(lines_list[0], self.text,
113                         "readlines() returned improper text")
114
115    def test_fileno(self):
116        file_num = self.returned_obj.fileno()
117        self.assertIsInstance(file_num, int, "fileno() did not return an int")
118        self.assertEqual(os.read(file_num, len(self.text)), self.text,
119                         "Reading on the file descriptor returned by fileno() "
120                         "did not return the expected text")
121
122    def test_close(self):
123        # Test close() by calling it hear and then having it be called again
124        # by the tearDown() method for the test
125        self.returned_obj.close()
126
127    def test_info(self):
128        self.assertIsInstance(self.returned_obj.info(), mimetools.Message)
129
130    def test_geturl(self):
131        self.assertEqual(self.returned_obj.geturl(), self.pathname)
132
133    def test_getcode(self):
134        self.assertEqual(self.returned_obj.getcode(), None)
135
136    def test_iter(self):
137        # Test iterator
138        # Don't need to count number of iterations since test would fail the
139        # instant it returned anything beyond the first line from the
140        # comparison
141        for line in self.returned_obj.__iter__():
142            self.assertEqual(line, self.text)
143
144    def test_relativelocalfile(self):
145        self.assertRaises(ValueError,urllib.urlopen,'./' + self.pathname)
146
147class ProxyTests(unittest.TestCase):
148
149    def setUp(self):
150        # Records changes to env vars
151        self.env = test_support.EnvironmentVarGuard()
152        # Delete all proxy related env vars
153        for k in os.environ.keys():
154            if 'proxy' in k.lower():
155                self.env.unset(k)
156
157    def tearDown(self):
158        # Restore all proxy related env vars
159        self.env.__exit__()
160        del self.env
161
162    def test_getproxies_environment_keep_no_proxies(self):
163        self.env.set('NO_PROXY', 'localhost')
164        proxies = urllib.getproxies_environment()
165        # getproxies_environment use lowered case truncated (no '_proxy') keys
166        self.assertEqual('localhost', proxies['no'])
167        # List of no_proxies with space.
168        self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
169        self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com'))
170        self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888'))
171        self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234'))
172
173    def test_proxy_cgi_ignore(self):
174        try:
175            self.env.set('HTTP_PROXY', 'http://somewhere:3128')
176            proxies = urllib.getproxies_environment()
177            self.assertEqual('http://somewhere:3128', proxies['http'])
178            self.env.set('REQUEST_METHOD', 'GET')
179            proxies = urllib.getproxies_environment()
180            self.assertNotIn('http', proxies)
181        finally:
182            self.env.unset('REQUEST_METHOD')
183            self.env.unset('HTTP_PROXY')
184
185    def test_proxy_bypass_environment_host_match(self):
186        bypass = urllib.proxy_bypass_environment
187        self.env.set('NO_PROXY',
188                     'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
189        self.assertTrue(bypass('localhost'))
190        self.assertTrue(bypass('LocalHost'))                 # MixedCase
191        self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
192        self.assertTrue(bypass('newdomain.com:1234'))
193        self.assertTrue(bypass('foo.d.o.t'))                 # issue 29142
194        self.assertTrue(bypass('anotherdomain.com:8888'))
195        self.assertTrue(bypass('www.newdomain.com:1234'))
196        self.assertFalse(bypass('prelocalhost'))
197        self.assertFalse(bypass('newdomain.com'))            # no port
198        self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
199
200class ProxyTests_withOrderedEnv(unittest.TestCase):
201
202    def setUp(self):
203        # We need to test conditions, where variable order _is_ significant
204        self._saved_env = os.environ
205        # Monkey patch os.environ, start with empty fake environment
206        os.environ = collections.OrderedDict()
207
208    def tearDown(self):
209        os.environ = self._saved_env
210
211    def test_getproxies_environment_prefer_lowercase(self):
212        # Test lowercase preference with removal
213        os.environ['no_proxy'] = ''
214        os.environ['No_Proxy'] = 'localhost'
215        self.assertFalse(urllib.proxy_bypass_environment('localhost'))
216        self.assertFalse(urllib.proxy_bypass_environment('arbitrary'))
217        os.environ['http_proxy'] = ''
218        os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
219        proxies = urllib.getproxies_environment()
220        self.assertEqual({}, proxies)
221        # Test lowercase preference of proxy bypass and correct matching including ports
222        os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
223        os.environ['No_Proxy'] = 'xyz.com'
224        self.assertTrue(urllib.proxy_bypass_environment('localhost'))
225        self.assertTrue(urllib.proxy_bypass_environment('noproxy.com:5678'))
226        self.assertTrue(urllib.proxy_bypass_environment('my.proxy:1234'))
227        self.assertFalse(urllib.proxy_bypass_environment('my.proxy'))
228        self.assertFalse(urllib.proxy_bypass_environment('arbitrary'))
229        # Test lowercase preference with replacement
230        os.environ['http_proxy'] = 'http://somewhere:3128'
231        os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
232        proxies = urllib.getproxies_environment()
233        self.assertEqual('http://somewhere:3128', proxies['http'])
234
235
236class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
237    """Test urlopen() opening a fake http connection."""
238
239    def test_read(self):
240        self.fakehttp('Hello!')
241        try:
242            fp = urllib.urlopen("http://python.org/")
243            self.assertEqual(fp.readline(), 'Hello!')
244            self.assertEqual(fp.readline(), '')
245            self.assertEqual(fp.geturl(), 'http://python.org/')
246            self.assertEqual(fp.getcode(), 200)
247        finally:
248            self.unfakehttp()
249
250    def test_url_fragment(self):
251        # Issue #11703: geturl() omits fragments in the original URL.
252        url = 'http://docs.python.org/library/urllib.html#OK'
253        self.fakehttp('Hello!')
254        try:
255            fp = urllib.urlopen(url)
256            self.assertEqual(fp.geturl(), url)
257        finally:
258            self.unfakehttp()
259
260    def test_read_bogus(self):
261        # urlopen() should raise IOError for many error codes.
262        self.fakehttp('''HTTP/1.1 401 Authentication Required
263Date: Wed, 02 Jan 2008 03:03:54 GMT
264Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
265Connection: close
266Content-Type: text/html; charset=iso-8859-1
267''')
268        try:
269            self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
270        finally:
271            self.unfakehttp()
272
273    def test_invalid_redirect(self):
274        # urlopen() should raise IOError for many error codes.
275        self.fakehttp("""HTTP/1.1 302 Found
276Date: Wed, 02 Jan 2008 03:03:54 GMT
277Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
278Location: file:README
279Connection: close
280Content-Type: text/html; charset=iso-8859-1
281""")
282        try:
283            msg = "Redirection to url 'file:"
284            with self.assertRaisesRegexp(IOError, msg):
285                urllib.urlopen("http://python.org/")
286        finally:
287            self.unfakehttp()
288
289    def test_redirect_limit_independent(self):
290        # Ticket #12923: make sure independent requests each use their
291        # own retry limit.
292        for i in range(urllib.FancyURLopener().maxtries):
293            self.fakehttp(b'''HTTP/1.1 302 Found
294Location: file://guidocomputer.athome.com:/python/license
295Connection: close
296''')
297            try:
298                self.assertRaises(IOError, urllib.urlopen,
299                    "http://something")
300            finally:
301                self.unfakehttp()
302
303    def test_empty_socket(self):
304        # urlopen() raises IOError if the underlying socket does not send any
305        # data. (#1680230)
306        self.fakehttp('')
307        try:
308            self.assertRaises(IOError, urllib.urlopen, 'http://something')
309        finally:
310            self.unfakehttp()
311
312    def test_missing_localfile(self):
313        self.assertRaises(IOError, urllib.urlopen,
314                'file://localhost/a/missing/file.py')
315        fd, tmp_file = tempfile.mkstemp()
316        tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
317        self.assertTrue(os.path.exists(tmp_file))
318        try:
319            fp = urllib.urlopen(tmp_fileurl)
320            fp.close()
321        finally:
322            os.close(fd)
323            os.unlink(tmp_file)
324
325        self.assertFalse(os.path.exists(tmp_file))
326        self.assertRaises(IOError, urllib.urlopen, tmp_fileurl)
327
328    def test_ftp_nonexisting(self):
329        self.assertRaises(IOError, urllib.urlopen,
330                'ftp://localhost/not/existing/file.py')
331
332
333    def test_userpass_inurl(self):
334        self.fakehttp('Hello!')
335        try:
336            fakehttp_wrapper = httplib.HTTP._connection_class
337            fp = urllib.urlopen("http://user:pass@python.org/")
338            authorization = ("Authorization: Basic %s\r\n" %
339                            b64encode('user:pass'))
340            # The authorization header must be in place
341            self.assertIn(authorization, fakehttp_wrapper.buf)
342            self.assertEqual(fp.readline(), "Hello!")
343            self.assertEqual(fp.readline(), "")
344            self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
345            self.assertEqual(fp.getcode(), 200)
346        finally:
347            self.unfakehttp()
348
349    def test_userpass_with_spaces_inurl(self):
350        self.fakehttp('Hello!')
351        try:
352            url = "http://a b:c d@python.org/"
353            fakehttp_wrapper = httplib.HTTP._connection_class
354            authorization = ("Authorization: Basic %s\r\n" %
355                             b64encode('a b:c d'))
356            fp = urllib.urlopen(url)
357            # The authorization header must be in place
358            self.assertIn(authorization, fakehttp_wrapper.buf)
359            self.assertEqual(fp.readline(), "Hello!")
360            self.assertEqual(fp.readline(), "")
361            # the spaces are quoted in URL so no match
362            self.assertNotEqual(fp.geturl(), url)
363            self.assertEqual(fp.getcode(), 200)
364        finally:
365            self.unfakehttp()
366
367
368class urlretrieve_FileTests(unittest.TestCase):
369    """Test urllib.urlretrieve() on local files"""
370
371    def setUp(self):
372        # Create a list of temporary files. Each item in the list is a file
373        # name (absolute path or relative to the current working directory).
374        # All files in this list will be deleted in the tearDown method. Note,
375        # this only helps to makes sure temporary files get deleted, but it
376        # does nothing about trying to close files that may still be open. It
377        # is the responsibility of the developer to properly close files even
378        # when exceptional conditions occur.
379        self.tempFiles = []
380
381        # Create a temporary file.
382        self.registerFileForCleanUp(test_support.TESTFN)
383        self.text = 'testing urllib.urlretrieve'
384        try:
385            FILE = file(test_support.TESTFN, 'wb')
386            FILE.write(self.text)
387            FILE.close()
388        finally:
389            try: FILE.close()
390            except: pass
391
392    def tearDown(self):
393        # Delete the temporary files.
394        for each in self.tempFiles:
395            try: os.remove(each)
396            except: pass
397
398    def constructLocalFileUrl(self, filePath):
399        return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
400
401    def createNewTempFile(self, data=""):
402        """Creates a new temporary file containing the specified data,
403        registers the file for deletion during the test fixture tear down, and
404        returns the absolute path of the file."""
405
406        newFd, newFilePath = tempfile.mkstemp()
407        try:
408            self.registerFileForCleanUp(newFilePath)
409            newFile = os.fdopen(newFd, "wb")
410            newFile.write(data)
411            newFile.close()
412        finally:
413            try: newFile.close()
414            except: pass
415        return newFilePath
416
417    def registerFileForCleanUp(self, fileName):
418        self.tempFiles.append(fileName)
419
420    def test_basic(self):
421        # Make sure that a local file just gets its own location returned and
422        # a headers value is returned.
423        result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
424        self.assertEqual(result[0], test_support.TESTFN)
425        self.assertIsInstance(result[1], mimetools.Message,
426                              "did not get a mimetools.Message instance as "
427                              "second returned value")
428
429    def test_copy(self):
430        # Test that setting the filename argument works.
431        second_temp = "%s.2" % test_support.TESTFN
432        self.registerFileForCleanUp(second_temp)
433        result = urllib.urlretrieve(self.constructLocalFileUrl(
434            test_support.TESTFN), second_temp)
435        self.assertEqual(second_temp, result[0])
436        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
437                                                  "made")
438        FILE = file(second_temp, 'rb')
439        try:
440            text = FILE.read()
441            FILE.close()
442        finally:
443            try: FILE.close()
444            except: pass
445        self.assertEqual(self.text, text)
446
447    def test_reporthook(self):
448        # Make sure that the reporthook works.
449        def hooktester(count, block_size, total_size, count_holder=[0]):
450            self.assertIsInstance(count, int)
451            self.assertIsInstance(block_size, int)
452            self.assertIsInstance(total_size, int)
453            self.assertEqual(count, count_holder[0])
454            count_holder[0] = count_holder[0] + 1
455        second_temp = "%s.2" % test_support.TESTFN
456        self.registerFileForCleanUp(second_temp)
457        urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
458            second_temp, hooktester)
459
460    def test_reporthook_0_bytes(self):
461        # Test on zero length file. Should call reporthook only 1 time.
462        report = []
463        def hooktester(count, block_size, total_size, _report=report):
464            _report.append((count, block_size, total_size))
465        srcFileName = self.createNewTempFile()
466        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
467            test_support.TESTFN, hooktester)
468        self.assertEqual(len(report), 1)
469        self.assertEqual(report[0][2], 0)
470
471    def test_reporthook_5_bytes(self):
472        # Test on 5 byte file. Should call reporthook only 2 times (once when
473        # the "network connection" is established and once when the block is
474        # read). Since the block size is 8192 bytes, only one block read is
475        # required to read the entire file.
476        report = []
477        def hooktester(count, block_size, total_size, _report=report):
478            _report.append((count, block_size, total_size))
479        srcFileName = self.createNewTempFile("x" * 5)
480        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
481            test_support.TESTFN, hooktester)
482        self.assertEqual(len(report), 2)
483        self.assertEqual(report[0][1], 8192)
484        self.assertEqual(report[0][2], 5)
485
486    def test_reporthook_8193_bytes(self):
487        # Test on 8193 byte file. Should call reporthook only 3 times (once
488        # when the "network connection" is established, once for the next 8192
489        # bytes, and once for the last byte).
490        report = []
491        def hooktester(count, block_size, total_size, _report=report):
492            _report.append((count, block_size, total_size))
493        srcFileName = self.createNewTempFile("x" * 8193)
494        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
495            test_support.TESTFN, hooktester)
496        self.assertEqual(len(report), 3)
497        self.assertEqual(report[0][1], 8192)
498        self.assertEqual(report[0][2], 8193)
499
500
501class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
502    """Test urllib.urlretrieve() using fake http connections"""
503
504    def test_short_content_raises_ContentTooShortError(self):
505        self.fakehttp('''HTTP/1.1 200 OK
506Date: Wed, 02 Jan 2008 03:03:54 GMT
507Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
508Connection: close
509Content-Length: 100
510Content-Type: text/html; charset=iso-8859-1
511
512FF
513''')
514
515        def _reporthook(par1, par2, par3):
516            pass
517
518        try:
519            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
520                    'http://example.com', reporthook=_reporthook)
521        finally:
522            self.unfakehttp()
523
524    def test_short_content_raises_ContentTooShortError_without_reporthook(self):
525        self.fakehttp('''HTTP/1.1 200 OK
526Date: Wed, 02 Jan 2008 03:03:54 GMT
527Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
528Connection: close
529Content-Length: 100
530Content-Type: text/html; charset=iso-8859-1
531
532FF
533''')
534        try:
535            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
536        finally:
537            self.unfakehttp()
538
539class QuotingTests(unittest.TestCase):
540    """Tests for urllib.quote() and urllib.quote_plus()
541
542    According to RFC 2396 ("Uniform Resource Identifiers), to escape a
543    character you write it as '%' + <2 character US-ASCII hex value>.  The Python
544    code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
545    Case does not matter on the hex letters.
546
547    The various character sets specified are:
548
549    Reserved characters : ";/?:@&=+$,"
550        Have special meaning in URIs and must be escaped if not being used for
551        their special meaning
552    Data characters : letters, digits, and "-_.!~*'()"
553        Unreserved and do not need to be escaped; can be, though, if desired
554    Control characters : 0x00 - 0x1F, 0x7F
555        Have no use in URIs so must be escaped
556    space : 0x20
557        Must be escaped
558    Delimiters : '<>#%"'
559        Must be escaped
560    Unwise : "{}|\^[]`"
561        Must be escaped
562
563    """
564
565    def test_never_quote(self):
566        # Make sure quote() does not quote letters, digits, and "_,.-"
567        do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
568                                 "abcdefghijklmnopqrstuvwxyz",
569                                 "0123456789",
570                                 "_.-"])
571        result = urllib.quote(do_not_quote)
572        self.assertEqual(do_not_quote, result,
573                         "using quote(): %s != %s" % (do_not_quote, result))
574        result = urllib.quote_plus(do_not_quote)
575        self.assertEqual(do_not_quote, result,
576                        "using quote_plus(): %s != %s" % (do_not_quote, result))
577
578    def test_default_safe(self):
579        # Test '/' is default value for 'safe' parameter
580        self.assertEqual(urllib.quote.func_defaults[0], '/')
581
582    def test_safe(self):
583        # Test setting 'safe' parameter does what it should do
584        quote_by_default = "<>"
585        result = urllib.quote(quote_by_default, safe=quote_by_default)
586        self.assertEqual(quote_by_default, result,
587                         "using quote(): %s != %s" % (quote_by_default, result))
588        result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
589        self.assertEqual(quote_by_default, result,
590                         "using quote_plus(): %s != %s" %
591                         (quote_by_default, result))
592
593    def test_default_quoting(self):
594        # Make sure all characters that should be quoted are by default sans
595        # space (separate test for that).
596        should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
597        should_quote.append('<>#%"{}|\^[]`')
598        should_quote.append(chr(127)) # For 0x7F
599        should_quote = ''.join(should_quote)
600        for char in should_quote:
601            result = urllib.quote(char)
602            self.assertEqual(hexescape(char), result,
603                             "using quote(): %s should be escaped to %s, not %s" %
604                             (char, hexescape(char), result))
605            result = urllib.quote_plus(char)
606            self.assertEqual(hexescape(char), result,
607                             "using quote_plus(): "
608                             "%s should be escapes to %s, not %s" %
609                             (char, hexescape(char), result))
610        del should_quote
611        partial_quote = "ab[]cd"
612        expected = "ab%5B%5Dcd"
613        result = urllib.quote(partial_quote)
614        self.assertEqual(expected, result,
615                         "using quote(): %s != %s" % (expected, result))
616        result = urllib.quote_plus(partial_quote)
617        self.assertEqual(expected, result,
618                         "using quote_plus(): %s != %s" % (expected, result))
619        self.assertRaises(TypeError, urllib.quote, None)
620
621    def test_quoting_space(self):
622        # Make sure quote() and quote_plus() handle spaces as specified in
623        # their unique way
624        result = urllib.quote(' ')
625        self.assertEqual(result, hexescape(' '),
626                         "using quote(): %s != %s" % (result, hexescape(' ')))
627        result = urllib.quote_plus(' ')
628        self.assertEqual(result, '+',
629                         "using quote_plus(): %s != +" % result)
630        given = "a b cd e f"
631        expect = given.replace(' ', hexescape(' '))
632        result = urllib.quote(given)
633        self.assertEqual(expect, result,
634                         "using quote(): %s != %s" % (expect, result))
635        expect = given.replace(' ', '+')
636        result = urllib.quote_plus(given)
637        self.assertEqual(expect, result,
638                         "using quote_plus(): %s != %s" % (expect, result))
639
640    def test_quoting_plus(self):
641        self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
642                         'alpha%2Bbeta+gamma')
643        self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
644                         'alpha+beta+gamma')
645
646class UnquotingTests(unittest.TestCase):
647    """Tests for unquote() and unquote_plus()
648
649    See the doc string for quoting_Tests for details on quoting and such.
650
651    """
652
653    def test_unquoting(self):
654        # Make sure unquoting of all ASCII values works
655        escape_list = []
656        for num in range(128):
657            given = hexescape(chr(num))
658            expect = chr(num)
659            result = urllib.unquote(given)
660            self.assertEqual(expect, result,
661                             "using unquote(): %s != %s" % (expect, result))
662            result = urllib.unquote_plus(given)
663            self.assertEqual(expect, result,
664                             "using unquote_plus(): %s != %s" %
665                             (expect, result))
666            escape_list.append(given)
667        escape_string = ''.join(escape_list)
668        del escape_list
669        result = urllib.unquote(escape_string)
670        self.assertEqual(result.count('%'), 1,
671                         "using quote(): not all characters escaped; %s" %
672                         result)
673        result = urllib.unquote(escape_string)
674        self.assertEqual(result.count('%'), 1,
675                         "using unquote(): not all characters escaped: "
676                         "%s" % result)
677
678    def test_unquoting_badpercent(self):
679        # Test unquoting on bad percent-escapes
680        given = '%xab'
681        expect = given
682        result = urllib.unquote(given)
683        self.assertEqual(expect, result, "using unquote(): %r != %r"
684                         % (expect, result))
685        given = '%x'
686        expect = given
687        result = urllib.unquote(given)
688        self.assertEqual(expect, result, "using unquote(): %r != %r"
689                         % (expect, result))
690        given = '%'
691        expect = given
692        result = urllib.unquote(given)
693        self.assertEqual(expect, result, "using unquote(): %r != %r"
694                         % (expect, result))
695
696    def test_unquoting_mixed_case(self):
697        # Test unquoting on mixed-case hex digits in the percent-escapes
698        given = '%Ab%eA'
699        expect = '\xab\xea'
700        result = urllib.unquote(given)
701        self.assertEqual(expect, result, "using unquote(): %r != %r"
702                         % (expect, result))
703
704    def test_unquoting_parts(self):
705        # Make sure unquoting works when have non-quoted characters
706        # interspersed
707        given = 'ab%sd' % hexescape('c')
708        expect = "abcd"
709        result = urllib.unquote(given)
710        self.assertEqual(expect, result,
711                         "using quote(): %s != %s" % (expect, result))
712        result = urllib.unquote_plus(given)
713        self.assertEqual(expect, result,
714                         "using unquote_plus(): %s != %s" % (expect, result))
715
716    def test_unquoting_plus(self):
717        # Test difference between unquote() and unquote_plus()
718        given = "are+there+spaces..."
719        expect = given
720        result = urllib.unquote(given)
721        self.assertEqual(expect, result,
722                         "using unquote(): %s != %s" % (expect, result))
723        expect = given.replace('+', ' ')
724        result = urllib.unquote_plus(given)
725        self.assertEqual(expect, result,
726                         "using unquote_plus(): %s != %s" % (expect, result))
727
728    def test_unquote_with_unicode(self):
729        r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
730        self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
731
732class urlencode_Tests(unittest.TestCase):
733    """Tests for urlencode()"""
734
735    def help_inputtype(self, given, test_type):
736        """Helper method for testing different input types.
737
738        'given' must lead to only the pairs:
739            * 1st, 1
740            * 2nd, 2
741            * 3rd, 3
742
743        Test cannot assume anything about order.  Docs make no guarantee and
744        have possible dictionary input.
745
746        """
747        expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
748        result = urllib.urlencode(given)
749        for expected in expect_somewhere:
750            self.assertIn(expected, result,
751                         "testing %s: %s not found in %s" %
752                         (test_type, expected, result))
753        self.assertEqual(result.count('&'), 2,
754                         "testing %s: expected 2 '&'s; got %s" %
755                         (test_type, result.count('&')))
756        amp_location = result.index('&')
757        on_amp_left = result[amp_location - 1]
758        on_amp_right = result[amp_location + 1]
759        self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
760                     "testing %s: '&' not located in proper place in %s" %
761                     (test_type, result))
762        self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
763                         "testing %s: "
764                         "unexpected number of characters: %s != %s" %
765                         (test_type, len(result), (5 * 3) + 2))
766
767    def test_using_mapping(self):
768        # Test passing in a mapping object as an argument.
769        self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
770                            "using dict as input type")
771
772    def test_using_sequence(self):
773        # Test passing in a sequence of two-item sequences as an argument.
774        self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
775                            "using sequence of two-item tuples as input")
776
777    def test_quoting(self):
778        # Make sure keys and values are quoted using quote_plus()
779        given = {"&":"="}
780        expect = "%s=%s" % (hexescape('&'), hexescape('='))
781        result = urllib.urlencode(given)
782        self.assertEqual(expect, result)
783        given = {"key name":"A bunch of pluses"}
784        expect = "key+name=A+bunch+of+pluses"
785        result = urllib.urlencode(given)
786        self.assertEqual(expect, result)
787
788    def test_doseq(self):
789        # Test that passing True for 'doseq' parameter works correctly
790        given = {'sequence':['1', '2', '3']}
791        expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
792        result = urllib.urlencode(given)
793        self.assertEqual(expect, result)
794        result = urllib.urlencode(given, True)
795        for value in given["sequence"]:
796            expect = "sequence=%s" % value
797            self.assertIn(expect, result)
798        self.assertEqual(result.count('&'), 2,
799                         "Expected 2 '&'s, got %s" % result.count('&'))
800
801class Pathname_Tests(unittest.TestCase):
802    """Test pathname2url() and url2pathname()"""
803
804    def test_basic(self):
805        # Make sure simple tests pass
806        expected_path = os.path.join("parts", "of", "a", "path")
807        expected_url = "parts/of/a/path"
808        result = urllib.pathname2url(expected_path)
809        self.assertEqual(expected_url, result,
810                         "pathname2url() failed; %s != %s" %
811                         (result, expected_url))
812        result = urllib.url2pathname(expected_url)
813        self.assertEqual(expected_path, result,
814                         "url2pathame() failed; %s != %s" %
815                         (result, expected_path))
816
817    def test_quoting(self):
818        # Test automatic quoting and unquoting works for pathnam2url() and
819        # url2pathname() respectively
820        given = os.path.join("needs", "quot=ing", "here")
821        expect = "needs/%s/here" % urllib.quote("quot=ing")
822        result = urllib.pathname2url(given)
823        self.assertEqual(expect, result,
824                         "pathname2url() failed; %s != %s" %
825                         (expect, result))
826        expect = given
827        result = urllib.url2pathname(result)
828        self.assertEqual(expect, result,
829                         "url2pathname() failed; %s != %s" %
830                         (expect, result))
831        given = os.path.join("make sure", "using_quote")
832        expect = "%s/using_quote" % urllib.quote("make sure")
833        result = urllib.pathname2url(given)
834        self.assertEqual(expect, result,
835                         "pathname2url() failed; %s != %s" %
836                         (expect, result))
837        given = "make+sure/using_unquote"
838        expect = os.path.join("make+sure", "using_unquote")
839        result = urllib.url2pathname(given)
840        self.assertEqual(expect, result,
841                         "url2pathname() failed; %s != %s" %
842                         (expect, result))
843
844    @unittest.skipUnless(sys.platform == 'win32',
845                         'test specific to the nturl2path library')
846    def test_ntpath(self):
847        given = ('/C:/', '///C:/', '/C|//')
848        expect = 'C:\\'
849        for url in given:
850            result = urllib.url2pathname(url)
851            self.assertEqual(expect, result,
852                             'nturl2path.url2pathname() failed; %s != %s' %
853                             (expect, result))
854        given = '///C|/path'
855        expect = 'C:\\path'
856        result = urllib.url2pathname(given)
857        self.assertEqual(expect, result,
858                         'nturl2path.url2pathname() failed; %s != %s' %
859                         (expect, result))
860
861class Utility_Tests(unittest.TestCase):
862    """Testcase to test the various utility functions in the urllib."""
863    # In Python 3 this test class is moved to test_urlparse.
864
865    def test_splittype(self):
866        splittype = urllib.splittype
867        self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
868        self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
869        self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
870        self.assertEqual(splittype('type:'), ('type', ''))
871        self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
872
873    def test_splithost(self):
874        splithost = urllib.splithost
875        self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
876                         ('www.example.org:80', '/foo/bar/baz.html'))
877        self.assertEqual(splithost('//www.example.org:80'),
878                         ('www.example.org:80', ''))
879        self.assertEqual(splithost('/foo/bar/baz.html'),
880                         (None, '/foo/bar/baz.html'))
881
882        # bpo-30500: # starts a fragment.
883        self.assertEqual(splithost('//127.0.0.1#@host.com'),
884                         ('127.0.0.1', '/#@host.com'))
885        self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
886                         ('127.0.0.1', '/#@host.com:80'))
887        self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
888                         ('127.0.0.1:80', '/#@host.com'))
889
890        # Empty host is returned as empty string.
891        self.assertEqual(splithost("///file"),
892                         ('', '/file'))
893
894        # Trailing semicolon, question mark and hash symbol are kept.
895        self.assertEqual(splithost("//example.net/file;"),
896                         ('example.net', '/file;'))
897        self.assertEqual(splithost("//example.net/file?"),
898                         ('example.net', '/file?'))
899        self.assertEqual(splithost("//example.net/file#"),
900                         ('example.net', '/file#'))
901
902    def test_splituser(self):
903        splituser = urllib.splituser
904        self.assertEqual(splituser('User:Pass@www.python.org:080'),
905                         ('User:Pass', 'www.python.org:080'))
906        self.assertEqual(splituser('@www.python.org:080'),
907                         ('', 'www.python.org:080'))
908        self.assertEqual(splituser('www.python.org:080'),
909                         (None, 'www.python.org:080'))
910        self.assertEqual(splituser('User:Pass@'),
911                         ('User:Pass', ''))
912        self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
913                         ('User@example.com:Pass', 'www.python.org:080'))
914
915    def test_splitpasswd(self):
916        # Some of the password examples are not sensible, but it is added to
917        # confirming to RFC2617 and addressing issue4675.
918        splitpasswd = urllib.splitpasswd
919        self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
920        self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
921        self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
922        self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
923        self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
924        self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
925        self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
926        self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
927        self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
928        self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
929        self.assertEqual(splitpasswd('user:'), ('user', ''))
930        self.assertEqual(splitpasswd('user'), ('user', None))
931        self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
932
933    def test_splitport(self):
934        splitport = urllib.splitport
935        self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
936        self.assertEqual(splitport('parrot'), ('parrot', None))
937        self.assertEqual(splitport('parrot:'), ('parrot', None))
938        self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
939        self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
940        self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
941        self.assertEqual(splitport('[::1]'), ('[::1]', None))
942        self.assertEqual(splitport(':88'), ('', '88'))
943
944    def test_splitnport(self):
945        splitnport = urllib.splitnport
946        self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
947        self.assertEqual(splitnport('parrot'), ('parrot', -1))
948        self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
949        self.assertEqual(splitnport('parrot:'), ('parrot', -1))
950        self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
951        self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
952        self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
953        self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
954        self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
955
956    def test_splitquery(self):
957        # Normal cases are exercised by other tests; ensure that we also
958        # catch cases with no port specified (testcase ensuring coverage)
959        splitquery = urllib.splitquery
960        self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
961                         ('http://python.org/fake', 'foo=bar'))
962        self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
963                         ('http://python.org/fake?foo=bar', ''))
964        self.assertEqual(splitquery('http://python.org/fake'),
965                         ('http://python.org/fake', None))
966        self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
967
968    def test_splittag(self):
969        splittag = urllib.splittag
970        self.assertEqual(splittag('http://example.com?foo=bar#baz'),
971                         ('http://example.com?foo=bar', 'baz'))
972        self.assertEqual(splittag('http://example.com?foo=bar#'),
973                         ('http://example.com?foo=bar', ''))
974        self.assertEqual(splittag('#baz'), ('', 'baz'))
975        self.assertEqual(splittag('http://example.com?foo=bar'),
976                         ('http://example.com?foo=bar', None))
977        self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
978                         ('http://example.com?foo=bar#baz', 'boo'))
979
980    def test_splitattr(self):
981        splitattr = urllib.splitattr
982        self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
983                         ('/path', ['attr1=value1', 'attr2=value2']))
984        self.assertEqual(splitattr('/path;'), ('/path', ['']))
985        self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
986                         ('', ['attr1=value1', 'attr2=value2']))
987        self.assertEqual(splitattr('/path'), ('/path', []))
988
989    def test_splitvalue(self):
990        # Normal cases are exercised by other tests; test pathological cases
991        # with no key/value pairs. (testcase ensuring coverage)
992        splitvalue = urllib.splitvalue
993        self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
994        self.assertEqual(splitvalue('foo='), ('foo', ''))
995        self.assertEqual(splitvalue('=bar'), ('', 'bar'))
996        self.assertEqual(splitvalue('foobar'), ('foobar', None))
997        self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
998
999    def test_toBytes(self):
1000        result = urllib.toBytes(u'http://www.python.org')
1001        self.assertEqual(result, 'http://www.python.org')
1002        self.assertRaises(UnicodeError, urllib.toBytes,
1003                          test_support.u(r'http://www.python.org/medi\u00e6val'))
1004
1005    def test_unwrap(self):
1006        url = urllib.unwrap('<URL:type://host/path>')
1007        self.assertEqual(url, 'type://host/path')
1008
1009
1010class URLopener_Tests(unittest.TestCase):
1011    """Testcase to test the open method of URLopener class."""
1012
1013    def test_quoted_open(self):
1014        class DummyURLopener(urllib.URLopener):
1015            def open_spam(self, url):
1016                return url
1017
1018        self.assertEqual(DummyURLopener().open(
1019            'spam://example/ /'),'//example/%20/')
1020
1021        # test the safe characters are not quoted by urlopen
1022        self.assertEqual(DummyURLopener().open(
1023            "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1024            "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1025
1026
1027# Just commented them out.
1028# Can't really tell why keep failing in windows and sparc.
1029# Everywhere else they work ok, but on those machines, sometimes
1030# fail in one of the tests, sometimes in other. I have a linux, and
1031# the tests go ok.
1032# If anybody has one of the problematic environments, please help!
1033# .   Facundo
1034#
1035# def server(evt):
1036#     import socket, time
1037#     serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1038#     serv.settimeout(3)
1039#     serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1040#     serv.bind(("", 9093))
1041#     serv.listen(5)
1042#     try:
1043#         conn, addr = serv.accept()
1044#         conn.send("1 Hola mundo\n")
1045#         cantdata = 0
1046#         while cantdata < 13:
1047#             data = conn.recv(13-cantdata)
1048#             cantdata += len(data)
1049#             time.sleep(.3)
1050#         conn.send("2 No more lines\n")
1051#         conn.close()
1052#     except socket.timeout:
1053#         pass
1054#     finally:
1055#         serv.close()
1056#         evt.set()
1057#
1058# class FTPWrapperTests(unittest.TestCase):
1059#
1060#     def setUp(self):
1061#         import ftplib, time, threading
1062#         ftplib.FTP.port = 9093
1063#         self.evt = threading.Event()
1064#         threading.Thread(target=server, args=(self.evt,)).start()
1065#         time.sleep(.1)
1066#
1067#     def tearDown(self):
1068#         self.evt.wait()
1069#
1070#     def testBasic(self):
1071#         # connects
1072#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1073#         ftp.close()
1074#
1075#     def testTimeoutNone(self):
1076#         # global default timeout is ignored
1077#         import socket
1078#         self.assertIsNone(socket.getdefaulttimeout())
1079#         socket.setdefaulttimeout(30)
1080#         try:
1081#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1082#         finally:
1083#             socket.setdefaulttimeout(None)
1084#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1085#         ftp.close()
1086#
1087#     def testTimeoutDefault(self):
1088#         # global default timeout is used
1089#         import socket
1090#         self.assertIsNone(socket.getdefaulttimeout())
1091#         socket.setdefaulttimeout(30)
1092#         try:
1093#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1094#         finally:
1095#             socket.setdefaulttimeout(None)
1096#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1097#         ftp.close()
1098#
1099#     def testTimeoutValue(self):
1100#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1101#                                 timeout=30)
1102#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1103#         ftp.close()
1104
1105
1106
1107def test_main():
1108    import warnings
1109    with warnings.catch_warnings():
1110        warnings.filterwarnings('ignore', ".*urllib\.urlopen.*Python 3.0",
1111                                DeprecationWarning)
1112        test_support.run_unittest(
1113            urlopen_FileTests,
1114            urlopen_HttpTests,
1115            urlretrieve_FileTests,
1116            urlretrieve_HttpTests,
1117            ProxyTests,
1118            QuotingTests,
1119            UnquotingTests,
1120            urlencode_Tests,
1121            Pathname_Tests,
1122            Utility_Tests,
1123            URLopener_Tests,
1124            ProxyTests,
1125            ProxyTests_withOrderedEnv,
1126            #FTPWrapperTests,
1127        )
1128
1129
1130
1131if __name__ == '__main__':
1132    test_main()
1133