1#! /usr/bin/env python
2
3from test import test_support
4import unittest
5import urlparse
6
7RFC1808_BASE = "http://a/b/c/d;p?q#f"
8RFC2396_BASE = "http://a/b/c/d;p?q"
9RFC3986_BASE = 'http://a/b/c/d;p?q'
10SIMPLE_BASE  = 'http://a/b/c/d'
11
12# A list of test cases.  Each test case is a a two-tuple that contains
13# a string with the query and a dictionary with the expected result.
14
15parse_qsl_test_cases = [
16    ("", []),
17    ("&", []),
18    ("&&", []),
19    ("=", [('', '')]),
20    ("=a", [('', 'a')]),
21    ("a", [('a', '')]),
22    ("a=", [('a', '')]),
23    ("a=", [('a', '')]),
24    ("&a=b", [('a', 'b')]),
25    ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
26    ("a=1&a=2", [('a', '1'), ('a', '2')]),
27]
28
29class UrlParseTestCase(unittest.TestCase):
30
31    def checkRoundtrips(self, url, parsed, split):
32        result = urlparse.urlparse(url)
33        self.assertEqual(result, parsed)
34        t = (result.scheme, result.netloc, result.path,
35             result.params, result.query, result.fragment)
36        self.assertEqual(t, parsed)
37        # put it back together and it should be the same
38        result2 = urlparse.urlunparse(result)
39        self.assertEqual(result2, url)
40        self.assertEqual(result2, result.geturl())
41
42        # the result of geturl() is a fixpoint; we can always parse it
43        # again to get the same result:
44        result3 = urlparse.urlparse(result.geturl())
45        self.assertEqual(result3.geturl(), result.geturl())
46        self.assertEqual(result3,          result)
47        self.assertEqual(result3.scheme,   result.scheme)
48        self.assertEqual(result3.netloc,   result.netloc)
49        self.assertEqual(result3.path,     result.path)
50        self.assertEqual(result3.params,   result.params)
51        self.assertEqual(result3.query,    result.query)
52        self.assertEqual(result3.fragment, result.fragment)
53        self.assertEqual(result3.username, result.username)
54        self.assertEqual(result3.password, result.password)
55        self.assertEqual(result3.hostname, result.hostname)
56        self.assertEqual(result3.port,     result.port)
57
58        # check the roundtrip using urlsplit() as well
59        result = urlparse.urlsplit(url)
60        self.assertEqual(result, split)
61        t = (result.scheme, result.netloc, result.path,
62             result.query, result.fragment)
63        self.assertEqual(t, split)
64        result2 = urlparse.urlunsplit(result)
65        self.assertEqual(result2, url)
66        self.assertEqual(result2, result.geturl())
67
68        # check the fixpoint property of re-parsing the result of geturl()
69        result3 = urlparse.urlsplit(result.geturl())
70        self.assertEqual(result3.geturl(), result.geturl())
71        self.assertEqual(result3,          result)
72        self.assertEqual(result3.scheme,   result.scheme)
73        self.assertEqual(result3.netloc,   result.netloc)
74        self.assertEqual(result3.path,     result.path)
75        self.assertEqual(result3.query,    result.query)
76        self.assertEqual(result3.fragment, result.fragment)
77        self.assertEqual(result3.username, result.username)
78        self.assertEqual(result3.password, result.password)
79        self.assertEqual(result3.hostname, result.hostname)
80        self.assertEqual(result3.port,     result.port)
81
82    def test_qsl(self):
83        for orig, expect in parse_qsl_test_cases:
84            result = urlparse.parse_qsl(orig, keep_blank_values=True)
85            self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
86
87    def test_roundtrips(self):
88        testcases = [
89            ('file:///tmp/junk.txt',
90             ('file', '', '/tmp/junk.txt', '', '', ''),
91             ('file', '', '/tmp/junk.txt', '', '')),
92            ('imap://mail.python.org/mbox1',
93             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
94             ('imap', 'mail.python.org', '/mbox1', '', '')),
95            ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
96             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
97              '', '', ''),
98             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
99              '', '')),
100            ('nfs://server/path/to/file.txt',
101             ('nfs', 'server', '/path/to/file.txt',  '', '', ''),
102             ('nfs', 'server', '/path/to/file.txt', '', '')),
103            ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
104             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
105              '', '', ''),
106             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
107              '', '')),
108            ('git+ssh://git@github.com/user/project.git',
109            ('git+ssh', 'git@github.com','/user/project.git',
110             '','',''),
111            ('git+ssh', 'git@github.com','/user/project.git',
112             '', ''))
113            ]
114        for url, parsed, split in testcases:
115            self.checkRoundtrips(url, parsed, split)
116
117    def test_http_roundtrips(self):
118        # urlparse.urlsplit treats 'http:' as an optimized special case,
119        # so we test both 'http:' and 'https:' in all the following.
120        # Three cheers for white box knowledge!
121        testcases = [
122            ('://www.python.org',
123             ('www.python.org', '', '', '', ''),
124             ('www.python.org', '', '', '')),
125            ('://www.python.org#abc',
126             ('www.python.org', '', '', '', 'abc'),
127             ('www.python.org', '', '', 'abc')),
128            ('://www.python.org?q=abc',
129             ('www.python.org', '', '', 'q=abc', ''),
130             ('www.python.org', '', 'q=abc', '')),
131            ('://www.python.org/#abc',
132             ('www.python.org', '/', '', '', 'abc'),
133             ('www.python.org', '/', '', 'abc')),
134            ('://a/b/c/d;p?q#f',
135             ('a', '/b/c/d', 'p', 'q', 'f'),
136             ('a', '/b/c/d;p', 'q', 'f')),
137            ]
138        for scheme in ('http', 'https'):
139            for url, parsed, split in testcases:
140                url = scheme + url
141                parsed = (scheme,) + parsed
142                split = (scheme,) + split
143                self.checkRoundtrips(url, parsed, split)
144
145    def checkJoin(self, base, relurl, expected):
146        self.assertEqual(urlparse.urljoin(base, relurl), expected,
147                         (base, relurl, expected))
148
149    def test_unparse_parse(self):
150        for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
151            self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
152            self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
153
154    def test_RFC1808(self):
155        # "normal" cases from RFC 1808:
156        self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
157        self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
158        self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
159        self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
160        self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
161        self.checkJoin(RFC1808_BASE, '//g', 'http://g')
162        self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
163        self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
164        self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
165        self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
166        self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
167        self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
168        self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
169        self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
170        self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
171        self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
172        self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
173        self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
174        self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
175        self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
176        self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
177        self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
178
179        # "abnormal" cases from RFC 1808:
180        self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
181        self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
182        self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
183        self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
184        self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
185        self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
186        self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
187        self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
188        self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
189        self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
190        self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
191        self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
192        self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
193
194        # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
195        # so we'll not actually run these tests (which expect 1808 behavior).
196        #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
197        #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
198
199    def test_RFC2368(self):
200        # Issue 11467: path that starts with a number is not parsed correctly
201        self.assertEqual(urlparse.urlparse('mailto:1337@example.org'),
202                ('mailto', '', '1337@example.org', '', '', ''))
203
204    def test_RFC2396(self):
205        # cases from RFC 2396
206        self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
207        self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
208        self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
209        self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
210        self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
211        self.checkJoin(RFC2396_BASE, '//g', 'http://g')
212        self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
213        self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
214        self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
215        self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
216        self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
217        self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
218        self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
219        self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
220        self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
221        self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
222        self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
223        self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
224        self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
225        self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
226        self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
227        self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
228        self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
229        self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
230        self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
231        self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
232        self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
233        self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
234        self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
235        self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
236        self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
237        self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
238        self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
239        self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
240        self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
241        self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
242        self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
243        self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
244        self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
245
246    def test_RFC3986(self):
247        # Test cases from RFC3986
248        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
249        self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
250        self.checkJoin(RFC3986_BASE, 'g:h','g:h')
251        self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
252        self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
253        self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
254        self.checkJoin(RFC3986_BASE, '/g','http://a/g')
255        self.checkJoin(RFC3986_BASE, '//g','http://g')
256        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
257        self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
258        self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
259        self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
260        self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
261        self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
262        self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
263        self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
264        self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
265        self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
266        self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
267        self.checkJoin(RFC3986_BASE, '..','http://a/b/')
268        self.checkJoin(RFC3986_BASE, '../','http://a/b/')
269        self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
270        self.checkJoin(RFC3986_BASE, '../..','http://a/')
271        self.checkJoin(RFC3986_BASE, '../../','http://a/')
272        self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
273
274        #Abnormal Examples
275
276        # The 'abnormal scenarios' are incompatible with RFC2986 parsing
277        # Tests are here for reference.
278
279        #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
280        #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
281        #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
282        #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
283
284        self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
285        self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
286        self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
287        self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
288        self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
289        self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
290        self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
291        self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
292        self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
293        self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
294        self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
295        self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
296        self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
297        self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
298        #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
299        self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') # relaxed parser
300
301        # Test for issue9721
302        self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
303
304    def test_urljoins(self):
305        self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
306        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
307        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
308        self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
309        self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
310        self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
311        self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
312        self.checkJoin(SIMPLE_BASE, '//g','http://g')
313        self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
314        self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
315        self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
316        self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
317        self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
318        self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
319        self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
320        self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
321        self.checkJoin(SIMPLE_BASE, '../..','http://a/')
322        self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
323        self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
324        self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
325        self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
326        self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
327        self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
328        self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
329        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
330        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
331        self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
332        self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
333        self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
334
335    def test_RFC2732(self):
336        for url, hostname, port in [
337            ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
338            ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
339            ('http://[::1]:5432/foo/', '::1', 5432),
340            ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
341            ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
342            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
343             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
344            ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
345            ('http://[::ffff:12.34.56.78]:5432/foo/',
346             '::ffff:12.34.56.78', 5432),
347            ('http://Test.python.org/foo/', 'test.python.org', None),
348            ('http://12.34.56.78/foo/', '12.34.56.78', None),
349            ('http://[::1]/foo/', '::1', None),
350            ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
351            ('http://[dead:beef::]/foo/', 'dead:beef::', None),
352            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
353             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
354            ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
355            ('http://[::ffff:12.34.56.78]/foo/',
356             '::ffff:12.34.56.78', None),
357            ]:
358            urlparsed = urlparse.urlparse(url)
359            self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
360
361        for invalid_url in [
362                'http://::12.34.56.78]/',
363                'http://[::1/foo/',
364                'ftp://[::1/foo/bad]/bad',
365                'http://[::1/foo/bad]/bad',
366                'http://[::ffff:12.34.56.78']:
367            self.assertRaises(ValueError, urlparse.urlparse, invalid_url)
368
369    def test_urldefrag(self):
370        for url, defrag, frag in [
371            ('http://python.org#frag', 'http://python.org', 'frag'),
372            ('http://python.org', 'http://python.org', ''),
373            ('http://python.org/#frag', 'http://python.org/', 'frag'),
374            ('http://python.org/', 'http://python.org/', ''),
375            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
376            ('http://python.org/?q', 'http://python.org/?q', ''),
377            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
378            ('http://python.org/p?q', 'http://python.org/p?q', ''),
379            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
380            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
381            ]:
382            self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
383
384    def test_urlsplit_attributes(self):
385        url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
386        p = urlparse.urlsplit(url)
387        self.assertEqual(p.scheme, "http")
388        self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
389        self.assertEqual(p.path, "/doc/")
390        self.assertEqual(p.query, "")
391        self.assertEqual(p.fragment, "frag")
392        self.assertEqual(p.username, None)
393        self.assertEqual(p.password, None)
394        self.assertEqual(p.hostname, "www.python.org")
395        self.assertEqual(p.port, None)
396        # geturl() won't return exactly the original URL in this case
397        # since the scheme is always case-normalized
398        #self.assertEqual(p.geturl(), url)
399
400        url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
401        p = urlparse.urlsplit(url)
402        self.assertEqual(p.scheme, "http")
403        self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
404        self.assertEqual(p.path, "/doc/")
405        self.assertEqual(p.query, "query=yes")
406        self.assertEqual(p.fragment, "frag")
407        self.assertEqual(p.username, "User")
408        self.assertEqual(p.password, "Pass")
409        self.assertEqual(p.hostname, "www.python.org")
410        self.assertEqual(p.port, 80)
411        self.assertEqual(p.geturl(), url)
412
413        # Addressing issue1698, which suggests Username can contain
414        # "@" characters.  Though not RFC compliant, many ftp sites allow
415        # and request email addresses as usernames.
416
417        url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
418        p = urlparse.urlsplit(url)
419        self.assertEqual(p.scheme, "http")
420        self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
421        self.assertEqual(p.path, "/doc/")
422        self.assertEqual(p.query, "query=yes")
423        self.assertEqual(p.fragment, "frag")
424        self.assertEqual(p.username, "User@example.com")
425        self.assertEqual(p.password, "Pass")
426        self.assertEqual(p.hostname, "www.python.org")
427        self.assertEqual(p.port, 80)
428        self.assertEqual(p.geturl(), url)
429
430
431    def test_attributes_bad_port(self):
432        """Check handling of non-integer ports."""
433        p = urlparse.urlsplit("http://www.example.net:foo")
434        self.assertEqual(p.netloc, "www.example.net:foo")
435        self.assertRaises(ValueError, lambda: p.port)
436
437        p = urlparse.urlparse("http://www.example.net:foo")
438        self.assertEqual(p.netloc, "www.example.net:foo")
439        self.assertRaises(ValueError, lambda: p.port)
440
441    def test_attributes_without_netloc(self):
442        # This example is straight from RFC 3261.  It looks like it
443        # should allow the username, hostname, and port to be filled
444        # in, but doesn't.  Since it's a URI and doesn't use the
445        # scheme://netloc syntax, the netloc and related attributes
446        # should be left empty.
447        uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
448        p = urlparse.urlsplit(uri)
449        self.assertEqual(p.netloc, "")
450        self.assertEqual(p.username, None)
451        self.assertEqual(p.password, None)
452        self.assertEqual(p.hostname, None)
453        self.assertEqual(p.port, None)
454        self.assertEqual(p.geturl(), uri)
455
456        p = urlparse.urlparse(uri)
457        self.assertEqual(p.netloc, "")
458        self.assertEqual(p.username, None)
459        self.assertEqual(p.password, None)
460        self.assertEqual(p.hostname, None)
461        self.assertEqual(p.port, None)
462        self.assertEqual(p.geturl(), uri)
463
464    def test_caching(self):
465        # Test case for bug #1313119
466        uri = "http://example.com/doc/"
467        unicode_uri = unicode(uri)
468
469        urlparse.urlparse(unicode_uri)
470        p = urlparse.urlparse(uri)
471        self.assertEqual(type(p.scheme), type(uri))
472        self.assertEqual(type(p.hostname), type(uri))
473        self.assertEqual(type(p.path), type(uri))
474
475    def test_noslash(self):
476        # Issue 1637: http://foo.com?query is legal
477        self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
478                         ('http', 'example.com', '', '', 'blahblah=/foo', ''))
479
480    def test_anyscheme(self):
481        # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
482        self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"),
483                         ('s3','foo.com','/stuff','','',''))
484        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
485                         ('x-newscheme','foo.com','/stuff','','',''))
486
487    def test_withoutscheme(self):
488        # Test urlparse without scheme
489        # Issue 754016: urlparse goes wrong with IP:port without scheme
490        # RFC 1808 specifies that netloc should start with //, urlparse expects
491        # the same, otherwise it classifies the portion of url as path.
492        self.assertEqual(urlparse.urlparse("path"),
493                ('','','path','','',''))
494        self.assertEqual(urlparse.urlparse("//www.python.org:80"),
495                ('','www.python.org:80','','','',''))
496        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
497                ('http','www.python.org:80','','','',''))
498
499    def test_portseparator(self):
500        # Issue 754016 makes changes for port separator ':' from scheme separator
501        self.assertEqual(urlparse.urlparse("path:80"),
502                ('','','path:80','','',''))
503        self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
504        self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
505        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
506                ('http','www.python.org:80','','','',''))
507
508
509def test_main():
510    test_support.run_unittest(UrlParseTestCase)
511
512if __name__ == "__main__":
513    test_main()
514