1from test import test_support
2import unittest
3import urlparse
4
5RFC1808_BASE = "http://a/b/c/d;p?q#f"
6RFC2396_BASE = "http://a/b/c/d;p?q"
7RFC3986_BASE = 'http://a/b/c/d;p?q'
8SIMPLE_BASE  = 'http://a/b/c/d'
9
10# A list of test cases.  Each test case is a two-tuple that contains
11# a string with the query and a dictionary with the expected result.
12
13parse_qsl_test_cases = [
14    ("", []),
15    ("&", []),
16    ("&&", []),
17    ("=", [('', '')]),
18    ("=a", [('', 'a')]),
19    ("a", [('a', '')]),
20    ("a=", [('a', '')]),
21    ("a=", [('a', '')]),
22    ("&a=b", [('a', 'b')]),
23    ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24    ("a=1&a=2", [('a', '1'), ('a', '2')]),
25    (";", []),
26    (";;", []),
27    (";a=b", [('a', 'b')]),
28    ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
29    ("a=1;a=2", [('a', '1'), ('a', '2')]),
30    (b";", []),
31    (b";;", []),
32    (b";a=b", [(b'a', b'b')]),
33    (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34    (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
35]
36
37parse_qs_test_cases = [
38    ("", {}),
39    ("&", {}),
40    ("&&", {}),
41    ("=", {'': ['']}),
42    ("=a", {'': ['a']}),
43    ("a", {'a': ['']}),
44    ("a=", {'a': ['']}),
45    ("&a=b", {'a': ['b']}),
46    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
47    ("a=1&a=2", {'a': ['1', '2']}),
48    (b"", {}),
49    (b"&", {}),
50    (b"&&", {}),
51    (b"=", {b'': [b'']}),
52    (b"=a", {b'': [b'a']}),
53    (b"a", {b'a': [b'']}),
54    (b"a=", {b'a': [b'']}),
55    (b"&a=b", {b'a': [b'b']}),
56    (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
57    (b"a=1&a=2", {b'a': [b'1', b'2']}),
58    (";", {}),
59    (";;", {}),
60    (";a=b", {'a': ['b']}),
61    ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
62    ("a=1;a=2", {'a': ['1', '2']}),
63    (b";", {}),
64    (b";;", {}),
65    (b";a=b", {b'a': [b'b']}),
66    (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
67    (b"a=1;a=2", {b'a': [b'1', b'2']}),
68]
69
70class UrlParseTestCase(unittest.TestCase):
71
72    def checkRoundtrips(self, url, parsed, split):
73        result = urlparse.urlparse(url)
74        self.assertEqual(result, parsed)
75        t = (result.scheme, result.netloc, result.path,
76             result.params, result.query, result.fragment)
77        self.assertEqual(t, parsed)
78        # put it back together and it should be the same
79        result2 = urlparse.urlunparse(result)
80        self.assertEqual(result2, url)
81        self.assertEqual(result2, result.geturl())
82
83        # the result of geturl() is a fixpoint; we can always parse it
84        # again to get the same result:
85        result3 = urlparse.urlparse(result.geturl())
86        self.assertEqual(result3.geturl(), result.geturl())
87        self.assertEqual(result3,          result)
88        self.assertEqual(result3.scheme,   result.scheme)
89        self.assertEqual(result3.netloc,   result.netloc)
90        self.assertEqual(result3.path,     result.path)
91        self.assertEqual(result3.params,   result.params)
92        self.assertEqual(result3.query,    result.query)
93        self.assertEqual(result3.fragment, result.fragment)
94        self.assertEqual(result3.username, result.username)
95        self.assertEqual(result3.password, result.password)
96        self.assertEqual(result3.hostname, result.hostname)
97        self.assertEqual(result3.port,     result.port)
98
99        # check the roundtrip using urlsplit() as well
100        result = urlparse.urlsplit(url)
101        self.assertEqual(result, split)
102        t = (result.scheme, result.netloc, result.path,
103             result.query, result.fragment)
104        self.assertEqual(t, split)
105        result2 = urlparse.urlunsplit(result)
106        self.assertEqual(result2, url)
107        self.assertEqual(result2, result.geturl())
108
109        # check the fixpoint property of re-parsing the result of geturl()
110        result3 = urlparse.urlsplit(result.geturl())
111        self.assertEqual(result3.geturl(), result.geturl())
112        self.assertEqual(result3,          result)
113        self.assertEqual(result3.scheme,   result.scheme)
114        self.assertEqual(result3.netloc,   result.netloc)
115        self.assertEqual(result3.path,     result.path)
116        self.assertEqual(result3.query,    result.query)
117        self.assertEqual(result3.fragment, result.fragment)
118        self.assertEqual(result3.username, result.username)
119        self.assertEqual(result3.password, result.password)
120        self.assertEqual(result3.hostname, result.hostname)
121        self.assertEqual(result3.port,     result.port)
122
123    def test_qsl(self):
124        for orig, expect in parse_qsl_test_cases:
125            result = urlparse.parse_qsl(orig, keep_blank_values=True)
126            self.assertEqual(result, expect, "Error parsing %r" % orig)
127            expect_without_blanks = [v for v in expect if len(v[1])]
128            result = urlparse.parse_qsl(orig, keep_blank_values=False)
129            self.assertEqual(result, expect_without_blanks,
130                    "Error parsing %r" % orig)
131
132    def test_qs(self):
133        for orig, expect in parse_qs_test_cases:
134            result = urlparse.parse_qs(orig, keep_blank_values=True)
135            self.assertEqual(result, expect, "Error parsing %r" % orig)
136            expect_without_blanks = dict(
137                    [(v, expect[v]) for v in expect if len(expect[v][0])])
138            result = urlparse.parse_qs(orig, keep_blank_values=False)
139            self.assertEqual(result, expect_without_blanks,
140                    "Error parsing %r" % orig)
141
142    def test_roundtrips(self):
143        testcases = [
144            ('file:///tmp/junk.txt',
145             ('file', '', '/tmp/junk.txt', '', '', ''),
146             ('file', '', '/tmp/junk.txt', '', '')),
147            ('imap://mail.python.org/mbox1',
148             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
149             ('imap', 'mail.python.org', '/mbox1', '', '')),
150            ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
151             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
152              '', '', ''),
153             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
154              '', '')),
155            ('nfs://server/path/to/file.txt',
156             ('nfs', 'server', '/path/to/file.txt',  '', '', ''),
157             ('nfs', 'server', '/path/to/file.txt', '', '')),
158            ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
159             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
160              '', '', ''),
161             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
162              '', '')),
163            ('git+ssh://git@github.com/user/project.git',
164            ('git+ssh', 'git@github.com','/user/project.git',
165             '','',''),
166            ('git+ssh', 'git@github.com','/user/project.git',
167             '', ''))
168            ]
169        for url, parsed, split in testcases:
170            self.checkRoundtrips(url, parsed, split)
171
172    def test_http_roundtrips(self):
173        # urlparse.urlsplit treats 'http:' as an optimized special case,
174        # so we test both 'http:' and 'https:' in all the following.
175        # Three cheers for white box knowledge!
176        testcases = [
177            ('://www.python.org',
178             ('www.python.org', '', '', '', ''),
179             ('www.python.org', '', '', '')),
180            ('://www.python.org#abc',
181             ('www.python.org', '', '', '', 'abc'),
182             ('www.python.org', '', '', 'abc')),
183            ('://www.python.org?q=abc',
184             ('www.python.org', '', '', 'q=abc', ''),
185             ('www.python.org', '', 'q=abc', '')),
186            ('://www.python.org/#abc',
187             ('www.python.org', '/', '', '', 'abc'),
188             ('www.python.org', '/', '', 'abc')),
189            ('://a/b/c/d;p?q#f',
190             ('a', '/b/c/d', 'p', 'q', 'f'),
191             ('a', '/b/c/d;p', 'q', 'f')),
192            ]
193        for scheme in ('http', 'https'):
194            for url, parsed, split in testcases:
195                url = scheme + url
196                parsed = (scheme,) + parsed
197                split = (scheme,) + split
198                self.checkRoundtrips(url, parsed, split)
199
200    def checkJoin(self, base, relurl, expected):
201        self.assertEqual(urlparse.urljoin(base, relurl), expected,
202                         (base, relurl, expected))
203
204    def test_unparse_parse(self):
205        for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
206            self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
207            self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
208
209    def test_RFC1808(self):
210        # "normal" cases from RFC 1808:
211        self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
212        self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
213        self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
214        self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
215        self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
216        self.checkJoin(RFC1808_BASE, '//g', 'http://g')
217        self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
218        self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
219        self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
220        self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
221        self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
222        self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
223        self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
224        self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
225        self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
226        self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
227        self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
228        self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
229        self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
230        self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
231        self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
232        self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
233
234        # "abnormal" cases from RFC 1808:
235        self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
236        self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
237        self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
238        self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
239        self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
240        self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
241        self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
242        self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
243        self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
244        self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
245        self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
246        self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
247        self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
248
249        # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
250        # so we'll not actually run these tests (which expect 1808 behavior).
251        #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
252        #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
253
254    def test_RFC2368(self):
255        # Issue 11467: path that starts with a number is not parsed correctly
256        self.assertEqual(urlparse.urlparse('mailto:1337@example.org'),
257                ('mailto', '', '1337@example.org', '', '', ''))
258
259    def test_RFC2396(self):
260        # cases from RFC 2396
261        self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
262        self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
263        self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
264        self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
265        self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
266        self.checkJoin(RFC2396_BASE, '//g', 'http://g')
267        self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
268        self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
269        self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
270        self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
271        self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
272        self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
273        self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
274        self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
275        self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
276        self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
277        self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
278        self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
279        self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
280        self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
281        self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
282        self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
283        self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
284        self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
285        self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
286        self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
287        self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
288        self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
289        self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
290        self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
291        self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
292        self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
293        self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
294        self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
295        self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
296        self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
297        self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
298        self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
299        self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
300
301    def test_RFC3986(self):
302        # Test cases from RFC3986
303        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
304        self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
305        self.checkJoin(RFC3986_BASE, 'g:h','g:h')
306        self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
307        self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
308        self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
309        self.checkJoin(RFC3986_BASE, '/g','http://a/g')
310        self.checkJoin(RFC3986_BASE, '//g','http://g')
311        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
312        self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
313        self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
314        self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
315        self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
316        self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
317        self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
318        self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
319        self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
320        self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
321        self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
322        self.checkJoin(RFC3986_BASE, '..','http://a/b/')
323        self.checkJoin(RFC3986_BASE, '../','http://a/b/')
324        self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
325        self.checkJoin(RFC3986_BASE, '../..','http://a/')
326        self.checkJoin(RFC3986_BASE, '../../','http://a/')
327        self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
328
329        #Abnormal Examples
330
331        # The 'abnormal scenarios' are incompatible with RFC2986 parsing
332        # Tests are here for reference.
333
334        #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
335        #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
336        #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
337        #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
338
339        self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
340        self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
341        self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
342        self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
343        self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
344        self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
345        self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
346        self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
347        self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
348        self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
349        self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
350        self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
351        self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
352        self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
353        #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
354        self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') # relaxed parser
355
356        # Test for issue9721
357        self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
358
359    def test_urljoins(self):
360        self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
361        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
362        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
363        self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
364        self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
365        self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
366        self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
367        self.checkJoin(SIMPLE_BASE, '//g','http://g')
368        self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
369        self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
370        self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
371        self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
372        self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
373        self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
374        self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
375        self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
376        self.checkJoin(SIMPLE_BASE, '../..','http://a/')
377        self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
378        self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
379        self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
380        self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
381        self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
382        self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
383        self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
384        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
385        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
386        self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
387        self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
388        self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
389        self.checkJoin('http:///', '..','http:///')
390        self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
391        self.checkJoin('', 'http://a/./g', 'http://a/./g')
392        self.checkJoin('svn://pathtorepo/dir1','dir2','svn://pathtorepo/dir2')
393        self.checkJoin('svn+ssh://pathtorepo/dir1','dir2','svn+ssh://pathtorepo/dir2')
394
395    def test_RFC2732(self):
396        for url, hostname, port in [
397            ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
398            ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
399            ('http://[::1]:5432/foo/', '::1', 5432),
400            ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
401            ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
402            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
403             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
404            ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
405            ('http://[::ffff:12.34.56.78]:5432/foo/',
406             '::ffff:12.34.56.78', 5432),
407            ('http://Test.python.org/foo/', 'test.python.org', None),
408            ('http://12.34.56.78/foo/', '12.34.56.78', None),
409            ('http://[::1]/foo/', '::1', None),
410            ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
411            ('http://[dead:beef::]/foo/', 'dead:beef::', None),
412            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
413             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
414            ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
415            ('http://[::ffff:12.34.56.78]/foo/',
416             '::ffff:12.34.56.78', None),
417            ('http://Test.python.org:/foo/', 'test.python.org', None),
418            ('http://12.34.56.78:/foo/', '12.34.56.78', None),
419            ('http://[::1]:/foo/', '::1', None),
420            ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
421            ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
422            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
423             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
424            ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
425            ('http://[::ffff:12.34.56.78]:/foo/',
426             '::ffff:12.34.56.78', None),
427            ]:
428            urlparsed = urlparse.urlparse(url)
429            self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
430
431        for invalid_url in [
432                'http://::12.34.56.78]/',
433                'http://[::1/foo/',
434                'ftp://[::1/foo/bad]/bad',
435                'http://[::1/foo/bad]/bad',
436                'http://[::ffff:12.34.56.78']:
437            self.assertRaises(ValueError, urlparse.urlparse, invalid_url)
438
439    def test_urldefrag(self):
440        for url, defrag, frag in [
441            ('http://python.org#frag', 'http://python.org', 'frag'),
442            ('http://python.org', 'http://python.org', ''),
443            ('http://python.org/#frag', 'http://python.org/', 'frag'),
444            ('http://python.org/', 'http://python.org/', ''),
445            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
446            ('http://python.org/?q', 'http://python.org/?q', ''),
447            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
448            ('http://python.org/p?q', 'http://python.org/p?q', ''),
449            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
450            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
451            ]:
452            self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
453
454    def test_urlsplit_attributes(self):
455        url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
456        p = urlparse.urlsplit(url)
457        self.assertEqual(p.scheme, "http")
458        self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
459        self.assertEqual(p.path, "/doc/")
460        self.assertEqual(p.query, "")
461        self.assertEqual(p.fragment, "frag")
462        self.assertEqual(p.username, None)
463        self.assertEqual(p.password, None)
464        self.assertEqual(p.hostname, "www.python.org")
465        self.assertEqual(p.port, None)
466        # geturl() won't return exactly the original URL in this case
467        # since the scheme is always case-normalized
468        #self.assertEqual(p.geturl(), url)
469
470        url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
471        p = urlparse.urlsplit(url)
472        self.assertEqual(p.scheme, "http")
473        self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
474        self.assertEqual(p.path, "/doc/")
475        self.assertEqual(p.query, "query=yes")
476        self.assertEqual(p.fragment, "frag")
477        self.assertEqual(p.username, "User")
478        self.assertEqual(p.password, "Pass")
479        self.assertEqual(p.hostname, "www.python.org")
480        self.assertEqual(p.port, 80)
481        self.assertEqual(p.geturl(), url)
482
483        # Addressing issue1698, which suggests Username can contain
484        # "@" characters.  Though not RFC compliant, many ftp sites allow
485        # and request email addresses as usernames.
486
487        url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
488        p = urlparse.urlsplit(url)
489        self.assertEqual(p.scheme, "http")
490        self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
491        self.assertEqual(p.path, "/doc/")
492        self.assertEqual(p.query, "query=yes")
493        self.assertEqual(p.fragment, "frag")
494        self.assertEqual(p.username, "User@example.com")
495        self.assertEqual(p.password, "Pass")
496        self.assertEqual(p.hostname, "www.python.org")
497        self.assertEqual(p.port, 80)
498        self.assertEqual(p.geturl(), url)
499
500        # Verify an illegal port of value greater than 65535 is set as None
501        url = "http://www.python.org:65536"
502        p = urlparse.urlsplit(url)
503        self.assertEqual(p.port, None)
504
505    def test_issue14072(self):
506        p1 = urlparse.urlsplit('tel:+31-641044153')
507        self.assertEqual(p1.scheme, 'tel')
508        self.assertEqual(p1.path, '+31-641044153')
509
510        p2 = urlparse.urlsplit('tel:+31641044153')
511        self.assertEqual(p2.scheme, 'tel')
512        self.assertEqual(p2.path, '+31641044153')
513
514        # Assert for urlparse
515        p1 = urlparse.urlparse('tel:+31-641044153')
516        self.assertEqual(p1.scheme, 'tel')
517        self.assertEqual(p1.path, '+31-641044153')
518
519        p2 = urlparse.urlparse('tel:+31641044153')
520        self.assertEqual(p2.scheme, 'tel')
521        self.assertEqual(p2.path, '+31641044153')
522
523
524    def test_telurl_params(self):
525        p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516')
526        self.assertEqual(p1.scheme, 'tel')
527        self.assertEqual(p1.path, '123-4')
528        self.assertEqual(p1.params, 'phone-context=+1-650-516')
529
530        p1 = urlparse.urlparse('tel:+1-201-555-0123')
531        self.assertEqual(p1.scheme, 'tel')
532        self.assertEqual(p1.path, '+1-201-555-0123')
533        self.assertEqual(p1.params, '')
534
535        p1 = urlparse.urlparse('tel:7042;phone-context=example.com')
536        self.assertEqual(p1.scheme, 'tel')
537        self.assertEqual(p1.path, '7042')
538        self.assertEqual(p1.params, 'phone-context=example.com')
539
540        p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555')
541        self.assertEqual(p1.scheme, 'tel')
542        self.assertEqual(p1.path, '863-1234')
543        self.assertEqual(p1.params, 'phone-context=+1-914-555')
544
545
546    def test_attributes_bad_port(self):
547        """Check handling of non-integer ports."""
548        p = urlparse.urlsplit("http://www.example.net:foo")
549        self.assertEqual(p.netloc, "www.example.net:foo")
550        self.assertRaises(ValueError, lambda: p.port)
551
552        p = urlparse.urlparse("http://www.example.net:foo")
553        self.assertEqual(p.netloc, "www.example.net:foo")
554        self.assertRaises(ValueError, lambda: p.port)
555
556    def test_attributes_without_netloc(self):
557        # This example is straight from RFC 3261.  It looks like it
558        # should allow the username, hostname, and port to be filled
559        # in, but doesn't.  Since it's a URI and doesn't use the
560        # scheme://netloc syntax, the netloc and related attributes
561        # should be left empty.
562        uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
563        p = urlparse.urlsplit(uri)
564        self.assertEqual(p.netloc, "")
565        self.assertEqual(p.username, None)
566        self.assertEqual(p.password, None)
567        self.assertEqual(p.hostname, None)
568        self.assertEqual(p.port, None)
569        self.assertEqual(p.geturl(), uri)
570
571        p = urlparse.urlparse(uri)
572        self.assertEqual(p.netloc, "")
573        self.assertEqual(p.username, None)
574        self.assertEqual(p.password, None)
575        self.assertEqual(p.hostname, None)
576        self.assertEqual(p.port, None)
577        self.assertEqual(p.geturl(), uri)
578
579    def test_caching(self):
580        # Test case for bug #1313119
581        uri = "http://example.com/doc/"
582        unicode_uri = unicode(uri)
583
584        urlparse.urlparse(unicode_uri)
585        p = urlparse.urlparse(uri)
586        self.assertEqual(type(p.scheme), type(uri))
587        self.assertEqual(type(p.hostname), type(uri))
588        self.assertEqual(type(p.path), type(uri))
589
590    def test_noslash(self):
591        # Issue 1637: http://foo.com?query is legal
592        self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
593                         ('http', 'example.com', '', '', 'blahblah=/foo', ''))
594
595    def test_anyscheme(self):
596        # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
597        self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"),
598                         ('s3','foo.com','/stuff','','',''))
599        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
600                         ('x-newscheme','foo.com','/stuff','','',''))
601        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
602                         ('x-newscheme','foo.com','/stuff','','query','fragment'))
603        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"),
604                         ('x-newscheme','foo.com','/stuff','','query',''))
605
606    def test_withoutscheme(self):
607        # Test urlparse without scheme
608        # Issue 754016: urlparse goes wrong with IP:port without scheme
609        # RFC 1808 specifies that netloc should start with //, urlparse expects
610        # the same, otherwise it classifies the portion of url as path.
611        self.assertEqual(urlparse.urlparse("path"),
612                ('','','path','','',''))
613        self.assertEqual(urlparse.urlparse("//www.python.org:80"),
614                ('','www.python.org:80','','','',''))
615        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
616                ('http','www.python.org:80','','','',''))
617
618    def test_portseparator(self):
619        # Issue 754016 makes changes for port separator ':' from scheme separator
620        self.assertEqual(urlparse.urlparse("path:80"),
621                ('','','path:80','','',''))
622        self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
623        self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
624        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
625                ('http','www.python.org:80','','','',''))
626
627def test_main():
628    test_support.run_unittest(UrlParseTestCase)
629
630if __name__ == "__main__":
631    test_main()
632