1import sys
2import unicodedata
3import unittest
4import urllib.parse
5
6RFC1808_BASE = "http://a/b/c/d;p?q#f"
7RFC2396_BASE = "http://a/b/c/d;p?q"
8RFC3986_BASE = 'http://a/b/c/d;p?q'
9SIMPLE_BASE  = 'http://a/b/c/d'
10
11# Each parse_qsl testcase is a two-tuple that contains
12# a string with the query and a list with the expected result.
13
14parse_qsl_test_cases = [
15    ("", []),
16    ("&", []),
17    ("&&", []),
18    ("=", [('', '')]),
19    ("=a", [('', 'a')]),
20    ("a", [('a', '')]),
21    ("a=", [('a', '')]),
22    ("&a=b", [('a', 'b')]),
23    ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24    ("a=1&a=2", [('a', '1'), ('a', '2')]),
25    (b"", []),
26    (b"&", []),
27    (b"&&", []),
28    (b"=", [(b'', b'')]),
29    (b"=a", [(b'', b'a')]),
30    (b"a", [(b'a', b'')]),
31    (b"a=", [(b'a', b'')]),
32    (b"&a=b", [(b'a', b'b')]),
33    (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34    (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
35    (";", []),
36    (";;", []),
37    (";a=b", [('a', 'b')]),
38    ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
39    ("a=1;a=2", [('a', '1'), ('a', '2')]),
40    (b";", []),
41    (b";;", []),
42    (b";a=b", [(b'a', b'b')]),
43    (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
44    (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
45]
46
47# Each parse_qs testcase is a two-tuple that contains
48# a string with the query and a dictionary with the expected result.
49
50parse_qs_test_cases = [
51    ("", {}),
52    ("&", {}),
53    ("&&", {}),
54    ("=", {'': ['']}),
55    ("=a", {'': ['a']}),
56    ("a", {'a': ['']}),
57    ("a=", {'a': ['']}),
58    ("&a=b", {'a': ['b']}),
59    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
60    ("a=1&a=2", {'a': ['1', '2']}),
61    (b"", {}),
62    (b"&", {}),
63    (b"&&", {}),
64    (b"=", {b'': [b'']}),
65    (b"=a", {b'': [b'a']}),
66    (b"a", {b'a': [b'']}),
67    (b"a=", {b'a': [b'']}),
68    (b"&a=b", {b'a': [b'b']}),
69    (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
70    (b"a=1&a=2", {b'a': [b'1', b'2']}),
71    (";", {}),
72    (";;", {}),
73    (";a=b", {'a': ['b']}),
74    ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
75    ("a=1;a=2", {'a': ['1', '2']}),
76    (b";", {}),
77    (b";;", {}),
78    (b";a=b", {b'a': [b'b']}),
79    (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
80    (b"a=1;a=2", {b'a': [b'1', b'2']}),
81]
82
83class UrlParseTestCase(unittest.TestCase):
84
85    def checkRoundtrips(self, url, parsed, split):
86        result = urllib.parse.urlparse(url)
87        self.assertEqual(result, parsed)
88        t = (result.scheme, result.netloc, result.path,
89             result.params, result.query, result.fragment)
90        self.assertEqual(t, parsed)
91        # put it back together and it should be the same
92        result2 = urllib.parse.urlunparse(result)
93        self.assertEqual(result2, url)
94        self.assertEqual(result2, result.geturl())
95
96        # the result of geturl() is a fixpoint; we can always parse it
97        # again to get the same result:
98        result3 = urllib.parse.urlparse(result.geturl())
99        self.assertEqual(result3.geturl(), result.geturl())
100        self.assertEqual(result3,          result)
101        self.assertEqual(result3.scheme,   result.scheme)
102        self.assertEqual(result3.netloc,   result.netloc)
103        self.assertEqual(result3.path,     result.path)
104        self.assertEqual(result3.params,   result.params)
105        self.assertEqual(result3.query,    result.query)
106        self.assertEqual(result3.fragment, result.fragment)
107        self.assertEqual(result3.username, result.username)
108        self.assertEqual(result3.password, result.password)
109        self.assertEqual(result3.hostname, result.hostname)
110        self.assertEqual(result3.port,     result.port)
111
112        # check the roundtrip using urlsplit() as well
113        result = urllib.parse.urlsplit(url)
114        self.assertEqual(result, split)
115        t = (result.scheme, result.netloc, result.path,
116             result.query, result.fragment)
117        self.assertEqual(t, split)
118        result2 = urllib.parse.urlunsplit(result)
119        self.assertEqual(result2, url)
120        self.assertEqual(result2, result.geturl())
121
122        # check the fixpoint property of re-parsing the result of geturl()
123        result3 = urllib.parse.urlsplit(result.geturl())
124        self.assertEqual(result3.geturl(), result.geturl())
125        self.assertEqual(result3,          result)
126        self.assertEqual(result3.scheme,   result.scheme)
127        self.assertEqual(result3.netloc,   result.netloc)
128        self.assertEqual(result3.path,     result.path)
129        self.assertEqual(result3.query,    result.query)
130        self.assertEqual(result3.fragment, result.fragment)
131        self.assertEqual(result3.username, result.username)
132        self.assertEqual(result3.password, result.password)
133        self.assertEqual(result3.hostname, result.hostname)
134        self.assertEqual(result3.port,     result.port)
135
136    def test_qsl(self):
137        for orig, expect in parse_qsl_test_cases:
138            result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
139            self.assertEqual(result, expect, "Error parsing %r" % orig)
140            expect_without_blanks = [v for v in expect if len(v[1])]
141            result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
142            self.assertEqual(result, expect_without_blanks,
143                            "Error parsing %r" % orig)
144
145    def test_qs(self):
146        for orig, expect in parse_qs_test_cases:
147            result = urllib.parse.parse_qs(orig, keep_blank_values=True)
148            self.assertEqual(result, expect, "Error parsing %r" % orig)
149            expect_without_blanks = {v: expect[v]
150                                     for v in expect if len(expect[v][0])}
151            result = urllib.parse.parse_qs(orig, keep_blank_values=False)
152            self.assertEqual(result, expect_without_blanks,
153                            "Error parsing %r" % orig)
154
155    def test_roundtrips(self):
156        str_cases = [
157            ('file:///tmp/junk.txt',
158             ('file', '', '/tmp/junk.txt', '', '', ''),
159             ('file', '', '/tmp/junk.txt', '', '')),
160            ('imap://mail.python.org/mbox1',
161             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
162             ('imap', 'mail.python.org', '/mbox1', '', '')),
163            ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
164             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
165              '', '', ''),
166             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
167              '', '')),
168            ('nfs://server/path/to/file.txt',
169             ('nfs', 'server', '/path/to/file.txt', '', '', ''),
170             ('nfs', 'server', '/path/to/file.txt', '', '')),
171            ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
172             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
173              '', '', ''),
174             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
175              '', '')),
176            ('git+ssh://git@github.com/user/project.git',
177            ('git+ssh', 'git@github.com','/user/project.git',
178             '','',''),
179            ('git+ssh', 'git@github.com','/user/project.git',
180             '', '')),
181            ]
182        def _encode(t):
183            return (t[0].encode('ascii'),
184                    tuple(x.encode('ascii') for x in t[1]),
185                    tuple(x.encode('ascii') for x in t[2]))
186        bytes_cases = [_encode(x) for x in str_cases]
187        for url, parsed, split in str_cases + bytes_cases:
188            self.checkRoundtrips(url, parsed, split)
189
190    def test_http_roundtrips(self):
191        # urllib.parse.urlsplit treats 'http:' as an optimized special case,
192        # so we test both 'http:' and 'https:' in all the following.
193        # Three cheers for white box knowledge!
194        str_cases = [
195            ('://www.python.org',
196             ('www.python.org', '', '', '', ''),
197             ('www.python.org', '', '', '')),
198            ('://www.python.org#abc',
199             ('www.python.org', '', '', '', 'abc'),
200             ('www.python.org', '', '', 'abc')),
201            ('://www.python.org?q=abc',
202             ('www.python.org', '', '', 'q=abc', ''),
203             ('www.python.org', '', 'q=abc', '')),
204            ('://www.python.org/#abc',
205             ('www.python.org', '/', '', '', 'abc'),
206             ('www.python.org', '/', '', 'abc')),
207            ('://a/b/c/d;p?q#f',
208             ('a', '/b/c/d', 'p', 'q', 'f'),
209             ('a', '/b/c/d;p', 'q', 'f')),
210            ]
211        def _encode(t):
212            return (t[0].encode('ascii'),
213                    tuple(x.encode('ascii') for x in t[1]),
214                    tuple(x.encode('ascii') for x in t[2]))
215        bytes_cases = [_encode(x) for x in str_cases]
216        str_schemes = ('http', 'https')
217        bytes_schemes = (b'http', b'https')
218        str_tests = str_schemes, str_cases
219        bytes_tests = bytes_schemes, bytes_cases
220        for schemes, test_cases in (str_tests, bytes_tests):
221            for scheme in schemes:
222                for url, parsed, split in test_cases:
223                    url = scheme + url
224                    parsed = (scheme,) + parsed
225                    split = (scheme,) + split
226                    self.checkRoundtrips(url, parsed, split)
227
228    def checkJoin(self, base, relurl, expected):
229        str_components = (base, relurl, expected)
230        self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
231        bytes_components = baseb, relurlb, expectedb = [
232                            x.encode('ascii') for x in str_components]
233        self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
234
235    def test_unparse_parse(self):
236        str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
237        bytes_cases = [x.encode('ascii') for x in str_cases]
238        for u in str_cases + bytes_cases:
239            self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
240            self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
241
242    def test_RFC1808(self):
243        # "normal" cases from RFC 1808:
244        self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
245        self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
246        self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
247        self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
248        self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
249        self.checkJoin(RFC1808_BASE, '//g', 'http://g')
250        self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
251        self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
252        self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
253        self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
254        self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
255        self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
256        self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
257        self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
258        self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
259        self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
260        self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
261        self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
262        self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
263        self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
264        self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
265        self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
266
267        # "abnormal" cases from RFC 1808:
268        self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
269        self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
270        self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
271        self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
272        self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
273        self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
274        self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
275        self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
276        self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
277
278        # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
279        # so we'll not actually run these tests (which expect 1808 behavior).
280        #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
281        #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
282
283        # XXX: The following tests are no longer compatible with RFC3986
284        # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
285        # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
286        # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
287        # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
288
289
290    def test_RFC2368(self):
291        # Issue 11467: path that starts with a number is not parsed correctly
292        self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
293                ('mailto', '', '1337@example.org', '', '', ''))
294
295    def test_RFC2396(self):
296        # cases from RFC 2396
297
298        self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
299        self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
300        self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
301        self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
302        self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
303        self.checkJoin(RFC2396_BASE, '//g', 'http://g')
304        self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
305        self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
306        self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
307        self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
308        self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
309        self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
310        self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
311        self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
312        self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
313        self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
314        self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
315        self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
316        self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
317        self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
318        self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
319        self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
320        self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
321        self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
322        self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
323        self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
324        self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
325        self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
326        self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
327        self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
328        self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
329        self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
330        self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
331        self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
332        self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
333
334        # XXX: The following tests are no longer compatible with RFC3986
335        # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
336        # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
337        # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
338        # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
339
340    def test_RFC3986(self):
341        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
342        self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
343        self.checkJoin(RFC3986_BASE, 'g:h','g:h')
344        self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
345        self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
346        self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
347        self.checkJoin(RFC3986_BASE, '/g','http://a/g')
348        self.checkJoin(RFC3986_BASE, '//g','http://g')
349        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
350        self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
351        self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
352        self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
353        self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
354        self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
355        self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
356        self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
357        self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
358        self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
359        self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
360        self.checkJoin(RFC3986_BASE, '..','http://a/b/')
361        self.checkJoin(RFC3986_BASE, '../','http://a/b/')
362        self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
363        self.checkJoin(RFC3986_BASE, '../..','http://a/')
364        self.checkJoin(RFC3986_BASE, '../../','http://a/')
365        self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
366        self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
367
368        # Abnormal Examples
369
370        # The 'abnormal scenarios' are incompatible with RFC2986 parsing
371        # Tests are here for reference.
372
373        self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
374        self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
375        self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
376        self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
377        self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
378        self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
379        self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
380        self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
381        self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
382        self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
383        self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
384        self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
385        self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
386        self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
387        self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
388        self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
389        self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
390        self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
391        #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
392        self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
393
394        # Test for issue9721
395        self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
396
397    def test_urljoins(self):
398        self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
399        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
400        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
401        self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
402        self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
403        self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
404        self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
405        self.checkJoin(SIMPLE_BASE, '//g','http://g')
406        self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
407        self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
408        self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
409        self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
410        self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
411        self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
412        self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
413        self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
414        self.checkJoin(SIMPLE_BASE, '../..','http://a/')
415        self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
416        self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
417        self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
418        self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
419        self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
420        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
421        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
422        self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
423        self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
424        self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
425        self.checkJoin('http:///', '..','http:///')
426        self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
427        self.checkJoin('', 'http://a/./g', 'http://a/./g')
428        self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
429        self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
430        self.checkJoin('ws://a/b','g','ws://a/g')
431        self.checkJoin('wss://a/b','g','wss://a/g')
432
433        # XXX: The following tests are no longer compatible with RFC3986
434        # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
435        # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
436
437        # test for issue22118 duplicate slashes
438        self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
439
440        # Non-RFC-defined tests, covering variations of base and trailing
441        # slashes
442        self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
443        self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
444        self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
445        self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
446        self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
447        self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
448
449        # issue 23703: don't duplicate filename
450        self.checkJoin('a', 'b', 'b')
451
452    def test_RFC2732(self):
453        str_cases = [
454            ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
455            ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
456            ('http://[::1]:5432/foo/', '::1', 5432),
457            ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
458            ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
459            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
460             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
461            ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
462            ('http://[::ffff:12.34.56.78]:5432/foo/',
463             '::ffff:12.34.56.78', 5432),
464            ('http://Test.python.org/foo/', 'test.python.org', None),
465            ('http://12.34.56.78/foo/', '12.34.56.78', None),
466            ('http://[::1]/foo/', '::1', None),
467            ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
468            ('http://[dead:beef::]/foo/', 'dead:beef::', None),
469            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
470             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
471            ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
472            ('http://[::ffff:12.34.56.78]/foo/',
473             '::ffff:12.34.56.78', None),
474            ('http://Test.python.org:/foo/', 'test.python.org', None),
475            ('http://12.34.56.78:/foo/', '12.34.56.78', None),
476            ('http://[::1]:/foo/', '::1', None),
477            ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
478            ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
479            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
480             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
481            ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
482            ('http://[::ffff:12.34.56.78]:/foo/',
483             '::ffff:12.34.56.78', None),
484            ]
485        def _encode(t):
486            return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
487        bytes_cases = [_encode(x) for x in str_cases]
488        for url, hostname, port in str_cases + bytes_cases:
489            urlparsed = urllib.parse.urlparse(url)
490            self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
491
492        str_cases = [
493                'http://::12.34.56.78]/',
494                'http://[::1/foo/',
495                'ftp://[::1/foo/bad]/bad',
496                'http://[::1/foo/bad]/bad',
497                'http://[::ffff:12.34.56.78']
498        bytes_cases = [x.encode('ascii') for x in str_cases]
499        for invalid_url in str_cases + bytes_cases:
500            self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
501
502    def test_urldefrag(self):
503        str_cases = [
504            ('http://python.org#frag', 'http://python.org', 'frag'),
505            ('http://python.org', 'http://python.org', ''),
506            ('http://python.org/#frag', 'http://python.org/', 'frag'),
507            ('http://python.org/', 'http://python.org/', ''),
508            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
509            ('http://python.org/?q', 'http://python.org/?q', ''),
510            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
511            ('http://python.org/p?q', 'http://python.org/p?q', ''),
512            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
513            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
514        ]
515        def _encode(t):
516            return type(t)(x.encode('ascii') for x in t)
517        bytes_cases = [_encode(x) for x in str_cases]
518        for url, defrag, frag in str_cases + bytes_cases:
519            result = urllib.parse.urldefrag(url)
520            self.assertEqual(result.geturl(), url)
521            self.assertEqual(result, (defrag, frag))
522            self.assertEqual(result.url, defrag)
523            self.assertEqual(result.fragment, frag)
524
525    def test_urlsplit_scoped_IPv6(self):
526        p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
527        self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
528        self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
529
530        p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
531        self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
532        self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
533
534    def test_urlsplit_attributes(self):
535        url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
536        p = urllib.parse.urlsplit(url)
537        self.assertEqual(p.scheme, "http")
538        self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
539        self.assertEqual(p.path, "/doc/")
540        self.assertEqual(p.query, "")
541        self.assertEqual(p.fragment, "frag")
542        self.assertEqual(p.username, None)
543        self.assertEqual(p.password, None)
544        self.assertEqual(p.hostname, "www.python.org")
545        self.assertEqual(p.port, None)
546        # geturl() won't return exactly the original URL in this case
547        # since the scheme is always case-normalized
548        # We handle this by ignoring the first 4 characters of the URL
549        self.assertEqual(p.geturl()[4:], url[4:])
550
551        url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
552        p = urllib.parse.urlsplit(url)
553        self.assertEqual(p.scheme, "http")
554        self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
555        self.assertEqual(p.path, "/doc/")
556        self.assertEqual(p.query, "query=yes")
557        self.assertEqual(p.fragment, "frag")
558        self.assertEqual(p.username, "User")
559        self.assertEqual(p.password, "Pass")
560        self.assertEqual(p.hostname, "www.python.org")
561        self.assertEqual(p.port, 80)
562        self.assertEqual(p.geturl(), url)
563
564        # Addressing issue1698, which suggests Username can contain
565        # "@" characters.  Though not RFC compliant, many ftp sites allow
566        # and request email addresses as usernames.
567
568        url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
569        p = urllib.parse.urlsplit(url)
570        self.assertEqual(p.scheme, "http")
571        self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
572        self.assertEqual(p.path, "/doc/")
573        self.assertEqual(p.query, "query=yes")
574        self.assertEqual(p.fragment, "frag")
575        self.assertEqual(p.username, "User@example.com")
576        self.assertEqual(p.password, "Pass")
577        self.assertEqual(p.hostname, "www.python.org")
578        self.assertEqual(p.port, 80)
579        self.assertEqual(p.geturl(), url)
580
581        # And check them all again, only with bytes this time
582        url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
583        p = urllib.parse.urlsplit(url)
584        self.assertEqual(p.scheme, b"http")
585        self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
586        self.assertEqual(p.path, b"/doc/")
587        self.assertEqual(p.query, b"")
588        self.assertEqual(p.fragment, b"frag")
589        self.assertEqual(p.username, None)
590        self.assertEqual(p.password, None)
591        self.assertEqual(p.hostname, b"www.python.org")
592        self.assertEqual(p.port, None)
593        self.assertEqual(p.geturl()[4:], url[4:])
594
595        url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
596        p = urllib.parse.urlsplit(url)
597        self.assertEqual(p.scheme, b"http")
598        self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
599        self.assertEqual(p.path, b"/doc/")
600        self.assertEqual(p.query, b"query=yes")
601        self.assertEqual(p.fragment, b"frag")
602        self.assertEqual(p.username, b"User")
603        self.assertEqual(p.password, b"Pass")
604        self.assertEqual(p.hostname, b"www.python.org")
605        self.assertEqual(p.port, 80)
606        self.assertEqual(p.geturl(), url)
607
608        url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
609        p = urllib.parse.urlsplit(url)
610        self.assertEqual(p.scheme, b"http")
611        self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
612        self.assertEqual(p.path, b"/doc/")
613        self.assertEqual(p.query, b"query=yes")
614        self.assertEqual(p.fragment, b"frag")
615        self.assertEqual(p.username, b"User@example.com")
616        self.assertEqual(p.password, b"Pass")
617        self.assertEqual(p.hostname, b"www.python.org")
618        self.assertEqual(p.port, 80)
619        self.assertEqual(p.geturl(), url)
620
621        # Verify an illegal port raises ValueError
622        url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
623        p = urllib.parse.urlsplit(url)
624        with self.assertRaisesRegex(ValueError, "out of range"):
625            p.port
626
627    def test_attributes_bad_port(self):
628        """Check handling of invalid ports."""
629        for bytes in (False, True):
630            for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
631                for port in ("foo", "1.5", "-1", "0x10"):
632                    with self.subTest(bytes=bytes, parse=parse, port=port):
633                        netloc = "www.example.net:" + port
634                        url = "http://" + netloc
635                        if bytes:
636                            netloc = netloc.encode("ascii")
637                            url = url.encode("ascii")
638                        p = parse(url)
639                        self.assertEqual(p.netloc, netloc)
640                        with self.assertRaises(ValueError):
641                            p.port
642
643    def test_attributes_without_netloc(self):
644        # This example is straight from RFC 3261.  It looks like it
645        # should allow the username, hostname, and port to be filled
646        # in, but doesn't.  Since it's a URI and doesn't use the
647        # scheme://netloc syntax, the netloc and related attributes
648        # should be left empty.
649        uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
650        p = urllib.parse.urlsplit(uri)
651        self.assertEqual(p.netloc, "")
652        self.assertEqual(p.username, None)
653        self.assertEqual(p.password, None)
654        self.assertEqual(p.hostname, None)
655        self.assertEqual(p.port, None)
656        self.assertEqual(p.geturl(), uri)
657
658        p = urllib.parse.urlparse(uri)
659        self.assertEqual(p.netloc, "")
660        self.assertEqual(p.username, None)
661        self.assertEqual(p.password, None)
662        self.assertEqual(p.hostname, None)
663        self.assertEqual(p.port, None)
664        self.assertEqual(p.geturl(), uri)
665
666        # You guessed it, repeating the test with bytes input
667        uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
668        p = urllib.parse.urlsplit(uri)
669        self.assertEqual(p.netloc, b"")
670        self.assertEqual(p.username, None)
671        self.assertEqual(p.password, None)
672        self.assertEqual(p.hostname, None)
673        self.assertEqual(p.port, None)
674        self.assertEqual(p.geturl(), uri)
675
676        p = urllib.parse.urlparse(uri)
677        self.assertEqual(p.netloc, b"")
678        self.assertEqual(p.username, None)
679        self.assertEqual(p.password, None)
680        self.assertEqual(p.hostname, None)
681        self.assertEqual(p.port, None)
682        self.assertEqual(p.geturl(), uri)
683
684    def test_noslash(self):
685        # Issue 1637: http://foo.com?query is legal
686        self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
687                         ('http', 'example.com', '', '', 'blahblah=/foo', ''))
688        self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
689                         (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
690
691    def test_withoutscheme(self):
692        # Test urlparse without scheme
693        # Issue 754016: urlparse goes wrong with IP:port without scheme
694        # RFC 1808 specifies that netloc should start with //, urlparse expects
695        # the same, otherwise it classifies the portion of url as path.
696        self.assertEqual(urllib.parse.urlparse("path"),
697                ('','','path','','',''))
698        self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
699                ('','www.python.org:80','','','',''))
700        self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
701                ('http','www.python.org:80','','','',''))
702        # Repeat for bytes input
703        self.assertEqual(urllib.parse.urlparse(b"path"),
704                (b'',b'',b'path',b'',b'',b''))
705        self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
706                (b'',b'www.python.org:80',b'',b'',b'',b''))
707        self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
708                (b'http',b'www.python.org:80',b'',b'',b'',b''))
709
710    def test_portseparator(self):
711        # Issue 754016 makes changes for port separator ':' from scheme separator
712        self.assertEqual(urllib.parse.urlparse("path:80"),
713                ('','','path:80','','',''))
714        self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
715        self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
716        self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
717                ('http','www.python.org:80','','','',''))
718        # As usual, need to check bytes input as well
719        self.assertEqual(urllib.parse.urlparse(b"path:80"),
720                (b'',b'',b'path:80',b'',b'',b''))
721        self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
722        self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
723        self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
724                (b'http',b'www.python.org:80',b'',b'',b'',b''))
725
726    def test_usingsys(self):
727        # Issue 3314: sys module is used in the error
728        self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
729
730    def test_anyscheme(self):
731        # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
732        self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
733                         ('s3', 'foo.com', '/stuff', '', '', ''))
734        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
735                         ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
736        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
737                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
738        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
739                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
740
741        # And for bytes...
742        self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
743                         (b's3', b'foo.com', b'/stuff', b'', b'', b''))
744        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
745                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
746        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
747                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
748        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
749                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
750
751    def test_default_scheme(self):
752        # Exercise the scheme parameter of urlparse() and urlsplit()
753        for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
754            with self.subTest(function=func):
755                result = func("http://example.net/", "ftp")
756                self.assertEqual(result.scheme, "http")
757                result = func(b"http://example.net/", b"ftp")
758                self.assertEqual(result.scheme, b"http")
759                self.assertEqual(func("path", "ftp").scheme, "ftp")
760                self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
761                self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
762                self.assertEqual(func("path").scheme, "")
763                self.assertEqual(func(b"path").scheme, b"")
764                self.assertEqual(func(b"path", "").scheme, b"")
765
766    def test_parse_fragments(self):
767        # Exercise the allow_fragments parameter of urlparse() and urlsplit()
768        tests = (
769            ("http:#frag", "path", "frag"),
770            ("//example.net#frag", "path", "frag"),
771            ("index.html#frag", "path", "frag"),
772            (";a=b#frag", "params", "frag"),
773            ("?a=b#frag", "query", "frag"),
774            ("#frag", "path", "frag"),
775            ("abc#@frag", "path", "@frag"),
776            ("//abc#@frag", "path", "@frag"),
777            ("//abc:80#@frag", "path", "@frag"),
778            ("//abc#@frag:80", "path", "@frag:80"),
779        )
780        for url, attr, expected_frag in tests:
781            for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
782                if attr == "params" and func is urllib.parse.urlsplit:
783                    attr = "path"
784                with self.subTest(url=url, function=func):
785                    result = func(url, allow_fragments=False)
786                    self.assertEqual(result.fragment, "")
787                    self.assertTrue(
788                            getattr(result, attr).endswith("#" + expected_frag))
789                    self.assertEqual(func(url, "", False).fragment, "")
790
791                    result = func(url, allow_fragments=True)
792                    self.assertEqual(result.fragment, expected_frag)
793                    self.assertFalse(
794                            getattr(result, attr).endswith(expected_frag))
795                    self.assertEqual(func(url, "", True).fragment,
796                                     expected_frag)
797                    self.assertEqual(func(url).fragment, expected_frag)
798
799    def test_mixed_types_rejected(self):
800        # Several functions that process either strings or ASCII encoded bytes
801        # accept multiple arguments. Check they reject mixed type input
802        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
803            urllib.parse.urlparse("www.python.org", b"http")
804        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
805            urllib.parse.urlparse(b"www.python.org", "http")
806        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
807            urllib.parse.urlsplit("www.python.org", b"http")
808        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
809            urllib.parse.urlsplit(b"www.python.org", "http")
810        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
811            urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
812        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
813            urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
814        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
815            urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
816        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
817            urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
818        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
819            urllib.parse.urljoin("http://python.org", b"http://python.org")
820        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
821            urllib.parse.urljoin(b"http://python.org", "http://python.org")
822
823    def _check_result_type(self, str_type):
824        num_args = len(str_type._fields)
825        bytes_type = str_type._encoded_counterpart
826        self.assertIs(bytes_type._decoded_counterpart, str_type)
827        str_args = ('',) * num_args
828        bytes_args = (b'',) * num_args
829        str_result = str_type(*str_args)
830        bytes_result = bytes_type(*bytes_args)
831        encoding = 'ascii'
832        errors = 'strict'
833        self.assertEqual(str_result, str_args)
834        self.assertEqual(bytes_result.decode(), str_args)
835        self.assertEqual(bytes_result.decode(), str_result)
836        self.assertEqual(bytes_result.decode(encoding), str_args)
837        self.assertEqual(bytes_result.decode(encoding), str_result)
838        self.assertEqual(bytes_result.decode(encoding, errors), str_args)
839        self.assertEqual(bytes_result.decode(encoding, errors), str_result)
840        self.assertEqual(bytes_result, bytes_args)
841        self.assertEqual(str_result.encode(), bytes_args)
842        self.assertEqual(str_result.encode(), bytes_result)
843        self.assertEqual(str_result.encode(encoding), bytes_args)
844        self.assertEqual(str_result.encode(encoding), bytes_result)
845        self.assertEqual(str_result.encode(encoding, errors), bytes_args)
846        self.assertEqual(str_result.encode(encoding, errors), bytes_result)
847
848    def test_result_pairs(self):
849        # Check encoding and decoding between result pairs
850        result_types = [
851          urllib.parse.DefragResult,
852          urllib.parse.SplitResult,
853          urllib.parse.ParseResult,
854        ]
855        for result_type in result_types:
856            self._check_result_type(result_type)
857
858    def test_parse_qs_encoding(self):
859        result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
860        self.assertEqual(result, {'key': ['\u0141\xE9']})
861        result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
862        self.assertEqual(result, {'key': ['\u0141\xE9']})
863        result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
864        self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
865        result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
866        self.assertEqual(result, {'key': ['\u0141\ufffd-']})
867        result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
868                                                          errors="ignore")
869        self.assertEqual(result, {'key': ['\u0141-']})
870
871    def test_parse_qsl_encoding(self):
872        result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
873        self.assertEqual(result, [('key', '\u0141\xE9')])
874        result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
875        self.assertEqual(result, [('key', '\u0141\xE9')])
876        result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
877        self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
878        result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
879        self.assertEqual(result, [('key', '\u0141\ufffd-')])
880        result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
881                                                          errors="ignore")
882        self.assertEqual(result, [('key', '\u0141-')])
883
884    def test_parse_qsl_max_num_fields(self):
885        with self.assertRaises(ValueError):
886            urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
887        with self.assertRaises(ValueError):
888            urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
889        urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
890
891    def test_urlencode_sequences(self):
892        # Other tests incidentally urlencode things; test non-covered cases:
893        # Sequence and object values.
894        result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
895        # we cannot rely on ordering here
896        assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
897
898        class Trivial:
899            def __str__(self):
900                return 'trivial'
901
902        result = urllib.parse.urlencode({'a': Trivial()}, True)
903        self.assertEqual(result, 'a=trivial')
904
905    def test_urlencode_quote_via(self):
906        result = urllib.parse.urlencode({'a': 'some value'})
907        self.assertEqual(result, "a=some+value")
908        result = urllib.parse.urlencode({'a': 'some value/another'},
909                                        quote_via=urllib.parse.quote)
910        self.assertEqual(result, "a=some%20value%2Fanother")
911        result = urllib.parse.urlencode({'a': 'some value/another'},
912                                        safe='/', quote_via=urllib.parse.quote)
913        self.assertEqual(result, "a=some%20value/another")
914
915    def test_quote_from_bytes(self):
916        self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
917        result = urllib.parse.quote_from_bytes(b'archaeological arcana')
918        self.assertEqual(result, 'archaeological%20arcana')
919        result = urllib.parse.quote_from_bytes(b'')
920        self.assertEqual(result, '')
921
922    def test_unquote_to_bytes(self):
923        result = urllib.parse.unquote_to_bytes('abc%20def')
924        self.assertEqual(result, b'abc def')
925        result = urllib.parse.unquote_to_bytes('')
926        self.assertEqual(result, b'')
927
928    def test_quote_errors(self):
929        self.assertRaises(TypeError, urllib.parse.quote, b'foo',
930                          encoding='utf-8')
931        self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
932
933    def test_issue14072(self):
934        p1 = urllib.parse.urlsplit('tel:+31-641044153')
935        self.assertEqual(p1.scheme, 'tel')
936        self.assertEqual(p1.path, '+31-641044153')
937        p2 = urllib.parse.urlsplit('tel:+31641044153')
938        self.assertEqual(p2.scheme, 'tel')
939        self.assertEqual(p2.path, '+31641044153')
940        # assert the behavior for urlparse
941        p1 = urllib.parse.urlparse('tel:+31-641044153')
942        self.assertEqual(p1.scheme, 'tel')
943        self.assertEqual(p1.path, '+31-641044153')
944        p2 = urllib.parse.urlparse('tel:+31641044153')
945        self.assertEqual(p2.scheme, 'tel')
946        self.assertEqual(p2.path, '+31641044153')
947
948    def test_telurl_params(self):
949        p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
950        self.assertEqual(p1.scheme, 'tel')
951        self.assertEqual(p1.path, '123-4')
952        self.assertEqual(p1.params, 'phone-context=+1-650-516')
953
954        p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
955        self.assertEqual(p1.scheme, 'tel')
956        self.assertEqual(p1.path, '+1-201-555-0123')
957        self.assertEqual(p1.params, '')
958
959        p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
960        self.assertEqual(p1.scheme, 'tel')
961        self.assertEqual(p1.path, '7042')
962        self.assertEqual(p1.params, 'phone-context=example.com')
963
964        p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
965        self.assertEqual(p1.scheme, 'tel')
966        self.assertEqual(p1.path, '863-1234')
967        self.assertEqual(p1.params, 'phone-context=+1-914-555')
968
969    def test_Quoter_repr(self):
970        quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
971        self.assertIn('Quoter', repr(quoter))
972
973    def test_all(self):
974        expected = []
975        undocumented = {
976            'splitattr', 'splithost', 'splitnport', 'splitpasswd',
977            'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
978            'splitvalue',
979            'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
980        }
981        for name in dir(urllib.parse):
982            if name.startswith('_') or name in undocumented:
983                continue
984            object = getattr(urllib.parse, name)
985            if getattr(object, '__module__', None) == 'urllib.parse':
986                expected.append(name)
987        self.assertCountEqual(urllib.parse.__all__, expected)
988
989    def test_urlsplit_normalization(self):
990        # Certain characters should never occur in the netloc,
991        # including under normalization.
992        # Ensure that ALL of them are detected and cause an error
993        illegal_chars = '/:#?@'
994        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
995        denorm_chars = [
996            c for c in map(chr, range(128, sys.maxunicode))
997            if (hex_chars & set(unicodedata.decomposition(c).split()))
998            and c not in illegal_chars
999        ]
1000        # Sanity check that we found at least one such character
1001        self.assertIn('\u2100', denorm_chars)
1002        self.assertIn('\uFF03', denorm_chars)
1003
1004        for scheme in ["http", "https", "ftp"]:
1005            for c in denorm_chars:
1006                url = "{}://netloc{}false.netloc/path".format(scheme, c)
1007                with self.subTest(url=url, char='{:04X}'.format(ord(c))):
1008                    with self.assertRaises(ValueError):
1009                        urllib.parse.urlsplit(url)
1010
1011class Utility_Tests(unittest.TestCase):
1012    """Testcase to test the various utility functions in the urllib."""
1013    # In Python 2 this test class was in test_urllib.
1014
1015    def test_splittype(self):
1016        splittype = urllib.parse.splittype
1017        self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
1018        self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
1019        self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
1020        self.assertEqual(splittype('type:'), ('type', ''))
1021        self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
1022
1023    def test_splithost(self):
1024        splithost = urllib.parse.splithost
1025        self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
1026                         ('www.example.org:80', '/foo/bar/baz.html'))
1027        self.assertEqual(splithost('//www.example.org:80'),
1028                         ('www.example.org:80', ''))
1029        self.assertEqual(splithost('/foo/bar/baz.html'),
1030                         (None, '/foo/bar/baz.html'))
1031
1032        # bpo-30500: # starts a fragment.
1033        self.assertEqual(splithost('//127.0.0.1#@host.com'),
1034                         ('127.0.0.1', '/#@host.com'))
1035        self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
1036                         ('127.0.0.1', '/#@host.com:80'))
1037        self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
1038                         ('127.0.0.1:80', '/#@host.com'))
1039
1040        # Empty host is returned as empty string.
1041        self.assertEqual(splithost("///file"),
1042                         ('', '/file'))
1043
1044        # Trailing semicolon, question mark and hash symbol are kept.
1045        self.assertEqual(splithost("//example.net/file;"),
1046                         ('example.net', '/file;'))
1047        self.assertEqual(splithost("//example.net/file?"),
1048                         ('example.net', '/file?'))
1049        self.assertEqual(splithost("//example.net/file#"),
1050                         ('example.net', '/file#'))
1051
1052    def test_splituser(self):
1053        splituser = urllib.parse.splituser
1054        self.assertEqual(splituser('User:Pass@www.python.org:080'),
1055                         ('User:Pass', 'www.python.org:080'))
1056        self.assertEqual(splituser('@www.python.org:080'),
1057                         ('', 'www.python.org:080'))
1058        self.assertEqual(splituser('www.python.org:080'),
1059                         (None, 'www.python.org:080'))
1060        self.assertEqual(splituser('User:Pass@'),
1061                         ('User:Pass', ''))
1062        self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
1063                         ('User@example.com:Pass', 'www.python.org:080'))
1064
1065    def test_splitpasswd(self):
1066        # Some of the password examples are not sensible, but it is added to
1067        # confirming to RFC2617 and addressing issue4675.
1068        splitpasswd = urllib.parse.splitpasswd
1069        self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1070        self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1071        self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1072        self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1073        self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1074        self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1075        self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1076        self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1077        self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1078        self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1079        self.assertEqual(splitpasswd('user:'), ('user', ''))
1080        self.assertEqual(splitpasswd('user'), ('user', None))
1081        self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1082
1083    def test_splitport(self):
1084        splitport = urllib.parse.splitport
1085        self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1086        self.assertEqual(splitport('parrot'), ('parrot', None))
1087        self.assertEqual(splitport('parrot:'), ('parrot', None))
1088        self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1089        self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1090        self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1091        self.assertEqual(splitport('[::1]'), ('[::1]', None))
1092        self.assertEqual(splitport(':88'), ('', '88'))
1093
1094    def test_splitnport(self):
1095        splitnport = urllib.parse.splitnport
1096        self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1097        self.assertEqual(splitnport('parrot'), ('parrot', -1))
1098        self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1099        self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1100        self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1101        self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1102        self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1103        self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1104        self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1105
1106    def test_splitquery(self):
1107        # Normal cases are exercised by other tests; ensure that we also
1108        # catch cases with no port specified (testcase ensuring coverage)
1109        splitquery = urllib.parse.splitquery
1110        self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1111                         ('http://python.org/fake', 'foo=bar'))
1112        self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1113                         ('http://python.org/fake?foo=bar', ''))
1114        self.assertEqual(splitquery('http://python.org/fake'),
1115                         ('http://python.org/fake', None))
1116        self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1117
1118    def test_splittag(self):
1119        splittag = urllib.parse.splittag
1120        self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1121                         ('http://example.com?foo=bar', 'baz'))
1122        self.assertEqual(splittag('http://example.com?foo=bar#'),
1123                         ('http://example.com?foo=bar', ''))
1124        self.assertEqual(splittag('#baz'), ('', 'baz'))
1125        self.assertEqual(splittag('http://example.com?foo=bar'),
1126                         ('http://example.com?foo=bar', None))
1127        self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1128                         ('http://example.com?foo=bar#baz', 'boo'))
1129
1130    def test_splitattr(self):
1131        splitattr = urllib.parse.splitattr
1132        self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1133                         ('/path', ['attr1=value1', 'attr2=value2']))
1134        self.assertEqual(splitattr('/path;'), ('/path', ['']))
1135        self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1136                         ('', ['attr1=value1', 'attr2=value2']))
1137        self.assertEqual(splitattr('/path'), ('/path', []))
1138
1139    def test_splitvalue(self):
1140        # Normal cases are exercised by other tests; test pathological cases
1141        # with no key/value pairs. (testcase ensuring coverage)
1142        splitvalue = urllib.parse.splitvalue
1143        self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1144        self.assertEqual(splitvalue('foo='), ('foo', ''))
1145        self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1146        self.assertEqual(splitvalue('foobar'), ('foobar', None))
1147        self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1148
1149    def test_to_bytes(self):
1150        result = urllib.parse.to_bytes('http://www.python.org')
1151        self.assertEqual(result, 'http://www.python.org')
1152        self.assertRaises(UnicodeError, urllib.parse.to_bytes,
1153                          'http://www.python.org/medi\u00e6val')
1154
1155    def test_unwrap(self):
1156        url = urllib.parse.unwrap('<URL:type://host/path>')
1157        self.assertEqual(url, 'type://host/path')
1158
1159
1160if __name__ == "__main__":
1161    unittest.main()
1162