1import cgi
2import os
3import sys
4import tempfile
5import unittest
6from collections import namedtuple
7from io import StringIO, BytesIO
8from test import support
9
10class HackedSysModule:
11    # The regression test will have real values in sys.argv, which
12    # will completely confuse the test of the cgi module
13    argv = []
14    stdin = sys.stdin
15
16cgi.sys = HackedSysModule()
17
18class ComparableException:
19    def __init__(self, err):
20        self.err = err
21
22    def __str__(self):
23        return str(self.err)
24
25    def __eq__(self, anExc):
26        if not isinstance(anExc, Exception):
27            return NotImplemented
28        return (self.err.__class__ == anExc.__class__ and
29                self.err.args == anExc.args)
30
31    def __getattr__(self, attr):
32        return getattr(self.err, attr)
33
34def do_test(buf, method):
35    env = {}
36    if method == "GET":
37        fp = None
38        env['REQUEST_METHOD'] = 'GET'
39        env['QUERY_STRING'] = buf
40    elif method == "POST":
41        fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes
42        env['REQUEST_METHOD'] = 'POST'
43        env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded'
44        env['CONTENT_LENGTH'] = str(len(buf))
45    else:
46        raise ValueError("unknown method: %s" % method)
47    try:
48        return cgi.parse(fp, env, strict_parsing=1)
49    except Exception as err:
50        return ComparableException(err)
51
52parse_strict_test_cases = [
53    ("", ValueError("bad query field: ''")),
54    ("&", ValueError("bad query field: ''")),
55    ("&&", ValueError("bad query field: ''")),
56    (";", ValueError("bad query field: ''")),
57    (";&;", ValueError("bad query field: ''")),
58    # Should the next few really be valid?
59    ("=", {}),
60    ("=&=", {}),
61    ("=;=", {}),
62    # This rest seem to make sense
63    ("=a", {'': ['a']}),
64    ("&=a", ValueError("bad query field: ''")),
65    ("=a&", ValueError("bad query field: ''")),
66    ("=&a", ValueError("bad query field: 'a'")),
67    ("b=a", {'b': ['a']}),
68    ("b+=a", {'b ': ['a']}),
69    ("a=b=a", {'a': ['b=a']}),
70    ("a=+b=a", {'a': [' b=a']}),
71    ("&b=a", ValueError("bad query field: ''")),
72    ("b&=a", ValueError("bad query field: 'b'")),
73    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
74    ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
75    ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
76    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
77    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
78    ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
79     {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
80      'cuyer': ['r'],
81      'expire': ['964546263'],
82      'kid': ['130003.300038'],
83      'lobale': ['en-US'],
84      'order_id': ['0bb2e248638833d48cb7fed300000f1b'],
85      'ss': ['env'],
86      'view': ['bustomer'],
87      }),
88
89    ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse",
90     {'SUBMIT': ['Browse'],
91      '_assigned_to': ['31392'],
92      '_category': ['100'],
93      '_status': ['1'],
94      'group_id': ['5470'],
95      'set': ['custom'],
96      })
97    ]
98
99def norm(seq):
100    return sorted(seq, key=repr)
101
102def first_elts(list):
103    return [p[0] for p in list]
104
105def first_second_elts(list):
106    return [(p[0], p[1][0]) for p in list]
107
108def gen_result(data, environ):
109    encoding = 'latin-1'
110    fake_stdin = BytesIO(data.encode(encoding))
111    fake_stdin.seek(0)
112    form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding)
113
114    result = {}
115    for k, v in dict(form).items():
116        result[k] = isinstance(v, list) and form.getlist(k) or v.value
117
118    return result
119
120class CgiTests(unittest.TestCase):
121
122    def test_parse_multipart(self):
123        fp = BytesIO(POSTDATA.encode('latin1'))
124        env = {'boundary': BOUNDARY.encode('latin1'),
125               'CONTENT-LENGTH': '558'}
126        result = cgi.parse_multipart(fp, env)
127        expected = {'submit': [' Add '], 'id': ['1234'],
128                    'file': [b'Testing 123.\n'], 'title': ['']}
129        self.assertEqual(result, expected)
130
131    def test_parse_multipart_without_content_length(self):
132        POSTDATA = '''--JfISa01
133Content-Disposition: form-data; name="submit-name"
134
135just a string
136
137--JfISa01--
138'''
139        fp = BytesIO(POSTDATA.encode('latin1'))
140        env = {'boundary': 'JfISa01'.encode('latin1')}
141        result = cgi.parse_multipart(fp, env)
142        expected = {'submit-name': ['just a string\n']}
143        self.assertEqual(result, expected)
144
145    def test_parse_multipart_invalid_encoding(self):
146        BOUNDARY = "JfISa01"
147        POSTDATA = """--JfISa01
148Content-Disposition: form-data; name="submit-name"
149Content-Length: 3
150
151\u2603
152--JfISa01"""
153        fp = BytesIO(POSTDATA.encode('utf8'))
154        env = {'boundary': BOUNDARY.encode('latin1'),
155               'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))}
156        result = cgi.parse_multipart(fp, env, encoding="ascii",
157                                     errors="surrogateescape")
158        expected = {'submit-name': ["\udce2\udc98\udc83"]}
159        self.assertEqual(result, expected)
160        self.assertEqual("\u2603".encode('utf8'),
161                         result["submit-name"][0].encode('utf8', 'surrogateescape'))
162
163    def test_fieldstorage_properties(self):
164        fs = cgi.FieldStorage()
165        self.assertFalse(fs)
166        self.assertIn("FieldStorage", repr(fs))
167        self.assertEqual(list(fs), list(fs.keys()))
168        fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue'))
169        self.assertTrue(fs)
170
171    def test_fieldstorage_invalid(self):
172        self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj",
173                                                            environ={"REQUEST_METHOD":"PUT"})
174        self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar")
175        fs = cgi.FieldStorage(headers={'content-type':'text/plain'})
176        self.assertRaises(TypeError, bool, fs)
177
178    def test_strict(self):
179        for orig, expect in parse_strict_test_cases:
180            # Test basic parsing
181            d = do_test(orig, "GET")
182            self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig))
183            d = do_test(orig, "POST")
184            self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig))
185
186            env = {'QUERY_STRING': orig}
187            fs = cgi.FieldStorage(environ=env)
188            if isinstance(expect, dict):
189                # test dict interface
190                self.assertEqual(len(expect), len(fs))
191                self.assertCountEqual(expect.keys(), fs.keys())
192                ##self.assertEqual(norm(expect.values()), norm(fs.values()))
193                ##self.assertEqual(norm(expect.items()), norm(fs.items()))
194                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
195                # test individual fields
196                for key in expect.keys():
197                    expect_val = expect[key]
198                    self.assertIn(key, fs)
199                    if len(expect_val) > 1:
200                        self.assertEqual(fs.getvalue(key), expect_val)
201                    else:
202                        self.assertEqual(fs.getvalue(key), expect_val[0])
203
204    def test_log(self):
205        cgi.log("Testing")
206
207        cgi.logfp = StringIO()
208        cgi.initlog("%s", "Testing initlog 1")
209        cgi.log("%s", "Testing log 2")
210        self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n")
211        if os.path.exists(os.devnull):
212            cgi.logfp = None
213            cgi.logfile = os.devnull
214            cgi.initlog("%s", "Testing log 3")
215            self.addCleanup(cgi.closelog)
216            cgi.log("Testing log 4")
217
218    def test_fieldstorage_readline(self):
219        # FieldStorage uses readline, which has the capacity to read all
220        # contents of the input file into memory; we use readline's size argument
221        # to prevent that for files that do not contain any newlines in
222        # non-GET/HEAD requests
223        class TestReadlineFile:
224            def __init__(self, file):
225                self.file = file
226                self.numcalls = 0
227
228            def readline(self, size=None):
229                self.numcalls += 1
230                if size:
231                    return self.file.readline(size)
232                else:
233                    return self.file.readline()
234
235            def __getattr__(self, name):
236                file = self.__dict__['file']
237                a = getattr(file, name)
238                if not isinstance(a, int):
239                    setattr(self, name, a)
240                return a
241
242        f = TestReadlineFile(tempfile.TemporaryFile("wb+"))
243        self.addCleanup(f.close)
244        f.write(b'x' * 256 * 1024)
245        f.seek(0)
246        env = {'REQUEST_METHOD':'PUT'}
247        fs = cgi.FieldStorage(fp=f, environ=env)
248        self.addCleanup(fs.file.close)
249        # if we're not chunking properly, readline is only called twice
250        # (by read_binary); if we are chunking properly, it will be called 5 times
251        # as long as the chunksize is 1 << 16.
252        self.assertGreater(f.numcalls, 2)
253        f.close()
254
255    def test_fieldstorage_multipart(self):
256        #Test basic FieldStorage multipart parsing
257        env = {
258            'REQUEST_METHOD': 'POST',
259            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
260            'CONTENT_LENGTH': '558'}
261        fp = BytesIO(POSTDATA.encode('latin-1'))
262        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
263        self.assertEqual(len(fs.list), 4)
264        expect = [{'name':'id', 'filename':None, 'value':'1234'},
265                  {'name':'title', 'filename':None, 'value':''},
266                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
267                  {'name':'submit', 'filename':None, 'value':' Add '}]
268        for x in range(len(fs.list)):
269            for k, exp in expect[x].items():
270                got = getattr(fs.list[x], k)
271                self.assertEqual(got, exp)
272
273    def test_fieldstorage_multipart_leading_whitespace(self):
274        env = {
275            'REQUEST_METHOD': 'POST',
276            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
277            'CONTENT_LENGTH': '560'}
278        # Add some leading whitespace to our post data that will cause the
279        # first line to not be the innerboundary.
280        fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1'))
281        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
282        self.assertEqual(len(fs.list), 4)
283        expect = [{'name':'id', 'filename':None, 'value':'1234'},
284                  {'name':'title', 'filename':None, 'value':''},
285                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
286                  {'name':'submit', 'filename':None, 'value':' Add '}]
287        for x in range(len(fs.list)):
288            for k, exp in expect[x].items():
289                got = getattr(fs.list[x], k)
290                self.assertEqual(got, exp)
291
292    def test_fieldstorage_multipart_non_ascii(self):
293        #Test basic FieldStorage multipart parsing
294        env = {'REQUEST_METHOD':'POST',
295            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
296            'CONTENT_LENGTH':'558'}
297        for encoding in ['iso-8859-1','utf-8']:
298            fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding))
299            fs = cgi.FieldStorage(fp, environ=env,encoding=encoding)
300            self.assertEqual(len(fs.list), 1)
301            expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}]
302            for x in range(len(fs.list)):
303                for k, exp in expect[x].items():
304                    got = getattr(fs.list[x], k)
305                    self.assertEqual(got, exp)
306
307    def test_fieldstorage_multipart_maxline(self):
308        # Issue #18167
309        maxline = 1 << 16
310        self.maxDiff = None
311        def check(content):
312            data = """---123
313Content-Disposition: form-data; name="upload"; filename="fake.txt"
314Content-Type: text/plain
315
316%s
317---123--
318""".replace('\n', '\r\n') % content
319            environ = {
320                'CONTENT_LENGTH':   str(len(data)),
321                'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
322                'REQUEST_METHOD':   'POST',
323            }
324            self.assertEqual(gen_result(data, environ),
325                             {'upload': content.encode('latin1')})
326        check('x' * (maxline - 1))
327        check('x' * (maxline - 1) + '\r')
328        check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1))
329
330    def test_fieldstorage_multipart_w3c(self):
331        # Test basic FieldStorage multipart parsing (W3C sample)
332        env = {
333            'REQUEST_METHOD': 'POST',
334            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3),
335            'CONTENT_LENGTH': str(len(POSTDATA_W3))}
336        fp = BytesIO(POSTDATA_W3.encode('latin-1'))
337        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
338        self.assertEqual(len(fs.list), 2)
339        self.assertEqual(fs.list[0].name, 'submit-name')
340        self.assertEqual(fs.list[0].value, 'Larry')
341        self.assertEqual(fs.list[1].name, 'files')
342        files = fs.list[1].value
343        self.assertEqual(len(files), 2)
344        expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'},
345                  {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}]
346        for x in range(len(files)):
347            for k, exp in expect[x].items():
348                got = getattr(files[x], k)
349                self.assertEqual(got, exp)
350
351    def test_fieldstorage_part_content_length(self):
352        BOUNDARY = "JfISa01"
353        POSTDATA = """--JfISa01
354Content-Disposition: form-data; name="submit-name"
355Content-Length: 5
356
357Larry
358--JfISa01"""
359        env = {
360            'REQUEST_METHOD': 'POST',
361            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
362            'CONTENT_LENGTH': str(len(POSTDATA))}
363        fp = BytesIO(POSTDATA.encode('latin-1'))
364        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
365        self.assertEqual(len(fs.list), 1)
366        self.assertEqual(fs.list[0].name, 'submit-name')
367        self.assertEqual(fs.list[0].value, 'Larry')
368
369    def test_field_storage_multipart_no_content_length(self):
370        fp = BytesIO(b"""--MyBoundary
371Content-Disposition: form-data; name="my-arg"; filename="foo"
372
373Test
374
375--MyBoundary--
376""")
377        env = {
378            "REQUEST_METHOD": "POST",
379            "CONTENT_TYPE": "multipart/form-data; boundary=MyBoundary",
380            "wsgi.input": fp,
381        }
382        fields = cgi.FieldStorage(fp, environ=env)
383
384        self.assertEqual(len(fields["my-arg"].file.read()), 5)
385
386    def test_fieldstorage_as_context_manager(self):
387        fp = BytesIO(b'x' * 10)
388        env = {'REQUEST_METHOD': 'PUT'}
389        with cgi.FieldStorage(fp=fp, environ=env) as fs:
390            content = fs.file.read()
391            self.assertFalse(fs.file.closed)
392        self.assertTrue(fs.file.closed)
393        self.assertEqual(content, 'x' * 10)
394        with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'):
395            fs.file.read()
396
397    _qs_result = {
398        'key1': 'value1',
399        'key2': ['value2x', 'value2y'],
400        'key3': 'value3',
401        'key4': 'value4'
402    }
403    def testQSAndUrlEncode(self):
404        data = "key2=value2x&key3=value3&key4=value4"
405        environ = {
406            'CONTENT_LENGTH':   str(len(data)),
407            'CONTENT_TYPE':     'application/x-www-form-urlencoded',
408            'QUERY_STRING':     'key1=value1&key2=value2y',
409            'REQUEST_METHOD':   'POST',
410        }
411        v = gen_result(data, environ)
412        self.assertEqual(self._qs_result, v)
413
414    def test_max_num_fields(self):
415        # For application/x-www-form-urlencoded
416        data = '&'.join(['a=a']*11)
417        environ = {
418            'CONTENT_LENGTH': str(len(data)),
419            'CONTENT_TYPE': 'application/x-www-form-urlencoded',
420            'REQUEST_METHOD': 'POST',
421        }
422
423        with self.assertRaises(ValueError):
424            cgi.FieldStorage(
425                fp=BytesIO(data.encode()),
426                environ=environ,
427                max_num_fields=10,
428            )
429
430        # For multipart/form-data
431        data = """---123
432Content-Disposition: form-data; name="a"
433
4343
435---123
436Content-Type: application/x-www-form-urlencoded
437
438a=4
439---123
440Content-Type: application/x-www-form-urlencoded
441
442a=5
443---123--
444"""
445        environ = {
446            'CONTENT_LENGTH':   str(len(data)),
447            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
448            'QUERY_STRING':     'a=1&a=2',
449            'REQUEST_METHOD':   'POST',
450        }
451
452        # 2 GET entities
453        # 1 top level POST entities
454        # 1 entity within the second POST entity
455        # 1 entity within the third POST entity
456        with self.assertRaises(ValueError):
457            cgi.FieldStorage(
458                fp=BytesIO(data.encode()),
459                environ=environ,
460                max_num_fields=4,
461            )
462        cgi.FieldStorage(
463            fp=BytesIO(data.encode()),
464            environ=environ,
465            max_num_fields=5,
466        )
467
468    def testQSAndFormData(self):
469        data = """---123
470Content-Disposition: form-data; name="key2"
471
472value2y
473---123
474Content-Disposition: form-data; name="key3"
475
476value3
477---123
478Content-Disposition: form-data; name="key4"
479
480value4
481---123--
482"""
483        environ = {
484            'CONTENT_LENGTH':   str(len(data)),
485            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
486            'QUERY_STRING':     'key1=value1&key2=value2x',
487            'REQUEST_METHOD':   'POST',
488        }
489        v = gen_result(data, environ)
490        self.assertEqual(self._qs_result, v)
491
492    def testQSAndFormDataFile(self):
493        data = """---123
494Content-Disposition: form-data; name="key2"
495
496value2y
497---123
498Content-Disposition: form-data; name="key3"
499
500value3
501---123
502Content-Disposition: form-data; name="key4"
503
504value4
505---123
506Content-Disposition: form-data; name="upload"; filename="fake.txt"
507Content-Type: text/plain
508
509this is the content of the fake file
510
511---123--
512"""
513        environ = {
514            'CONTENT_LENGTH':   str(len(data)),
515            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
516            'QUERY_STRING':     'key1=value1&key2=value2x',
517            'REQUEST_METHOD':   'POST',
518        }
519        result = self._qs_result.copy()
520        result.update({
521            'upload': b'this is the content of the fake file\n'
522        })
523        v = gen_result(data, environ)
524        self.assertEqual(result, v)
525
526    def test_parse_header(self):
527        self.assertEqual(
528            cgi.parse_header("text/plain"),
529            ("text/plain", {}))
530        self.assertEqual(
531            cgi.parse_header("text/vnd.just.made.this.up ; "),
532            ("text/vnd.just.made.this.up", {}))
533        self.assertEqual(
534            cgi.parse_header("text/plain;charset=us-ascii"),
535            ("text/plain", {"charset": "us-ascii"}))
536        self.assertEqual(
537            cgi.parse_header('text/plain ; charset="us-ascii"'),
538            ("text/plain", {"charset": "us-ascii"}))
539        self.assertEqual(
540            cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'),
541            ("text/plain", {"charset": "us-ascii", "another": "opt"}))
542        self.assertEqual(
543            cgi.parse_header('attachment; filename="silly.txt"'),
544            ("attachment", {"filename": "silly.txt"}))
545        self.assertEqual(
546            cgi.parse_header('attachment; filename="strange;name"'),
547            ("attachment", {"filename": "strange;name"}))
548        self.assertEqual(
549            cgi.parse_header('attachment; filename="strange;name";size=123;'),
550            ("attachment", {"filename": "strange;name", "size": "123"}))
551        self.assertEqual(
552            cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
553            ("form-data", {"name": "files", "filename": 'fo"o;bar'}))
554
555    def test_all(self):
556        blacklist = {"logfile", "logfp", "initlog", "dolog", "nolog",
557                     "closelog", "log", "maxlen", "valid_boundary"}
558        support.check__all__(self, cgi, blacklist=blacklist)
559
560
561BOUNDARY = "---------------------------721837373350705526688164684"
562
563POSTDATA = """-----------------------------721837373350705526688164684
564Content-Disposition: form-data; name="id"
565
5661234
567-----------------------------721837373350705526688164684
568Content-Disposition: form-data; name="title"
569
570
571-----------------------------721837373350705526688164684
572Content-Disposition: form-data; name="file"; filename="test.txt"
573Content-Type: text/plain
574
575Testing 123.
576
577-----------------------------721837373350705526688164684
578Content-Disposition: form-data; name="submit"
579
580 Add\x20
581-----------------------------721837373350705526688164684--
582"""
583
584POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684
585Content-Disposition: form-data; name="id"
586
587\xe7\xf1\x80
588-----------------------------721837373350705526688164684
589"""
590
591# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4
592BOUNDARY_W3 = "AaB03x"
593POSTDATA_W3 = """--AaB03x
594Content-Disposition: form-data; name="submit-name"
595
596Larry
597--AaB03x
598Content-Disposition: form-data; name="files"
599Content-Type: multipart/mixed; boundary=BbC04y
600
601--BbC04y
602Content-Disposition: file; filename="file1.txt"
603Content-Type: text/plain
604
605... contents of file1.txt ...
606--BbC04y
607Content-Disposition: file; filename="file2.gif"
608Content-Type: image/gif
609Content-Transfer-Encoding: binary
610
611...contents of file2.gif...
612--BbC04y--
613--AaB03x--
614"""
615
616if __name__ == '__main__':
617    unittest.main()
618