1import cgi 2import os 3import sys 4import tempfile 5import unittest 6from collections import namedtuple 7from io import StringIO, BytesIO 8from test import support 9 10class HackedSysModule: 11 # The regression test will have real values in sys.argv, which 12 # will completely confuse the test of the cgi module 13 argv = [] 14 stdin = sys.stdin 15 16cgi.sys = HackedSysModule() 17 18class ComparableException: 19 def __init__(self, err): 20 self.err = err 21 22 def __str__(self): 23 return str(self.err) 24 25 def __eq__(self, anExc): 26 if not isinstance(anExc, Exception): 27 return NotImplemented 28 return (self.err.__class__ == anExc.__class__ and 29 self.err.args == anExc.args) 30 31 def __getattr__(self, attr): 32 return getattr(self.err, attr) 33 34def do_test(buf, method): 35 env = {} 36 if method == "GET": 37 fp = None 38 env['REQUEST_METHOD'] = 'GET' 39 env['QUERY_STRING'] = buf 40 elif method == "POST": 41 fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes 42 env['REQUEST_METHOD'] = 'POST' 43 env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' 44 env['CONTENT_LENGTH'] = str(len(buf)) 45 else: 46 raise ValueError("unknown method: %s" % method) 47 try: 48 return cgi.parse(fp, env, strict_parsing=1) 49 except Exception as err: 50 return ComparableException(err) 51 52parse_strict_test_cases = [ 53 ("", ValueError("bad query field: ''")), 54 ("&", ValueError("bad query field: ''")), 55 ("&&", ValueError("bad query field: ''")), 56 (";", ValueError("bad query field: ''")), 57 (";&;", ValueError("bad query field: ''")), 58 # Should the next few really be valid? 59 ("=", {}), 60 ("=&=", {}), 61 ("=;=", {}), 62 # This rest seem to make sense 63 ("=a", {'': ['a']}), 64 ("&=a", ValueError("bad query field: ''")), 65 ("=a&", ValueError("bad query field: ''")), 66 ("=&a", ValueError("bad query field: 'a'")), 67 ("b=a", {'b': ['a']}), 68 ("b+=a", {'b ': ['a']}), 69 ("a=b=a", {'a': ['b=a']}), 70 ("a=+b=a", {'a': [' b=a']}), 71 ("&b=a", ValueError("bad query field: ''")), 72 ("b&=a", ValueError("bad query field: 'b'")), 73 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 74 ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), 75 ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 76 ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 77 ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 78 ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", 79 {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 80 'cuyer': ['r'], 81 'expire': ['964546263'], 82 'kid': ['130003.300038'], 83 'lobale': ['en-US'], 84 'order_id': ['0bb2e248638833d48cb7fed300000f1b'], 85 'ss': ['env'], 86 'view': ['bustomer'], 87 }), 88 89 ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse", 90 {'SUBMIT': ['Browse'], 91 '_assigned_to': ['31392'], 92 '_category': ['100'], 93 '_status': ['1'], 94 'group_id': ['5470'], 95 'set': ['custom'], 96 }) 97 ] 98 99def norm(seq): 100 return sorted(seq, key=repr) 101 102def first_elts(list): 103 return [p[0] for p in list] 104 105def first_second_elts(list): 106 return [(p[0], p[1][0]) for p in list] 107 108def gen_result(data, environ): 109 encoding = 'latin-1' 110 fake_stdin = BytesIO(data.encode(encoding)) 111 fake_stdin.seek(0) 112 form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding) 113 114 result = {} 115 for k, v in dict(form).items(): 116 result[k] = isinstance(v, list) and form.getlist(k) or v.value 117 118 return result 119 120class CgiTests(unittest.TestCase): 121 122 def test_parse_multipart(self): 123 fp = BytesIO(POSTDATA.encode('latin1')) 124 env = {'boundary': BOUNDARY.encode('latin1'), 125 'CONTENT-LENGTH': '558'} 126 result = cgi.parse_multipart(fp, env) 127 expected = {'submit': [' Add '], 'id': ['1234'], 128 'file': [b'Testing 123.\n'], 'title': ['']} 129 self.assertEqual(result, expected) 130 131 def test_parse_multipart_without_content_length(self): 132 POSTDATA = '''--JfISa01 133Content-Disposition: form-data; name="submit-name" 134 135just a string 136 137--JfISa01-- 138''' 139 fp = BytesIO(POSTDATA.encode('latin1')) 140 env = {'boundary': 'JfISa01'.encode('latin1')} 141 result = cgi.parse_multipart(fp, env) 142 expected = {'submit-name': ['just a string\n']} 143 self.assertEqual(result, expected) 144 145 def test_parse_multipart_invalid_encoding(self): 146 BOUNDARY = "JfISa01" 147 POSTDATA = """--JfISa01 148Content-Disposition: form-data; name="submit-name" 149Content-Length: 3 150 151\u2603 152--JfISa01""" 153 fp = BytesIO(POSTDATA.encode('utf8')) 154 env = {'boundary': BOUNDARY.encode('latin1'), 155 'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))} 156 result = cgi.parse_multipart(fp, env, encoding="ascii", 157 errors="surrogateescape") 158 expected = {'submit-name': ["\udce2\udc98\udc83"]} 159 self.assertEqual(result, expected) 160 self.assertEqual("\u2603".encode('utf8'), 161 result["submit-name"][0].encode('utf8', 'surrogateescape')) 162 163 def test_fieldstorage_properties(self): 164 fs = cgi.FieldStorage() 165 self.assertFalse(fs) 166 self.assertIn("FieldStorage", repr(fs)) 167 self.assertEqual(list(fs), list(fs.keys())) 168 fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue')) 169 self.assertTrue(fs) 170 171 def test_fieldstorage_invalid(self): 172 self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj", 173 environ={"REQUEST_METHOD":"PUT"}) 174 self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar") 175 fs = cgi.FieldStorage(headers={'content-type':'text/plain'}) 176 self.assertRaises(TypeError, bool, fs) 177 178 def test_strict(self): 179 for orig, expect in parse_strict_test_cases: 180 # Test basic parsing 181 d = do_test(orig, "GET") 182 self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig)) 183 d = do_test(orig, "POST") 184 self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig)) 185 186 env = {'QUERY_STRING': orig} 187 fs = cgi.FieldStorage(environ=env) 188 if isinstance(expect, dict): 189 # test dict interface 190 self.assertEqual(len(expect), len(fs)) 191 self.assertCountEqual(expect.keys(), fs.keys()) 192 ##self.assertEqual(norm(expect.values()), norm(fs.values())) 193 ##self.assertEqual(norm(expect.items()), norm(fs.items())) 194 self.assertEqual(fs.getvalue("nonexistent field", "default"), "default") 195 # test individual fields 196 for key in expect.keys(): 197 expect_val = expect[key] 198 self.assertIn(key, fs) 199 if len(expect_val) > 1: 200 self.assertEqual(fs.getvalue(key), expect_val) 201 else: 202 self.assertEqual(fs.getvalue(key), expect_val[0]) 203 204 def test_log(self): 205 cgi.log("Testing") 206 207 cgi.logfp = StringIO() 208 cgi.initlog("%s", "Testing initlog 1") 209 cgi.log("%s", "Testing log 2") 210 self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n") 211 if os.path.exists(os.devnull): 212 cgi.logfp = None 213 cgi.logfile = os.devnull 214 cgi.initlog("%s", "Testing log 3") 215 self.addCleanup(cgi.closelog) 216 cgi.log("Testing log 4") 217 218 def test_fieldstorage_readline(self): 219 # FieldStorage uses readline, which has the capacity to read all 220 # contents of the input file into memory; we use readline's size argument 221 # to prevent that for files that do not contain any newlines in 222 # non-GET/HEAD requests 223 class TestReadlineFile: 224 def __init__(self, file): 225 self.file = file 226 self.numcalls = 0 227 228 def readline(self, size=None): 229 self.numcalls += 1 230 if size: 231 return self.file.readline(size) 232 else: 233 return self.file.readline() 234 235 def __getattr__(self, name): 236 file = self.__dict__['file'] 237 a = getattr(file, name) 238 if not isinstance(a, int): 239 setattr(self, name, a) 240 return a 241 242 f = TestReadlineFile(tempfile.TemporaryFile("wb+")) 243 self.addCleanup(f.close) 244 f.write(b'x' * 256 * 1024) 245 f.seek(0) 246 env = {'REQUEST_METHOD':'PUT'} 247 fs = cgi.FieldStorage(fp=f, environ=env) 248 self.addCleanup(fs.file.close) 249 # if we're not chunking properly, readline is only called twice 250 # (by read_binary); if we are chunking properly, it will be called 5 times 251 # as long as the chunksize is 1 << 16. 252 self.assertGreater(f.numcalls, 2) 253 f.close() 254 255 def test_fieldstorage_multipart(self): 256 #Test basic FieldStorage multipart parsing 257 env = { 258 'REQUEST_METHOD': 'POST', 259 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 260 'CONTENT_LENGTH': '558'} 261 fp = BytesIO(POSTDATA.encode('latin-1')) 262 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 263 self.assertEqual(len(fs.list), 4) 264 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 265 {'name':'title', 'filename':None, 'value':''}, 266 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 267 {'name':'submit', 'filename':None, 'value':' Add '}] 268 for x in range(len(fs.list)): 269 for k, exp in expect[x].items(): 270 got = getattr(fs.list[x], k) 271 self.assertEqual(got, exp) 272 273 def test_fieldstorage_multipart_leading_whitespace(self): 274 env = { 275 'REQUEST_METHOD': 'POST', 276 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 277 'CONTENT_LENGTH': '560'} 278 # Add some leading whitespace to our post data that will cause the 279 # first line to not be the innerboundary. 280 fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1')) 281 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 282 self.assertEqual(len(fs.list), 4) 283 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 284 {'name':'title', 'filename':None, 'value':''}, 285 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 286 {'name':'submit', 'filename':None, 'value':' Add '}] 287 for x in range(len(fs.list)): 288 for k, exp in expect[x].items(): 289 got = getattr(fs.list[x], k) 290 self.assertEqual(got, exp) 291 292 def test_fieldstorage_multipart_non_ascii(self): 293 #Test basic FieldStorage multipart parsing 294 env = {'REQUEST_METHOD':'POST', 295 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 296 'CONTENT_LENGTH':'558'} 297 for encoding in ['iso-8859-1','utf-8']: 298 fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding)) 299 fs = cgi.FieldStorage(fp, environ=env,encoding=encoding) 300 self.assertEqual(len(fs.list), 1) 301 expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}] 302 for x in range(len(fs.list)): 303 for k, exp in expect[x].items(): 304 got = getattr(fs.list[x], k) 305 self.assertEqual(got, exp) 306 307 def test_fieldstorage_multipart_maxline(self): 308 # Issue #18167 309 maxline = 1 << 16 310 self.maxDiff = None 311 def check(content): 312 data = """---123 313Content-Disposition: form-data; name="upload"; filename="fake.txt" 314Content-Type: text/plain 315 316%s 317---123-- 318""".replace('\n', '\r\n') % content 319 environ = { 320 'CONTENT_LENGTH': str(len(data)), 321 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 322 'REQUEST_METHOD': 'POST', 323 } 324 self.assertEqual(gen_result(data, environ), 325 {'upload': content.encode('latin1')}) 326 check('x' * (maxline - 1)) 327 check('x' * (maxline - 1) + '\r') 328 check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1)) 329 330 def test_fieldstorage_multipart_w3c(self): 331 # Test basic FieldStorage multipart parsing (W3C sample) 332 env = { 333 'REQUEST_METHOD': 'POST', 334 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3), 335 'CONTENT_LENGTH': str(len(POSTDATA_W3))} 336 fp = BytesIO(POSTDATA_W3.encode('latin-1')) 337 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 338 self.assertEqual(len(fs.list), 2) 339 self.assertEqual(fs.list[0].name, 'submit-name') 340 self.assertEqual(fs.list[0].value, 'Larry') 341 self.assertEqual(fs.list[1].name, 'files') 342 files = fs.list[1].value 343 self.assertEqual(len(files), 2) 344 expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'}, 345 {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}] 346 for x in range(len(files)): 347 for k, exp in expect[x].items(): 348 got = getattr(files[x], k) 349 self.assertEqual(got, exp) 350 351 def test_fieldstorage_part_content_length(self): 352 BOUNDARY = "JfISa01" 353 POSTDATA = """--JfISa01 354Content-Disposition: form-data; name="submit-name" 355Content-Length: 5 356 357Larry 358--JfISa01""" 359 env = { 360 'REQUEST_METHOD': 'POST', 361 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 362 'CONTENT_LENGTH': str(len(POSTDATA))} 363 fp = BytesIO(POSTDATA.encode('latin-1')) 364 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 365 self.assertEqual(len(fs.list), 1) 366 self.assertEqual(fs.list[0].name, 'submit-name') 367 self.assertEqual(fs.list[0].value, 'Larry') 368 369 def test_field_storage_multipart_no_content_length(self): 370 fp = BytesIO(b"""--MyBoundary 371Content-Disposition: form-data; name="my-arg"; filename="foo" 372 373Test 374 375--MyBoundary-- 376""") 377 env = { 378 "REQUEST_METHOD": "POST", 379 "CONTENT_TYPE": "multipart/form-data; boundary=MyBoundary", 380 "wsgi.input": fp, 381 } 382 fields = cgi.FieldStorage(fp, environ=env) 383 384 self.assertEqual(len(fields["my-arg"].file.read()), 5) 385 386 def test_fieldstorage_as_context_manager(self): 387 fp = BytesIO(b'x' * 10) 388 env = {'REQUEST_METHOD': 'PUT'} 389 with cgi.FieldStorage(fp=fp, environ=env) as fs: 390 content = fs.file.read() 391 self.assertFalse(fs.file.closed) 392 self.assertTrue(fs.file.closed) 393 self.assertEqual(content, 'x' * 10) 394 with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'): 395 fs.file.read() 396 397 _qs_result = { 398 'key1': 'value1', 399 'key2': ['value2x', 'value2y'], 400 'key3': 'value3', 401 'key4': 'value4' 402 } 403 def testQSAndUrlEncode(self): 404 data = "key2=value2x&key3=value3&key4=value4" 405 environ = { 406 'CONTENT_LENGTH': str(len(data)), 407 'CONTENT_TYPE': 'application/x-www-form-urlencoded', 408 'QUERY_STRING': 'key1=value1&key2=value2y', 409 'REQUEST_METHOD': 'POST', 410 } 411 v = gen_result(data, environ) 412 self.assertEqual(self._qs_result, v) 413 414 def test_max_num_fields(self): 415 # For application/x-www-form-urlencoded 416 data = '&'.join(['a=a']*11) 417 environ = { 418 'CONTENT_LENGTH': str(len(data)), 419 'CONTENT_TYPE': 'application/x-www-form-urlencoded', 420 'REQUEST_METHOD': 'POST', 421 } 422 423 with self.assertRaises(ValueError): 424 cgi.FieldStorage( 425 fp=BytesIO(data.encode()), 426 environ=environ, 427 max_num_fields=10, 428 ) 429 430 # For multipart/form-data 431 data = """---123 432Content-Disposition: form-data; name="a" 433 4343 435---123 436Content-Type: application/x-www-form-urlencoded 437 438a=4 439---123 440Content-Type: application/x-www-form-urlencoded 441 442a=5 443---123-- 444""" 445 environ = { 446 'CONTENT_LENGTH': str(len(data)), 447 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 448 'QUERY_STRING': 'a=1&a=2', 449 'REQUEST_METHOD': 'POST', 450 } 451 452 # 2 GET entities 453 # 1 top level POST entities 454 # 1 entity within the second POST entity 455 # 1 entity within the third POST entity 456 with self.assertRaises(ValueError): 457 cgi.FieldStorage( 458 fp=BytesIO(data.encode()), 459 environ=environ, 460 max_num_fields=4, 461 ) 462 cgi.FieldStorage( 463 fp=BytesIO(data.encode()), 464 environ=environ, 465 max_num_fields=5, 466 ) 467 468 def testQSAndFormData(self): 469 data = """---123 470Content-Disposition: form-data; name="key2" 471 472value2y 473---123 474Content-Disposition: form-data; name="key3" 475 476value3 477---123 478Content-Disposition: form-data; name="key4" 479 480value4 481---123-- 482""" 483 environ = { 484 'CONTENT_LENGTH': str(len(data)), 485 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 486 'QUERY_STRING': 'key1=value1&key2=value2x', 487 'REQUEST_METHOD': 'POST', 488 } 489 v = gen_result(data, environ) 490 self.assertEqual(self._qs_result, v) 491 492 def testQSAndFormDataFile(self): 493 data = """---123 494Content-Disposition: form-data; name="key2" 495 496value2y 497---123 498Content-Disposition: form-data; name="key3" 499 500value3 501---123 502Content-Disposition: form-data; name="key4" 503 504value4 505---123 506Content-Disposition: form-data; name="upload"; filename="fake.txt" 507Content-Type: text/plain 508 509this is the content of the fake file 510 511---123-- 512""" 513 environ = { 514 'CONTENT_LENGTH': str(len(data)), 515 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 516 'QUERY_STRING': 'key1=value1&key2=value2x', 517 'REQUEST_METHOD': 'POST', 518 } 519 result = self._qs_result.copy() 520 result.update({ 521 'upload': b'this is the content of the fake file\n' 522 }) 523 v = gen_result(data, environ) 524 self.assertEqual(result, v) 525 526 def test_parse_header(self): 527 self.assertEqual( 528 cgi.parse_header("text/plain"), 529 ("text/plain", {})) 530 self.assertEqual( 531 cgi.parse_header("text/vnd.just.made.this.up ; "), 532 ("text/vnd.just.made.this.up", {})) 533 self.assertEqual( 534 cgi.parse_header("text/plain;charset=us-ascii"), 535 ("text/plain", {"charset": "us-ascii"})) 536 self.assertEqual( 537 cgi.parse_header('text/plain ; charset="us-ascii"'), 538 ("text/plain", {"charset": "us-ascii"})) 539 self.assertEqual( 540 cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'), 541 ("text/plain", {"charset": "us-ascii", "another": "opt"})) 542 self.assertEqual( 543 cgi.parse_header('attachment; filename="silly.txt"'), 544 ("attachment", {"filename": "silly.txt"})) 545 self.assertEqual( 546 cgi.parse_header('attachment; filename="strange;name"'), 547 ("attachment", {"filename": "strange;name"})) 548 self.assertEqual( 549 cgi.parse_header('attachment; filename="strange;name";size=123;'), 550 ("attachment", {"filename": "strange;name", "size": "123"})) 551 self.assertEqual( 552 cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'), 553 ("form-data", {"name": "files", "filename": 'fo"o;bar'})) 554 555 def test_all(self): 556 blacklist = {"logfile", "logfp", "initlog", "dolog", "nolog", 557 "closelog", "log", "maxlen", "valid_boundary"} 558 support.check__all__(self, cgi, blacklist=blacklist) 559 560 561BOUNDARY = "---------------------------721837373350705526688164684" 562 563POSTDATA = """-----------------------------721837373350705526688164684 564Content-Disposition: form-data; name="id" 565 5661234 567-----------------------------721837373350705526688164684 568Content-Disposition: form-data; name="title" 569 570 571-----------------------------721837373350705526688164684 572Content-Disposition: form-data; name="file"; filename="test.txt" 573Content-Type: text/plain 574 575Testing 123. 576 577-----------------------------721837373350705526688164684 578Content-Disposition: form-data; name="submit" 579 580 Add\x20 581-----------------------------721837373350705526688164684-- 582""" 583 584POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684 585Content-Disposition: form-data; name="id" 586 587\xe7\xf1\x80 588-----------------------------721837373350705526688164684 589""" 590 591# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4 592BOUNDARY_W3 = "AaB03x" 593POSTDATA_W3 = """--AaB03x 594Content-Disposition: form-data; name="submit-name" 595 596Larry 597--AaB03x 598Content-Disposition: form-data; name="files" 599Content-Type: multipart/mixed; boundary=BbC04y 600 601--BbC04y 602Content-Disposition: file; filename="file1.txt" 603Content-Type: text/plain 604 605... contents of file1.txt ... 606--BbC04y 607Content-Disposition: file; filename="file2.gif" 608Content-Type: image/gif 609Content-Transfer-Encoding: binary 610 611...contents of file2.gif... 612--BbC04y-- 613--AaB03x-- 614""" 615 616if __name__ == '__main__': 617 unittest.main() 618