1#!/usr/bin/python3
2
3# Copyright 2020 Google LLC
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17import logging
18import sys
19import warnings
20import atheris
21
22from bs4 import BeautifulSoup
23
24
25try:
26  import HTMLParser
27  HTMLParseError = HTMLParser.HTMLParseError
28except ImportError:
29  # HTMLParseError is removed in Python 3.5. Since it can never be
30  # thrown in 3.5, we can just define our own class as a placeholder.
31
32  class HTMLParseError(Exception):
33    pass
34
35
36def TestOneInput(data):
37  """TestOneInput gets random data from the fuzzer, and throws it at bs4."""
38  if len(data) < 1:
39    return
40
41  parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
42  try:
43    idx = int(data[0]) % len(parsers)
44  except ValueError:
45    return
46
47  try:
48    soup = BeautifulSoup(data[1:], features=parsers[idx])
49  except HTMLParseError:
50    return
51  except ValueError:
52    return
53
54  list(soup.find_all(True))
55  soup.prettify()
56
57
58def main():
59  logging.disable(logging.CRITICAL)
60  warnings.filterwarnings('ignore')
61  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
62  atheris.Fuzz()
63
64
65if __name__ == "__main__":
66  main()
67