1# Copyright 2021 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Module for interacting with the "ClusterFuzz deployment."""
15import logging
16import os
17import sys
18import tempfile
19import time
20import urllib.error
21import urllib.request
22import zipfile
23
24# pylint: disable=wrong-import-position,import-error
25sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
26import utils
27
28
29class BaseClusterFuzzDeployment:
30  """Base class for ClusterFuzz deployments."""
31
32  CORPUS_DIR_NAME = 'cifuzz-corpus'
33  BUILD_DIR_NAME = 'cifuzz-latest-build'
34
35  def __init__(self, config):
36    self.config = config
37
38  def download_latest_build(self, out_dir):
39    """Downloads the latest build from ClusterFuzz.
40
41    Returns:
42      A path to where the OSS-Fuzz build was stored, or None if it wasn't.
43    """
44    raise NotImplementedError('Child class must implement method.')
45
46  def download_corpus(self, target_name, out_dir):
47    """Downloads the corpus for |target_name| from ClusterFuzz to |out_dir|.
48
49    Returns:
50      A path to where the OSS-Fuzz build was stored, or None if it wasn't.
51    """
52    raise NotImplementedError('Child class must implement method.')
53
54
55class ClusterFuzzLite(BaseClusterFuzzDeployment):
56  """Class representing a deployment of ClusterFuzzLite."""
57
58  def download_latest_build(self, out_dir):
59    logging.info('download_latest_build not implemented for ClusterFuzzLite.')
60
61  def download_corpus(self, target_name, out_dir):
62    logging.info('download_corpus not implemented for ClusterFuzzLite.')
63
64
65class OSSFuzz(BaseClusterFuzzDeployment):
66  """The OSS-Fuzz ClusterFuzz deployment."""
67
68  # Location of clusterfuzz builds on GCS.
69  CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds'
70
71  # Format string for the latest version of a project's build.
72  VERSION_STRING = '{project_name}-{sanitizer}-latest.version'
73
74  # Zip file name containing the corpus.
75  CORPUS_ZIP_NAME = 'public.zip'
76
77  def get_latest_build_name(self):
78    """Gets the name of the latest OSS-Fuzz build of a project.
79
80    Returns:
81      A string with the latest build version or None.
82    """
83    version_file = self.VERSION_STRING.format(
84        project_name=self.config.project_name, sanitizer=self.config.sanitizer)
85    version_url = utils.url_join(utils.GCS_BASE_URL, self.CLUSTERFUZZ_BUILDS,
86                                 self.config.project_name, version_file)
87    try:
88      response = urllib.request.urlopen(version_url)
89    except urllib.error.HTTPError:
90      logging.error('Error getting latest build version for %s from: %s.',
91                    self.config.project_name, version_url)
92      return None
93    return response.read().decode()
94
95  def download_latest_build(self, out_dir):
96    """Downloads the latest OSS-Fuzz build from GCS.
97
98    Returns:
99      A path to where the OSS-Fuzz build was stored, or None if it wasn't.
100    """
101    build_dir = os.path.join(out_dir, self.BUILD_DIR_NAME)
102    if os.path.exists(build_dir):
103      return build_dir
104
105    os.makedirs(build_dir, exist_ok=True)
106
107    latest_build_name = self.get_latest_build_name()
108    if not latest_build_name:
109      return None
110
111    oss_fuzz_build_url = utils.url_join(utils.GCS_BASE_URL,
112                                        self.CLUSTERFUZZ_BUILDS,
113                                        self.config.project_name,
114                                        latest_build_name)
115    if download_and_unpack_zip(oss_fuzz_build_url, build_dir):
116      return build_dir
117
118    return None
119
120  def download_corpus(self, target_name, out_dir):
121    """Downloads the latest OSS-Fuzz corpus for the target.
122
123    Returns:
124      The local path to to corpus or None if download failed.
125    """
126    corpus_dir = os.path.join(out_dir, self.CORPUS_DIR_NAME, target_name)
127    os.makedirs(corpus_dir, exist_ok=True)
128    # TODO(metzman): Clean up this code.
129    project_qualified_fuzz_target_name = target_name
130    qualified_name_prefix = self.config.project_name + '_'
131
132    if not target_name.startswith(qualified_name_prefix):
133      project_qualified_fuzz_target_name = qualified_name_prefix + target_name
134
135    corpus_url = utils.url_join(
136        utils.GCS_BASE_URL,
137        '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format(
138            self.config.project_name), project_qualified_fuzz_target_name,
139        self.CORPUS_ZIP_NAME)
140
141    if download_and_unpack_zip(corpus_url, corpus_dir):
142      return corpus_dir
143
144    return None
145
146
147def download_url(url, filename, num_attempts=3):
148  """Downloads the file located at |url|, using HTTP to |filename|.
149
150  Args:
151    url: A url to a file to download.
152    filename: The path the file should be downloaded to.
153    num_retries: The number of times to retry the download on
154       ConnectionResetError.
155
156  Returns:
157    True on success.
158  """
159  sleep_time = 1
160
161  # Don't use retry wrapper since we don't want this to raise any exceptions.
162  for _ in range(num_attempts):
163    try:
164      urllib.request.urlretrieve(url, filename)
165      return True
166    except urllib.error.HTTPError:
167      # In these cases, retrying probably wont work since the error probably
168      # means there is nothing at the URL to download.
169      logging.error('Unable to download from: %s.', url)
170      return False
171    except ConnectionResetError:
172      # These errors are more likely to be transient. Retry.
173      pass
174    time.sleep(sleep_time)
175
176  logging.error('Failed to download %s, %d times.', url, num_attempts)
177
178  return False
179
180
181def download_and_unpack_zip(url, extract_directory):
182  """Downloads and unpacks a zip file from an HTTP URL.
183
184  Args:
185    url: A url to the zip file to be downloaded and unpacked.
186    out_dir: The path where the zip file should be extracted to.
187
188  Returns:
189    True on success.
190  """
191  if not os.path.exists(extract_directory):
192    logging.error('Extract directory: %s does not exist.', extract_directory)
193    return False
194
195  # Gives the temporary zip file a unique identifier in the case that
196  # that download_and_unpack_zip is done in parallel.
197  with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
198    if not download_url(url, tmp_file.name):
199      return False
200
201    try:
202      with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
203        zip_file.extractall(extract_directory)
204    except zipfile.BadZipFile:
205      logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
206      return False
207
208  return True
209
210
211def get_clusterfuzz_deployment(config):
212  """Returns object reprsenting deployment of ClusterFuzz used by |config|."""
213  if (config.platform == config.Platform.INTERNAL_GENERIC_CI or
214      config.platform == config.Platform.INTERNAL_GITHUB):
215    logging.info('Using OSS-Fuzz as ClusterFuzz deployment.')
216    return OSSFuzz(config)
217  logging.info('Using ClusterFuzzLite as ClusterFuzz deployment.')
218  return ClusterFuzzLite(config)
219