1# Copyright 2021 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Module for interacting with the "ClusterFuzz deployment.""" 15import logging 16import os 17import sys 18import tempfile 19import time 20import urllib.error 21import urllib.request 22import zipfile 23 24# pylint: disable=wrong-import-position,import-error 25sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 26import utils 27 28 29class BaseClusterFuzzDeployment: 30 """Base class for ClusterFuzz deployments.""" 31 32 CORPUS_DIR_NAME = 'cifuzz-corpus' 33 BUILD_DIR_NAME = 'cifuzz-latest-build' 34 35 def __init__(self, config): 36 self.config = config 37 38 def download_latest_build(self, out_dir): 39 """Downloads the latest build from ClusterFuzz. 40 41 Returns: 42 A path to where the OSS-Fuzz build was stored, or None if it wasn't. 43 """ 44 raise NotImplementedError('Child class must implement method.') 45 46 def download_corpus(self, target_name, out_dir): 47 """Downloads the corpus for |target_name| from ClusterFuzz to |out_dir|. 48 49 Returns: 50 A path to where the OSS-Fuzz build was stored, or None if it wasn't. 51 """ 52 raise NotImplementedError('Child class must implement method.') 53 54 55class ClusterFuzzLite(BaseClusterFuzzDeployment): 56 """Class representing a deployment of ClusterFuzzLite.""" 57 58 def download_latest_build(self, out_dir): 59 logging.info('download_latest_build not implemented for ClusterFuzzLite.') 60 61 def download_corpus(self, target_name, out_dir): 62 logging.info('download_corpus not implemented for ClusterFuzzLite.') 63 64 65class OSSFuzz(BaseClusterFuzzDeployment): 66 """The OSS-Fuzz ClusterFuzz deployment.""" 67 68 # Location of clusterfuzz builds on GCS. 69 CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds' 70 71 # Format string for the latest version of a project's build. 72 VERSION_STRING = '{project_name}-{sanitizer}-latest.version' 73 74 # Zip file name containing the corpus. 75 CORPUS_ZIP_NAME = 'public.zip' 76 77 def get_latest_build_name(self): 78 """Gets the name of the latest OSS-Fuzz build of a project. 79 80 Returns: 81 A string with the latest build version or None. 82 """ 83 version_file = self.VERSION_STRING.format( 84 project_name=self.config.project_name, sanitizer=self.config.sanitizer) 85 version_url = utils.url_join(utils.GCS_BASE_URL, self.CLUSTERFUZZ_BUILDS, 86 self.config.project_name, version_file) 87 try: 88 response = urllib.request.urlopen(version_url) 89 except urllib.error.HTTPError: 90 logging.error('Error getting latest build version for %s from: %s.', 91 self.config.project_name, version_url) 92 return None 93 return response.read().decode() 94 95 def download_latest_build(self, out_dir): 96 """Downloads the latest OSS-Fuzz build from GCS. 97 98 Returns: 99 A path to where the OSS-Fuzz build was stored, or None if it wasn't. 100 """ 101 build_dir = os.path.join(out_dir, self.BUILD_DIR_NAME) 102 if os.path.exists(build_dir): 103 return build_dir 104 105 os.makedirs(build_dir, exist_ok=True) 106 107 latest_build_name = self.get_latest_build_name() 108 if not latest_build_name: 109 return None 110 111 oss_fuzz_build_url = utils.url_join(utils.GCS_BASE_URL, 112 self.CLUSTERFUZZ_BUILDS, 113 self.config.project_name, 114 latest_build_name) 115 if download_and_unpack_zip(oss_fuzz_build_url, build_dir): 116 return build_dir 117 118 return None 119 120 def download_corpus(self, target_name, out_dir): 121 """Downloads the latest OSS-Fuzz corpus for the target. 122 123 Returns: 124 The local path to to corpus or None if download failed. 125 """ 126 corpus_dir = os.path.join(out_dir, self.CORPUS_DIR_NAME, target_name) 127 os.makedirs(corpus_dir, exist_ok=True) 128 # TODO(metzman): Clean up this code. 129 project_qualified_fuzz_target_name = target_name 130 qualified_name_prefix = self.config.project_name + '_' 131 132 if not target_name.startswith(qualified_name_prefix): 133 project_qualified_fuzz_target_name = qualified_name_prefix + target_name 134 135 corpus_url = utils.url_join( 136 utils.GCS_BASE_URL, 137 '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format( 138 self.config.project_name), project_qualified_fuzz_target_name, 139 self.CORPUS_ZIP_NAME) 140 141 if download_and_unpack_zip(corpus_url, corpus_dir): 142 return corpus_dir 143 144 return None 145 146 147def download_url(url, filename, num_attempts=3): 148 """Downloads the file located at |url|, using HTTP to |filename|. 149 150 Args: 151 url: A url to a file to download. 152 filename: The path the file should be downloaded to. 153 num_retries: The number of times to retry the download on 154 ConnectionResetError. 155 156 Returns: 157 True on success. 158 """ 159 sleep_time = 1 160 161 # Don't use retry wrapper since we don't want this to raise any exceptions. 162 for _ in range(num_attempts): 163 try: 164 urllib.request.urlretrieve(url, filename) 165 return True 166 except urllib.error.HTTPError: 167 # In these cases, retrying probably wont work since the error probably 168 # means there is nothing at the URL to download. 169 logging.error('Unable to download from: %s.', url) 170 return False 171 except ConnectionResetError: 172 # These errors are more likely to be transient. Retry. 173 pass 174 time.sleep(sleep_time) 175 176 logging.error('Failed to download %s, %d times.', url, num_attempts) 177 178 return False 179 180 181def download_and_unpack_zip(url, extract_directory): 182 """Downloads and unpacks a zip file from an HTTP URL. 183 184 Args: 185 url: A url to the zip file to be downloaded and unpacked. 186 out_dir: The path where the zip file should be extracted to. 187 188 Returns: 189 True on success. 190 """ 191 if not os.path.exists(extract_directory): 192 logging.error('Extract directory: %s does not exist.', extract_directory) 193 return False 194 195 # Gives the temporary zip file a unique identifier in the case that 196 # that download_and_unpack_zip is done in parallel. 197 with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file: 198 if not download_url(url, tmp_file.name): 199 return False 200 201 try: 202 with zipfile.ZipFile(tmp_file.name, 'r') as zip_file: 203 zip_file.extractall(extract_directory) 204 except zipfile.BadZipFile: 205 logging.error('Error unpacking zip from %s. Bad Zipfile.', url) 206 return False 207 208 return True 209 210 211def get_clusterfuzz_deployment(config): 212 """Returns object reprsenting deployment of ClusterFuzz used by |config|.""" 213 if (config.platform == config.Platform.INTERNAL_GENERIC_CI or 214 config.platform == config.Platform.INTERNAL_GITHUB): 215 logging.info('Using OSS-Fuzz as ClusterFuzz deployment.') 216 return OSSFuzz(config) 217 logging.info('Using ClusterFuzzLite as ClusterFuzz deployment.') 218 return ClusterFuzzLite(config) 219