1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Module to get the the name of a git repo containing a specific commit
15inside of an OSS-Fuzz project.
16
17Example Usage:
18
19  python detect_repo.py --src_dir /src --example_commit
20    b534f03eecd8a109db2b085ab24d419b6486de97
21
22Prints the location of the git remote repo as well as the repo's name
23seperated by a space.
24
25  https://github.com/VirusTotal/yara.git yara
26
27"""
28import argparse
29import logging
30import os
31import subprocess
32
33GO_PATH = '/root/go/src/'
34
35
36def main():
37  """Function to get a git repo's url and name referenced by OSS-Fuzz
38  Dockerfile.
39
40  Raises:
41    ValueError when a commit or a ref is not provided.
42  """
43  parser = argparse.ArgumentParser(
44      description=
45      'Finds a specific git repo in an oss-fuzz project\'s docker file.')
46  parser.add_argument('--repo_name', help='The name of the git repo.')
47  parser.add_argument('--src_dir', help='The location of the possible repo.')
48  parser.add_argument('--example_commit',
49                      help='A commit SHA referencing the project\'s main repo.')
50
51  args = parser.parse_args()
52  if not args.repo_name and not args.example_commit:
53    raise ValueError(
54        'Requires an example commit or a repo name to find repo location.')
55  if args.src_dir:
56    src_dir = args.src_dir
57  else:
58    src_dir = os.environ.get('SRC', '/src')
59
60  for single_dir in get_dirs_to_search(src_dir, args.repo_name):
61    full_path = os.path.join(src_dir, single_dir)
62    if not os.path.isdir(full_path):
63      continue
64    if args.example_commit and check_for_commit(full_path, args.example_commit):
65      print('Detected repo:', get_repo(full_path), full_path)
66      return
67    if args.repo_name and check_for_repo_name(full_path, args.repo_name):
68      print('Detected repo:', get_repo(full_path), full_path)
69      return
70  logging.error('No git repos with specific commit: %s found in %s',
71                args.example_commit, src_dir)
72
73
74def get_dirs_to_search(src_dir, repo_name):
75  """Gets a list of directories to search for the main git repo.
76
77  Args:
78    src_dir: The location set for the projects SRC.
79    repo_name: The name of the repo you are searching for.
80
81  Returns:
82    A list of directorys to search.
83  """
84  dirs_to_search = os.listdir(src_dir)
85  if os.path.exists(GO_PATH) and repo_name:
86    for root, dirs, _ in os.walk(GO_PATH):
87      for test_dir in dirs:
88        if repo_name in test_dir:
89          dirs_to_search.append(os.path.join(root, test_dir))
90  return dirs_to_search
91
92
93def get_repo(repo_path):
94  """Gets a git repo link from a specific directory in a docker image.
95
96  Args:
97    repo_path: The directory on the image where the git repo exists.
98
99  Returns:
100    The repo location or None.
101  """
102  output, return_code = execute(['git', 'config', '--get', 'remote.origin.url'],
103                                location=repo_path,
104                                check_result=True)
105  if return_code == 0 and output:
106    return output.rstrip()
107  return None
108
109
110def check_for_repo_name(repo_path, expected_repo_name):
111  """Returns True if the repo at |repo_path| repo_name matches
112  |expected_repo_name|.
113
114  Args:
115    repo_path: The directory of a git repo.
116    expected_repo_name: The name of the target git repo.
117  """
118  if not os.path.exists(os.path.join(repo_path, '.git')):
119    return False
120
121  repo_url, _ = execute(['git', 'config', '--get', 'remote.origin.url'],
122                        location=repo_path)
123  # Handle two common cases:
124  # https://github.com/google/syzkaller/
125  # https://github.com/google/syzkaller.git
126  repo_url = repo_url.replace('.git', '').rstrip().rstrip('/')
127  actual_repo_name = repo_url.split('/')[-1]
128  return actual_repo_name == expected_repo_name
129
130
131def check_for_commit(repo_path, commit):
132  """Checks a directory for a specific commit.
133
134  Args:
135    repo_path: The name of the directory to test for the commit.
136    commit: The commit SHA to check for.
137
138  Returns:
139    True if directory contains that commit.
140  """
141
142  # Check if valid git repo.
143  if not os.path.exists(os.path.join(repo_path, '.git')):
144    return False
145
146  # Check if history fetch is needed.
147  if os.path.exists(os.path.join(repo_path, '.git', 'shallow')):
148    execute(['git', 'fetch', '--unshallow'], location=repo_path)
149
150  # Check if commit is in history.
151  _, return_code = execute(['git', 'cat-file', '-e', commit],
152                           location=repo_path)
153  return return_code == 0
154
155
156def execute(command, location, check_result=False):
157  """Runs a shell command in the specified directory location.
158
159  Args:
160    command: The command as a list to be run.
161    location: The directory the command is run in.
162    check_result: Should an exception be thrown on failed command.
163
164  Returns:
165    The stdout of the command, the error code.
166
167  Raises:
168    RuntimeError: running a command resulted in an error.
169  """
170  process = subprocess.Popen(command, stdout=subprocess.PIPE, cwd=location)
171  output, err = process.communicate()
172  if check_result and (process.returncode or err):
173    raise RuntimeError(
174        'Error: %s\n running command: %s\n return code: %s\n out %s\n' %
175        (err, command, process.returncode, output))
176  if output is not None:
177    output = output.decode('ascii')
178  return output, process.returncode
179
180
181if __name__ == '__main__':
182  main()
183