1# Copyright 2020 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Provide some basic utility functions for libchrome tools.""" 6 7import collections 8import enum 9import os 10import re 11import subprocess 12 13class DiffOperations(enum.Enum): 14 """ 15 Describes operations on files 16 """ 17 ADD = 1 18 DEL = 2 19 REP = 3 20 21GitFile = collections.namedtuple( 22 'GitFile', 23 ['path', 'mode', 'id',] 24) 25 26GitDiffTree = collections.namedtuple( 27 'GitDiffTree', 28 ['op', 'file',] 29) 30 31GitBlameLine = collections.namedtuple( 32 'GitBlameLine', 33 ['data', 'commit', 'old_line', 'new_line',] 34) 35 36 37GIT_DIFFTREE_RE_LINE = re.compile(rb'^:([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)\t(.*)$') 38 39 40def _reverse(files): 41 """Creates a reverse map from file path to file. 42 43 Asserts if a file path exist only once in files. 44 45 Args: 46 files: list of files. 47 """ 48 files_map = {} 49 for i in files: 50 if i.path in files_map: 51 assert i.path not in files_map 52 files_map[i.path] = i 53 return files_map 54 55 56def get_file_list(commit): 57 """Gets a list of the files of the commit. 58 59 Args: 60 commit: commit hash or refs. 61 """ 62 63 output = subprocess.check_output(['git', 'ls-tree', '-r', 64 commit]).split(b'\n') 65 files = [] 66 # Line looks like 67 # mode<space>type<space>id<tab>file name 68 # split by tab first, and by space. 69 re_line = re.compile(rb'^([^ ]*) ([^ ]*) ([^ ]*)\t(.*)$') 70 for line in output: 71 if not line: 72 continue 73 match = re_line.match(line) 74 mode, gittype, blobhash, path = match.groups() 75 if gittype == b'commit': 76 continue 77 assert gittype == b'blob', '%s\n\n%s' % (str(output), line) 78 files.append(GitFile(path, mode, blobhash)) 79 return files 80 81 82def git_difftree(treeish1, treeish2): 83 """Gets diffs between treeish1 and treeish2. 84 85 It returns a list of GitDiffTree, each GitDiffTree contains an ADD, DEL or 86 REP operation and a GitFile. 87 88 Args: 89 treeish1, treeish2: treeish to diff. 90 treeish can be tree hash or commit hash. If treeish1 is None, it 91 generate difftrees with its parent. 92 """ 93 out = None 94 if treeish1 is None: 95 # Remove first line since it's tree hash printed. 96 out = subprocess.check_output(['git', 'diff-tree', '-r', 97 treeish2]).split(b'\n')[1:] 98 else: 99 out = subprocess.check_output(['git', 'diff-tree', '-r', 100 treeish1, treeish2]).split(b'\n') 101 diff = [] 102 for line in out: 103 if not line: 104 continue 105 match = GIT_DIFFTREE_RE_LINE.match(line) 106 oldmode, newmode, oldhash, newhash, typeofchange, path = match.groups() 107 assert typeofchange in b'ADMT', (treeish1, treeish2, line) 108 if typeofchange == b'A': 109 diff.append( 110 GitDiffTree(DiffOperations.ADD, 111 GitFile(path, newmode, newhash))) 112 elif typeofchange == b'D': 113 diff.append( 114 GitDiffTree(DiffOperations.DEL, 115 GitFile(path, oldmode, oldhash))) 116 elif typeofchange == b'M' or typeofchange == b'T': 117 diff.append( 118 GitDiffTree(DiffOperations.REP, 119 GitFile(path, newmode, newhash))) 120 else: 121 raise Exception(b"Unsupported type: " + line) 122 return diff 123 124 125def gen_op(current_files, target_files): 126 """Returns an operation list to convert files to target_files. 127 128 Generates list of operations (add/delete/replace files) if we want to 129 convert current_files in directory to target_files 130 131 Args: 132 current_files: list of files in current directory. 133 target_files: list of files we want it to be in current directory. 134 """ 135 current_file_map = _reverse(current_files) 136 target_file_map = _reverse(target_files) 137 op = [] 138 for i in sorted(current_file_map): 139 if i not in target_file_map: 140 op.append((DiffOperations.DEL, current_file_map[i])) 141 for i in sorted(target_file_map): 142 if i in current_file_map and current_file_map[i] != target_file_map[i]: 143 op.append((DiffOperations.REP, target_file_map[i])) 144 elif i not in current_file_map: 145 op.append((DiffOperations.ADD, target_file_map[i])) 146 return op 147 148 149def git_mktree(files): 150 """Returns a git tree object hash after mktree recursively.""" 151 152 def recursive_default_dict(): 153 return collections.defaultdict(recursive_default_dict) 154 155 tree = recursive_default_dict() 156 for f in files: 157 directories = f.path.split(b'/') 158 directories, filename = directories[:-1], directories[-1] 159 cwd = tree 160 for directory in directories: 161 # If cwd is a GitFile, which means a file and a directory shares the 162 # same name. 163 assert type(cwd) == collections.defaultdict 164 cwd = cwd[directory] 165 assert filename not in cwd 166 cwd[filename] = f 167 168 def _mktree(prefix, node): 169 objects = [] 170 for name, val in node.items(): 171 prefix.append(name) 172 if isinstance(val, collections.defaultdict): 173 tree_hash = _mktree(prefix, val) 174 objects.append(b'\t'.join( 175 [b' '.join([b'040000', b'tree', tree_hash]), name])) 176 else: 177 path = b'/'.join(prefix) 178 assert path == val.path, '%s\n%s' % (str(path), str(val.path)) 179 objects.append(b'\t'.join( 180 [b' '.join([val.mode, b'blob', val.id]), name])) 181 prefix.pop(-1) 182 return subprocess.check_output(['git', 'mktree'], 183 input=b'\n'.join(objects)).strip(b'\n') 184 185 return _mktree([], tree) 186 187 188def git_commit(tree, parents, message=b"", extra_env={}): 189 """Creates a commit. 190 191 Args: 192 tree: tree object id. 193 parents: parent commit id. 194 message: commit message. 195 extra_env: extra environment variables passed to git. 196 """ 197 parent_args = [] 198 for parent in parents: 199 parent_args.append('-p') 200 parent_args.append(parent) 201 return subprocess.check_output( 202 ['git', 'commit-tree', tree] + parent_args, 203 input=message, 204 env=dict(os.environ, **extra_env)).strip(b'\n') 205 206 207def git_revlist(from_commit, to_commit): 208 """Returns a list of commits and their parents. 209 210 Each item in the list is a tuple, containing two elements. 211 The first element is the commit hash; the second element is a list of parent 212 commits' hash. 213 """ 214 215 commits = [] 216 ret = None 217 if from_commit is None: 218 ret = subprocess.check_output(['git', 'rev-list', to_commit, 219 '--topo-order', '--parents']) 220 else: 221 # b'...'.join() later requires all variable to be binary-typed. 222 if type(from_commit) == str: 223 from_commit = from_commit.encode('ascii') 224 if type(to_commit) == str: 225 to_commit = to_commit.encode('ascii') 226 commit_range = b'...'.join([from_commit, to_commit]) 227 ret = subprocess.check_output(['git', 'rev-list', commit_range, 228 '--topo-order', '--parents']) 229 ret = ret.split(b'\n') 230 for line in ret: 231 if not line: 232 continue 233 hashes = line.split(b' ') 234 commits.append((hashes[0], hashes[1:])) 235 return list(reversed(commits)) 236 237 238def git_blame(commit, filepath): 239 """Returns line-by-line git blame. 240 241 Return value is represented by a list of GitBlameLine. 242 243 Args: 244 commit: commit hash to blame at. 245 filepath: file to blame. 246 """ 247 output = subprocess.check_output(['git', 'blame', '-p', 248 commit, filepath]) 249 commit, old_line, new_line = None, None, None 250 blames = [] 251 COMMIT_LINE_PREFIX = re.compile(b'^[0-9a-f]* ') 252 for line in output.split(b'\n'): 253 if not line: 254 continue 255 if line[0] == ord(b'\t'): 256 assert commit != None 257 blames.append(GitBlameLine(line[1:], commit, old_line, new_line)) 258 commit, old_line, new_line = None, None, None 259 elif COMMIT_LINE_PREFIX.match(line): 260 commit, old_line, new_line = line.split(b' ', 3)[0:3] 261 return blames 262