1#!/usr/bin/python 2# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Runs on autotest servers from a cron job to self update them. 7 8This script is designed to run on all autotest servers to allow them to 9automatically self-update based on the manifests used to create their (existing) 10repos. 11""" 12 13from __future__ import print_function 14 15import ConfigParser 16import argparse 17import os 18import re 19import socket 20import subprocess 21import sys 22import time 23 24import common 25 26from autotest_lib.client.common_lib import global_config 27from autotest_lib.server import utils as server_utils 28from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 29 30 31# How long after restarting a service do we watch it to see if it's stable. 32SERVICE_STABILITY_TIMER = 60 33 34# A list of commands that only applies to primary server. For example, 35# test_importer should only be run in primary master scheduler. If two servers 36# are both running test_importer, there is a chance to fail as both try to 37# update the same table. 38PRIMARY_ONLY_COMMANDS = ['test_importer'] 39# A dict to map update_commands defined in config file to repos or files that 40# decide whether need to update these commands. E.g. if no changes under 41# frontend repo, no need to update afe. 42COMMANDS_TO_REPOS_DICT = {'afe': 'frontend/', 43 'tko': 'tko/'} 44BUILD_EXTERNALS_COMMAND = 'build_externals' 45# Services present on all hosts. 46UNIVERSAL_SERVICES = ['sysmon'] 47 48AFE = frontend_wrappers.RetryingAFE( 49 server=server_utils.get_global_afe_hostname(), timeout_min=5, 50 delay_sec=10) 51 52class DirtyTreeException(Exception): 53 """Raised when the tree has been modified in an unexpected way.""" 54 55 56class UnknownCommandException(Exception): 57 """Raised when we try to run a command name with no associated command.""" 58 59 60class UnstableServices(Exception): 61 """Raised if a service appears unstable after restart.""" 62 63 64def strip_terminal_codes(text): 65 """This function removes all terminal formatting codes from a string. 66 67 @param text: String of text to cleanup. 68 @returns String with format codes removed. 69 """ 70 ESC = '\x1b' 71 return re.sub(ESC+r'\[[^m]*m', '', text) 72 73 74def verify_repo_clean(): 75 """This function cleans the current repo then verifies that it is valid. 76 77 @raises DirtyTreeException if the repo is still not clean. 78 @raises subprocess.CalledProcessError on a repo command failure. 79 """ 80 subprocess.check_output(['git', 'reset', '--hard']) 81 out = subprocess.check_output(['repo', 'status'], stderr=subprocess.STDOUT) 82 out = strip_terminal_codes(out).strip() 83 84 if not 'working directory clean' in out: 85 raise DirtyTreeException(out) 86 87 88def repo_versions(): 89 """This function collects the versions of all git repos in the general repo. 90 91 @returns A dictionary mapping project names to git hashes for HEAD. 92 @raises subprocess.CalledProcessError on a repo command failure. 93 """ 94 cmd = ['repo', 'forall', '-p', '-c', 'pwd && git log -1 --format=%h'] 95 output = strip_terminal_codes(subprocess.check_output(cmd)) 96 97 # The expected output format is: 98 99 # project chrome_build/ 100 # /dir/holding/chrome_build 101 # 73dee9d 102 # 103 # project chrome_release/ 104 # /dir/holding/chrome_release 105 # 9f3a5d8 106 107 lines = output.splitlines() 108 109 PROJECT_PREFIX = 'project ' 110 111 project_heads = {} 112 for n in range(0, len(lines), 4): 113 project_line = lines[n] 114 project_dir = lines[n+1] 115 project_hash = lines[n+2] 116 # lines[n+3] is a blank line, but doesn't exist for the final block. 117 118 # Convert 'project chrome_build/' -> 'chrome_build' 119 assert project_line.startswith(PROJECT_PREFIX) 120 name = project_line[len(PROJECT_PREFIX):].rstrip('/') 121 122 project_heads[name] = (project_dir, project_hash) 123 124 return project_heads 125 126 127def repo_versions_to_decide_whether_run_cmd_update(): 128 """Collect versions of repos/files defined in COMMANDS_TO_REPOS_DICT. 129 130 For the update_commands defined in config files, no need to run the command 131 every time. Only run it when the repos/files related to the commands have 132 been changed. 133 134 @returns A set of tuples: {(cmd, repo_version), ()...} 135 """ 136 results = set() 137 for cmd, repo in COMMANDS_TO_REPOS_DICT.iteritems(): 138 version = subprocess.check_output( 139 ['git', 'log', '-1', '--pretty=tformat:%h', 140 '%s/%s' % (common.autotest_dir, repo)]) 141 results.add((cmd, version.strip())) 142 return results 143 144 145def repo_sync(update_push_servers=False): 146 """Perform a repo sync. 147 148 @param update_push_servers: If True, then update test_push servers to ToT. 149 Otherwise, update server to prod branch. 150 @raises subprocess.CalledProcessError on a repo command failure. 151 """ 152 subprocess.check_output(['repo', 'sync']) 153 if update_push_servers: 154 print('Updating push servers, checkout cros/master') 155 subprocess.check_output(['git', 'checkout', 'cros/master'], 156 stderr=subprocess.STDOUT) 157 else: 158 print('Updating server to prod branch') 159 subprocess.check_output(['git', 'checkout', 'cros/prod'], 160 stderr=subprocess.STDOUT) 161 # Remove .pyc files via pyclean, which is a package on all ubuntu server. 162 print('Removing .pyc files') 163 try: 164 subprocess.check_output(['pyclean', '.', '-q']) 165 except Exception as e: 166 print('Warning: fail to remove .pyc! %s' % e) 167 168def discover_update_commands(): 169 """Lookup the commands to run on this server. 170 171 These commonly come from shadow_config.ini, since they vary by server type. 172 173 @returns List of command names in string format. 174 """ 175 try: 176 return global_config.global_config.get_config_value( 177 'UPDATE', 'commands', type=list) 178 179 except (ConfigParser.NoSectionError, global_config.ConfigError): 180 return [] 181 182 183def discover_restart_services(): 184 """Find the services that need restarting on the current server. 185 186 These commonly come from shadow_config.ini, since they vary by server type. 187 188 @returns List of service names in string format. 189 """ 190 services = list(UNIVERSAL_SERVICES) 191 try: 192 # Look up services from shadow_config.ini. 193 extra_services = global_config.global_config.get_config_value( 194 'UPDATE', 'services', type=list) 195 services.extend(extra_services) 196 except (ConfigParser.NoSectionError, global_config.ConfigError): 197 pass 198 return services 199 200 201def update_command(cmd_tag, dryrun=False, use_chromite_master=False): 202 """Restart a command. 203 204 The command name is looked up in global_config.ini to find the full command 205 to run, then it's executed. 206 207 @param cmd_tag: Which command to restart. 208 @param dryrun: If true print the command that would have been run. 209 @param use_chromite_master: True if updating chromite to master, rather 210 than prod. 211 212 @raises UnknownCommandException If cmd_tag can't be looked up. 213 @raises subprocess.CalledProcessError on a command failure. 214 """ 215 # Lookup the list of commands to consider. They are intended to be 216 # in global_config.ini so that they can be shared everywhere. 217 cmds = dict(global_config.global_config.config.items( 218 'UPDATE_COMMANDS')) 219 220 if cmd_tag not in cmds: 221 raise UnknownCommandException(cmd_tag, cmds) 222 223 expanded_command = cmds[cmd_tag].replace('AUTOTEST_REPO', 224 common.autotest_dir) 225 # When updating push servers, pass an arg to build_externals to update 226 # chromite to master branch for testing 227 if use_chromite_master and cmd_tag == BUILD_EXTERNALS_COMMAND: 228 expanded_command += ' --use_chromite_master' 229 230 print('Running: %s: %s' % (cmd_tag, expanded_command)) 231 if dryrun: 232 print('Skip: %s' % expanded_command) 233 else: 234 try: 235 subprocess.check_output(expanded_command, shell=True, 236 stderr=subprocess.STDOUT) 237 except subprocess.CalledProcessError as e: 238 print('FAILED:') 239 print(e.output) 240 raise 241 242 243def restart_service(service_name, dryrun=False): 244 """Restart a service. 245 246 Restarts the standard service with "service <name> restart". 247 248 @param service_name: The name of the service to restart. 249 @param dryrun: Don't really run anything, just print out the command. 250 251 @raises subprocess.CalledProcessError on a command failure. 252 """ 253 cmd = ['sudo', 'service', service_name, 'restart'] 254 print('Restarting: %s' % service_name) 255 if dryrun: 256 print('Skip: %s' % ' '.join(cmd)) 257 else: 258 subprocess.check_call(cmd, stderr=subprocess.STDOUT) 259 260 261def service_status(service_name): 262 """Return the results "status <name>" for a given service. 263 264 This string is expected to contain the pid, and so to change is the service 265 is shutdown or restarted for any reason. 266 267 @param service_name: The name of the service to check on. 268 269 @returns The output of the external command. 270 Ex: autofs start/running, process 1931 271 272 @raises subprocess.CalledProcessError on a command failure. 273 """ 274 return subprocess.check_output(['sudo', 'status', service_name]) 275 276 277def restart_services(service_names, dryrun=False, skip_service_status=False): 278 """Restart services as needed for the current server type. 279 280 Restart the listed set of services, and watch to see if they are stable for 281 at least SERVICE_STABILITY_TIMER. It restarts all services quickly, 282 waits for that delay, then verifies the status of all of them. 283 284 @param service_names: The list of service to restart and monitor. 285 @param dryrun: Don't really restart the service, just print out the command. 286 @param skip_service_status: Set to True to skip service status check. 287 Default is False. 288 289 @raises subprocess.CalledProcessError on a command failure. 290 @raises UnstableServices if any services are unstable after restart. 291 """ 292 service_statuses = {} 293 294 if dryrun: 295 for name in service_names: 296 restart_service(name, dryrun=True) 297 return 298 299 # Restart each, and record the status (including pid). 300 for name in service_names: 301 restart_service(name) 302 303 # Skip service status check if --skip-service-status is specified. Used for 304 # servers in backup status. 305 if skip_service_status: 306 print('--skip-service-status is specified, skip checking services.') 307 return 308 309 # Wait for a while to let the services settle. 310 time.sleep(SERVICE_STABILITY_TIMER) 311 service_statuses = {name: service_status(name) for name in service_names} 312 time.sleep(SERVICE_STABILITY_TIMER) 313 # Look for any services that changed status. 314 unstable_services = [n for n in service_names 315 if service_status(n) != service_statuses[n]] 316 317 # Report any services having issues. 318 if unstable_services: 319 raise UnstableServices(unstable_services) 320 321 322def run_deploy_actions(cmds_skip=set(), dryrun=False, 323 skip_service_status=False, use_chromite_master=False): 324 """Run arbitrary update commands specified in global.ini. 325 326 @param cmds_skip: cmds no need to run since the corresponding repo/file 327 does not change. 328 @param dryrun: Don't really restart the service, just print out the command. 329 @param skip_service_status: Set to True to skip service status check. 330 Default is False. 331 @param use_chromite_master: True if updating chromite to master, rather 332 than prod. 333 334 @raises subprocess.CalledProcessError on a command failure. 335 @raises UnstableServices if any services are unstable after restart. 336 """ 337 defined_cmds = set(discover_update_commands()) 338 cmds = defined_cmds - cmds_skip 339 if cmds: 340 print('Running update commands:', ', '.join(cmds)) 341 for cmd in cmds: 342 if (cmd in PRIMARY_ONLY_COMMANDS and 343 not AFE.run('get_servers', hostname=socket.getfqdn(), 344 status='primary')): 345 print('Command %s is only applicable to primary servers.' % cmd) 346 continue 347 update_command(cmd, dryrun=dryrun, 348 use_chromite_master=use_chromite_master) 349 350 services = discover_restart_services() 351 if services: 352 print('Restarting Services:', ', '.join(services)) 353 restart_services(services, dryrun=dryrun, 354 skip_service_status=skip_service_status) 355 356 357def report_changes(versions_before, versions_after): 358 """Produce a report describing what changed in all repos. 359 360 @param versions_before: Results of repo_versions() from before the update. 361 @param versions_after: Results of repo_versions() from after the update. 362 363 @returns string containing a human friendly changes report. 364 """ 365 result = [] 366 367 if versions_after: 368 for project in sorted(set(versions_before.keys() + versions_after.keys())): 369 result.append('%s:' % project) 370 371 _, before_hash = versions_before.get(project, (None, None)) 372 after_dir, after_hash = versions_after.get(project, (None, None)) 373 374 if project not in versions_before: 375 result.append('Added.') 376 377 elif project not in versions_after: 378 result.append('Removed.') 379 380 elif before_hash == after_hash: 381 result.append('No Change.') 382 383 else: 384 hashes = '%s..%s' % (before_hash, after_hash) 385 cmd = ['git', 'log', hashes, '--oneline'] 386 out = subprocess.check_output(cmd, cwd=after_dir, 387 stderr=subprocess.STDOUT) 388 result.append(out.strip()) 389 390 result.append('') 391 else: 392 for project in sorted(versions_before.keys()): 393 _, before_hash = versions_before[project] 394 result.append('%s: %s' % (project, before_hash)) 395 result.append('') 396 397 return '\n'.join(result) 398 399 400def parse_arguments(args): 401 """Parse command line arguments. 402 403 @param args: The command line arguments to parse. (ususally sys.argsv[1:]) 404 405 @returns An argparse.Namespace populated with argument values. 406 """ 407 parser = argparse.ArgumentParser( 408 description='Command to update an autotest server.') 409 parser.add_argument('--skip-verify', action='store_false', 410 dest='verify', default=True, 411 help='Disable verification of a clean repository.') 412 parser.add_argument('--skip-update', action='store_false', 413 dest='update', default=True, 414 help='Skip the repository source code update.') 415 parser.add_argument('--skip-actions', action='store_false', 416 dest='actions', default=True, 417 help='Skip the post update actions.') 418 parser.add_argument('--skip-report', action='store_false', 419 dest='report', default=True, 420 help='Skip the git version report.') 421 parser.add_argument('--actions-only', action='store_true', 422 help='Run the post update actions (restart services).') 423 parser.add_argument('--dryrun', action='store_true', 424 help='Don\'t actually run any commands, just log.') 425 parser.add_argument('--skip-service-status', action='store_true', 426 help='Skip checking the service status.') 427 parser.add_argument('--update_push_servers', action='store_true', 428 help='Indicate to update test_push server. If not ' 429 'specify, then update server to production.') 430 parser.add_argument('--force_update', action='store_true', 431 help='Force to run the update commands for afe, tko ' 432 'and build_externals') 433 434 results = parser.parse_args(args) 435 436 if results.actions_only: 437 results.verify = False 438 results.update = False 439 results.report = False 440 441 # TODO(dgarrett): Make these behaviors support dryrun. 442 if results.dryrun: 443 results.verify = False 444 results.update = False 445 446 return results 447 448 449class ChangeDir(object): 450 451 """Context manager for changing to a directory temporarily.""" 452 453 def __init__(self, dir): 454 self.new_dir = dir 455 self.old_dir = None 456 457 def __enter__(self): 458 self.old_dir = os.getcwd() 459 os.chdir(self.new_dir) 460 461 def __exit__(self, exc_type, exc_val, exc_tb): 462 os.chdir(self.old_dir) 463 464 465def _sync_chromiumos_repo(): 466 """Update ~chromeos-test/chromiumos repo.""" 467 print('Updating ~chromeos-test/chromiumos') 468 with ChangeDir(os.path.expanduser('~chromeos-test/chromiumos')): 469 ret = subprocess.call(['repo', 'sync'], stderr=subprocess.STDOUT) 470 # Remove .pyc files via pyclean, which is a package on all ubuntu server 471 print('Removing .pyc files') 472 try: 473 subprocess.check_output(['pyclean', '.', '-q']) 474 except Exception as e: 475 print('Warning: fail to remove .pyc! %s' % e) 476 if ret != 0: 477 print('Update failed, exited with status: %d' % ret) 478 479 480def main(args): 481 """Main method.""" 482 os.chdir(common.autotest_dir) 483 global_config.global_config.parse_config_file() 484 485 behaviors = parse_arguments(args) 486 487 if behaviors.verify: 488 print('Checking tree status:') 489 verify_repo_clean() 490 print('Tree status: clean') 491 492 versions_before = repo_versions() 493 versions_after = set() 494 cmd_versions_before = repo_versions_to_decide_whether_run_cmd_update() 495 cmd_versions_after = set() 496 497 if behaviors.update: 498 print('Updating Repo.') 499 repo_sync(behaviors.update_push_servers) 500 versions_after = repo_versions() 501 cmd_versions_after = repo_versions_to_decide_whether_run_cmd_update() 502 503 _sync_chromiumos_repo() 504 505 if behaviors.actions: 506 # If the corresponding repo/file not change, no need to run the cmd. 507 cmds_skip = (set() if behaviors.force_update else 508 {t[0] for t in cmd_versions_before & cmd_versions_after}) 509 run_deploy_actions( 510 cmds_skip, behaviors.dryrun, behaviors.skip_service_status, 511 use_chromite_master=behaviors.update_push_servers) 512 513 if behaviors.report: 514 print('Changes:') 515 print(report_changes(versions_before, versions_after)) 516 517 518if __name__ == '__main__': 519 sys.exit(main(sys.argv[1:])) 520