1#!/usr/bin/python -u 2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc. 3# Released under the GPL v2 4 5""" 6Run a control file through the server side engine 7""" 8 9import datetime 10import contextlib 11import getpass 12import logging 13import os 14import re 15import shutil 16import signal 17import socket 18import sys 19import traceback 20import time 21import urllib2 22 23import common 24from autotest_lib.client.bin.result_tools import utils as result_utils 25from autotest_lib.client.bin.result_tools import view as result_view 26from autotest_lib.client.common_lib import control_data 27from autotest_lib.client.common_lib import enum 28from autotest_lib.client.common_lib import error 29from autotest_lib.client.common_lib import global_config 30from autotest_lib.client.common_lib import host_queue_entry_states 31from autotest_lib.client.common_lib import host_states 32from autotest_lib.server import results_mocker 33from autotest_lib.server.cros.dynamic_suite import suite 34 35try: 36 from chromite.lib import metrics 37 from chromite.lib import cloud_trace 38except ImportError: 39 from autotest_lib.client.common_lib import utils as common_utils 40 metrics = common_utils.metrics_mock 41 import mock 42 cloud_trace = mock.MagicMock() 43 44_CONFIG = global_config.global_config 45 46# Number of seconds to wait before returning if testing mode is enabled 47TESTING_MODE_SLEEP_SECS = 1 48 49 50from autotest_lib.server import frontend 51from autotest_lib.server import server_logging_config 52from autotest_lib.server import server_job, utils, autoserv_parser, autotest 53from autotest_lib.server import utils as server_utils 54from autotest_lib.server import site_utils 55from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 56from autotest_lib.site_utils import job_directories 57from autotest_lib.site_utils import lxc 58from autotest_lib.site_utils.lxc import utils as lxc_utils 59from autotest_lib.client.common_lib import pidfile, logging_manager 60 61 62# Control segment to stage server-side package. 63STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path( 64 'stage_server_side_package') 65 66# Command line to start servod in a moblab. 67START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s' 68STOP_SERVOD_CMD = 'sudo stop servod' 69 70_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..')) 71_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name' 72 73_LXC_JOB_FOLDER = 'lxc_job_folder' 74 75def log_alarm(signum, frame): 76 logging.error("Received SIGALARM. Ignoring and continuing on.") 77 sys.exit(1) 78 79 80def _get_machines(parser): 81 """Get a list of machine names from command line arg -m or a file. 82 83 @param parser: Parser for the command line arguments. 84 85 @return: A list of machine names from command line arg -m or the 86 machines file specified in the command line arg -M. 87 """ 88 if parser.options.machines: 89 machines = parser.options.machines.replace(',', ' ').strip().split() 90 else: 91 machines = [] 92 machines_file = parser.options.machines_file 93 if machines_file: 94 machines = [] 95 for m in open(machines_file, 'r').readlines(): 96 # remove comments, spaces 97 m = re.sub('#.*', '', m).strip() 98 if m: 99 machines.append(m) 100 logging.debug('Read list of machines from file: %s', machines_file) 101 logging.debug('Machines: %s', ','.join(machines)) 102 103 if machines: 104 for machine in machines: 105 if not machine or re.search('\s', machine): 106 parser.parser.error("Invalid machine: %s" % str(machine)) 107 machines = list(set(machines)) 108 machines.sort() 109 return machines 110 111 112def _stage_ssp(parser, resultsdir): 113 """Stage server-side package. 114 115 This function calls a control segment to stage server-side package based on 116 the job and autoserv command line option. The detail implementation could 117 be different for each host type. Currently, only CrosHost has 118 stage_server_side_package function defined. 119 The script returns None if no server-side package is available. However, 120 it may raise exception if it failed for reasons other than artifact (the 121 server-side package) not found. 122 123 @param parser: Command line arguments parser passed in the autoserv process. 124 @param resultsdir: Folder to store results. This could be different from 125 parser.options.results: parser.options.results can be set to None 126 for results to be stored in a temp folder. resultsdir can be None 127 for autoserv run requires no logging. 128 129 @return: url to the autotest server-side package. None in case of errors. 130 """ 131 machines_list = _get_machines(parser) 132 machines_list = server_job.get_machine_dicts( 133 machine_names=machines_list, 134 store_dir=os.path.join(resultsdir, parser.options.host_info_subdir), 135 in_lab=parser.options.lab, 136 use_shadow_store=not parser.options.local_only_host_info, 137 host_attributes=parser.options.host_attributes, 138 ) 139 140 namespace = {'machines': machines_list, 141 'isolate_hash': parser.options.isolate, 142 'image': parser.options.test_source_build} 143 script_locals = {} 144 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals) 145 ssp_url = script_locals['ssp_url'] 146 if not ssp_url: 147 logging.error('Failed to stage SSP package: %s', 148 script_locals['error_msg']) 149 logging.error('This job will fail later, when attempting to run with' 150 ' SSP') 151 return ssp_url 152 153 154def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url, 155 machines): 156 """Run the server job with server-side packaging. 157 158 @param job: The server job object. 159 @param container_id: ID of the container to run the test. 160 @param job_id: ID of the test job. 161 @param results: Folder to store results. This could be different from 162 parser.options.results: 163 parser.options.results can be set to None for results to be 164 stored in a temp folder. 165 results can be None for autoserv run requires no logging. 166 @param parser: Command line parser that contains the options. 167 @param ssp_url: url of the staged server-side package. 168 @param machines: A list of machines to run the test. 169 """ 170 if not ssp_url: 171 job.record('FAIL', None, None, 172 'Failed to stage server-side package') 173 raise error.AutoservError('Failed to stage server-side package') 174 175 bucket = lxc.ContainerBucket() 176 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != '' 177 else None) 178 try: 179 dut_name = machines[0] if len(machines) >= 1 else None 180 test_container = bucket.setup_test(container_id, job_id, ssp_url, 181 results, control=control, 182 job_folder=_LXC_JOB_FOLDER, 183 dut_name=dut_name, 184 isolate_hash=parser.options.isolate) 185 except Exception as e: 186 job.record('FAIL', None, None, 187 'Failed to setup container for test: %s. Check logs in ' 188 'ssp_logs folder for more details.' % e) 189 raise 190 191 args = sys.argv[:] 192 args.remove('--require-ssp') 193 # --parent_job_id is only useful in autoserv running in host, not in 194 # container. Include this argument will cause test to fail for builds before 195 # CL 286265 was merged. 196 if '--parent_job_id' in args: 197 index = args.index('--parent_job_id') 198 args.remove('--parent_job_id') 199 # Remove the actual parent job id in command line arg. 200 del args[index] 201 202 # A dictionary of paths to replace in the command line. Key is the path to 203 # be replaced with the one in value. 204 paths_to_replace = {} 205 # Replace the control file path with the one in container. 206 if control: 207 container_control_filename = os.path.join( 208 lxc.CONTROL_TEMP_PATH, os.path.basename(control)) 209 paths_to_replace[control] = container_control_filename 210 # Update result directory with the one in container. 211 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER) 212 if parser.options.results: 213 paths_to_replace[parser.options.results] = container_result_dir 214 args = [paths_to_replace.get(arg, arg) for arg in args] 215 216 # Apply --use-existing-results, results directory is aready created and 217 # mounted in container. Apply this arg to avoid exception being raised. 218 if not '--use-existing-results' in args: 219 args.append('--use-existing-results') 220 221 # Make sure autoserv running in container using a different pid file. 222 if not '--pidfile-label' in args: 223 args.extend(['--pidfile-label', 'container_autoserv']) 224 225 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args]) 226 logging.info('Run command in container: %s', cmd_line) 227 success = False 228 try: 229 test_container.attach_run(cmd_line) 230 success = True 231 except Exception as e: 232 # If the test run inside container fails without generating any log, 233 # write a message to status.log to help troubleshooting. 234 debug_files = os.listdir(os.path.join(results, 'debug')) 235 if not debug_files: 236 job.record('FAIL', None, None, 237 'Failed to run test inside the container: %s. Check ' 238 'logs in ssp_logs folder for more details.' % e) 239 raise 240 finally: 241 metrics.Counter( 242 'chromeos/autotest/experimental/execute_job_in_ssp').increment( 243 fields={'success': success}) 244 test_container.destroy() 245 246 247def correct_results_folder_permission(results): 248 """Make sure the results folder has the right permission settings. 249 250 For tests running with server-side packaging, the results folder has the 251 owner of root. This must be changed to the user running the autoserv 252 process, so parsing job can access the results folder. 253 TODO(dshi): crbug.com/459344 Remove this function when test container can be 254 unprivileged container. 255 256 @param results: Path to the results folder. 257 258 """ 259 if not results: 260 return 261 262 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results)) 263 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results)) 264 265 266def _start_servod(machine): 267 """Try to start servod in moblab if it's not already running or running with 268 different board or port. 269 270 @param machine: Name of the dut used for test. 271 """ 272 if not utils.is_moblab(): 273 return 274 275 logging.debug('Trying to start servod.') 276 try: 277 afe = frontend.AFE() 278 board = server_utils.get_board_from_afe(machine, afe) 279 hosts = afe.get_hosts(hostname=machine) 280 servo_host = hosts[0].attributes.get('servo_host', None) 281 servo_port = hosts[0].attributes.get('servo_port', 9999) 282 if not servo_host in ['localhost', '127.0.0.1']: 283 logging.warn('Starting servod is aborted. The dut\'s servo_host ' 284 'attribute is not set to localhost.') 285 return 286 except (urllib2.HTTPError, urllib2.URLError): 287 # Ignore error if RPC failed to get board 288 logging.error('Failed to get board name from AFE. Start servod is ' 289 'aborted') 290 return 291 292 try: 293 pid = utils.run('pgrep servod').stdout 294 cmd_line = utils.run('ps -fp %s' % pid).stdout 295 if ('--board %s' % board in cmd_line and 296 '--port %s' % servo_port in cmd_line): 297 logging.debug('Servod is already running with given board and port.' 298 ' There is no need to restart servod.') 299 return 300 logging.debug('Servod is running with different board or port. ' 301 'Stopping existing servod.') 302 utils.run('sudo stop servod') 303 except error.CmdError: 304 # servod is not running. 305 pass 306 307 try: 308 utils.run(START_SERVOD_CMD % (board, servo_port)) 309 logging.debug('Servod is started') 310 except error.CmdError as e: 311 logging.error('Servod failed to be started, error: %s', e) 312 313 314def _control_path_on_disk(control_name): 315 """Find the control file corresponding to the given control name, on disk. 316 317 @param control_name: NAME attribute of the control file to fetch. 318 @return: Path to the control file. 319 """ 320 cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT) 321 control_name_predicate = suite.test_name_matches_pattern_predicate( 322 '^%s$' % control_name) 323 tests = suite.find_and_parse_tests(cf_getter, control_name_predicate) 324 if not tests: 325 raise error.AutoservError( 326 'Failed to find any control files with NAME %s' % control_name) 327 if len(tests) > 1: 328 logging.error('Found more than one control file with NAME %s: %s', 329 control_name, [t.path for t in tests]) 330 raise error.AutoservError( 331 'Found more than one control file with NAME %s' % control_name) 332 return tests[0].path 333 334 335def _stage_control_file(control_name, results_dir): 336 """Stage the control file to execute from local autotest checkout. 337 338 @param control_name: Name of the control file to stage. 339 @param results_dir: Results directory to stage the control file into. 340 @return: Absolute path to the staged control file. 341 """ 342 control_path = _control_path_on_disk(control_name) 343 new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME) 344 shutil.copy2(control_path, new_control) 345 return new_control 346 347 348def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp): 349 """Run server job with given options. 350 351 @param pid_file_manager: PidFileManager used to monitor the autoserv process 352 @param results: Folder to store results. 353 @param parser: Parser for the command line arguments. 354 @param ssp_url: Url to server-side package. 355 @param use_ssp: Set to True to run with server-side packaging. 356 """ 357 # send stdin to /dev/null 358 dev_null = os.open(os.devnull, os.O_RDONLY) 359 os.dup2(dev_null, sys.stdin.fileno()) 360 os.close(dev_null) 361 362 # Create separate process group if the process is not a process group 363 # leader. This allows autoserv process to keep running after the caller 364 # process (drone manager call) exits. 365 if os.getpid() != os.getpgid(0): 366 os.setsid() 367 368 # Container name is predefined so the container can be destroyed in 369 # handle_sigterm. 370 job_or_task_id = job_directories.get_job_id_or_task_id( 371 parser.options.results) 372 container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid()) 373 374 # Implement SIGTERM handler 375 def handle_sigterm(signum, frame): 376 logging.debug('Received SIGTERM') 377 if pid_file_manager: 378 pid_file_manager.close_file(1, signal.SIGTERM) 379 logging.debug('Finished writing to pid_file. Killing process.') 380 381 # Update results folder's file permission. This needs to be done ASAP 382 # before the parsing process tries to access the log. 383 if use_ssp and results: 384 correct_results_folder_permission(results) 385 386 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved. 387 # This sleep allows the pending output to be logged before the kill 388 # signal is sent. 389 time.sleep(.1) 390 if use_ssp: 391 logging.debug('Destroy container %s before aborting the autoserv ' 392 'process.', container_id) 393 try: 394 bucket = lxc.ContainerBucket() 395 container = bucket.get_container(container_id) 396 if container: 397 container.destroy() 398 else: 399 logging.debug('Container %s is not found.', container_id) 400 except: 401 # Handle any exception so the autoserv process can be aborted. 402 logging.exception('Failed to destroy container %s.', 403 container_id) 404 # Try to correct the result file permission again after the 405 # container is destroyed, as the container might have created some 406 # new files in the result folder. 407 if results: 408 correct_results_folder_permission(results) 409 410 os.killpg(os.getpgrp(), signal.SIGKILL) 411 412 # Set signal handler 413 signal.signal(signal.SIGTERM, handle_sigterm) 414 415 # faulthandler is only needed to debug in the Lab and is not avaliable to 416 # be imported in the chroot as part of VMTest, so Try-Except it. 417 try: 418 import faulthandler 419 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True) 420 logging.debug('faulthandler registered on SIGTERM.') 421 except ImportError: 422 sys.exc_clear() 423 424 # Ignore SIGTTOU's generated by output from forked children. 425 signal.signal(signal.SIGTTOU, signal.SIG_IGN) 426 427 # If we received a SIGALARM, let's be loud about it. 428 signal.signal(signal.SIGALRM, log_alarm) 429 430 # Server side tests that call shell scripts often depend on $USER being set 431 # but depending on how you launch your autotest scheduler it may not be set. 432 os.environ['USER'] = getpass.getuser() 433 434 label = parser.options.label 435 group_name = parser.options.group_name 436 user = parser.options.user 437 client = parser.options.client 438 server = parser.options.server 439 verify = parser.options.verify 440 repair = parser.options.repair 441 cleanup = parser.options.cleanup 442 provision = parser.options.provision 443 reset = parser.options.reset 444 job_labels = parser.options.job_labels 445 no_tee = parser.options.no_tee 446 execution_tag = parser.options.execution_tag 447 ssh_user = parser.options.ssh_user 448 ssh_port = parser.options.ssh_port 449 ssh_pass = parser.options.ssh_pass 450 collect_crashinfo = parser.options.collect_crashinfo 451 control_filename = parser.options.control_filename 452 verify_job_repo_url = parser.options.verify_job_repo_url 453 skip_crash_collection = parser.options.skip_crash_collection 454 ssh_verbosity = int(parser.options.ssh_verbosity) 455 ssh_options = parser.options.ssh_options 456 no_use_packaging = parser.options.no_use_packaging 457 in_lab = bool(parser.options.lab) 458 459 # can't be both a client and a server side test 460 if client and server: 461 parser.parser.error("Can not specify a test as both server and client!") 462 463 if provision and client: 464 parser.parser.error("Cannot specify provisioning and client!") 465 466 is_special_task = (verify or repair or cleanup or collect_crashinfo or 467 provision or reset) 468 use_client_trampoline = False 469 if parser.options.control_name: 470 if use_ssp: 471 # When use_ssp is True, autoserv will be re-executed inside a 472 # container preserving the --control-name argument. Control file 473 # will be staged inside the rexecuted autoserv. 474 control = None 475 else: 476 try: 477 control = _stage_control_file(parser.options.control_name, 478 results) 479 except error.AutoservError as e: 480 logging.info("Using client trampoline because of: %s", e) 481 control = parser.options.control_name 482 use_client_trampoline = True 483 484 elif parser.args: 485 control = parser.args[0] 486 else: 487 if not is_special_task: 488 parser.parser.error("Missing argument: control file") 489 control = None 490 491 if ssh_verbosity > 0: 492 # ssh_verbosity is an integer between 0 and 3, inclusive 493 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity 494 else: 495 ssh_verbosity_flag = '' 496 497 machines = _get_machines(parser) 498 if group_name and len(machines) < 2: 499 parser.parser.error('-G %r may only be supplied with more than one ' 500 'machine.' % group_name) 501 502 job_kwargs = { 503 'control': control, 504 'args': parser.args[1:], 505 'resultdir': results, 506 'label': label, 507 'user': user, 508 'machines': machines, 509 'machine_dict_list': server_job.get_machine_dicts( 510 machine_names=machines, 511 store_dir=os.path.join(results, 512 parser.options.host_info_subdir), 513 in_lab=in_lab, 514 use_shadow_store=not parser.options.local_only_host_info, 515 host_attributes=parser.options.host_attributes, 516 ), 517 'client': client, 518 'ssh_user': ssh_user, 519 'ssh_port': ssh_port, 520 'ssh_pass': ssh_pass, 521 'ssh_verbosity_flag': ssh_verbosity_flag, 522 'ssh_options': ssh_options, 523 'group_name': group_name, 524 'tag': execution_tag, 525 'disable_sysinfo': parser.options.disable_sysinfo, 526 'in_lab': in_lab, 527 'use_client_trampoline': use_client_trampoline, 528 } 529 if parser.options.parent_job_id: 530 job_kwargs['parent_job_id'] = int(parser.options.parent_job_id) 531 if control_filename: 532 job_kwargs['control_filename'] = control_filename 533 job = server_job.server_job(**job_kwargs) 534 535 job.logging.start_logging() 536 537 # perform checks 538 job.precheck() 539 540 # run the job 541 exit_code = 0 542 auto_start_servod = _CONFIG.get_config_value( 543 'AUTOSERV', 'auto_start_servod', type=bool, default=False) 544 545 site_utils.SetupTsMonGlobalState('autoserv', indirect=False, 546 short_lived=True) 547 try: 548 try: 549 if repair: 550 if auto_start_servod and len(machines) == 1: 551 _start_servod(machines[0]) 552 job.repair(job_labels) 553 elif verify: 554 job.verify(job_labels) 555 elif provision: 556 job.provision(job_labels) 557 elif reset: 558 job.reset(job_labels) 559 elif cleanup: 560 job.cleanup(job_labels) 561 else: 562 if auto_start_servod and len(machines) == 1: 563 _start_servod(machines[0]) 564 if use_ssp: 565 try: 566 _run_with_ssp(job, container_id, job_or_task_id, 567 results, parser, ssp_url, machines) 568 finally: 569 # Update the ownership of files in result folder. 570 correct_results_folder_permission(results) 571 else: 572 if collect_crashinfo: 573 # Update the ownership of files in result folder. If the 574 # job to collect crashinfo was running inside container 575 # (SSP) and crashed before correcting folder permission, 576 # the result folder might have wrong permission setting. 577 try: 578 correct_results_folder_permission(results) 579 except: 580 # Ignore any error as the user may not have root 581 # permission to run sudo command. 582 pass 583 metric_name = ('chromeos/autotest/experimental/' 584 'autoserv_job_run_duration') 585 f = {'in_container': utils.is_in_container(), 586 'success': False} 587 with metrics.SecondsTimer(metric_name, fields=f) as c: 588 job.run(verify_job_repo_url=verify_job_repo_url, 589 only_collect_crashinfo=collect_crashinfo, 590 skip_crash_collection=skip_crash_collection, 591 job_labels=job_labels, 592 use_packaging=(not no_use_packaging)) 593 c['success'] = True 594 595 finally: 596 job.close() 597 # Special task doesn't run parse, so result summary needs to be 598 # built here. 599 if results and (repair or verify or reset or cleanup or provision): 600 # Throttle the result on the server side. 601 try: 602 result_utils.execute( 603 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB) 604 except: 605 logging.exception( 606 'Non-critical failure: Failed to throttle results ' 607 'in directory %s.', results) 608 # Build result view and report metrics for result sizes. 609 site_utils.collect_result_sizes(results) 610 except: 611 exit_code = 1 612 traceback.print_exc() 613 finally: 614 metrics.Flush() 615 616 sys.exit(exit_code) 617 618 619# Job breakdown statuses 620_hs = host_states.Status 621_qs = host_queue_entry_states.Status 622_status_list = [ 623 _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING, 624 _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING, 625 _qs.RUNNING, _qs.GATHERING, _qs.PARSING] 626_JOB_OVERHEAD_STATUS = enum.Enum(*_status_list, string_values=True) 627 628 629def get_job_status(options): 630 """Returns the HQE Status for this run. 631 632 @param options: parser options. 633 """ 634 s = _JOB_OVERHEAD_STATUS 635 task_mapping = { 636 'reset': s.RESETTING, 'verify': s.VERIFYING, 637 'provision': s.PROVISIONING, 'repair': s.REPAIRING, 638 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING} 639 match = [task for task in task_mapping if getattr(options, task, False)] 640 return task_mapping[match[0]] if match else s.RUNNING 641 642 643def _require_ssp_from_control(control_name): 644 """Read the value of REQUIRE_SSP from test control file. 645 646 This reads the control file from the prod checkout of autotest and uses that 647 to determine whether to even stage the SSP package on a devserver. 648 649 This means: 650 [1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go 651 live. 652 [2] This function may find that the control file does not exist but the SSP 653 package may contain the test file. This function conservatively returns True 654 in that case. 655 656 This function is called very early in autoserv, before logging is setup. 657 """ 658 if not control_name: 659 return True 660 try: 661 path = _control_path_on_disk(control_name) 662 except error.AutoservError as e: 663 sys.stderr.write("autoserv: Could not determine control file path," 664 " assuming we need SSP: %s\n" % e) 665 sys.stderr.flush() 666 return True 667 if not os.path.isfile(path): 668 return True 669 control = control_data.parse_control(path) 670 # There must be explicit directive in the control file to disable SSP. 671 if not control or control.require_ssp is None: 672 return True 673 return control.require_ssp 674 675 676def main(): 677 start_time = datetime.datetime.now() 678 parser = autoserv_parser.autoserv_parser 679 parser.parse_args() 680 681 if len(sys.argv) == 1: 682 parser.parser.print_help() 683 sys.exit(1) 684 685 if parser.options.no_logging: 686 results = None 687 else: 688 results = parser.options.results 689 if not results: 690 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') 691 results = os.path.abspath(results) 692 resultdir_exists = False 693 for filename in ('control.srv', 'status.log', '.autoserv_execute'): 694 if os.path.exists(os.path.join(results, filename)): 695 resultdir_exists = True 696 if not parser.options.use_existing_results and resultdir_exists: 697 error = "Error: results directory already exists: %s\n" % results 698 sys.stderr.write(error) 699 sys.exit(1) 700 701 # Now that we certified that there's no leftover results dir from 702 # previous jobs, lets create the result dir since the logging system 703 # needs to create the log file in there. 704 if not os.path.isdir(results): 705 os.makedirs(results) 706 707 if parser.options.require_ssp: 708 # This is currently only used for skylab (i.e., when --control-name is 709 # used). 710 use_ssp = _require_ssp_from_control(parser.options.control_name) 711 else: 712 use_ssp = False 713 714 715 if use_ssp: 716 log_dir = os.path.join(results, 'ssp_logs') if results else None 717 if log_dir and not os.path.exists(log_dir): 718 os.makedirs(log_dir) 719 else: 720 log_dir = results 721 722 logging_manager.configure_logging( 723 server_logging_config.ServerLoggingConfig(), 724 results_dir=log_dir, 725 use_console=not parser.options.no_tee, 726 verbose=parser.options.verbose, 727 no_console_prefix=parser.options.no_console_prefix) 728 729 logging.debug('autoserv is running in drone %s.', socket.gethostname()) 730 logging.debug('autoserv command was: %s', ' '.join(sys.argv)) 731 logging.debug('autoserv parsed options: %s', parser.options) 732 733 if use_ssp: 734 ssp_url = _stage_ssp(parser, results) 735 else: 736 ssp_url = None 737 738 if results: 739 logging.info("Results placed in %s" % results) 740 741 # wait until now to perform this check, so it get properly logged 742 if (parser.options.use_existing_results and not resultdir_exists and 743 not utils.is_in_container()): 744 logging.error("No existing results directory found: %s", results) 745 sys.exit(1) 746 747 if parser.options.write_pidfile and results: 748 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, 749 results) 750 pid_file_manager.open_file() 751 else: 752 pid_file_manager = None 753 754 autotest.Autotest.set_install_in_tmpdir( 755 parser.options.install_in_tmpdir) 756 757 exit_code = 0 758 # TODO(beeps): Extend this to cover different failure modes. 759 # Testing exceptions are matched against labels sent to autoserv. Eg, 760 # to allow only the hostless job to run, specify 761 # testing_exceptions: test_suite in the shadow_config. To allow both 762 # the hostless job and dummy_Pass to run, specify 763 # testing_exceptions: test_suite,dummy_Pass. You can figure out 764 # what label autoserv is invoked with by looking through the logs of a test 765 # for the autoserv command's -l option. 766 testing_exceptions = _CONFIG.get_config_value( 767 'AUTOSERV', 'testing_exceptions', type=list, default=[]) 768 test_mode = _CONFIG.get_config_value( 769 'AUTOSERV', 'testing_mode', type=bool, default=False) 770 test_mode = (results_mocker and test_mode and not 771 any([ex in parser.options.label 772 for ex in testing_exceptions])) 773 is_task = (parser.options.verify or parser.options.repair or 774 parser.options.provision or parser.options.reset or 775 parser.options.cleanup or parser.options.collect_crashinfo) 776 777 trace_labels = { 778 'job_id': job_directories.get_job_id_or_task_id( 779 parser.options.results) 780 } 781 trace = cloud_trace.SpanStack( 782 labels=trace_labels, 783 global_context=parser.options.cloud_trace_context) 784 trace.enabled = parser.options.cloud_trace_context_enabled == 'True' 785 try: 786 try: 787 if test_mode: 788 # The parser doesn't run on tasks anyway, so we can just return 789 # happy signals without faking results. 790 if not is_task: 791 machine = parser.options.results.split('/')[-1] 792 793 # TODO(beeps): The proper way to do this would be to 794 # refactor job creation so we can invoke job.record 795 # directly. To do that one needs to pipe the test_name 796 # through run_autoserv and bail just before invoking 797 # the server job. See the comment in 798 # puppylab/results_mocker for more context. 799 results_mocker.ResultsMocker( 800 'unknown-test', parser.options.results, machine 801 ).mock_results() 802 return 803 else: 804 with trace.Span(get_job_status(parser.options)): 805 run_autoserv(pid_file_manager, results, parser, ssp_url, 806 use_ssp) 807 except SystemExit as e: 808 exit_code = e.code 809 if exit_code: 810 logging.exception('Uncaught SystemExit with code %s', exit_code) 811 except Exception: 812 # If we don't know what happened, we'll classify it as 813 # an 'abort' and return 1. 814 logging.exception('Uncaught Exception, exit_code = 1.') 815 exit_code = 1 816 finally: 817 if pid_file_manager: 818 pid_file_manager.close_file(exit_code) 819 sys.exit(exit_code) 820 821 822if __name__ == '__main__': 823 main() 824