1#!/usr/bin/python -u
2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import datetime
10import contextlib
11import getpass
12import logging
13import os
14import re
15import shutil
16import signal
17import socket
18import sys
19import traceback
20import time
21import urllib2
22
23import common
24from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
26from autotest_lib.client.common_lib import control_data
27from autotest_lib.client.common_lib import enum
28from autotest_lib.client.common_lib import error
29from autotest_lib.client.common_lib import global_config
30from autotest_lib.client.common_lib import host_queue_entry_states
31from autotest_lib.client.common_lib import host_states
32from autotest_lib.server import results_mocker
33from autotest_lib.server.cros.dynamic_suite import suite
34
35try:
36    from chromite.lib import metrics
37    from chromite.lib import cloud_trace
38except ImportError:
39    from autotest_lib.client.common_lib import utils as common_utils
40    metrics = common_utils.metrics_mock
41    import mock
42    cloud_trace = mock.MagicMock()
43
44_CONFIG = global_config.global_config
45
46# Number of seconds to wait before returning if testing mode is enabled
47TESTING_MODE_SLEEP_SECS = 1
48
49
50from autotest_lib.server import frontend
51from autotest_lib.server import server_logging_config
52from autotest_lib.server import server_job, utils, autoserv_parser, autotest
53from autotest_lib.server import utils as server_utils
54from autotest_lib.server import site_utils
55from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
56from autotest_lib.site_utils import job_directories
57from autotest_lib.site_utils import lxc
58from autotest_lib.site_utils.lxc import utils as lxc_utils
59from autotest_lib.client.common_lib import pidfile, logging_manager
60
61
62# Control segment to stage server-side package.
63STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
64        'stage_server_side_package')
65
66# Command line to start servod in a moblab.
67START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
68STOP_SERVOD_CMD = 'sudo stop servod'
69
70_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
71_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name'
72
73_LXC_JOB_FOLDER = 'lxc_job_folder'
74
75def log_alarm(signum, frame):
76    logging.error("Received SIGALARM. Ignoring and continuing on.")
77    sys.exit(1)
78
79
80def _get_machines(parser):
81    """Get a list of machine names from command line arg -m or a file.
82
83    @param parser: Parser for the command line arguments.
84
85    @return: A list of machine names from command line arg -m or the
86             machines file specified in the command line arg -M.
87    """
88    if parser.options.machines:
89        machines = parser.options.machines.replace(',', ' ').strip().split()
90    else:
91        machines = []
92    machines_file = parser.options.machines_file
93    if machines_file:
94        machines = []
95        for m in open(machines_file, 'r').readlines():
96            # remove comments, spaces
97            m = re.sub('#.*', '', m).strip()
98            if m:
99                machines.append(m)
100        logging.debug('Read list of machines from file: %s', machines_file)
101        logging.debug('Machines: %s', ','.join(machines))
102
103    if machines:
104        for machine in machines:
105            if not machine or re.search('\s', machine):
106                parser.parser.error("Invalid machine: %s" % str(machine))
107        machines = list(set(machines))
108        machines.sort()
109    return machines
110
111
112def _stage_ssp(parser, resultsdir):
113    """Stage server-side package.
114
115    This function calls a control segment to stage server-side package based on
116    the job and autoserv command line option. The detail implementation could
117    be different for each host type. Currently, only CrosHost has
118    stage_server_side_package function defined.
119    The script returns None if no server-side package is available. However,
120    it may raise exception if it failed for reasons other than artifact (the
121    server-side package) not found.
122
123    @param parser: Command line arguments parser passed in the autoserv process.
124    @param resultsdir: Folder to store results. This could be different from
125            parser.options.results: parser.options.results  can be set to None
126            for results to be stored in a temp folder. resultsdir can be None
127            for autoserv run requires no logging.
128
129    @return: url to the autotest server-side package. None in case of errors.
130    """
131    machines_list = _get_machines(parser)
132    machines_list = server_job.get_machine_dicts(
133            machine_names=machines_list,
134            store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
135            in_lab=parser.options.lab,
136            use_shadow_store=not parser.options.local_only_host_info,
137            host_attributes=parser.options.host_attributes,
138    )
139
140    namespace = {'machines': machines_list,
141                 'isolate_hash': parser.options.isolate,
142                 'image': parser.options.test_source_build}
143    script_locals = {}
144    execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
145    ssp_url = script_locals['ssp_url']
146    if not ssp_url:
147        logging.error('Failed to stage SSP package: %s',
148                      script_locals['error_msg'])
149        logging.error('This job will fail later, when attempting to run with'
150                      ' SSP')
151    return ssp_url
152
153
154def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
155                  machines):
156    """Run the server job with server-side packaging.
157
158    @param job: The server job object.
159    @param container_id: ID of the container to run the test.
160    @param job_id: ID of the test job.
161    @param results: Folder to store results. This could be different from
162                    parser.options.results:
163                    parser.options.results  can be set to None for results to be
164                    stored in a temp folder.
165                    results can be None for autoserv run requires no logging.
166    @param parser: Command line parser that contains the options.
167    @param ssp_url: url of the staged server-side package.
168    @param machines: A list of machines to run the test.
169    """
170    if not ssp_url:
171        job.record('FAIL', None, None,
172                   'Failed to stage server-side package')
173        raise error.AutoservError('Failed to stage server-side package')
174
175    bucket = lxc.ContainerBucket()
176    control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
177               else None)
178    try:
179        dut_name = machines[0] if len(machines) >= 1 else None
180        test_container = bucket.setup_test(container_id, job_id, ssp_url,
181                                           results, control=control,
182                                           job_folder=_LXC_JOB_FOLDER,
183                                           dut_name=dut_name,
184                                           isolate_hash=parser.options.isolate)
185    except Exception as e:
186        job.record('FAIL', None, None,
187                   'Failed to setup container for test: %s. Check logs in '
188                   'ssp_logs folder for more details.' % e)
189        raise
190
191    args = sys.argv[:]
192    args.remove('--require-ssp')
193    # --parent_job_id is only useful in autoserv running in host, not in
194    # container. Include this argument will cause test to fail for builds before
195    # CL 286265 was merged.
196    if '--parent_job_id' in args:
197        index = args.index('--parent_job_id')
198        args.remove('--parent_job_id')
199        # Remove the actual parent job id in command line arg.
200        del args[index]
201
202    # A dictionary of paths to replace in the command line. Key is the path to
203    # be replaced with the one in value.
204    paths_to_replace = {}
205    # Replace the control file path with the one in container.
206    if control:
207        container_control_filename = os.path.join(
208                lxc.CONTROL_TEMP_PATH, os.path.basename(control))
209        paths_to_replace[control] = container_control_filename
210    # Update result directory with the one in container.
211    container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER)
212    if parser.options.results:
213        paths_to_replace[parser.options.results] = container_result_dir
214    args = [paths_to_replace.get(arg, arg) for arg in args]
215
216    # Apply --use-existing-results, results directory is aready created and
217    # mounted in container. Apply this arg to avoid exception being raised.
218    if not '--use-existing-results' in args:
219        args.append('--use-existing-results')
220
221    # Make sure autoserv running in container using a different pid file.
222    if not '--pidfile-label' in args:
223        args.extend(['--pidfile-label', 'container_autoserv'])
224
225    cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
226    logging.info('Run command in container: %s', cmd_line)
227    success = False
228    try:
229        test_container.attach_run(cmd_line)
230        success = True
231    except Exception as e:
232        # If the test run inside container fails without generating any log,
233        # write a message to status.log to help troubleshooting.
234        debug_files = os.listdir(os.path.join(results, 'debug'))
235        if not debug_files:
236            job.record('FAIL', None, None,
237                       'Failed to run test inside the container: %s. Check '
238                       'logs in ssp_logs folder for more details.' % e)
239        raise
240    finally:
241        metrics.Counter(
242            'chromeos/autotest/experimental/execute_job_in_ssp').increment(
243                fields={'success': success})
244        test_container.destroy()
245
246
247def correct_results_folder_permission(results):
248    """Make sure the results folder has the right permission settings.
249
250    For tests running with server-side packaging, the results folder has the
251    owner of root. This must be changed to the user running the autoserv
252    process, so parsing job can access the results folder.
253    TODO(dshi): crbug.com/459344 Remove this function when test container can be
254    unprivileged container.
255
256    @param results: Path to the results folder.
257
258    """
259    if not results:
260        return
261
262    utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
263    utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
264
265
266def _start_servod(machine):
267    """Try to start servod in moblab if it's not already running or running with
268    different board or port.
269
270    @param machine: Name of the dut used for test.
271    """
272    if not utils.is_moblab():
273        return
274
275    logging.debug('Trying to start servod.')
276    try:
277        afe = frontend.AFE()
278        board = server_utils.get_board_from_afe(machine, afe)
279        hosts = afe.get_hosts(hostname=machine)
280        servo_host = hosts[0].attributes.get('servo_host', None)
281        servo_port = hosts[0].attributes.get('servo_port', 9999)
282        if not servo_host in ['localhost', '127.0.0.1']:
283            logging.warn('Starting servod is aborted. The dut\'s servo_host '
284                         'attribute is not set to localhost.')
285            return
286    except (urllib2.HTTPError, urllib2.URLError):
287        # Ignore error if RPC failed to get board
288        logging.error('Failed to get board name from AFE. Start servod is '
289                      'aborted')
290        return
291
292    try:
293        pid = utils.run('pgrep servod').stdout
294        cmd_line = utils.run('ps -fp %s' % pid).stdout
295        if ('--board %s' % board in cmd_line and
296            '--port %s' % servo_port in cmd_line):
297            logging.debug('Servod is already running with given board and port.'
298                          ' There is no need to restart servod.')
299            return
300        logging.debug('Servod is running with different board or port. '
301                      'Stopping existing servod.')
302        utils.run('sudo stop servod')
303    except error.CmdError:
304        # servod is not running.
305        pass
306
307    try:
308        utils.run(START_SERVOD_CMD % (board, servo_port))
309        logging.debug('Servod is started')
310    except error.CmdError as e:
311        logging.error('Servod failed to be started, error: %s', e)
312
313
314def _control_path_on_disk(control_name):
315    """Find the control file corresponding to the given control name, on disk.
316
317    @param control_name: NAME attribute of the control file to fetch.
318    @return: Path to the control file.
319    """
320    cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT)
321    control_name_predicate = suite.test_name_matches_pattern_predicate(
322            '^%s$' % control_name)
323    tests = suite.find_and_parse_tests(cf_getter, control_name_predicate)
324    if not tests:
325        raise error.AutoservError(
326                'Failed to find any control files with NAME %s' % control_name)
327    if len(tests) > 1:
328        logging.error('Found more than one control file with NAME %s: %s',
329                      control_name, [t.path for t in tests])
330        raise error.AutoservError(
331                'Found more than one control file with NAME %s' % control_name)
332    return tests[0].path
333
334
335def _stage_control_file(control_name, results_dir):
336    """Stage the control file to execute from local autotest checkout.
337
338    @param control_name: Name of the control file to stage.
339    @param results_dir: Results directory to stage the control file into.
340    @return: Absolute path to the staged control file.
341    """
342    control_path = _control_path_on_disk(control_name)
343    new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME)
344    shutil.copy2(control_path, new_control)
345    return new_control
346
347
348def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
349    """Run server job with given options.
350
351    @param pid_file_manager: PidFileManager used to monitor the autoserv process
352    @param results: Folder to store results.
353    @param parser: Parser for the command line arguments.
354    @param ssp_url: Url to server-side package.
355    @param use_ssp: Set to True to run with server-side packaging.
356    """
357    # send stdin to /dev/null
358    dev_null = os.open(os.devnull, os.O_RDONLY)
359    os.dup2(dev_null, sys.stdin.fileno())
360    os.close(dev_null)
361
362    # Create separate process group if the process is not a process group
363    # leader. This allows autoserv process to keep running after the caller
364    # process (drone manager call) exits.
365    if os.getpid() != os.getpgid(0):
366        os.setsid()
367
368    # Container name is predefined so the container can be destroyed in
369    # handle_sigterm.
370    job_or_task_id = job_directories.get_job_id_or_task_id(
371            parser.options.results)
372    container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
373
374    # Implement SIGTERM handler
375    def handle_sigterm(signum, frame):
376        logging.debug('Received SIGTERM')
377        if pid_file_manager:
378            pid_file_manager.close_file(1, signal.SIGTERM)
379        logging.debug('Finished writing to pid_file. Killing process.')
380
381        # Update results folder's file permission. This needs to be done ASAP
382        # before the parsing process tries to access the log.
383        if use_ssp and results:
384            correct_results_folder_permission(results)
385
386        # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
387        # This sleep allows the pending output to be logged before the kill
388        # signal is sent.
389        time.sleep(.1)
390        if use_ssp:
391            logging.debug('Destroy container %s before aborting the autoserv '
392                          'process.', container_id)
393            try:
394                bucket = lxc.ContainerBucket()
395                container = bucket.get_container(container_id)
396                if container:
397                    container.destroy()
398                else:
399                    logging.debug('Container %s is not found.', container_id)
400            except:
401                # Handle any exception so the autoserv process can be aborted.
402                logging.exception('Failed to destroy container %s.',
403                                  container_id)
404            # Try to correct the result file permission again after the
405            # container is destroyed, as the container might have created some
406            # new files in the result folder.
407            if results:
408                correct_results_folder_permission(results)
409
410        os.killpg(os.getpgrp(), signal.SIGKILL)
411
412    # Set signal handler
413    signal.signal(signal.SIGTERM, handle_sigterm)
414
415    # faulthandler is only needed to debug in the Lab and is not avaliable to
416    # be imported in the chroot as part of VMTest, so Try-Except it.
417    try:
418        import faulthandler
419        faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
420        logging.debug('faulthandler registered on SIGTERM.')
421    except ImportError:
422        sys.exc_clear()
423
424    # Ignore SIGTTOU's generated by output from forked children.
425    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
426
427    # If we received a SIGALARM, let's be loud about it.
428    signal.signal(signal.SIGALRM, log_alarm)
429
430    # Server side tests that call shell scripts often depend on $USER being set
431    # but depending on how you launch your autotest scheduler it may not be set.
432    os.environ['USER'] = getpass.getuser()
433
434    label = parser.options.label
435    group_name = parser.options.group_name
436    user = parser.options.user
437    client = parser.options.client
438    server = parser.options.server
439    verify = parser.options.verify
440    repair = parser.options.repair
441    cleanup = parser.options.cleanup
442    provision = parser.options.provision
443    reset = parser.options.reset
444    job_labels = parser.options.job_labels
445    no_tee = parser.options.no_tee
446    execution_tag = parser.options.execution_tag
447    ssh_user = parser.options.ssh_user
448    ssh_port = parser.options.ssh_port
449    ssh_pass = parser.options.ssh_pass
450    collect_crashinfo = parser.options.collect_crashinfo
451    control_filename = parser.options.control_filename
452    verify_job_repo_url = parser.options.verify_job_repo_url
453    skip_crash_collection = parser.options.skip_crash_collection
454    ssh_verbosity = int(parser.options.ssh_verbosity)
455    ssh_options = parser.options.ssh_options
456    no_use_packaging = parser.options.no_use_packaging
457    in_lab = bool(parser.options.lab)
458
459    # can't be both a client and a server side test
460    if client and server:
461        parser.parser.error("Can not specify a test as both server and client!")
462
463    if provision and client:
464        parser.parser.error("Cannot specify provisioning and client!")
465
466    is_special_task = (verify or repair or cleanup or collect_crashinfo or
467                       provision or reset)
468    use_client_trampoline = False
469    if parser.options.control_name:
470        if use_ssp:
471            # When use_ssp is True, autoserv will be re-executed inside a
472            # container preserving the --control-name argument. Control file
473            # will be staged inside the rexecuted autoserv.
474            control = None
475        else:
476            try:
477                control = _stage_control_file(parser.options.control_name,
478                                              results)
479            except error.AutoservError as e:
480                logging.info("Using client trampoline because of: %s", e)
481                control = parser.options.control_name
482                use_client_trampoline = True
483
484    elif parser.args:
485        control = parser.args[0]
486    else:
487        if not is_special_task:
488            parser.parser.error("Missing argument: control file")
489        control = None
490
491    if ssh_verbosity > 0:
492        # ssh_verbosity is an integer between 0 and 3, inclusive
493        ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
494    else:
495        ssh_verbosity_flag = ''
496
497    machines = _get_machines(parser)
498    if group_name and len(machines) < 2:
499        parser.parser.error('-G %r may only be supplied with more than one '
500                            'machine.' % group_name)
501
502    job_kwargs = {
503            'control': control,
504            'args': parser.args[1:],
505            'resultdir': results,
506            'label': label,
507            'user': user,
508            'machines': machines,
509            'machine_dict_list': server_job.get_machine_dicts(
510                    machine_names=machines,
511                    store_dir=os.path.join(results,
512                                           parser.options.host_info_subdir),
513                    in_lab=in_lab,
514                    use_shadow_store=not parser.options.local_only_host_info,
515                    host_attributes=parser.options.host_attributes,
516            ),
517            'client': client,
518            'ssh_user': ssh_user,
519            'ssh_port': ssh_port,
520            'ssh_pass': ssh_pass,
521            'ssh_verbosity_flag': ssh_verbosity_flag,
522            'ssh_options': ssh_options,
523            'group_name': group_name,
524            'tag': execution_tag,
525            'disable_sysinfo': parser.options.disable_sysinfo,
526            'in_lab': in_lab,
527            'use_client_trampoline': use_client_trampoline,
528    }
529    if parser.options.parent_job_id:
530        job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
531    if control_filename:
532        job_kwargs['control_filename'] = control_filename
533    job = server_job.server_job(**job_kwargs)
534
535    job.logging.start_logging()
536
537    # perform checks
538    job.precheck()
539
540    # run the job
541    exit_code = 0
542    auto_start_servod = _CONFIG.get_config_value(
543            'AUTOSERV', 'auto_start_servod', type=bool, default=False)
544
545    site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
546                                     short_lived=True)
547    try:
548        try:
549            if repair:
550                if auto_start_servod and len(machines) == 1:
551                    _start_servod(machines[0])
552                job.repair(job_labels)
553            elif verify:
554                job.verify(job_labels)
555            elif provision:
556                job.provision(job_labels)
557            elif reset:
558                job.reset(job_labels)
559            elif cleanup:
560                job.cleanup(job_labels)
561            else:
562                if auto_start_servod and len(machines) == 1:
563                    _start_servod(machines[0])
564                if use_ssp:
565                    try:
566                        _run_with_ssp(job, container_id, job_or_task_id,
567                                        results, parser, ssp_url, machines)
568                    finally:
569                        # Update the ownership of files in result folder.
570                        correct_results_folder_permission(results)
571                else:
572                    if collect_crashinfo:
573                        # Update the ownership of files in result folder. If the
574                        # job to collect crashinfo was running inside container
575                        # (SSP) and crashed before correcting folder permission,
576                        # the result folder might have wrong permission setting.
577                        try:
578                            correct_results_folder_permission(results)
579                        except:
580                            # Ignore any error as the user may not have root
581                            # permission to run sudo command.
582                            pass
583                    metric_name = ('chromeos/autotest/experimental/'
584                                   'autoserv_job_run_duration')
585                    f = {'in_container': utils.is_in_container(),
586                         'success': False}
587                    with metrics.SecondsTimer(metric_name, fields=f) as c:
588                        job.run(verify_job_repo_url=verify_job_repo_url,
589                                only_collect_crashinfo=collect_crashinfo,
590                                skip_crash_collection=skip_crash_collection,
591                                job_labels=job_labels,
592                                use_packaging=(not no_use_packaging))
593                        c['success'] = True
594
595        finally:
596            job.close()
597            # Special task doesn't run parse, so result summary needs to be
598            # built here.
599            if results and (repair or verify or reset or cleanup or provision):
600                # Throttle the result on the server side.
601                try:
602                    result_utils.execute(
603                            results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
604                except:
605                    logging.exception(
606                            'Non-critical failure: Failed to throttle results '
607                            'in directory %s.', results)
608                # Build result view and report metrics for result sizes.
609                site_utils.collect_result_sizes(results)
610    except:
611        exit_code = 1
612        traceback.print_exc()
613    finally:
614        metrics.Flush()
615
616    sys.exit(exit_code)
617
618
619# Job breakdown statuses
620_hs = host_states.Status
621_qs = host_queue_entry_states.Status
622_status_list = [
623        _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING,
624        _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING,
625        _qs.RUNNING, _qs.GATHERING, _qs.PARSING]
626_JOB_OVERHEAD_STATUS = enum.Enum(*_status_list, string_values=True)
627
628
629def get_job_status(options):
630    """Returns the HQE Status for this run.
631
632    @param options: parser options.
633    """
634    s = _JOB_OVERHEAD_STATUS
635    task_mapping = {
636            'reset': s.RESETTING, 'verify': s.VERIFYING,
637            'provision': s.PROVISIONING, 'repair': s.REPAIRING,
638            'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
639    match = [task for task in task_mapping if getattr(options, task, False)]
640    return task_mapping[match[0]] if match else s.RUNNING
641
642
643def _require_ssp_from_control(control_name):
644    """Read the value of REQUIRE_SSP from test control file.
645
646    This reads the control file from the prod checkout of autotest and uses that
647    to determine whether to even stage the SSP package on a devserver.
648
649    This means:
650    [1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go
651    live.
652    [2] This function may find that the control file does not exist but the SSP
653    package may contain the test file. This function conservatively returns True
654    in that case.
655
656    This function is called very early in autoserv, before logging is setup.
657    """
658    if not control_name:
659        return True
660    try:
661        path = _control_path_on_disk(control_name)
662    except error.AutoservError as e:
663        sys.stderr.write("autoserv: Could not determine control file path,"
664                         " assuming we need SSP: %s\n" % e)
665        sys.stderr.flush()
666        return True
667    if not os.path.isfile(path):
668        return True
669    control = control_data.parse_control(path)
670    # There must be explicit directive in the control file to disable SSP.
671    if not control or control.require_ssp is None:
672        return True
673    return control.require_ssp
674
675
676def main():
677    start_time = datetime.datetime.now()
678    parser = autoserv_parser.autoserv_parser
679    parser.parse_args()
680
681    if len(sys.argv) == 1:
682        parser.parser.print_help()
683        sys.exit(1)
684
685    if parser.options.no_logging:
686        results = None
687    else:
688        results = parser.options.results
689        if not results:
690            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
691        results = os.path.abspath(results)
692        resultdir_exists = False
693        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
694            if os.path.exists(os.path.join(results, filename)):
695                resultdir_exists = True
696        if not parser.options.use_existing_results and resultdir_exists:
697            error = "Error: results directory already exists: %s\n" % results
698            sys.stderr.write(error)
699            sys.exit(1)
700
701        # Now that we certified that there's no leftover results dir from
702        # previous jobs, lets create the result dir since the logging system
703        # needs to create the log file in there.
704        if not os.path.isdir(results):
705            os.makedirs(results)
706
707    if parser.options.require_ssp:
708        # This is currently only used for skylab (i.e., when --control-name is
709        # used).
710        use_ssp = _require_ssp_from_control(parser.options.control_name)
711    else:
712        use_ssp = False
713
714
715    if use_ssp:
716        log_dir = os.path.join(results, 'ssp_logs') if results else None
717        if log_dir and not os.path.exists(log_dir):
718            os.makedirs(log_dir)
719    else:
720        log_dir = results
721
722    logging_manager.configure_logging(
723            server_logging_config.ServerLoggingConfig(),
724            results_dir=log_dir,
725            use_console=not parser.options.no_tee,
726            verbose=parser.options.verbose,
727            no_console_prefix=parser.options.no_console_prefix)
728
729    logging.debug('autoserv is running in drone %s.', socket.gethostname())
730    logging.debug('autoserv command was: %s', ' '.join(sys.argv))
731    logging.debug('autoserv parsed options: %s', parser.options)
732
733    if use_ssp:
734        ssp_url = _stage_ssp(parser, results)
735    else:
736        ssp_url = None
737
738    if results:
739        logging.info("Results placed in %s" % results)
740
741        # wait until now to perform this check, so it get properly logged
742        if (parser.options.use_existing_results and not resultdir_exists and
743            not utils.is_in_container()):
744            logging.error("No existing results directory found: %s", results)
745            sys.exit(1)
746
747    if parser.options.write_pidfile and results:
748        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
749                                                  results)
750        pid_file_manager.open_file()
751    else:
752        pid_file_manager = None
753
754    autotest.Autotest.set_install_in_tmpdir(
755        parser.options.install_in_tmpdir)
756
757    exit_code = 0
758    # TODO(beeps): Extend this to cover different failure modes.
759    # Testing exceptions are matched against labels sent to autoserv. Eg,
760    # to allow only the hostless job to run, specify
761    # testing_exceptions: test_suite in the shadow_config. To allow both
762    # the hostless job and dummy_Pass to run, specify
763    # testing_exceptions: test_suite,dummy_Pass. You can figure out
764    # what label autoserv is invoked with by looking through the logs of a test
765    # for the autoserv command's -l option.
766    testing_exceptions = _CONFIG.get_config_value(
767            'AUTOSERV', 'testing_exceptions', type=list, default=[])
768    test_mode = _CONFIG.get_config_value(
769            'AUTOSERV', 'testing_mode', type=bool, default=False)
770    test_mode = (results_mocker and test_mode and not
771                 any([ex in parser.options.label
772                      for ex in testing_exceptions]))
773    is_task = (parser.options.verify or parser.options.repair or
774               parser.options.provision or parser.options.reset or
775               parser.options.cleanup or parser.options.collect_crashinfo)
776
777    trace_labels = {
778            'job_id': job_directories.get_job_id_or_task_id(
779                    parser.options.results)
780    }
781    trace = cloud_trace.SpanStack(
782            labels=trace_labels,
783            global_context=parser.options.cloud_trace_context)
784    trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
785    try:
786        try:
787            if test_mode:
788                # The parser doesn't run on tasks anyway, so we can just return
789                # happy signals without faking results.
790                if not is_task:
791                    machine = parser.options.results.split('/')[-1]
792
793                    # TODO(beeps): The proper way to do this would be to
794                    # refactor job creation so we can invoke job.record
795                    # directly. To do that one needs to pipe the test_name
796                    # through run_autoserv and bail just before invoking
797                    # the server job. See the comment in
798                    # puppylab/results_mocker for more context.
799                    results_mocker.ResultsMocker(
800                            'unknown-test', parser.options.results, machine
801                            ).mock_results()
802                return
803            else:
804                with trace.Span(get_job_status(parser.options)):
805                    run_autoserv(pid_file_manager, results, parser, ssp_url,
806                                 use_ssp)
807        except SystemExit as e:
808            exit_code = e.code
809            if exit_code:
810                logging.exception('Uncaught SystemExit with code %s', exit_code)
811        except Exception:
812            # If we don't know what happened, we'll classify it as
813            # an 'abort' and return 1.
814            logging.exception('Uncaught Exception, exit_code = 1.')
815            exit_code = 1
816    finally:
817        if pid_file_manager:
818            pid_file_manager.close_file(exit_code)
819    sys.exit(exit_code)
820
821
822if __name__ == '__main__':
823    main()
824