1# Copyright (c) 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5
6import contextlib
7import grp
8import httplib
9import json
10import logging
11import os
12import random
13import re
14import time
15import traceback
16import urllib2
17
18import common
19from autotest_lib.client.bin.result_tools import utils as result_utils
20from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
21from autotest_lib.client.bin.result_tools import view as result_view
22from autotest_lib.client.common_lib import lsbrelease_utils
23from autotest_lib.client.common_lib import utils
24from autotest_lib.client.common_lib import error
25from autotest_lib.client.common_lib import file_utils
26from autotest_lib.client.common_lib import global_config
27from autotest_lib.client.common_lib import host_queue_entry_states
28from autotest_lib.client.common_lib import host_states
29from autotest_lib.server.cros import provision
30from autotest_lib.server.cros.dynamic_suite import constants
31from autotest_lib.server.cros.dynamic_suite import job_status
32
33try:
34    from chromite.lib import metrics
35except ImportError:
36    metrics = utils.metrics_mock
37
38
39CONFIG = global_config.global_config
40
41_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
42_LAB_SHERIFF_JS = CONFIG.get_config_value(
43        'NOTIFICATIONS', 'lab_sheriffs', default='')
44_CHROMIUM_BUILD_URL = CONFIG.get_config_value(
45        'NOTIFICATIONS', 'chromium_build_url', default='')
46
47LAB_GOOD_STATES = ('open', 'throttled')
48
49ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
50        'CROS', 'enable_drone_in_restricted_subnet', type=bool,
51        default=False)
52
53# Wait at most 10 mins for duts to go idle.
54IDLE_DUT_WAIT_TIMEOUT = 600
55
56# Mapping between board name and build target. This is for special case handling
57# for certain Android board that the board name and build target name does not
58# match.
59ANDROID_TARGET_TO_BOARD_MAP = {
60        'seed_l8150': 'gm4g_sprout',
61        'bat_land': 'bat'
62        }
63ANDROID_BOARD_TO_TARGET_MAP = {
64        'gm4g_sprout': 'seed_l8150',
65        'bat': 'bat_land'
66        }
67# Prefix for the metrics name for result size information.
68RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
69
70class TestLabException(Exception):
71    """Exception raised when the Test Lab blocks a test or suite."""
72    pass
73
74
75class ParseBuildNameException(Exception):
76    """Raised when ParseBuildName() cannot parse a build name."""
77    pass
78
79
80class Singleton(type):
81    """Enforce that only one client class is instantiated per process."""
82    _instances = {}
83
84    def __call__(cls, *args, **kwargs):
85        """Fetch the instance of a class to use for subsequent calls."""
86        if cls not in cls._instances:
87            cls._instances[cls] = super(Singleton, cls).__call__(
88                    *args, **kwargs)
89        return cls._instances[cls]
90
91class EmptyAFEHost(object):
92    """Object to represent an AFE host object when there is no AFE."""
93
94    def __init__(self):
95        """
96        We'll be setting the instance attributes as we use them.  Right now
97        we only use attributes and labels but as time goes by and other
98        attributes are used from an actual AFE Host object (check
99        rpc_interfaces.get_hosts()), we'll add them in here so users won't be
100        perplexed why their host's afe_host object complains that attribute
101        doesn't exist.
102        """
103        self.attributes = {}
104        self.labels = []
105
106
107def ParseBuildName(name):
108    """Format a build name, given board, type, milestone, and manifest num.
109
110    @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
111                 relative build name, e.g. 'x86-alex-release/LATEST'
112
113    @return board: board the manifest is for, e.g. x86-alex.
114    @return type: one of 'release', 'factory', or 'firmware'
115    @return milestone: (numeric) milestone the manifest was associated with.
116                        Will be None for relative build names.
117    @return manifest: manifest number, e.g. '2015.0.0'.
118                      Will be None for relative build names.
119
120    """
121    match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
122                     r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
123                     r'(?P<manifest>[\d.ab-]+)|LATEST)',
124                     name)
125    if match and len(match.groups()) >= 5:
126        return (match.group('board'), match.group('type'),
127                match.group('milestone'), match.group('manifest'))
128    raise ParseBuildNameException('%s is a malformed build name.' % name)
129
130
131def get_labels_from_afe(hostname, label_prefix, afe):
132    """Retrieve a host's specific labels from the AFE.
133
134    Looks for the host labels that have the form <label_prefix>:<value>
135    and returns the "<value>" part of the label. None is returned
136    if there is not a label matching the pattern
137
138    @param hostname: hostname of given DUT.
139    @param label_prefix: prefix of label to be matched, e.g., |board:|
140    @param afe: afe instance.
141
142    @returns A list of labels that match the prefix or 'None'
143
144    """
145    labels = afe.get_labels(name__startswith=label_prefix,
146                            host__hostname__in=[hostname])
147    if labels:
148        return [l.name.split(label_prefix, 1)[1] for l in labels]
149
150
151def get_label_from_afe(hostname, label_prefix, afe):
152    """Retrieve a host's specific label from the AFE.
153
154    Looks for a host label that has the form <label_prefix>:<value>
155    and returns the "<value>" part of the label. None is returned
156    if there is not a label matching the pattern
157
158    @param hostname: hostname of given DUT.
159    @param label_prefix: prefix of label to be matched, e.g., |board:|
160    @param afe: afe instance.
161    @returns the label that matches the prefix or 'None'
162
163    """
164    labels = get_labels_from_afe(hostname, label_prefix, afe)
165    if labels and len(labels) == 1:
166        return labels[0]
167
168
169def get_board_from_afe(hostname, afe):
170    """Retrieve given host's board from its labels in the AFE.
171
172    Looks for a host label of the form "board:<board>", and
173    returns the "<board>" part of the label.  `None` is returned
174    if there is not a single, unique label matching the pattern.
175
176    @param hostname: hostname of given DUT.
177    @param afe: afe instance.
178    @returns board from label, or `None`.
179
180    """
181    return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
182
183
184def get_build_from_afe(hostname, afe):
185    """Retrieve the current build for given host from the AFE.
186
187    Looks through the host's labels in the AFE to determine its build.
188
189    @param hostname: hostname of given DUT.
190    @param afe: afe instance.
191    @returns The current build or None if it could not find it or if there
192             were multiple build labels assigned to this host.
193
194    """
195    prefix = provision.CROS_VERSION_PREFIX
196    build = get_label_from_afe(hostname, prefix + ':', afe)
197    if build:
198        return build
199    return None
200
201
202# TODO(fdeng): fix get_sheriffs crbug.com/483254
203def get_sheriffs(lab_only=False):
204    """
205    Polls the javascript file that holds the identity of the sheriff and
206    parses it's output to return a list of chromium sheriff email addresses.
207    The javascript file can contain the ldap of more than one sheriff, eg:
208    document.write('sheriff_one, sheriff_two').
209
210    @param lab_only: if True, only pulls lab sheriff.
211    @return: A list of chroium.org sheriff email addresses to cc on the bug.
212             An empty list if failed to parse the javascript.
213    """
214    sheriff_ids = []
215    sheriff_js_list = _LAB_SHERIFF_JS.split(',')
216    if not lab_only:
217        sheriff_js_list.extend(_SHERIFF_JS.split(','))
218
219    for sheriff_js in sheriff_js_list:
220        try:
221            url_content = utils.urlopen('%s%s'% (
222                _CHROMIUM_BUILD_URL, sheriff_js)).read()
223        except (ValueError, IOError) as e:
224            logging.warning('could not parse sheriff from url %s%s: %s',
225                             _CHROMIUM_BUILD_URL, sheriff_js, str(e))
226        except (urllib2.URLError, httplib.HTTPException) as e:
227            logging.warning('unexpected error reading from url "%s%s": %s',
228                             _CHROMIUM_BUILD_URL, sheriff_js, str(e))
229        else:
230            ldaps = re.search(r"document.write\('(.*)'\)", url_content)
231            if not ldaps:
232                logging.warning('Could not retrieve sheriff ldaps for: %s',
233                                 url_content)
234                continue
235            sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '')
236                            for alias in ldaps.group(1).split(',')]
237    return sheriff_ids
238
239
240def remote_wget(source_url, dest_path, ssh_cmd):
241    """wget source_url from localhost to dest_path on remote host using ssh.
242
243    @param source_url: The complete url of the source of the package to send.
244    @param dest_path: The path on the remote host's file system where we would
245        like to store the package.
246    @param ssh_cmd: The ssh command to use in performing the remote wget.
247    """
248    wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
249                (source_url, ssh_cmd, dest_path))
250    utils.run(wget_cmd)
251
252
253_MAX_LAB_STATUS_ATTEMPTS = 5
254def _get_lab_status(status_url):
255    """Grabs the current lab status and message.
256
257    @returns The JSON object obtained from the given URL.
258
259    """
260    retry_waittime = 1
261    for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
262        try:
263            response = urllib2.urlopen(status_url)
264        except IOError as e:
265            logging.debug('Error occurred when grabbing the lab status: %s.',
266                          e)
267            time.sleep(retry_waittime)
268            continue
269        # Check for successful response code.
270        if response.getcode() == 200:
271            return json.load(response)
272        time.sleep(retry_waittime)
273    return None
274
275
276def _decode_lab_status(lab_status, build):
277    """Decode lab status, and report exceptions as needed.
278
279    Take a deserialized JSON object from the lab status page, and
280    interpret it to determine the actual lab status.  Raise
281    exceptions as required to report when the lab is down.
282
283    @param build: build name that we want to check the status of.
284
285    @raises TestLabException Raised if a request to test for the given
286                             status and build should be blocked.
287    """
288    # First check if the lab is up.
289    if not lab_status['general_state'] in LAB_GOOD_STATES:
290        raise TestLabException('Chromium OS Test Lab is closed: '
291                               '%s.' % lab_status['message'])
292
293    # Check if the build we wish to use is disabled.
294    # Lab messages should be in the format of:
295    #    Lab is 'status' [regex ...] (comment)
296    # If the build name matches any regex, it will be blocked.
297    build_exceptions = re.search('\[(.*)\]', lab_status['message'])
298    if not build_exceptions or not build:
299        return
300    for build_pattern in build_exceptions.group(1).split():
301        if re.match(build_pattern, build):
302            raise TestLabException('Chromium OS Test Lab is closed: '
303                                   '%s matches %s.' % (
304                                           build, build_pattern))
305    return
306
307
308def is_in_lab():
309    """Check if current Autotest instance is in lab
310
311    @return: True if the Autotest instance is in lab.
312    """
313    test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
314    return test_server_name.startswith('cautotest')
315
316
317def check_lab_status(build):
318    """Check if the lab status allows us to schedule for a build.
319
320    Checks if the lab is down, or if testing for the requested build
321    should be blocked.
322
323    @param build: Name of the build to be scheduled for testing.
324
325    @raises TestLabException Raised if a request to test for the given
326                             status and build should be blocked.
327
328    """
329    # Ensure we are trying to schedule on the actual lab.
330    if not is_in_lab():
331        return
332
333    # Download the lab status from its home on the web.
334    status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
335    json_status = _get_lab_status(status_url)
336    if json_status is None:
337        # We go ahead and say the lab is open if we can't get the status.
338        logging.warning('Could not get a status from %s', status_url)
339        return
340    _decode_lab_status(json_status, build)
341
342
343def host_in_lab(hostname):
344    return (not utils.in_moblab_ssp()
345            and not lsbrelease_utils.is_moblab()
346            and utils.host_is_in_lab_zone(hostname))
347
348
349def lock_host_with_labels(afe, lock_manager, labels):
350    """Lookup and lock one host that matches the list of input labels.
351
352    @param afe: An instance of the afe class, as defined in server.frontend.
353    @param lock_manager: A lock manager capable of locking hosts, eg the
354        one defined in server.cros.host_lock_manager.
355    @param labels: A list of labels to look for on hosts.
356
357    @return: The hostname of a host matching all labels, and locked through the
358        lock_manager. The hostname will be as specified in the database the afe
359        object is associated with, i.e if it exists in afe_hosts with a .cros
360        suffix, the hostname returned will contain a .cros suffix.
361
362    @raises: error.NoEligibleHostException: If no hosts matching the list of
363        input labels are available.
364    @raises: error.TestError: If unable to lock a host matching the labels.
365    """
366    potential_hosts = afe.get_hosts(multiple_labels=labels)
367    if not potential_hosts:
368        raise error.NoEligibleHostException(
369                'No devices found with labels %s.' % labels)
370
371    # This prevents errors where a fault might seem repeatable
372    # because we lock, say, the same packet capturer for each test run.
373    random.shuffle(potential_hosts)
374    for host in potential_hosts:
375        if lock_manager.lock([host.hostname]):
376            logging.info('Locked device %s with labels %s.',
377                         host.hostname, labels)
378            return host.hostname
379        else:
380            logging.info('Unable to lock device %s with labels %s.',
381                         host.hostname, labels)
382
383    raise error.TestError('Could not lock a device with labels %s' % labels)
384
385
386def get_test_views_from_tko(suite_job_id, tko):
387    """Get test name and result for given suite job ID.
388
389    @param suite_job_id: ID of suite job.
390    @param tko: an instance of TKO as defined in server/frontend.py.
391    @return: A dictionary of test status keyed by test name, e.g.,
392             {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'}
393    @raise: Exception when there is no test view found.
394
395    """
396    views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
397    relevant_views = filter(job_status.view_is_relevant, views)
398    if not relevant_views:
399        raise Exception('Failed to retrieve job results.')
400
401    test_views = {}
402    for view in relevant_views:
403        test_views[view['test_name']] = view['status']
404
405    return test_views
406
407
408def get_data_key(prefix, suite, build, board):
409    """
410    Constructs a key string from parameters.
411
412    @param prefix: Prefix for the generating key.
413    @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
414    @param build: The build string. This string should have a consistent
415        format eg: x86-mario-release/R26-3570.0.0. If the format of this
416        string changes such that we can't determine build_type or branch
417        we give up and use the parametes we're sure of instead (suite,
418        board). eg:
419            1. build = x86-alex-pgo-release/R26-3570.0.0
420               branch = 26
421               build_type = pgo-release
422            2. build = lumpy-paladin/R28-3993.0.0-rc5
423               branch = 28
424               build_type = paladin
425    @param board: The board that this suite ran on.
426    @return: The key string used for a dictionary.
427    """
428    try:
429        _board, build_type, branch = ParseBuildName(build)[:3]
430    except ParseBuildNameException as e:
431        logging.error(str(e))
432        branch = 'Unknown'
433        build_type = 'Unknown'
434    else:
435        embedded_str = re.search(r'x86-\w+-(.*)', _board)
436        if embedded_str:
437            build_type = embedded_str.group(1) + '-' + build_type
438
439    data_key_dict = {
440        'prefix': prefix,
441        'board': board,
442        'branch': branch,
443        'build_type': build_type,
444        'suite': suite,
445    }
446    return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
447            % data_key_dict)
448
449
450def setup_logging(logfile=None, prefix=False):
451    """Setup basic logging with all logging info stripped.
452
453    Calls to logging will only show the message. No severity is logged.
454
455    @param logfile: If specified dump output to a file as well.
456    @param prefix: Flag for log prefix. Set to True to add prefix to log
457        entries to include timestamp and log level. Default is False.
458    """
459    # TODO (xixuan): Delete this code when finishing replacing run_suite.py &
460    # abort_suite.py in skylab.
461    # Remove all existing handlers. client/common_lib/logging_config adds
462    # a StreamHandler to logger when modules are imported, e.g.,
463    # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
464    # log only messages, not severity.
465    logging.getLogger().handlers = []
466
467    if prefix:
468        log_format = '%(asctime)s %(levelname)-5s| %(message)s'
469    else:
470        log_format = '%(message)s'
471
472    screen_handler = logging.StreamHandler()
473    screen_handler.setFormatter(logging.Formatter(log_format))
474    logging.getLogger().addHandler(screen_handler)
475    logging.getLogger().setLevel(logging.INFO)
476    if logfile:
477        file_handler = logging.FileHandler(logfile)
478        file_handler.setFormatter(logging.Formatter(log_format))
479        file_handler.setLevel(logging.DEBUG)
480        logging.getLogger().addHandler(file_handler)
481
482
483def is_shard():
484    """Determines if this instance is running as a shard.
485
486    Reads the global_config value shard_hostname in the section SHARD.
487
488    @return True, if shard_hostname is set, False otherwise.
489    """
490    hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
491    return bool(hostname)
492
493
494def get_global_afe_hostname():
495    """Read the hostname of the global AFE from the global configuration."""
496    return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
497
498
499def is_restricted_user(username):
500    """Determines if a user is in a restricted group.
501
502    User in restricted group only have access to master.
503
504    @param username: A string, representing a username.
505
506    @returns: True if the user is in a restricted group.
507    """
508    if not username:
509        return False
510
511    restricted_groups = CONFIG.get_config_value(
512            'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
513    for group in restricted_groups:
514        try:
515            if group and username in grp.getgrnam(group).gr_mem:
516                return True
517        except KeyError as e:
518            logging.debug("%s is not a valid group.", group)
519    return False
520
521
522def get_special_task_status(is_complete, success, is_active):
523    """Get the status of a special task.
524
525    Emulate a host queue entry status for a special task
526    Although SpecialTasks are not HostQueueEntries, it is helpful to
527    the user to present similar statuses.
528
529    @param is_complete    Boolean if the task is completed.
530    @param success        Boolean if the task succeeded.
531    @param is_active      Boolean if the task is active.
532
533    @return The status of a special task.
534    """
535    if is_complete:
536        if success:
537            return host_queue_entry_states.Status.COMPLETED
538        return host_queue_entry_states.Status.FAILED
539    if is_active:
540        return host_queue_entry_states.Status.RUNNING
541    return host_queue_entry_states.Status.QUEUED
542
543
544def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
545    """Get the execution path of the SpecialTask.
546
547    This method returns different paths depending on where a
548    the task ran:
549        * Master: hosts/hostname/task_id-task_type
550        * Shard: Master_path/time_created
551    This is to work around the fact that a shard can fail independent
552    of the master, and be replaced by another shard that has the same
553    hosts. Without the time_created stamp the logs of the tasks running
554    on the second shard will clobber the logs from the first in google
555    storage, because task ids are not globally unique.
556
557    @param hostname        Hostname
558    @param task_id         Special task id
559    @param task_name       Special task name (e.g., Verify, Repair, etc)
560    @param time_requested  Special task requested time.
561
562    @return An execution path for the task.
563    """
564    results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
565
566    # If we do this on the master it will break backward compatibility,
567    # as there are tasks that currently don't have timestamps. If a host
568    # or job has been sent to a shard, the rpc for that host/job will
569    # be redirected to the shard, so this global_config check will happen
570    # on the shard the logs are on.
571    if not is_shard():
572        return results_path
573
574    # Generate a uid to disambiguate special task result directories
575    # in case this shard fails. The simplest uid is the job_id, however
576    # in rare cases tasks do not have jobs associated with them (eg:
577    # frontend verify), so just use the creation timestamp. The clocks
578    # between a shard and master should always be in sync. Any discrepancies
579    # will be brought to our attention in the form of job timeouts.
580    uid = time_requested.strftime('%Y%d%m%H%M%S')
581
582    # TODO: This is a hack, however it is the easiest way to achieve
583    # correctness. There is currently some debate over the future of
584    # tasks in our infrastructure and refactoring everything right
585    # now isn't worth the time.
586    return '%s/%s' % (results_path, uid)
587
588
589def get_job_tag(id, owner):
590    """Returns a string tag for a job.
591
592    @param id    Job id
593    @param owner Job owner
594
595    """
596    return '%s-%s' % (id, owner)
597
598
599def get_hqe_exec_path(tag, execution_subdir):
600    """Returns a execution path to a HQE's results.
601
602    @param tag               Tag string for a job associated with a HQE.
603    @param execution_subdir  Execution sub-directory string of a HQE.
604
605    """
606    return os.path.join(tag, execution_subdir)
607
608
609def is_inside_chroot():
610    """Check if the process is running inside chroot.
611
612    This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The
613    method checks if cros_build_lib can be imported first.
614
615    @return: True if the process is running inside chroot or cros_build_lib
616             cannot be imported.
617
618    """
619    try:
620        # TODO(crbug.com/739466) This module import is delayed because it adds
621        # 1-2 seconds to the module import time and most users of site_utils
622        # don't need it. The correct fix is to break apart site_utils into more
623        # meaningful chunks.
624        from chromite.lib import cros_build_lib
625    except ImportError:
626        logging.warn('Unable to import chromite. Can not detect chroot. '
627                     'Defaulting to False')
628        return False
629    return cros_build_lib.IsInsideChroot()
630
631
632def parse_job_name(name):
633    """Parse job name to get information including build, board and suite etc.
634
635    Suite job created by run_suite follows the naming convention of:
636    [build]-test_suites/control.[suite]
637    For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
638    The naming convention is defined in rpc_interface.create_suite_job.
639
640    Test job created by suite job follows the naming convention of:
641    [build]/[suite]/[test name]
642    For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
643    The naming convention is defined in
644    server/cros/dynamic_suite/tools.create_job_name
645
646    Note that pgo and chrome-perf builds will fail the method. Since lab does
647    not run test for these builds, they can be ignored.
648    Also, tests for Launch Control builds have different naming convention.
649    The build ID will be used as build_version.
650
651    @param name: Name of the job.
652
653    @return: A dictionary containing the test information. The keyvals include:
654             build: Name of the build, e.g., lumpy-release/R46-7272.0.0
655             build_version: The version of the build, e.g., R46-7272.0.0
656             board: Name of the board, e.g., lumpy
657             suite: Name of the test suite, e.g., bvt
658
659    """
660    info = {}
661    suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
662    test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
663    match = re.match(suite_job_regex, name)
664    if not match:
665        match = re.match(test_job_regex, name)
666    if match:
667        info['build'] = match.groups()[0]
668        info['suite'] = match.groups()[1]
669        info['build_version'] = info['build'].split('/')[1]
670        try:
671            info['board'], _, _, _ = ParseBuildName(info['build'])
672        except ParseBuildNameException:
673            # Try to parse it as Launch Control build
674            # Launch Control builds have name format:
675            # branch/build_target-build_type/build_id.
676            try:
677                _, target, build_id = utils.parse_launch_control_build(
678                        info['build'])
679                build_target, _ = utils.parse_launch_control_target(target)
680                if build_target:
681                    info['board'] = build_target
682                    info['build_version'] = build_id
683            except ValueError:
684                pass
685    return info
686
687
688def verify_not_root_user():
689    """Simple function to error out if running with uid == 0"""
690    if os.getuid() == 0:
691        raise error.IllegalUser('This script can not be ran as root.')
692
693
694def get_hostname_from_machine(machine):
695    """Lookup hostname from a machine string or dict.
696
697    @returns: Machine hostname in string format.
698    """
699    hostname, _ = get_host_info_from_machine(machine)
700    return hostname
701
702
703def get_host_info_from_machine(machine):
704    """Lookup host information from a machine string or dict.
705
706    @returns: Tuple of (hostname, afe_host)
707    """
708    if isinstance(machine, dict):
709        return (machine['hostname'], machine['afe_host'])
710    else:
711        return (machine, EmptyAFEHost())
712
713
714def get_afe_host_from_machine(machine):
715    """Return the afe_host from the machine dict if possible.
716
717    @returns: AFE host object.
718    """
719    _, afe_host = get_host_info_from_machine(machine)
720    return afe_host
721
722
723def get_connection_pool_from_machine(machine):
724    """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
725    if not isinstance(machine, dict):
726        return None
727    return machine.get('connection_pool')
728
729
730def get_creds_abspath(creds_file):
731    """Returns the abspath of the credentials file.
732
733    If creds_file is already an absolute path, just return it.
734    Otherwise, assume it is located in the creds directory
735    specified in global_config and return the absolute path.
736
737    @param: creds_path, a path to the credentials.
738    @return: An absolute path to the credentials file.
739    """
740    if not creds_file:
741        return None
742    if os.path.isabs(creds_file):
743        return creds_file
744    creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
745    if not creds_dir or not os.path.exists(creds_dir):
746        creds_dir = common.autotest_dir
747    return os.path.join(creds_dir, creds_file)
748
749
750def SetupTsMonGlobalState(*args, **kwargs):
751    """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
752
753    @param *args: Args to pass through.
754    @param **kwargs: Kwargs to pass through.
755    """
756    try:
757        # TODO(crbug.com/739466) This module import is delayed because it adds
758        # 1-2 seconds to the module import time and most users of site_utils
759        # don't need it. The correct fix is to break apart site_utils into more
760        # meaningful chunks.
761        from chromite.lib import ts_mon_config
762    except ImportError:
763        logging.warn('Unable to import chromite. Monarch is disabled.')
764        return TrivialContextManager()
765
766    try:
767        context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
768        if hasattr(context, '__exit__'):
769            return context
770    except Exception as e:
771        logging.warning('Caught an exception trying to setup ts_mon, '
772                        'monitoring is disabled: %s', e, exc_info=True)
773    return TrivialContextManager()
774
775
776@contextlib.contextmanager
777def TrivialContextManager(*args, **kwargs):
778    """Context manager that does nothing.
779
780    @param *args: Ignored args
781    @param **kwargs: Ignored kwargs.
782    """
783    yield
784
785
786def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
787    """Wait for the hosts to all go idle.
788
789    @param duts: List of duts to check for idle state.
790    @param afe: afe instance.
791    @param max_wait: Max wait time in seconds to wait for duts to be idle.
792
793    @returns Boolean True if all hosts are idle or False if any hosts did not
794            go idle within max_wait.
795    """
796    start_time = time.time()
797    # We make a shallow copy since we're going to be modifying active_dut_list.
798    active_dut_list = duts[:]
799    while active_dut_list:
800        # Let's rate-limit how often we hit the AFE.
801        time.sleep(1)
802
803        # Check if we've waited too long.
804        if (time.time() - start_time) > max_wait:
805            return False
806
807        idle_duts = []
808        # Get the status for the duts and see if they're in the idle state.
809        afe_hosts = afe.get_hosts(active_dut_list)
810        idle_duts = [afe_host.hostname for afe_host in afe_hosts
811                     if afe_host.status in host_states.IDLE_STATES]
812
813        # Take out idle duts so we don't needlessly check them
814        # next time around.
815        for idle_dut in idle_duts:
816            active_dut_list.remove(idle_dut)
817
818        logging.info('still waiting for following duts to go idle: %s',
819                     active_dut_list)
820    return True
821
822
823@contextlib.contextmanager
824def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
825                       max_wait=IDLE_DUT_WAIT_TIMEOUT):
826    """Context manager to lock the duts and wait for them to go idle.
827
828    @param duts: List of duts to lock.
829    @param afe: afe instance.
830    @param lock_msg: message for afe on locking this host.
831    @param max_wait: Max wait time in seconds to wait for duts to be idle.
832
833    @returns Boolean lock_success where True if all duts locked successfully or
834             False if we timed out waiting too long for hosts to go idle.
835    """
836    try:
837        locked_duts = []
838        duts.sort()
839        for dut in duts:
840            if afe.lock_host(dut, lock_msg, fail_if_locked=True):
841                locked_duts.append(dut)
842            else:
843                logging.info('%s already locked', dut)
844        yield wait_for_idle_duts(locked_duts, afe, max_wait)
845    finally:
846        afe.unlock_hosts(locked_duts)
847
848
849def _get_default_size_info(path):
850    """Get the default result size information.
851
852    In case directory summary is failed to build, assume the test result is not
853    throttled and all result sizes are the size of existing test results.
854
855    @return: A namedtuple of result size informations, including:
856            client_result_collected_KB: The total size (in KB) of test results
857                    collected from test device. Set to be the total size of the
858                    given path.
859            original_result_total_KB: The original size (in KB) of test results
860                    before being trimmed. Set to be the total size of the given
861                    path.
862            result_uploaded_KB: The total size (in KB) of test results to be
863                    uploaded. Set to be the total size of the given path.
864            result_throttled: True if test results collection is throttled.
865                    It's set to False in this default behavior.
866    """
867    total_size = file_utils.get_directory_size_kibibytes(path);
868    return result_utils_lib.ResultSizeInfo(
869            client_result_collected_KB=total_size,
870            original_result_total_KB=total_size,
871            result_uploaded_KB=total_size,
872            result_throttled=False)
873
874
875def _report_result_size_metrics(result_size_info):
876    """Report result sizes information to metrics.
877
878    @param result_size_info: A ResultSizeInfo namedtuple containing information
879            of test result sizes.
880    """
881    fields = {'result_throttled' : result_size_info.result_throttled}
882    metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
883                    description='The total size (in KB) of test results '
884                    'collected from test device. Set to be the total size of '
885                    'the given path.'
886                    ).increment_by(result_size_info.client_result_collected_KB,
887                                   fields=fields)
888    metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
889                    description='The original size (in KB) of test results '
890                    'before being trimmed.'
891                    ).increment_by(result_size_info.original_result_total_KB,
892                                   fields=fields)
893    metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
894                    description='The total size (in KB) of test results to be '
895                    'uploaded.'
896                    ).increment_by(result_size_info.result_uploaded_KB,
897                                   fields=fields)
898
899
900@metrics.SecondsTimerDecorator(
901        'chromeos/autotest/result_collection/collect_result_sizes_duration')
902def collect_result_sizes(path, log=logging.debug):
903    """Collect the result sizes information and build result summary.
904
905    It first tries to merge directory summaries and calculate the result sizes
906    including:
907    client_result_collected_KB: The volume in KB that's transfered from the test
908            device.
909    original_result_total_KB: The volume in KB that's the original size of the
910            result files before being trimmed.
911    result_uploaded_KB: The volume in KB that will be uploaded.
912    result_throttled: Indicating if the result files were throttled.
913
914    If directory summary merging failed for any reason, fall back to use the
915    total size of the given result directory.
916
917    @param path: Path of the result directory to get size information.
918    @param log: The logging method, default to logging.debug
919    @return: A ResultSizeInfo namedtuple containing information of test result
920             sizes.
921    """
922    try:
923        client_collected_bytes, summary, files = result_utils.merge_summaries(
924                path)
925        result_size_info = result_utils_lib.get_result_size_info(
926                client_collected_bytes, summary)
927        html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
928        result_view.build(client_collected_bytes, summary, html_file)
929
930        # Delete all summary files after final view is built.
931        for summary_file in files:
932            os.remove(summary_file)
933    except:
934        log('Failed to calculate result sizes based on directory summaries for '
935            'directory %s. Fall back to record the total size.\nException: %s' %
936            (path, traceback.format_exc()))
937        result_size_info = _get_default_size_info(path)
938
939    _report_result_size_metrics(result_size_info)
940
941    return result_size_info
942