1# Lint as: python2, python3
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
11import collections
12import contextlib
13import grp
14import six.moves.http_client
15import json
16import logging
17import os
18import random
19import re
20import time
21import traceback
22from six.moves import filter
23from six.moves import range
24from six.moves import urllib
25
26import common
27from autotest_lib.client.bin.result_tools import utils as result_utils
28from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
29from autotest_lib.client.bin.result_tools import view as result_view
30from autotest_lib.client.common_lib import lsbrelease_utils
31from autotest_lib.client.common_lib import utils
32from autotest_lib.client.common_lib import error
33from autotest_lib.client.common_lib import file_utils
34from autotest_lib.client.common_lib import global_config
35from autotest_lib.client.common_lib import host_queue_entry_states
36from autotest_lib.client.common_lib import host_states
37from autotest_lib.server.cros import provision
38from autotest_lib.server.cros.dynamic_suite import constants
39from autotest_lib.server.cros.dynamic_suite import job_status
40
41try:
42    from chromite.lib import metrics
43except ImportError:
44    metrics = utils.metrics_mock
45
46
47CONFIG = global_config.global_config
48
49_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
50_LAB_SHERIFF_JS = CONFIG.get_config_value(
51        'NOTIFICATIONS', 'lab_sheriffs', default='')
52_CHROMIUM_BUILD_URL = CONFIG.get_config_value(
53        'NOTIFICATIONS', 'chromium_build_url', default='')
54
55LAB_GOOD_STATES = ('open', 'throttled')
56
57ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
58        'CROS', 'enable_drone_in_restricted_subnet', type=bool,
59        default=False)
60
61# Wait at most 10 mins for duts to go idle.
62IDLE_DUT_WAIT_TIMEOUT = 600
63
64# Mapping between board name and build target. This is for special case handling
65# for certain Android board that the board name and build target name does not
66# match.
67ANDROID_TARGET_TO_BOARD_MAP = {
68        'seed_l8150': 'gm4g_sprout',
69        'bat_land': 'bat'
70        }
71ANDROID_BOARD_TO_TARGET_MAP = {
72        'gm4g_sprout': 'seed_l8150',
73        'bat': 'bat_land'
74        }
75# Prefix for the metrics name for result size information.
76RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
77
78class TestLabException(Exception):
79    """Exception raised when the Test Lab blocks a test or suite."""
80    pass
81
82
83class ParseBuildNameException(Exception):
84    """Raised when ParseBuildName() cannot parse a build name."""
85    pass
86
87
88class Singleton(type):
89    """Enforce that only one client class is instantiated per process."""
90    _instances = {}
91
92    def __call__(cls, *args, **kwargs):
93        """Fetch the instance of a class to use for subsequent calls."""
94        if cls not in cls._instances:
95            cls._instances[cls] = super(Singleton, cls).__call__(
96                    *args, **kwargs)
97        return cls._instances[cls]
98
99class EmptyAFEHost(object):
100    """Object to represent an AFE host object when there is no AFE."""
101
102    def __init__(self):
103        """
104        We'll be setting the instance attributes as we use them.  Right now
105        we only use attributes and labels but as time goes by and other
106        attributes are used from an actual AFE Host object (check
107        rpc_interfaces.get_hosts()), we'll add them in here so users won't be
108        perplexed why their host's afe_host object complains that attribute
109        doesn't exist.
110        """
111        self.attributes = {}
112        self.labels = []
113
114
115def ParseBuildName(name):
116    """Format a build name, given board, type, milestone, and manifest num.
117
118    @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
119                 relative build name, e.g. 'x86-alex-release/LATEST'
120
121    @return board: board the manifest is for, e.g. x86-alex.
122    @return type: one of 'release', 'factory', or 'firmware'
123    @return milestone: (numeric) milestone the manifest was associated with.
124                        Will be None for relative build names.
125    @return manifest: manifest number, e.g. '2015.0.0'.
126                      Will be None for relative build names.
127
128    """
129    match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
130                     r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
131                     r'(?P<manifest>[\d.ab-]+)|LATEST)',
132                     name)
133    if match and len(match.groups()) >= 5:
134        return (match.group('board'), match.group('type'),
135                match.group('milestone'), match.group('manifest'))
136    raise ParseBuildNameException('%s is a malformed build name.' % name)
137
138
139def get_labels_from_afe(hostname, label_prefix, afe):
140    """Retrieve a host's specific labels from the AFE.
141
142    Looks for the host labels that have the form <label_prefix>:<value>
143    and returns the "<value>" part of the label. None is returned
144    if there is not a label matching the pattern
145
146    @param hostname: hostname of given DUT.
147    @param label_prefix: prefix of label to be matched, e.g., |board:|
148    @param afe: afe instance.
149
150    @returns A list of labels that match the prefix or 'None'
151
152    """
153    labels = afe.get_labels(name__startswith=label_prefix,
154                            host__hostname__in=[hostname])
155    if labels:
156        return [l.name.split(label_prefix, 1)[1] for l in labels]
157
158
159def get_label_from_afe(hostname, label_prefix, afe):
160    """Retrieve a host's specific label from the AFE.
161
162    Looks for a host label that has the form <label_prefix>:<value>
163    and returns the "<value>" part of the label. None is returned
164    if there is not a label matching the pattern
165
166    @param hostname: hostname of given DUT.
167    @param label_prefix: prefix of label to be matched, e.g., |board:|
168    @param afe: afe instance.
169    @returns the label that matches the prefix or 'None'
170
171    """
172    labels = get_labels_from_afe(hostname, label_prefix, afe)
173    if labels and len(labels) == 1:
174        return labels[0]
175
176
177def get_board_from_afe(hostname, afe):
178    """Retrieve given host's board from its labels in the AFE.
179
180    Looks for a host label of the form "board:<board>", and
181    returns the "<board>" part of the label.  `None` is returned
182    if there is not a single, unique label matching the pattern.
183
184    @param hostname: hostname of given DUT.
185    @param afe: afe instance.
186    @returns board from label, or `None`.
187
188    """
189    return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
190
191
192def get_build_from_afe(hostname, afe):
193    """Retrieve the current build for given host from the AFE.
194
195    Looks through the host's labels in the AFE to determine its build.
196
197    @param hostname: hostname of given DUT.
198    @param afe: afe instance.
199    @returns The current build or None if it could not find it or if there
200             were multiple build labels assigned to this host.
201
202    """
203    prefix = provision.CROS_VERSION_PREFIX
204    build = get_label_from_afe(hostname, prefix + ':', afe)
205    if build:
206        return build
207    return None
208
209
210# TODO(fdeng): fix get_sheriffs crbug.com/483254
211def get_sheriffs(lab_only=False):
212    """
213    Polls the javascript file that holds the identity of the sheriff and
214    parses it's output to return a list of chromium sheriff email addresses.
215    The javascript file can contain the ldap of more than one sheriff, eg:
216    document.write('sheriff_one, sheriff_two').
217
218    @param lab_only: if True, only pulls lab sheriff.
219    @return: A list of chroium.org sheriff email addresses to cc on the bug.
220             An empty list if failed to parse the javascript.
221    """
222    sheriff_ids = []
223    sheriff_js_list = _LAB_SHERIFF_JS.split(',')
224    if not lab_only:
225        sheriff_js_list.extend(_SHERIFF_JS.split(','))
226
227    for sheriff_js in sheriff_js_list:
228        try:
229            url_content = utils.urlopen('%s%s'% (
230                _CHROMIUM_BUILD_URL, sheriff_js)).read()
231        except (ValueError, IOError) as e:
232            logging.warning('could not parse sheriff from url %s%s: %s',
233                             _CHROMIUM_BUILD_URL, sheriff_js, str(e))
234        except (urllib.error.URLError, six.moves.http_client.HTTPException) as e:
235            logging.warning('unexpected error reading from url "%s%s": %s',
236                             _CHROMIUM_BUILD_URL, sheriff_js, str(e))
237        else:
238            ldaps = re.search(r"document.write\('(.*)'\)", url_content)
239            if not ldaps:
240                logging.warning('Could not retrieve sheriff ldaps for: %s',
241                                 url_content)
242                continue
243            sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '')
244                            for alias in ldaps.group(1).split(',')]
245    return sheriff_ids
246
247
248def remote_wget(source_url, dest_path, ssh_cmd):
249    """wget source_url from localhost to dest_path on remote host using ssh.
250
251    @param source_url: The complete url of the source of the package to send.
252    @param dest_path: The path on the remote host's file system where we would
253        like to store the package.
254    @param ssh_cmd: The ssh command to use in performing the remote wget.
255    """
256    wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
257                (source_url, ssh_cmd, dest_path))
258    utils.run(wget_cmd)
259
260
261_MAX_LAB_STATUS_ATTEMPTS = 5
262def _get_lab_status(status_url):
263    """Grabs the current lab status and message.
264
265    @returns The JSON object obtained from the given URL.
266
267    """
268    retry_waittime = 1
269    for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
270        try:
271            response = urllib.request.urlopen(status_url)
272        except IOError as e:
273            logging.debug('Error occurred when grabbing the lab status: %s.',
274                          e)
275            time.sleep(retry_waittime)
276            continue
277        # Check for successful response code.
278        if response.getcode() == 200:
279            return json.load(response)
280        time.sleep(retry_waittime)
281    return None
282
283
284def _decode_lab_status(lab_status, build):
285    """Decode lab status, and report exceptions as needed.
286
287    Take a deserialized JSON object from the lab status page, and
288    interpret it to determine the actual lab status.  Raise
289    exceptions as required to report when the lab is down.
290
291    @param build: build name that we want to check the status of.
292
293    @raises TestLabException Raised if a request to test for the given
294                             status and build should be blocked.
295    """
296    # First check if the lab is up.
297    if not lab_status['general_state'] in LAB_GOOD_STATES:
298        raise TestLabException('Chromium OS Test Lab is closed: '
299                               '%s.' % lab_status['message'])
300
301    # Check if the build we wish to use is disabled.
302    # Lab messages should be in the format of:
303    #    Lab is 'status' [regex ...] (comment)
304    # If the build name matches any regex, it will be blocked.
305    build_exceptions = re.search('\[(.*)\]', lab_status['message'])
306    if not build_exceptions or not build:
307        return
308    for build_pattern in build_exceptions.group(1).split():
309        if re.match(build_pattern, build):
310            raise TestLabException('Chromium OS Test Lab is closed: '
311                                   '%s matches %s.' % (
312                                           build, build_pattern))
313    return
314
315
316def is_in_lab():
317    """Check if current Autotest instance is in lab
318
319    @return: True if the Autotest instance is in lab.
320    """
321    test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
322    return test_server_name.startswith('cautotest')
323
324
325def check_lab_status(build):
326    """Check if the lab status allows us to schedule for a build.
327
328    Checks if the lab is down, or if testing for the requested build
329    should be blocked.
330
331    @param build: Name of the build to be scheduled for testing.
332
333    @raises TestLabException Raised if a request to test for the given
334                             status and build should be blocked.
335
336    """
337    # Ensure we are trying to schedule on the actual lab.
338    if not is_in_lab():
339        return
340
341    # Download the lab status from its home on the web.
342    status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
343    json_status = _get_lab_status(status_url)
344    if json_status is None:
345        # We go ahead and say the lab is open if we can't get the status.
346        logging.warning('Could not get a status from %s', status_url)
347        return
348    _decode_lab_status(json_status, build)
349
350
351def host_in_lab(hostname):
352    """Check if the execution is against a host in the lab"""
353    return (not utils.in_moblab_ssp()
354            and not lsbrelease_utils.is_moblab()
355            and utils.host_is_in_lab_zone(hostname))
356
357
358def lock_host_with_labels(afe, lock_manager, labels):
359    """Lookup and lock one host that matches the list of input labels.
360
361    @param afe: An instance of the afe class, as defined in server.frontend.
362    @param lock_manager: A lock manager capable of locking hosts, eg the
363        one defined in server.cros.host_lock_manager.
364    @param labels: A list of labels to look for on hosts.
365
366    @return: The hostname of a host matching all labels, and locked through the
367        lock_manager. The hostname will be as specified in the database the afe
368        object is associated with, i.e if it exists in afe_hosts with a .cros
369        suffix, the hostname returned will contain a .cros suffix.
370
371    @raises: error.NoEligibleHostException: If no hosts matching the list of
372        input labels are available.
373    @raises: error.TestError: If unable to lock a host matching the labels.
374    """
375    potential_hosts = afe.get_hosts(multiple_labels=labels)
376    if not potential_hosts:
377        raise error.NoEligibleHostException(
378                'No devices found with labels %s.' % labels)
379
380    # This prevents errors where a fault might seem repeatable
381    # because we lock, say, the same packet capturer for each test run.
382    random.shuffle(potential_hosts)
383    for host in potential_hosts:
384        if lock_manager.lock([host.hostname]):
385            logging.info('Locked device %s with labels %s.',
386                         host.hostname, labels)
387            return host.hostname
388        else:
389            logging.info('Unable to lock device %s with labels %s.',
390                         host.hostname, labels)
391
392    raise error.TestError('Could not lock a device with labels %s' % labels)
393
394
395def get_test_views_from_tko(suite_job_id, tko):
396    """Get test name and result for given suite job ID.
397
398    @param suite_job_id: ID of suite job.
399    @param tko: an instance of TKO as defined in server/frontend.py.
400    @return: A defaultdict where keys are test names and values are
401             lists of test statuses, e.g.,
402             {'dummy_Fail.Error': ['ERROR'. 'ERROR'],
403              'dummy_Fail.NAError': ['TEST_NA'],
404              'dummy_Fail.RetrySuccess': ['ERROR', 'GOOD'],
405              }
406    @raise: Exception when there is no test view found.
407
408    """
409    views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
410    relevant_views = list(filter(job_status.view_is_relevant, views))
411    if not relevant_views:
412        raise Exception('Failed to retrieve job results.')
413
414    test_views = collections.defaultdict(list)
415    for view in relevant_views:
416        test_views[view['test_name']].append(view['status'])
417    return test_views
418
419
420def get_data_key(prefix, suite, build, board):
421    """
422    Constructs a key string from parameters.
423
424    @param prefix: Prefix for the generating key.
425    @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
426    @param build: The build string. This string should have a consistent
427        format eg: x86-mario-release/R26-3570.0.0. If the format of this
428        string changes such that we can't determine build_type or branch
429        we give up and use the parametes we're sure of instead (suite,
430        board). eg:
431            1. build = x86-alex-pgo-release/R26-3570.0.0
432               branch = 26
433               build_type = pgo-release
434            2. build = lumpy-paladin/R28-3993.0.0-rc5
435               branch = 28
436               build_type = paladin
437    @param board: The board that this suite ran on.
438    @return: The key string used for a dictionary.
439    """
440    try:
441        _board, build_type, branch = ParseBuildName(build)[:3]
442    except ParseBuildNameException as e:
443        logging.error(str(e))
444        branch = 'Unknown'
445        build_type = 'Unknown'
446    else:
447        embedded_str = re.search(r'x86-\w+-(.*)', _board)
448        if embedded_str:
449            build_type = embedded_str.group(1) + '-' + build_type
450
451    data_key_dict = {
452        'prefix': prefix,
453        'board': board,
454        'branch': branch,
455        'build_type': build_type,
456        'suite': suite,
457    }
458    return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
459            % data_key_dict)
460
461
462def is_shard():
463    """Determines if this instance is running as a shard.
464
465    Reads the global_config value shard_hostname in the section SHARD.
466
467    @return True, if shard_hostname is set, False otherwise.
468    """
469    hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
470    return bool(hostname)
471
472
473def get_global_afe_hostname():
474    """Read the hostname of the global AFE from the global configuration."""
475    return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
476
477
478def is_restricted_user(username):
479    """Determines if a user is in a restricted group.
480
481    User in restricted group only have access to main.
482
483    @param username: A string, representing a username.
484
485    @returns: True if the user is in a restricted group.
486    """
487    if not username:
488        return False
489
490    restricted_groups = CONFIG.get_config_value(
491            'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
492    for group in restricted_groups:
493        try:
494            if group and username in grp.getgrnam(group).gr_mem:
495                return True
496        except KeyError as e:
497            logging.debug("%s is not a valid group.", group)
498    return False
499
500
501def get_special_task_status(is_complete, success, is_active):
502    """Get the status of a special task.
503
504    Emulate a host queue entry status for a special task
505    Although SpecialTasks are not HostQueueEntries, it is helpful to
506    the user to present similar statuses.
507
508    @param is_complete    Boolean if the task is completed.
509    @param success        Boolean if the task succeeded.
510    @param is_active      Boolean if the task is active.
511
512    @return The status of a special task.
513    """
514    if is_complete:
515        if success:
516            return host_queue_entry_states.Status.COMPLETED
517        return host_queue_entry_states.Status.FAILED
518    if is_active:
519        return host_queue_entry_states.Status.RUNNING
520    return host_queue_entry_states.Status.QUEUED
521
522
523def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
524    """Get the execution path of the SpecialTask.
525
526    This method returns different paths depending on where a
527    the task ran:
528        * main: hosts/hostname/task_id-task_type
529        * Shard: main_path/time_created
530    This is to work around the fact that a shard can fail independent
531    of the main, and be replaced by another shard that has the same
532    hosts. Without the time_created stamp the logs of the tasks running
533    on the second shard will clobber the logs from the first in google
534    storage, because task ids are not globally unique.
535
536    @param hostname        Hostname
537    @param task_id         Special task id
538    @param task_name       Special task name (e.g., Verify, Repair, etc)
539    @param time_requested  Special task requested time.
540
541    @return An execution path for the task.
542    """
543    results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
544
545    # If we do this on the main it will break backward compatibility,
546    # as there are tasks that currently don't have timestamps. If a host
547    # or job has been sent to a shard, the rpc for that host/job will
548    # be redirected to the shard, so this global_config check will happen
549    # on the shard the logs are on.
550    if not is_shard():
551        return results_path
552
553    # Generate a uid to disambiguate special task result directories
554    # in case this shard fails. The simplest uid is the job_id, however
555    # in rare cases tasks do not have jobs associated with them (eg:
556    # frontend verify), so just use the creation timestamp. The clocks
557    # between a shard and main should always be in sync. Any discrepancies
558    # will be brought to our attention in the form of job timeouts.
559    uid = time_requested.strftime('%Y%d%m%H%M%S')
560
561    # TODO: This is a hack, however it is the easiest way to achieve
562    # correctness. There is currently some debate over the future of
563    # tasks in our infrastructure and refactoring everything right
564    # now isn't worth the time.
565    return '%s/%s' % (results_path, uid)
566
567
568def get_job_tag(id, owner):
569    """Returns a string tag for a job.
570
571    @param id    Job id
572    @param owner Job owner
573
574    """
575    return '%s-%s' % (id, owner)
576
577
578def get_hqe_exec_path(tag, execution_subdir):
579    """Returns a execution path to a HQE's results.
580
581    @param tag               Tag string for a job associated with a HQE.
582    @param execution_subdir  Execution sub-directory string of a HQE.
583
584    """
585    return os.path.join(tag, execution_subdir)
586
587
588def is_inside_chroot():
589    """Check if the process is running inside chroot.
590
591    @return: True if the process is running inside chroot.
592
593    """
594    return os.path.exists('/etc/cros_chroot_version')
595
596
597def parse_job_name(name):
598    """Parse job name to get information including build, board and suite etc.
599
600    Suite job created by run_suite follows the naming convention of:
601    [build]-test_suites/control.[suite]
602    For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
603    The naming convention is defined in rpc_interface.create_suite_job.
604
605    Test job created by suite job follows the naming convention of:
606    [build]/[suite]/[test name]
607    For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
608    The naming convention is defined in
609    server/cros/dynamic_suite/tools.create_job_name
610
611    Note that pgo and chrome-perf builds will fail the method. Since lab does
612    not run test for these builds, they can be ignored.
613    Also, tests for Launch Control builds have different naming convention.
614    The build ID will be used as build_version.
615
616    @param name: Name of the job.
617
618    @return: A dictionary containing the test information. The keyvals include:
619             build: Name of the build, e.g., lumpy-release/R46-7272.0.0
620             build_version: The version of the build, e.g., R46-7272.0.0
621             board: Name of the board, e.g., lumpy
622             suite: Name of the test suite, e.g., bvt
623
624    """
625    info = {}
626    suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
627    test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
628    match = re.match(suite_job_regex, name)
629    if not match:
630        match = re.match(test_job_regex, name)
631    if match:
632        info['build'] = match.groups()[0]
633        info['suite'] = match.groups()[1]
634        info['build_version'] = info['build'].split('/')[1]
635        try:
636            info['board'], _, _, _ = ParseBuildName(info['build'])
637        except ParseBuildNameException:
638            # Try to parse it as Launch Control build
639            # Launch Control builds have name format:
640            # branch/build_target-build_type/build_id.
641            try:
642                _, target, build_id = utils.parse_launch_control_build(
643                        info['build'])
644                build_target, _ = utils.parse_launch_control_target(target)
645                if build_target:
646                    info['board'] = build_target
647                    info['build_version'] = build_id
648            except ValueError:
649                pass
650    return info
651
652
653def verify_not_root_user():
654    """Simple function to error out if running with uid == 0"""
655    if os.getuid() == 0:
656        raise error.IllegalUser('This script can not be ran as root.')
657
658
659def get_hostname_from_machine(machine):
660    """Lookup hostname from a machine string or dict.
661
662    @returns: Machine hostname in string format.
663    """
664    hostname, _ = get_host_info_from_machine(machine)
665    return hostname
666
667
668def get_host_info_from_machine(machine):
669    """Lookup host information from a machine string or dict.
670
671    @returns: Tuple of (hostname, afe_host)
672    """
673    if isinstance(machine, dict):
674        return (machine['hostname'], machine['afe_host'])
675    else:
676        return (machine, EmptyAFEHost())
677
678
679def get_afe_host_from_machine(machine):
680    """Return the afe_host from the machine dict if possible.
681
682    @returns: AFE host object.
683    """
684    _, afe_host = get_host_info_from_machine(machine)
685    return afe_host
686
687
688def get_connection_pool_from_machine(machine):
689    """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
690    if not isinstance(machine, dict):
691        return None
692    return machine.get('connection_pool')
693
694
695def get_creds_abspath(creds_file):
696    """Returns the abspath of the credentials file.
697
698    If creds_file is already an absolute path, just return it.
699    Otherwise, assume it is located in the creds directory
700    specified in global_config and return the absolute path.
701
702    @param: creds_path, a path to the credentials.
703    @return: An absolute path to the credentials file.
704    """
705    if not creds_file:
706        return None
707    if os.path.isabs(creds_file):
708        return creds_file
709    creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
710    if not creds_dir or not os.path.exists(creds_dir):
711        creds_dir = common.autotest_dir
712    return os.path.join(creds_dir, creds_file)
713
714
715def SetupTsMonGlobalState(*args, **kwargs):
716    """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
717
718    @param *args: Args to pass through.
719    @param **kwargs: Kwargs to pass through.
720    """
721    try:
722        # TODO(crbug.com/739466) This module import is delayed because it adds
723        # 1-2 seconds to the module import time and most users of site_utils
724        # don't need it. The correct fix is to break apart site_utils into more
725        # meaningful chunks.
726        from chromite.lib import ts_mon_config
727    except ImportError:
728        logging.warn('Unable to import chromite. Monarch is disabled.')
729        return TrivialContextManager()
730
731    try:
732        context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
733        if hasattr(context, '__exit__'):
734            return context
735    except Exception as e:
736        logging.warning('Caught an exception trying to setup ts_mon, '
737                        'monitoring is disabled: %s', e, exc_info=True)
738    return TrivialContextManager()
739
740
741@contextlib.contextmanager
742def TrivialContextManager(*args, **kwargs):
743    """Context manager that does nothing.
744
745    @param *args: Ignored args
746    @param **kwargs: Ignored kwargs.
747    """
748    yield
749
750
751def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
752    """Wait for the hosts to all go idle.
753
754    @param duts: List of duts to check for idle state.
755    @param afe: afe instance.
756    @param max_wait: Max wait time in seconds to wait for duts to be idle.
757
758    @returns Boolean True if all hosts are idle or False if any hosts did not
759            go idle within max_wait.
760    """
761    start_time = time.time()
762    # We make a shallow copy since we're going to be modifying active_dut_list.
763    active_dut_list = duts[:]
764    while active_dut_list:
765        # Let's rate-limit how often we hit the AFE.
766        time.sleep(1)
767
768        # Check if we've waited too long.
769        if (time.time() - start_time) > max_wait:
770            return False
771
772        idle_duts = []
773        # Get the status for the duts and see if they're in the idle state.
774        afe_hosts = afe.get_hosts(active_dut_list)
775        idle_duts = [afe_host.hostname for afe_host in afe_hosts
776                     if afe_host.status in host_states.IDLE_STATES]
777
778        # Take out idle duts so we don't needlessly check them
779        # next time around.
780        for idle_dut in idle_duts:
781            active_dut_list.remove(idle_dut)
782
783        logging.info('still waiting for following duts to go idle: %s',
784                     active_dut_list)
785    return True
786
787
788@contextlib.contextmanager
789def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
790                       max_wait=IDLE_DUT_WAIT_TIMEOUT):
791    """Context manager to lock the duts and wait for them to go idle.
792
793    @param duts: List of duts to lock.
794    @param afe: afe instance.
795    @param lock_msg: message for afe on locking this host.
796    @param max_wait: Max wait time in seconds to wait for duts to be idle.
797
798    @returns Boolean lock_success where True if all duts locked successfully or
799             False if we timed out waiting too long for hosts to go idle.
800    """
801    try:
802        locked_duts = []
803        duts.sort()
804        for dut in duts:
805            if afe.lock_host(dut, lock_msg, fail_if_locked=True):
806                locked_duts.append(dut)
807            else:
808                logging.info('%s already locked', dut)
809        yield wait_for_idle_duts(locked_duts, afe, max_wait)
810    finally:
811        afe.unlock_hosts(locked_duts)
812
813
814def _get_default_size_info(path):
815    """Get the default result size information.
816
817    In case directory summary is failed to build, assume the test result is not
818    throttled and all result sizes are the size of existing test results.
819
820    @return: A namedtuple of result size informations, including:
821            client_result_collected_KB: The total size (in KB) of test results
822                    collected from test device. Set to be the total size of the
823                    given path.
824            original_result_total_KB: The original size (in KB) of test results
825                    before being trimmed. Set to be the total size of the given
826                    path.
827            result_uploaded_KB: The total size (in KB) of test results to be
828                    uploaded. Set to be the total size of the given path.
829            result_throttled: True if test results collection is throttled.
830                    It's set to False in this default behavior.
831    """
832    total_size = file_utils.get_directory_size_kibibytes(path);
833    return result_utils_lib.ResultSizeInfo(
834            client_result_collected_KB=total_size,
835            original_result_total_KB=total_size,
836            result_uploaded_KB=total_size,
837            result_throttled=False)
838
839
840def _report_result_size_metrics(result_size_info):
841    """Report result sizes information to metrics.
842
843    @param result_size_info: A ResultSizeInfo namedtuple containing information
844            of test result sizes.
845    """
846    fields = {'result_throttled' : result_size_info.result_throttled}
847    metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
848                    description='The total size (in KB) of test results '
849                    'collected from test device. Set to be the total size of '
850                    'the given path.'
851                    ).increment_by(result_size_info.client_result_collected_KB,
852                                   fields=fields)
853    metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
854                    description='The original size (in KB) of test results '
855                    'before being trimmed.'
856                    ).increment_by(result_size_info.original_result_total_KB,
857                                   fields=fields)
858    metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
859                    description='The total size (in KB) of test results to be '
860                    'uploaded.'
861                    ).increment_by(result_size_info.result_uploaded_KB,
862                                   fields=fields)
863
864
865@metrics.SecondsTimerDecorator(
866        'chromeos/autotest/result_collection/collect_result_sizes_duration')
867def collect_result_sizes(path, log=logging.debug):
868    """Collect the result sizes information and build result summary.
869
870    It first tries to merge directory summaries and calculate the result sizes
871    including:
872    client_result_collected_KB: The volume in KB that's transfered from the test
873            device.
874    original_result_total_KB: The volume in KB that's the original size of the
875            result files before being trimmed.
876    result_uploaded_KB: The volume in KB that will be uploaded.
877    result_throttled: Indicating if the result files were throttled.
878
879    If directory summary merging failed for any reason, fall back to use the
880    total size of the given result directory.
881
882    @param path: Path of the result directory to get size information.
883    @param log: The logging method, default to logging.debug
884    @return: A ResultSizeInfo namedtuple containing information of test result
885             sizes.
886    """
887    try:
888        client_collected_bytes, summary, files = result_utils.merge_summaries(
889                path)
890        result_size_info = result_utils_lib.get_result_size_info(
891                client_collected_bytes, summary)
892        html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
893        result_view.build(client_collected_bytes, summary, html_file)
894
895        # Delete all summary files after final view is built.
896        for summary_file in files:
897            os.remove(summary_file)
898    except:
899        log('Failed to calculate result sizes based on directory summaries for '
900            'directory %s. Fall back to record the total size.\nException: %s' %
901            (path, traceback.format_exc()))
902        result_size_info = _get_default_size_info(path)
903
904    _report_result_size_metrics(result_size_info)
905
906    return result_size_info
907