1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import re
7import sys
8import time
9import urllib2
10
11from autotest_lib.client.common_lib import error
12from autotest_lib.client.common_lib import global_config
13from autotest_lib.client.common_lib.cros import dev_server
14from autotest_lib.server import afe_utils
15from autotest_lib.server import test
16from autotest_lib.server import utils
17from autotest_lib.server.cros import autoupdater
18from autotest_lib.server.cros import provision
19
20
21try:
22    from chromite.lib import metrics
23except ImportError:
24    metrics = utils.metrics_mock
25
26
27_CONFIG = global_config.global_config
28# pylint: disable-msg=E1120
29_IMAGE_URL_PATTERN = _CONFIG.get_config_value(
30        'CROS', 'image_url_pattern', type=str)
31
32
33def _metric_name(base_name):
34    return 'chromeos/autotest/provision/' + base_name
35
36
37def _get_build_metrics_fields(build_name):
38    try:
39        return utils.ParseBuildName(build_name)[0 : 2]
40    except utils.ParseBuildNameException:
41        logging.warning('Unable to parse build name %s for metrics. '
42                        'Continuing anyway.', build_name)
43        return ('', '')
44
45
46def _emit_updater_metrics(name_prefix, build_name, failure_reason,
47                          duration, fields):
48    # reset_after=True is required for String gauges events to ensure that
49    # the metrics are not repeatedly emitted until the server restarts.
50    metrics.String(_metric_name(name_prefix + '_build_by_devserver_dut'),
51                   reset_after=True).set(build_name, fields=fields)
52    if failure_reason:
53        metrics.String(
54                _metric_name(name_prefix + '_failure_reason_by_devserver_dut'),
55                reset_after=True).set(failure_reason, fields=fields)
56    metrics.SecondsDistribution(
57            _metric_name(name_prefix + '_duration_by_devserver_dut')).add(
58                    duration, fields=fields)
59
60
61def _emit_provision_metrics(update_url, dut_host_name,
62                          exception, duration):
63    # The following is high cardinality, but sparse.
64    # Each DUT is of a single board type, and likely build type.
65    #
66    # TODO(jrbarnette) The devserver-triggered provisioning code
67    # includes retries in certain cases.  For that reason, the metrics
68    # distinguish 'provision' metrics which summarizes across all
69    # retries, and 'auto_update' which summarizes an individual update
70    # attempt.  ChromiumOSUpdater doesn't do retries, so we just report
71    # the same information twice.  We should replace the metrics with
72    # something better tailored to the current implementation.
73    build_name = autoupdater.url_to_image_name(update_url)
74    board, build_type = _get_build_metrics_fields(build_name)
75    fields = {
76        'board': board,
77        'build_type': build_type,
78        'dut_host_name': dut_host_name,
79        'dev_server': dev_server.get_resolved_hostname(update_url),
80        'success': not exception,
81    }
82    failure_reason = autoupdater.get_update_failure_reason(exception)
83    _emit_updater_metrics('provision', build_name, failure_reason,
84                          duration, fields)
85    fields['attempt'] = 1
86    _emit_updater_metrics('auto_update', build_name, failure_reason,
87                          duration, fields)
88
89
90class provision_AutoUpdate(test.test):
91    """A test that can provision a machine to the correct ChromeOS version."""
92    version = 1
93
94    def initialize(self, host, value, is_test_na=False):
95        """Initialize.
96
97        @param host: The host object to update to |value|.
98        @param value: The build type and version to install on the host.
99        @param is_test_na: boolean, if True, will simply skip the test
100                           and emit TestNAError. The control file
101                           determines whether the test should be skipped
102                           and passes the decision via this argument. Note
103                           we can't raise TestNAError in control file as it won't
104                           be caught and handled properly.
105        """
106        if is_test_na:
107            raise error.TestNAError(
108                'Test not available for test_that. chroot detected, '
109                'you are probably using test_that.')
110        # We check value in initialize so that it fails faster.
111        if not value:
112            raise error.TestFail('No build version specified.')
113
114
115    def run_once(self, host, value, force_update_engine=False):
116        """The method called by the control file to start the test.
117
118        @param host: The host object to update to |value|.
119        @param value: The host object to provision with a build corresponding
120                      to |value|.
121        @param force_update_engine: When true, the update flow must
122                      perform the update unconditionally, using
123                      update_engine.  Optimizations that could suppress
124                      invoking update_engine, including quick-provision,
125                      mustn't be used.
126        """
127        with_cheets = False
128        logging.debug('Start provisioning %s to %s.', host, value)
129        if value.endswith(provision.CHEETS_SUFFIX):
130            image = re.sub(provision.CHEETS_SUFFIX + '$', '', value)
131            with_cheets = True
132        else:
133            image = value
134
135        # If the host is already on the correct build, we have nothing to do.
136        # Note that this means we're not doing any sort of stateful-only
137        # update, and that we're relying more on cleanup to do cleanup.
138        if not force_update_engine:
139            info = host.host_info_store.get()
140            if info.build == value:
141                # We can't raise a TestNA, as would make sense, as that makes
142                # job.run_test return False as if the job failed.  However, it'd
143                # still be nice to get this into the status.log, so we manually
144                # emit an INFO line instead.
145                self.job.record('INFO', None, None,
146                                'Host already running %s' % value)
147                return
148
149        # We're about to reimage a machine, so we need full_payload and
150        # stateful.  If something happened where the devserver doesn't have one
151        # of these, then it's also likely that it'll be missing autotest.
152        # Therefore, we require the devserver to also have autotest staged, so
153        # that the test that runs after this provision finishes doesn't error
154        # out because the devserver that its job_repo_url is set to is missing
155        # autotest test code.
156        # TODO(milleral): http://crbug.com/249426
157        # Add an asynchronous staging call so that we can ask the devserver to
158        # fetch autotest in the background here, and then wait on it after
159        # reimaging finishes or at some other point in the provisioning.
160        ds = None
161        use_quick_provision = False
162        try:
163            ds = dev_server.ImageServer.resolve(image, host.hostname)
164            ds.stage_artifacts(image, ['full_payload', 'stateful',
165                                       'autotest_packages'])
166            if not force_update_engine:
167                try:
168                    ds.stage_artifacts(image, ['quick_provision'])
169                    use_quick_provision = True
170                except dev_server.DevServerException as e:
171                    logging.warning('Unable to stage quick provision '
172                                    'payload: %s', e)
173        except dev_server.DevServerException as e:
174            raise error.TestFail, str(e), sys.exc_info()[2]
175        finally:
176            # If a devserver is resolved, Log what has been downloaded so far.
177            if ds:
178                try:
179                    ds.list_image_dir(image)
180                except (dev_server.DevServerException, urllib2.URLError) as e2:
181                    logging.warning('Failed to list_image_dir for build %s. '
182                                    'Error: %s', image, e2)
183
184        url = _IMAGE_URL_PATTERN % (ds.url(), image)
185
186        logging.debug('Installing image')
187        start_time = time.time()
188        failure = None
189        try:
190            afe_utils.machine_install_and_update_labels(
191                    host, url, use_quick_provision, with_cheets)
192        except BaseException as e:
193            failure = e
194            raise
195        finally:
196            _emit_provision_metrics(
197                url, host.hostname, failure, time.time() - start_time)
198        logging.debug('Finished provisioning %s to %s', host, value)
199