1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import re 7import sys 8import time 9import urllib2 10 11from autotest_lib.client.common_lib import error 12from autotest_lib.client.common_lib import global_config 13from autotest_lib.client.common_lib.cros import dev_server 14from autotest_lib.server import afe_utils 15from autotest_lib.server import test 16from autotest_lib.server import utils 17from autotest_lib.server.cros import autoupdater 18from autotest_lib.server.cros import provision 19 20 21try: 22 from chromite.lib import metrics 23except ImportError: 24 metrics = utils.metrics_mock 25 26 27_CONFIG = global_config.global_config 28# pylint: disable-msg=E1120 29_IMAGE_URL_PATTERN = _CONFIG.get_config_value( 30 'CROS', 'image_url_pattern', type=str) 31 32 33def _metric_name(base_name): 34 return 'chromeos/autotest/provision/' + base_name 35 36 37def _get_build_metrics_fields(build_name): 38 try: 39 return utils.ParseBuildName(build_name)[0 : 2] 40 except utils.ParseBuildNameException: 41 logging.warning('Unable to parse build name %s for metrics. ' 42 'Continuing anyway.', build_name) 43 return ('', '') 44 45 46def _emit_updater_metrics(name_prefix, build_name, failure_reason, 47 duration, fields): 48 # reset_after=True is required for String gauges events to ensure that 49 # the metrics are not repeatedly emitted until the server restarts. 50 metrics.String(_metric_name(name_prefix + '_build_by_devserver_dut'), 51 reset_after=True).set(build_name, fields=fields) 52 if failure_reason: 53 metrics.String( 54 _metric_name(name_prefix + '_failure_reason_by_devserver_dut'), 55 reset_after=True).set(failure_reason, fields=fields) 56 metrics.SecondsDistribution( 57 _metric_name(name_prefix + '_duration_by_devserver_dut')).add( 58 duration, fields=fields) 59 60 61def _emit_provision_metrics(update_url, dut_host_name, 62 exception, duration): 63 # The following is high cardinality, but sparse. 64 # Each DUT is of a single board type, and likely build type. 65 # 66 # TODO(jrbarnette) The devserver-triggered provisioning code 67 # includes retries in certain cases. For that reason, the metrics 68 # distinguish 'provision' metrics which summarizes across all 69 # retries, and 'auto_update' which summarizes an individual update 70 # attempt. ChromiumOSUpdater doesn't do retries, so we just report 71 # the same information twice. We should replace the metrics with 72 # something better tailored to the current implementation. 73 build_name = autoupdater.url_to_image_name(update_url) 74 board, build_type = _get_build_metrics_fields(build_name) 75 fields = { 76 'board': board, 77 'build_type': build_type, 78 'dut_host_name': dut_host_name, 79 'dev_server': dev_server.get_resolved_hostname(update_url), 80 'success': not exception, 81 } 82 failure_reason = autoupdater.get_update_failure_reason(exception) 83 _emit_updater_metrics('provision', build_name, failure_reason, 84 duration, fields) 85 fields['attempt'] = 1 86 _emit_updater_metrics('auto_update', build_name, failure_reason, 87 duration, fields) 88 89 90class provision_AutoUpdate(test.test): 91 """A test that can provision a machine to the correct ChromeOS version.""" 92 version = 1 93 94 def initialize(self, host, value, is_test_na=False): 95 """Initialize. 96 97 @param host: The host object to update to |value|. 98 @param value: The build type and version to install on the host. 99 @param is_test_na: boolean, if True, will simply skip the test 100 and emit TestNAError. The control file 101 determines whether the test should be skipped 102 and passes the decision via this argument. Note 103 we can't raise TestNAError in control file as it won't 104 be caught and handled properly. 105 """ 106 if is_test_na: 107 raise error.TestNAError( 108 'Test not available for test_that. chroot detected, ' 109 'you are probably using test_that.') 110 # We check value in initialize so that it fails faster. 111 if not value: 112 raise error.TestFail('No build version specified.') 113 114 115 def run_once(self, host, value, force_update_engine=False): 116 """The method called by the control file to start the test. 117 118 @param host: The host object to update to |value|. 119 @param value: The host object to provision with a build corresponding 120 to |value|. 121 @param force_update_engine: When true, the update flow must 122 perform the update unconditionally, using 123 update_engine. Optimizations that could suppress 124 invoking update_engine, including quick-provision, 125 mustn't be used. 126 """ 127 with_cheets = False 128 logging.debug('Start provisioning %s to %s.', host, value) 129 if value.endswith(provision.CHEETS_SUFFIX): 130 image = re.sub(provision.CHEETS_SUFFIX + '$', '', value) 131 with_cheets = True 132 else: 133 image = value 134 135 # If the host is already on the correct build, we have nothing to do. 136 # Note that this means we're not doing any sort of stateful-only 137 # update, and that we're relying more on cleanup to do cleanup. 138 if not force_update_engine: 139 info = host.host_info_store.get() 140 if info.build == value: 141 # We can't raise a TestNA, as would make sense, as that makes 142 # job.run_test return False as if the job failed. However, it'd 143 # still be nice to get this into the status.log, so we manually 144 # emit an INFO line instead. 145 self.job.record('INFO', None, None, 146 'Host already running %s' % value) 147 return 148 149 # We're about to reimage a machine, so we need full_payload and 150 # stateful. If something happened where the devserver doesn't have one 151 # of these, then it's also likely that it'll be missing autotest. 152 # Therefore, we require the devserver to also have autotest staged, so 153 # that the test that runs after this provision finishes doesn't error 154 # out because the devserver that its job_repo_url is set to is missing 155 # autotest test code. 156 # TODO(milleral): http://crbug.com/249426 157 # Add an asynchronous staging call so that we can ask the devserver to 158 # fetch autotest in the background here, and then wait on it after 159 # reimaging finishes or at some other point in the provisioning. 160 ds = None 161 use_quick_provision = False 162 try: 163 ds = dev_server.ImageServer.resolve(image, host.hostname) 164 ds.stage_artifacts(image, ['full_payload', 'stateful', 165 'autotest_packages']) 166 if not force_update_engine: 167 try: 168 ds.stage_artifacts(image, ['quick_provision']) 169 use_quick_provision = True 170 except dev_server.DevServerException as e: 171 logging.warning('Unable to stage quick provision ' 172 'payload: %s', e) 173 except dev_server.DevServerException as e: 174 raise error.TestFail, str(e), sys.exc_info()[2] 175 finally: 176 # If a devserver is resolved, Log what has been downloaded so far. 177 if ds: 178 try: 179 ds.list_image_dir(image) 180 except (dev_server.DevServerException, urllib2.URLError) as e2: 181 logging.warning('Failed to list_image_dir for build %s. ' 182 'Error: %s', image, e2) 183 184 url = _IMAGE_URL_PATTERN % (ds.url(), image) 185 186 logging.debug('Installing image') 187 start_time = time.time() 188 failure = None 189 try: 190 afe_utils.machine_install_and_update_labels( 191 host, url, use_quick_provision, with_cheets) 192 except BaseException as e: 193 failure = e 194 raise 195 finally: 196 _emit_provision_metrics( 197 url, host.hostname, failure, time.time() - start_time) 198 logging.debug('Finished provisioning %s to %s', host, value) 199