1# Copyright 2018 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Run the skylab staging test. 6 7This script runs a suite of autotest tests and some other sanity checks against 8a given Skylab instance. If all sanity checks and tests have the expected 9results, the script exits with success. 10 11This script is intended to be used for the Autotest staging lab's test_push. 12This script does not update any software before running the tests (i.e. caller 13is responsible for setting up the staging lab with the correct software 14beforehand), nor does it update any software refs on success (i.e. caller is 15responsible for blessing the newer version of software as needed). 16""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import print_function 21 22import argparse 23import logging 24import sys 25import time 26 27from lucifer import autotest 28from lucifer import loglib 29from skylab_staging import errors 30from skylab_staging import swarming 31 32_METRICS_PREFIX = 'chromeos/autotest/test_push/skylab' 33_POLLING_INTERVAL_S = 10 34_WAIT_FOR_DUTS_TIMEOUT_S = 20 * 60 35 36# Dictionary of test results expected in suite:skylab_staging_test. 37_EXPECTED_TEST_RESULTS = {'login_LoginSuccess.*': 'GOOD', 38 'provision_AutoUpdate.double': 'GOOD', 39 'dummy_Pass.*': 'GOOD', 40 'dummy_Fail.Fail$': 'FAIL', 41 'dummy_Fail.Error$': 'ERROR', 42 'dummy_Fail.Warn$': 'WARN', 43 'dummy_Fail.NAError$': 'TEST_NA', 44 'dummy_Fail.Crash$': 'GOOD'} 45 46# Some test could be missing from the test results for various reasons. Add 47# such test in this list and explain the reason. 48_IGNORED_TESTS = [ 49 # TODO(pprabhu): Remove once R70 is stable. 50 'dummy_Fail.RetrySuccess', 51 'dummy_Fail.RetryFail', 52] 53 54_logger = logging.getLogger(__name__) 55 56 57def main(): 58 """Entry point of test_push.""" 59 autotest.monkeypatch() 60 metrics = autotest.chromite_load('metrics') 61 ts_mon_config = autotest.chromite_load('ts_mon_config') 62 63 parser = _get_parser() 64 loglib.add_logging_options(parser) 65 args = parser.parse_args() 66 loglib.configure_logging_with_args(parser, args) 67 68 with ts_mon_config.SetupTsMonGlobalState(service_name='skylab_test_push', 69 indirect=True): 70 success = False 71 try: 72 with metrics.SecondsTimer(_METRICS_PREFIX + '/durations/total', 73 add_exception_field=True): 74 _run_test_push(args) 75 success = True 76 finally: 77 metrics.Counter(_METRICS_PREFIX + '/tick').increment( 78 fields={'success': success}) 79 80def _get_parser(): 81 parser = argparse.ArgumentParser( 82 description='Run test_push against Skylab instance.') 83 parser.add_argument( 84 '--swarming-url', 85 required=True, 86 help='Full URL to the Swarming instance to use', 87 ) 88 parser.add_argument( 89 '--swarming-cli', 90 required=True, 91 help='Path to the Swarming cli tool.', 92 ) 93 # TODO(crbug.com/867969) Use model instead of board once skylab inventory has 94 # model information. 95 parser.add_argument( 96 '--dut-board', 97 required=True, 98 help='Label board of the DUTs to use for testing', 99 ) 100 parser.add_argument( 101 '--dut-pool', 102 required=True, 103 choices=('DUT_POOL_CQ', 'DUT_POOL_BVT', 'DUT_POOL_SUITES'), 104 help='Label pool of the DUTs to use for testing', 105 ) 106 parser.add_argument( 107 '--build', 108 required=True, 109 help='ChromeOS build to use for provisioning' 110 ' (e.g.: gandolf-release/R54-8743.25.0).', 111 ) 112 parser.add_argument( 113 '--timeout-mins', 114 type=int, 115 required=True, 116 help='(Optional) Overall timeout for the test_push. On timeout, test_push' 117 ' attempts to abort any in-flight test suites before quitting.', 118 ) 119 parser.add_argument( 120 '--num-min-duts', 121 type=int, 122 help='Minimum number of Ready DUTs required for test suite.', 123 ) 124 parser.add_argument( 125 '--service-account-json', 126 default=None, 127 help='(Optional) Path to the service account credentials file to' 128 ' authenticate with Swarming service.', 129 ) 130 return parser 131 132 133def _run_test_push(args): 134 """Meat of the test_push flow.""" 135 metrics = autotest.chromite_load('metrics') 136 137 deadline = time.time() + (args.timeout_mins * 60) 138 swclient = swarming.Client(args.swarming_cli, args.swarming_url, 139 args.service_account_json) 140 if args.num_min_duts: 141 _ensure_duts_ready( 142 swclient, 143 args.dut_board, 144 args.dut_pool, 145 args.num_min_duts, 146 min(deadline - time.time(), _WAIT_FOR_DUTS_TIMEOUT_S), 147 ) 148 149 # Just like the builders, first run a provision suite to provision required 150 # DUTs, then run the actual suite. 151 with metrics.SecondsTimer(_METRICS_PREFIX + '/durations/provision_suite', 152 add_exception_field=True): 153 task_id = swclient.trigger_suite( 154 args.dut_board, 155 args.dut_pool, 156 args.build, 157 'provision', 158 deadline - time.time(), 159 ) 160 _logger.info('Triggered provision suite. Task id: %s', task_id) 161 swclient.wait_for_suite( 162 task_id, 163 args.dut_board, 164 args.dut_pool, 165 args.build, 166 'provision', 167 deadline - time.time(), 168 ) 169 _logger.info('Finished provision suite.') 170 171 with metrics.SecondsTimer(_METRICS_PREFIX + '/durations/push_to_prod_suite', 172 add_exception_field=True): 173 task_id = swclient.trigger_suite( 174 args.dut_board, 175 args.dut_pool, 176 args.build, 177 'skylab_staging_test', 178 deadline - time.time(), 179 ) 180 _logger.info('Triggered skylab_staging_test suite. Task id: %s', task_id) 181 _verify_suite_creation(swclient, task_id) 182 _logger.info('Check push_to_prod suite on: \n %s', 183 swclient.task_url(task_id)) 184 swclient.wait_for_suite( 185 task_id, 186 args.dut_board, 187 args.dut_pool, 188 args.build, 189 'skylab_staging_test', 190 deadline - time.time(), 191 ) 192 _logger.info('Finished skylab_staging_test suite.') 193 194 _verify_test_results(task_id, _EXPECTED_TEST_RESULTS) 195 196 197def _verify_suite_creation(swclient, task_id): 198 """Verify the suite is created successfully.""" 199 result = swclient.query('task/%s/result' % task_id, []) 200 if result['state'] != 'COMPLETED' or result['failure']: 201 raise errors.TestPushError('Suite task %s is not successfully created.' 202 % task_id) 203 204 205def _verify_test_results(task_id, expected_results): 206 """Verify if test results are expected.""" 207 _logger.info('Comparing test results...') 208 test_views = _get_test_views(task_id) 209 available_views = [v for v in test_views if _view_is_preserved(v)] 210 logging.debug('Test results:') 211 for v in available_views: 212 logging.debug('%s%s', v['test_name'].ljust(30), v['status']) 213 214 summary = _verify_and_summarize(available_views, expected_results) 215 if summary: 216 logging.error('\n'.join(summary)) 217 raise errors.TestPushError('Test results are not consistent with ' 218 'expected results') 219 220 221def _get_test_views(task_id): 222 """Retrieve test views from TKO for skylab task id.""" 223 tko_db = autotest.load('tko.db') 224 db = tko_db.db() 225 return db.get_child_tests_by_parent_task_id(task_id) 226 227 228def _view_is_preserved(view): 229 """Detect whether to keep the test view for further comparison.""" 230 job_status = autotest.load('server.cros.dynamic_suite.job_status') 231 return (job_status.view_is_relevant(view) and 232 (not job_status.view_is_for_suite_job(view))) 233 234 235def _verify_and_summarize(available_views, expected_results): 236 """Verify and generate summaries for test_push results.""" 237 test_push_common = autotest.load('site_utils.test_push_common') 238 views = {v['test_name']:v['status'] for v in available_views} 239 return test_push_common.summarize_push(views, expected_results, 240 _IGNORED_TESTS) 241 242 243def _ensure_duts_ready(swclient, board, pool, min_duts, timeout_s): 244 """Ensure that at least num_duts are in the ready dut_state.""" 245 start_time = time.time() 246 while True: 247 _logger.debug('Checking whether %d DUTs are available', min_duts) 248 num_duts = swclient.num_ready_duts(board, pool) 249 if num_duts >= min_duts: 250 _logger.info( 251 '%d available DUTs satisfy the minimum requirement of %d DUTs', 252 num_duts, min_duts, 253 ) 254 return 255 if time.time() - start_time > timeout_s: 256 raise errors.TestPushError( 257 'Could not find %d ready DUTs with (board:%s, pool:%s) within %d' 258 ' seconds' % (min_duts, board, pool, timeout_s) 259 ) 260 time.sleep(_POLLING_INTERVAL_S) 261 262 263if __name__ == '__main__': 264 sys.exit(main()) 265