1# Copyright 2018 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Run the skylab staging test.
6
7This script runs a suite of autotest tests and some other sanity checks against
8a given Skylab instance. If all sanity checks and tests have the expected
9results, the script exits with success.
10
11This script is intended to be used for the Autotest staging lab's test_push.
12This script does not update any software before running the tests (i.e. caller
13is responsible for setting up the staging lab with the correct software
14beforehand), nor does it update any software refs on success (i.e. caller is
15responsible for blessing the newer version of software as needed).
16"""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
22import argparse
23import logging
24import sys
25import time
26
27from lucifer import autotest
28from lucifer import loglib
29from skylab_staging import errors
30from skylab_staging import swarming
31
32_METRICS_PREFIX = 'chromeos/autotest/test_push/skylab'
33_POLLING_INTERVAL_S = 10
34_WAIT_FOR_DUTS_TIMEOUT_S = 20 * 60
35
36# Dictionary of test results expected in suite:skylab_staging_test.
37_EXPECTED_TEST_RESULTS = {'login_LoginSuccess.*':         'GOOD',
38                          'provision_AutoUpdate.double':  'GOOD',
39                          'dummy_Pass.*':                 'GOOD',
40                          'dummy_Fail.Fail$':             'FAIL',
41                          'dummy_Fail.Error$':            'ERROR',
42                          'dummy_Fail.Warn$':             'WARN',
43                          'dummy_Fail.NAError$':          'TEST_NA',
44                          'dummy_Fail.Crash$':            'GOOD'}
45
46# Some test could be missing from the test results for various reasons. Add
47# such test in this list and explain the reason.
48_IGNORED_TESTS = [
49    # TODO(pprabhu): Remove once R70 is stable.
50    'dummy_Fail.RetrySuccess',
51    'dummy_Fail.RetryFail',
52]
53
54_logger = logging.getLogger(__name__)
55
56
57def main():
58  """Entry point of test_push."""
59  autotest.monkeypatch()
60  metrics = autotest.chromite_load('metrics')
61  ts_mon_config = autotest.chromite_load('ts_mon_config')
62
63  parser = _get_parser()
64  loglib.add_logging_options(parser)
65  args = parser.parse_args()
66  loglib.configure_logging_with_args(parser, args)
67
68  with ts_mon_config.SetupTsMonGlobalState(service_name='skylab_test_push',
69                                           indirect=True):
70    success = False
71    try:
72      with metrics.SecondsTimer(_METRICS_PREFIX + '/durations/total',
73                                add_exception_field=True):
74        _run_test_push(args)
75      success = True
76    finally:
77      metrics.Counter(_METRICS_PREFIX + '/tick').increment(
78          fields={'success': success})
79
80def _get_parser():
81  parser = argparse.ArgumentParser(
82      description='Run test_push against Skylab instance.')
83  parser.add_argument(
84      '--swarming-url',
85      required=True,
86      help='Full URL to the Swarming instance to use',
87  )
88  parser.add_argument(
89      '--swarming-cli',
90      required=True,
91      help='Path to the Swarming cli tool.',
92  )
93  # TODO(crbug.com/867969) Use model instead of board once skylab inventory has
94  # model information.
95  parser.add_argument(
96      '--dut-board',
97      required=True,
98      help='Label board of the DUTs to use for testing',
99  )
100  parser.add_argument(
101      '--dut-pool',
102      required=True,
103      choices=('DUT_POOL_CQ', 'DUT_POOL_BVT', 'DUT_POOL_SUITES'),
104      help='Label pool of the DUTs to use for testing',
105  )
106  parser.add_argument(
107      '--build',
108      required=True,
109      help='ChromeOS build to use for provisioning'
110           ' (e.g.: gandolf-release/R54-8743.25.0).',
111  )
112  parser.add_argument(
113      '--timeout-mins',
114      type=int,
115      required=True,
116      help='(Optional) Overall timeout for the test_push. On timeout, test_push'
117           ' attempts to abort any in-flight test suites before quitting.',
118  )
119  parser.add_argument(
120      '--num-min-duts',
121      type=int,
122      help='Minimum number of Ready DUTs required for test suite.',
123  )
124  parser.add_argument(
125      '--service-account-json',
126      default=None,
127      help='(Optional) Path to the service account credentials file to'
128           ' authenticate with Swarming service.',
129  )
130  return parser
131
132
133def _run_test_push(args):
134  """Meat of the test_push flow."""
135  metrics = autotest.chromite_load('metrics')
136
137  deadline = time.time() + (args.timeout_mins * 60)
138  swclient = swarming.Client(args.swarming_cli, args.swarming_url,
139                             args.service_account_json)
140  if args.num_min_duts:
141    _ensure_duts_ready(
142        swclient,
143        args.dut_board,
144        args.dut_pool,
145        args.num_min_duts,
146        min(deadline - time.time(), _WAIT_FOR_DUTS_TIMEOUT_S),
147    )
148
149  # Just like the builders, first run a provision suite to provision required
150  # DUTs, then run the actual suite.
151  with metrics.SecondsTimer(_METRICS_PREFIX + '/durations/provision_suite',
152                            add_exception_field=True):
153    task_id = swclient.trigger_suite(
154        args.dut_board,
155        args.dut_pool,
156        args.build,
157        'provision',
158        deadline - time.time(),
159    )
160    _logger.info('Triggered provision suite. Task id: %s', task_id)
161    swclient.wait_for_suite(
162        task_id,
163        args.dut_board,
164        args.dut_pool,
165        args.build,
166        'provision',
167        deadline - time.time(),
168    )
169    _logger.info('Finished provision suite.')
170
171  with metrics.SecondsTimer(_METRICS_PREFIX + '/durations/push_to_prod_suite',
172                            add_exception_field=True):
173    task_id = swclient.trigger_suite(
174        args.dut_board,
175        args.dut_pool,
176        args.build,
177        'skylab_staging_test',
178        deadline - time.time(),
179    )
180    _logger.info('Triggered skylab_staging_test suite. Task id: %s', task_id)
181    _verify_suite_creation(swclient, task_id)
182    _logger.info('Check push_to_prod suite on: \n    %s',
183                 swclient.task_url(task_id))
184    swclient.wait_for_suite(
185        task_id,
186        args.dut_board,
187        args.dut_pool,
188        args.build,
189        'skylab_staging_test',
190        deadline - time.time(),
191    )
192    _logger.info('Finished skylab_staging_test suite.')
193
194  _verify_test_results(task_id, _EXPECTED_TEST_RESULTS)
195
196
197def _verify_suite_creation(swclient, task_id):
198  """Verify the suite is created successfully."""
199  result = swclient.query('task/%s/result' % task_id, [])
200  if result['state'] != 'COMPLETED' or result['failure']:
201    raise errors.TestPushError('Suite task %s is not successfully created.'
202                               % task_id)
203
204
205def _verify_test_results(task_id, expected_results):
206  """Verify if test results are expected."""
207  _logger.info('Comparing test results...')
208  test_views = _get_test_views(task_id)
209  available_views = [v for v in test_views if _view_is_preserved(v)]
210  logging.debug('Test results:')
211  for v in available_views:
212    logging.debug('%s%s', v['test_name'].ljust(30), v['status'])
213
214  summary = _verify_and_summarize(available_views, expected_results)
215  if summary:
216    logging.error('\n'.join(summary))
217    raise errors.TestPushError('Test results are not consistent with '
218                               'expected results')
219
220
221def _get_test_views(task_id):
222  """Retrieve test views from TKO for skylab task id."""
223  tko_db = autotest.load('tko.db')
224  db = tko_db.db()
225  return db.get_child_tests_by_parent_task_id(task_id)
226
227
228def _view_is_preserved(view):
229  """Detect whether to keep the test view for further comparison."""
230  job_status = autotest.load('server.cros.dynamic_suite.job_status')
231  return (job_status.view_is_relevant(view) and
232          (not job_status.view_is_for_suite_job(view)))
233
234
235def _verify_and_summarize(available_views, expected_results):
236  """Verify and generate summaries for test_push results."""
237  test_push_common = autotest.load('site_utils.test_push_common')
238  views = {v['test_name']:v['status'] for v in available_views}
239  return test_push_common.summarize_push(views, expected_results,
240                                         _IGNORED_TESTS)
241
242
243def _ensure_duts_ready(swclient, board, pool, min_duts, timeout_s):
244  """Ensure that at least num_duts are in the ready dut_state."""
245  start_time = time.time()
246  while True:
247    _logger.debug('Checking whether %d DUTs are available', min_duts)
248    num_duts = swclient.num_ready_duts(board, pool)
249    if num_duts >= min_duts:
250      _logger.info(
251          '%d available DUTs satisfy the minimum requirement of %d DUTs',
252          num_duts, min_duts,
253      )
254      return
255    if time.time() - start_time > timeout_s:
256      raise errors.TestPushError(
257          'Could not find %d ready DUTs with (board:%s, pool:%s) within %d'
258          ' seconds' % (min_duts, board, pool, timeout_s)
259      )
260    time.sleep(_POLLING_INTERVAL_S)
261
262
263if __name__ == '__main__':
264  sys.exit(main())
265