1#!/usr/bin/python
2#
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool to validate code in prod branch before pushing to lab.
8
9The script runs push_to_prod suite to verify code in prod branch is ready to be
10pushed. Link to design document:
11https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit
12
13To verify if prod branch can be pushed to lab, run following command in
14chromeos-autotest.cbf server:
15/usr/local/autotest/site_utils/test_push.py -e someone@company.com
16
17The script uses latest gandof stable build as test build by default.
18
19"""
20
21import argparse
22import ast
23from contextlib import contextmanager
24import getpass
25import multiprocessing
26import os
27import re
28import subprocess
29import sys
30import time
31import traceback
32import urllib2
33
34import common
35try:
36    from autotest_lib.frontend import setup_django_environment
37    from autotest_lib.frontend.afe import models
38    from autotest_lib.frontend.afe import rpc_utils
39except ImportError:
40    # Unittest may not have Django database configured and will fail to import.
41    pass
42from autotest_lib.client.common_lib import global_config
43from autotest_lib.client.common_lib import priorities
44from autotest_lib.client.common_lib.cros import retry
45from autotest_lib.server import site_utils
46from autotest_lib.server import utils
47from autotest_lib.server.cros import provision
48from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
49from autotest_lib.server.hosts import afe_store
50from autotest_lib.site_utils import gmail_lib
51from autotest_lib.site_utils.suite_scheduler import constants
52
53AUTOTEST_DIR=common.autotest_dir
54CONFIG = global_config.global_config
55
56AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)
57TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)
58
59MAIL_FROM = 'chromeos-test@google.com'
60BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'
61RUN_SUITE_COMMAND = 'run_suite.py'
62PUSH_TO_PROD_SUITE = 'push_to_prod'
63DUMMY_SUITE = 'dummy'
64AU_SUITE = 'paygen_au_beta'
65TESTBED_SUITE = 'testbed_push'
66# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed
67DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30
68IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')
69DEFAULT_EMAIL = CONFIG.get_config_value(
70        'SCHEDULER', 'notify_email', type=str, default='')
71DEFAULT_NUM_DUTS = "{'gandof': 4, 'quawks': 2, 'testbed': 1}"
72
73SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'
74                              'tab_id=view_job&object_id=(\d+)$')
75
76# Dictionary of test results keyed by test name regular expression.
77EXPECTED_TEST_RESULTS = {'^SERVER_JOB$':                 'GOOD',
78                         # This is related to dummy_Fail/control.dependency.
79                         'dummy_Fail.dependency$':       'TEST_NA',
80                         'login_LoginSuccess.*':         'GOOD',
81                         'provision_AutoUpdate.double':  'GOOD',
82                         'dummy_Pass.*':                 'GOOD',
83                         'dummy_Fail.Fail$':             'FAIL',
84                         'dummy_Fail.RetryFail$':        'FAIL',
85                         'dummy_Fail.RetrySuccess':      'GOOD',
86                         'dummy_Fail.Error$':            'ERROR',
87                         'dummy_Fail.Warn$':             'WARN',
88                         'dummy_Fail.NAError$':          'TEST_NA',
89                         'dummy_Fail.Crash$':            'GOOD',
90                         }
91
92EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$':       'GOOD',
93                               'dummy_Pass.*':       'GOOD',
94                               'dummy_Fail.Fail':    'FAIL',
95                               'dummy_Fail.Warn':    'WARN',
96                               'dummy_Fail.Crash':   'GOOD',
97                               'dummy_Fail.Error':   'ERROR',
98                               'dummy_Fail.NAError': 'TEST_NA',}
99
100EXPECTED_TEST_RESULTS_AU = {'SERVER_JOB$':                        'GOOD',
101         'autoupdate_EndToEndTest.paygen_au_beta_delta.*': 'GOOD',
102         'autoupdate_EndToEndTest.paygen_au_beta_full.*':  'GOOD',
103         }
104
105EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$':      'GOOD',
106                                 'testbed_DummyTest': 'GOOD',}
107
108EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',
109                                   'SERVER_JOB':         'GOOD'}
110
111URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)
112URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)
113
114# Some test could be missing from the test results for various reasons. Add
115# such test in this list and explain the reason.
116IGNORE_MISSING_TESTS = [
117    # For latest build, npo_test_delta does not exist.
118    'autoupdate_EndToEndTest.npo_test_delta.*',
119    # For trybot build, nmo_test_delta does not exist.
120    'autoupdate_EndToEndTest.nmo_test_delta.*',
121    # Older build does not have login_LoginSuccess test in push_to_prod suite.
122    # TODO(dshi): Remove following lines after R41 is stable.
123    'login_LoginSuccess']
124
125# Save all run_suite command output.
126manager = multiprocessing.Manager()
127run_suite_output = manager.list()
128all_suite_ids = manager.list()
129# A dict maps the name of the updated repos and the path of them.
130UPDATED_REPOS = {'autotest': AUTOTEST_DIR,
131                 'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}
132PUSH_USER = 'chromeos-test-lab'
133
134class TestPushException(Exception):
135    """Exception to be raised when the test to push to prod failed."""
136    pass
137
138
139@retry.retry(TestPushException, timeout_min=5, delay_sec=30)
140def check_dut_inventory(required_num_duts, pool):
141    """Check DUT inventory for each board in the pool specified..
142
143    @param required_num_duts: a dict specifying the number of DUT each platform
144                              requires in order to finish push tests.
145    @param pool: the pool used by test_push.
146    @raise TestPushException: if number of DUTs are less than the requirement.
147    """
148    print 'Checking DUT inventory...'
149    pool_label = constants.Labels.POOL_PREFIX + pool
150    hosts = AFE.run('get_hosts', status='Ready', locked=False)
151    hosts = [h for h in hosts if pool_label in h.get('labels', [])]
152    platforms = [host['platform'] for host in hosts]
153    current_inventory = {p : platforms.count(p) for p in platforms}
154    error_msg = ''
155    for platform, req_num in required_num_duts.items():
156        curr_num = current_inventory.get(platform, 0)
157        if curr_num < req_num:
158            error_msg += ('\nRequire %d %s DUTs in pool: %s, only %d are Ready'
159                          ' now' % (req_num, platform, pool, curr_num))
160    if error_msg:
161        raise TestPushException('Not enough DUTs to run push tests. %s' %
162                                error_msg)
163
164
165def powerwash_dut_to_test_repair(hostname, timeout):
166    """Powerwash dut to test repair workflow.
167
168    @param hostname: hostname of the dut.
169    @param timeout: seconds of the powerwash test to hit timeout.
170    @raise TestPushException: if DUT fail to run the test.
171    """
172    t = models.Test.objects.get(name='platform_Powerwash')
173    c = utils.read_file(os.path.join(common.autotest_dir, t.path))
174    job_id = rpc_utils.create_job_common(
175             'powerwash', priority=priorities.Priority.SUPER,
176             control_type='Server', control_file=c, hosts=[hostname])
177
178    end = time.time() + timeout
179    while not TKO.get_job_test_statuses_from_db(job_id):
180        if time.time() >= end:
181            AFE.run('abort_host_queue_entries', job=job_id)
182            raise TestPushException(
183                'Powerwash test on %s timeout after %ds, abort it.' %
184                (hostname, timeout))
185        time.sleep(10)
186    verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)
187    # Kick off verify, verify will fail and a repair should be triggered.
188    AFE.reverify_hosts(hostnames=[hostname])
189
190
191def reverify_all_push_duts():
192    """Reverify all the push DUTs."""
193    print 'Reverifying all DUTs.'
194    hosts = [h.hostname for h in AFE.get_hosts()]
195    AFE.reverify_hosts(hostnames=hosts)
196
197
198def get_default_build(board='gandof', server='chromeos-autotest.hot'):
199    """Get the default build to be used for test.
200
201    @param board: Name of board to be tested, default is gandof.
202    @return: Build to be tested, e.g., gandof-release/R36-5881.0.0
203    """
204    build = None
205    cmd = ('%s/cli/atest stable_version list --board=%s -w %s' %
206           (AUTOTEST_DIR, board, server))
207    result = subprocess.check_output(cmd, shell=True).strip()
208    build = re.search(BUILD_REGEX, result)
209    if build:
210        return '%s-release/%s' % (board, build.group(0))
211
212    # If fail to get stable version from cautotest, use that defined in config
213    build = CONFIG.get_config_value('CROS', 'stable_cros_version')
214    return '%s-release/%s' % (board, build)
215
216def parse_arguments():
217    """Parse arguments for test_push tool.
218
219    @return: Parsed arguments.
220
221    """
222    parser = argparse.ArgumentParser()
223    parser.add_argument('-b', '--board', dest='board', default='gandof',
224                        help='Default is gandof.')
225    parser.add_argument('-sb', '--shard_board', dest='shard_board',
226                        default='quawks',
227                        help='Default is quawks.')
228    parser.add_argument('-i', '--build', dest='build', default=None,
229                        help='Default is the latest stale build of given '
230                             'board. Must be a stable build, otherwise AU test '
231                             'will fail. (ex: gandolf-release/R54-8743.25.0)')
232    parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,
233                        help='Default is the latest stable build of given '
234                             'board. Must be a stable build, otherwise AU test '
235                             'will fail.')
236    parser.add_argument('-w', '--web', default='chromeos-autotest.hot',
237                        help='Specify web server to grab stable version from.')
238    parser.add_argument('-ab', '--android_board', dest='android_board',
239                        default='shamu-2', help='Android board to test.')
240    parser.add_argument('-ai', '--android_build', dest='android_build',
241                        help='Android build to test.')
242    parser.add_argument('-p', '--pool', dest='pool', default='bvt')
243    parser.add_argument('-u', '--num', dest='num', type=int, default=3,
244                        help='Run on at most NUM machines.')
245    parser.add_argument('-e', '--email', dest='email', default=DEFAULT_EMAIL,
246                        help='Email address for the notification to be sent to '
247                             'after the script finished running.')
248    parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,
249                        default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,
250                        help='Time in mins to wait before abort the jobs we '
251                             'are waiting on. Only for the asynchronous suites '
252                             'triggered by create_and_return flag.')
253    parser.add_argument('-ud', '--num_duts', dest='num_duts',
254                        default=DEFAULT_NUM_DUTS,
255                        help="String of dict that indicates the required number"
256                             " of DUTs for each board. E.g {'gandof':4}")
257    parser.add_argument('-c', '--continue_on_failure', action='store_true',
258                        dest='continue_on_failure',
259                        help='All tests continue to run when there is failure')
260
261    arguments = parser.parse_args(sys.argv[1:])
262
263    # Get latest stable build as default build.
264    if not arguments.build:
265        arguments.build = get_default_build(arguments.board, arguments.web)
266    if not arguments.shard_build:
267        arguments.shard_build = get_default_build(arguments.shard_board,
268                                                  arguments.web)
269
270    arguments.num_duts = ast.literal_eval(arguments.num_duts)
271
272    return arguments
273
274
275def do_run_suite(suite_name, arguments, use_shard=False,
276                 create_and_return=False, testbed_test=False):
277    """Call run_suite to run a suite job, and return the suite job id.
278
279    The script waits the suite job to finish before returning the suite job id.
280    Also it will echo the run_suite output to stdout.
281
282    @param suite_name: Name of a suite, e.g., dummy.
283    @param arguments: Arguments for run_suite command.
284    @param use_shard: If true, suite is scheduled for shard board.
285    @param create_and_return: If True, run_suite just creates the suite, print
286                              the job id, then finish immediately.
287    @param testbed_test: True to run testbed test. Default is False.
288
289    @return: Suite job ID.
290
291    """
292    if use_shard and not testbed_test:
293        board = arguments.shard_board
294        build = arguments.shard_build
295    elif testbed_test:
296        board = arguments.android_board
297        build = arguments.android_build
298    else:
299        board = arguments.board
300        build = arguments.build
301
302    # Remove cros-version label to force provision.
303    hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,
304                          locked=False)
305    for host in hosts:
306        labels_to_remove = [
307                l for l in host.labels
308                if (l.startswith(provision.CROS_VERSION_PREFIX) or
309                    l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]
310        if labels_to_remove:
311            AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)
312
313        # Test repair work flow on shards, powerwash test will timeout after 7m.
314        if use_shard and not create_and_return:
315            powerwash_dut_to_test_repair(host.hostname, timeout=420)
316
317    current_dir = os.path.dirname(os.path.realpath(__file__))
318    cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),
319           '-s', suite_name,
320           '-b', board,
321           '-i', build,
322           '-p', arguments.pool,
323           '-u', str(arguments.num)]
324    if create_and_return:
325        cmd += ['-c']
326    if testbed_test:
327        cmd += ['--run_prod_code']
328
329    suite_job_id = None
330
331    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
332                            stderr=subprocess.STDOUT)
333
334    while True:
335        line = proc.stdout.readline()
336
337        # Break when run_suite process completed.
338        if not line and proc.poll() != None:
339            break
340        print line.rstrip()
341        run_suite_output.append(line.rstrip())
342
343        if not suite_job_id:
344            m = re.match(SUITE_JOB_START_INFO_REGEX, line)
345            if m and m.group(1):
346                suite_job_id = int(m.group(1))
347                all_suite_ids.append(suite_job_id)
348
349    if not suite_job_id:
350        raise TestPushException('Failed to retrieve suite job ID.')
351
352    # If create_and_return specified, wait for the suite to finish.
353    if create_and_return:
354        end = time.time() + arguments.timeout_min * 60
355        while not AFE.get_jobs(id=suite_job_id, finished=True):
356            if time.time() < end:
357                time.sleep(10)
358            else:
359                AFE.run('abort_host_queue_entries', job=suite_job_id)
360                raise TestPushException(
361                        'Asynchronous suite triggered by create_and_return '
362                        'flag has timed out after %d mins. Aborting it.' %
363                        arguments.timeout_min)
364
365    print 'Suite job %s is completed.' % suite_job_id
366    return suite_job_id
367
368
369def check_dut_image(build, suite_job_id):
370    """Confirm all DUTs used for the suite are imaged to expected build.
371
372    @param build: Expected build to be imaged.
373    @param suite_job_id: job ID of the suite job.
374    @raise TestPushException: If a DUT does not have expected build imaged.
375    """
376    print 'Checking image installed in DUTs...'
377    job_ids = [job.id for job in
378               models.Job.objects.filter(parent_job_id=suite_job_id)]
379    hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]
380            for job_id in job_ids]
381    hostnames = set([hqe.host.hostname for hqe in hqes])
382    for hostname in hostnames:
383        host_info_store = afe_store.AfeStore(hostname, AFE)
384        info = host_info_store.get()
385        if info.build != build:
386            raise TestPushException('DUT is not imaged properly. Host %s has '
387                                    'build %s, while build %s is expected.' %
388                                    (hostname, info.build, build))
389
390
391def test_suite(suite_name, expected_results, arguments, use_shard=False,
392               create_and_return=False, testbed_test=False):
393    """Call run_suite to start a suite job and verify results.
394
395    @param suite_name: Name of a suite, e.g., dummy
396    @param expected_results: A dictionary of test name to test result.
397    @param arguments: Arguments for run_suite command.
398    @param use_shard: If true, suite is scheduled for shard board.
399    @param create_and_return: If True, run_suite just creates the suite, print
400                              the job id, then finish immediately.
401    @param testbed_test: True to run testbed test. Default is False.
402    """
403    suite_job_id = do_run_suite(suite_name, arguments, use_shard,
404                                create_and_return, testbed_test)
405
406    # Confirm all DUTs used for the suite are imaged to expected build.
407    # hqe.host_id for jobs running in shard is not synced back to master db,
408    # therefore, skip verifying dut build for jobs running in shard.
409    build_expected = (arguments.android_build if testbed_test
410                      else arguments.build)
411    if suite_name != AU_SUITE and not use_shard and not testbed_test:
412        check_dut_image(build_expected, suite_job_id)
413
414    # Verify test results are the expected results.
415    verify_test_results(suite_job_id, expected_results)
416
417
418def verify_test_results(job_id, expected_results):
419    """Verify the test results with the expected results.
420
421    @param job_id: id of the running jobs. For suite job, it is suite_job_id.
422    @param expected_results: A dictionary of test name to test result.
423    @raise TestPushException: If verify fails.
424    """
425    print 'Comparing test results...'
426    test_views = site_utils.get_test_views_from_tko(job_id, TKO)
427
428    mismatch_errors = []
429    extra_test_errors = []
430
431    found_keys = set()
432    for test_name, test_status in test_views.items():
433        print "%s%s" % (test_name.ljust(30), test_status)
434        # platform_InstallTestImage test may exist in old builds.
435        if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):
436            continue
437        test_found = False
438        for key,val in expected_results.items():
439            if re.search(key, test_name):
440                test_found = True
441                found_keys.add(key)
442                if val != test_status:
443                    error = ('%s Expected: [%s], Actual: [%s]' %
444                             (test_name, val, test_status))
445                    mismatch_errors.append(error)
446        if not test_found:
447            extra_test_errors.append(test_name)
448
449    missing_test_errors = set(expected_results.keys()) - found_keys
450    for exception in IGNORE_MISSING_TESTS:
451        try:
452            missing_test_errors.remove(exception)
453        except KeyError:
454            pass
455
456    summary = []
457    if mismatch_errors:
458        summary.append(('Results of %d test(s) do not match expected '
459                        'values:') % len(mismatch_errors))
460        summary.extend(mismatch_errors)
461        summary.append('\n')
462
463    if extra_test_errors:
464        summary.append('%d test(s) are not expected to be run:' %
465                       len(extra_test_errors))
466        summary.extend(extra_test_errors)
467        summary.append('\n')
468
469    if missing_test_errors:
470        summary.append('%d test(s) are missing from the results:' %
471                       len(missing_test_errors))
472        summary.extend(missing_test_errors)
473        summary.append('\n')
474
475    # Test link to log can be loaded.
476    job_name = '%s-%s' % (job_id, getpass.getuser())
477    log_link = URL_PATTERN % (URL_HOST, job_name)
478    try:
479        urllib2.urlopen(log_link).read()
480    except urllib2.URLError:
481        summary.append('Failed to load page for link to log: %s.' % log_link)
482
483    if summary:
484        raise TestPushException('\n'.join(summary))
485
486
487def test_suite_wrapper(queue, suite_name, expected_results, arguments,
488                       use_shard=False, create_and_return=False,
489                       testbed_test=False):
490    """Wrapper to call test_suite. Handle exception and pipe it to parent
491    process.
492
493    @param queue: Queue to save exception to be accessed by parent process.
494    @param suite_name: Name of a suite, e.g., dummy
495    @param expected_results: A dictionary of test name to test result.
496    @param arguments: Arguments for run_suite command.
497    @param use_shard: If true, suite is scheduled for shard board.
498    @param create_and_return: If True, run_suite just creates the suite, print
499                              the job id, then finish immediately.
500    @param testbed_test: True to run testbed test. Default is False.
501    """
502    try:
503        test_suite(suite_name, expected_results, arguments, use_shard,
504                   create_and_return, testbed_test)
505    except:
506        # Store the whole exc_info leads to a PicklingError.
507        except_type, except_value, tb = sys.exc_info()
508        queue.put((except_type, except_value, traceback.extract_tb(tb)))
509
510
511def check_queue(queue):
512    """Check the queue for any exception being raised.
513
514    @param queue: Queue used to store exception for parent process to access.
515    @raise: Any exception found in the queue.
516    """
517    if queue.empty():
518        return
519    exc_info = queue.get()
520    # Raise the exception with original backtrace.
521    print 'Original stack trace of the exception:\n%s' % exc_info[2]
522    raise exc_info[0](exc_info[1])
523
524
525def get_head_of_repos(repos):
526    """Get HEAD of updated repos, currently are autotest and chromite repos
527
528    @param repos: a map of repo name to the path of the repo. E.g.
529                  {'autotest': '/usr/local/autotest'}
530    @return: a map of repo names to the current HEAD of that repo.
531    """
532    @contextmanager
533    def cd(new_wd):
534        """Helper function to change working directory.
535
536        @param new_wd: new working directory that switch to.
537        """
538        prev_wd = os.getcwd()
539        os.chdir(os.path.expanduser(new_wd))
540        try:
541            yield
542        finally:
543            os.chdir(prev_wd)
544
545    updated_repo_heads = {}
546    for repo_name, path_to_repo in repos.iteritems():
547        with cd(path_to_repo):
548            head = subprocess.check_output('git rev-parse HEAD',
549                                           shell=True).strip()
550        updated_repo_heads[repo_name] = head
551    return updated_repo_heads
552
553
554def push_prod_next_branch(updated_repo_heads):
555    """push prod-next branch to the tested HEAD after all tests pass.
556
557    The push command must be ran as PUSH_USER, since only PUSH_USER has the
558    right to push branches.
559
560    @param updated_repo_heads: a map of repo names to tested HEAD of that repo.
561    """
562    # prod-next branch for every repo is downloaded under PUSH_USER home dir.
563    cmd = ('cd ~/{repo}; git pull; git rebase {hash} prod-next;'
564           'git push origin prod-next')
565    run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)
566
567    for repo_name, test_hash in updated_repo_heads.iteritems():
568         push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)
569         print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)
570         print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,
571                                       shell=True)
572
573
574def main():
575    """Entry point for test_push script."""
576    arguments = parse_arguments()
577    updated_repo_heads = get_head_of_repos(UPDATED_REPOS)
578    updated_repo_msg = '\n'.join(
579        ['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])
580
581    try:
582        # Use daemon flag will kill child processes when parent process fails.
583        use_daemon = not arguments.continue_on_failure
584        # Verify all the DUTs at the beginning of testing push.
585        reverify_all_push_duts()
586        time.sleep(15) # Wait 15 secs for the verify test to start.
587        check_dut_inventory(arguments.num_duts, arguments.pool)
588        queue = multiprocessing.Queue()
589
590        push_to_prod_suite = multiprocessing.Process(
591                target=test_suite_wrapper,
592                args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,
593                      arguments))
594        push_to_prod_suite.daemon = use_daemon
595        push_to_prod_suite.start()
596
597        # TODO(dshi): Remove following line after crbug.com/267644 is fixed.
598        # Also, merge EXPECTED_TEST_RESULTS_AU to EXPECTED_TEST_RESULTS
599        # AU suite will be on shard until crbug.com/634049 is fixed.
600        au_suite = multiprocessing.Process(
601                target=test_suite_wrapper,
602                args=(queue, AU_SUITE, EXPECTED_TEST_RESULTS_AU,
603                      arguments, True))
604        au_suite.daemon = use_daemon
605        au_suite.start()
606
607        # suite test with --create_and_return flag
608        asynchronous_suite = multiprocessing.Process(
609                target=test_suite_wrapper,
610                args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,
611                      arguments, False, True))
612        asynchronous_suite.daemon = True
613        asynchronous_suite.start()
614
615        # Test suite for testbed
616        testbed_suite = multiprocessing.Process(
617                target=test_suite_wrapper,
618                args=(queue, TESTBED_SUITE, EXPECTED_TEST_RESULTS_TESTBED,
619                      arguments, False, False, True))
620        testbed_suite.daemon = use_daemon
621        testbed_suite.start()
622
623        while (push_to_prod_suite.is_alive() or au_suite.is_alive() or
624               asynchronous_suite.is_alive() or testbed_suite.is_alive()):
625            check_queue(queue)
626            time.sleep(5)
627
628        check_queue(queue)
629
630        push_to_prod_suite.join()
631        au_suite.join()
632        asynchronous_suite.join()
633        testbed_suite.join()
634
635        # All tests pass, push prod-next branch for UPDATED_REPOS.
636        push_prod_next_branch(updated_repo_heads)
637    except Exception as e:
638        print 'Test for pushing to prod failed:\n'
639        print str(e)
640        # Abort running jobs when choose not to continue when there is failure.
641        if not arguments.continue_on_failure:
642            for suite_id in all_suite_ids:
643                if AFE.get_jobs(id=suite_id, finished=False):
644                    AFE.run('abort_host_queue_entries', job=suite_id)
645        # Send out email about the test failure.
646        if arguments.email:
647            gmail_lib.send_email(
648                    arguments.email,
649                    'Test for pushing to prod failed. Do NOT push!',
650                    ('Test CLs of the following repos failed. Below are the '
651                     'repos and the corresponding test HEAD.\n\n%s\n\n.'
652                     'Error occurred during test:\n\n%s\n\n'
653                     'All logs have been saved to '
654                     '/var/log/test_push/test_push.log on push master. Detail '
655                     'debugging info can be found at go/push-to-prod' %
656                     (updated_repo_msg, str(e)) + '\n'.join(run_suite_output)))
657        raise
658    finally:
659        # Reverify all the hosts
660        reverify_all_push_duts()
661
662    message = ('\nAll tests are completed successfully, the prod branch of the '
663               'following repos ready to be pushed to the hash list below.\n'
664               '%s\n\n\nInstructions for pushing to prod are available at '
665               'https://goto.google.com/autotest-to-prod' % updated_repo_msg)
666    print message
667    # Send out email about test completed successfully.
668    if arguments.email:
669        gmail_lib.send_email(
670                arguments.email,
671                'Test for pushing to prod completed successfully',
672                message)
673
674
675if __name__ == '__main__':
676    sys.exit(main())
677