1#!/usr/bin/python
2# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Runs on autotest servers from a cron job to self update them.
7
8This script is designed to run on all autotest servers to allow them to
9automatically self-update based on the manifests used to create their (existing)
10repos.
11"""
12
13from __future__ import print_function
14
15import ConfigParser
16import argparse
17import os
18import re
19import socket
20import subprocess
21import sys
22import time
23
24import common
25
26from autotest_lib.client.common_lib import global_config
27from autotest_lib.server import utils as server_utils
28from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
29
30
31# How long after restarting a service do we watch it to see if it's stable.
32SERVICE_STABILITY_TIMER = 60
33
34# A list of commands that only applies to primary server. For example,
35# test_importer should only be run in primary master scheduler. If two servers
36# are both running test_importer, there is a chance to fail as both try to
37# update the same table.
38PRIMARY_ONLY_COMMANDS = ['test_importer']
39# A dict to map update_commands defined in config file to repos or files that
40# decide whether need to update these commands. E.g. if no changes under
41# frontend repo, no need to update afe.
42COMMANDS_TO_REPOS_DICT = {'afe': 'frontend/',
43                          'tko': 'tko/'}
44BUILD_EXTERNALS_COMMAND = 'build_externals'
45# Services present on all hosts.
46UNIVERSAL_SERVICES = ['sysmon']
47
48AFE = frontend_wrappers.RetryingAFE(
49        server=server_utils.get_global_afe_hostname(), timeout_min=5,
50        delay_sec=10)
51
52class DirtyTreeException(Exception):
53    """Raised when the tree has been modified in an unexpected way."""
54
55
56class UnknownCommandException(Exception):
57    """Raised when we try to run a command name with no associated command."""
58
59
60class UnstableServices(Exception):
61    """Raised if a service appears unstable after restart."""
62
63
64def strip_terminal_codes(text):
65    """This function removes all terminal formatting codes from a string.
66
67    @param text: String of text to cleanup.
68    @returns String with format codes removed.
69    """
70    ESC = '\x1b'
71    return re.sub(ESC+r'\[[^m]*m', '', text)
72
73
74def verify_repo_clean():
75    """This function cleans the current repo then verifies that it is valid.
76
77    @raises DirtyTreeException if the repo is still not clean.
78    @raises subprocess.CalledProcessError on a repo command failure.
79    """
80    subprocess.check_output(['git', 'reset', '--hard'])
81    out = subprocess.check_output(['repo', 'status'], stderr=subprocess.STDOUT)
82    out = strip_terminal_codes(out).strip()
83
84    if not 'working directory clean' in out:
85        raise DirtyTreeException(out)
86
87
88def repo_versions():
89    """This function collects the versions of all git repos in the general repo.
90
91    @returns A dictionary mapping project names to git hashes for HEAD.
92    @raises subprocess.CalledProcessError on a repo command failure.
93    """
94    cmd = ['repo', 'forall', '-p', '-c', 'pwd && git log -1 --format=%h']
95    output = strip_terminal_codes(subprocess.check_output(cmd))
96
97    # The expected output format is:
98
99    # project chrome_build/
100    # /dir/holding/chrome_build
101    # 73dee9d
102    #
103    # project chrome_release/
104    # /dir/holding/chrome_release
105    # 9f3a5d8
106
107    lines = output.splitlines()
108
109    PROJECT_PREFIX = 'project '
110
111    project_heads = {}
112    for n in range(0, len(lines), 4):
113        project_line = lines[n]
114        project_dir = lines[n+1]
115        project_hash = lines[n+2]
116        # lines[n+3] is a blank line, but doesn't exist for the final block.
117
118        # Convert 'project chrome_build/' -> 'chrome_build'
119        assert project_line.startswith(PROJECT_PREFIX)
120        name = project_line[len(PROJECT_PREFIX):].rstrip('/')
121
122        project_heads[name] = (project_dir, project_hash)
123
124    return project_heads
125
126
127def repo_versions_to_decide_whether_run_cmd_update():
128    """Collect versions of repos/files defined in COMMANDS_TO_REPOS_DICT.
129
130    For the update_commands defined in config files, no need to run the command
131    every time. Only run it when the repos/files related to the commands have
132    been changed.
133
134    @returns A set of tuples: {(cmd, repo_version), ()...}
135    """
136    results = set()
137    for cmd, repo in COMMANDS_TO_REPOS_DICT.iteritems():
138        version = subprocess.check_output(
139                ['git', 'log', '-1', '--pretty=tformat:%h',
140                 '%s/%s' % (common.autotest_dir, repo)])
141        results.add((cmd, version.strip()))
142    return results
143
144
145def repo_sync(update_push_servers=False):
146    """Perform a repo sync.
147
148    @param update_push_servers: If True, then update test_push servers to ToT.
149                                Otherwise, update server to prod branch.
150    @raises subprocess.CalledProcessError on a repo command failure.
151    """
152    subprocess.check_output(['repo', 'sync'])
153    if update_push_servers:
154        print('Updating push servers, checkout cros/master')
155        subprocess.check_output(['git', 'checkout', 'cros/master'],
156                                stderr=subprocess.STDOUT)
157    else:
158        print('Updating server to prod branch')
159        subprocess.check_output(['git', 'checkout', 'cros/prod'],
160                                stderr=subprocess.STDOUT)
161    # Remove .pyc files via pyclean, which is a package on all ubuntu server.
162    print('Removing .pyc files')
163    try:
164        subprocess.check_output(['pyclean', '.', '-q'])
165    except Exception as e:
166        print('Warning: fail to remove .pyc! %s' % e)
167
168def discover_update_commands():
169    """Lookup the commands to run on this server.
170
171    These commonly come from shadow_config.ini, since they vary by server type.
172
173    @returns List of command names in string format.
174    """
175    try:
176        return global_config.global_config.get_config_value(
177                'UPDATE', 'commands', type=list)
178
179    except (ConfigParser.NoSectionError, global_config.ConfigError):
180        return []
181
182
183def discover_restart_services():
184    """Find the services that need restarting on the current server.
185
186    These commonly come from shadow_config.ini, since they vary by server type.
187
188    @returns List of service names in string format.
189    """
190    services = list(UNIVERSAL_SERVICES)
191    try:
192        # Look up services from shadow_config.ini.
193        extra_services = global_config.global_config.get_config_value(
194                'UPDATE', 'services', type=list)
195        services.extend(extra_services)
196    except (ConfigParser.NoSectionError, global_config.ConfigError):
197        pass
198    return services
199
200
201def update_command(cmd_tag, dryrun=False, use_chromite_master=False):
202    """Restart a command.
203
204    The command name is looked up in global_config.ini to find the full command
205    to run, then it's executed.
206
207    @param cmd_tag: Which command to restart.
208    @param dryrun: If true print the command that would have been run.
209    @param use_chromite_master: True if updating chromite to master, rather
210                                than prod.
211
212    @raises UnknownCommandException If cmd_tag can't be looked up.
213    @raises subprocess.CalledProcessError on a command failure.
214    """
215    # Lookup the list of commands to consider. They are intended to be
216    # in global_config.ini so that they can be shared everywhere.
217    cmds = dict(global_config.global_config.config.items(
218        'UPDATE_COMMANDS'))
219
220    if cmd_tag not in cmds:
221        raise UnknownCommandException(cmd_tag, cmds)
222
223    expanded_command = cmds[cmd_tag].replace('AUTOTEST_REPO',
224                                              common.autotest_dir)
225    # When updating push servers, pass an arg to build_externals to update
226    # chromite to master branch for testing
227    if use_chromite_master and cmd_tag == BUILD_EXTERNALS_COMMAND:
228        expanded_command += ' --use_chromite_master'
229
230    print('Running: %s: %s' % (cmd_tag, expanded_command))
231    if dryrun:
232        print('Skip: %s' % expanded_command)
233    else:
234        try:
235            subprocess.check_output(expanded_command, shell=True,
236                                    stderr=subprocess.STDOUT)
237        except subprocess.CalledProcessError as e:
238            print('FAILED:')
239            print(e.output)
240            raise
241
242
243def restart_service(service_name, dryrun=False):
244    """Restart a service.
245
246    Restarts the standard service with "service <name> restart".
247
248    @param service_name: The name of the service to restart.
249    @param dryrun: Don't really run anything, just print out the command.
250
251    @raises subprocess.CalledProcessError on a command failure.
252    """
253    cmd = ['sudo', 'service', service_name, 'restart']
254    print('Restarting: %s' % service_name)
255    if dryrun:
256        print('Skip: %s' % ' '.join(cmd))
257    else:
258        subprocess.check_call(cmd, stderr=subprocess.STDOUT)
259
260
261def service_status(service_name):
262    """Return the results "status <name>" for a given service.
263
264    This string is expected to contain the pid, and so to change is the service
265    is shutdown or restarted for any reason.
266
267    @param service_name: The name of the service to check on.
268
269    @returns The output of the external command.
270             Ex: autofs start/running, process 1931
271
272    @raises subprocess.CalledProcessError on a command failure.
273    """
274    return subprocess.check_output(['sudo', 'status', service_name])
275
276
277def restart_services(service_names, dryrun=False, skip_service_status=False):
278    """Restart services as needed for the current server type.
279
280    Restart the listed set of services, and watch to see if they are stable for
281    at least SERVICE_STABILITY_TIMER. It restarts all services quickly,
282    waits for that delay, then verifies the status of all of them.
283
284    @param service_names: The list of service to restart and monitor.
285    @param dryrun: Don't really restart the service, just print out the command.
286    @param skip_service_status: Set to True to skip service status check.
287                                Default is False.
288
289    @raises subprocess.CalledProcessError on a command failure.
290    @raises UnstableServices if any services are unstable after restart.
291    """
292    service_statuses = {}
293
294    if dryrun:
295        for name in service_names:
296            restart_service(name, dryrun=True)
297        return
298
299    # Restart each, and record the status (including pid).
300    for name in service_names:
301        restart_service(name)
302
303    # Skip service status check if --skip-service-status is specified. Used for
304    # servers in backup status.
305    if skip_service_status:
306        print('--skip-service-status is specified, skip checking services.')
307        return
308
309    # Wait for a while to let the services settle.
310    time.sleep(SERVICE_STABILITY_TIMER)
311    service_statuses = {name: service_status(name) for name in service_names}
312    time.sleep(SERVICE_STABILITY_TIMER)
313    # Look for any services that changed status.
314    unstable_services = [n for n in service_names
315                         if service_status(n) != service_statuses[n]]
316
317    # Report any services having issues.
318    if unstable_services:
319        raise UnstableServices(unstable_services)
320
321
322def run_deploy_actions(cmds_skip=set(), dryrun=False,
323                       skip_service_status=False, use_chromite_master=False):
324    """Run arbitrary update commands specified in global.ini.
325
326    @param cmds_skip: cmds no need to run since the corresponding repo/file
327                      does not change.
328    @param dryrun: Don't really restart the service, just print out the command.
329    @param skip_service_status: Set to True to skip service status check.
330                                Default is False.
331    @param use_chromite_master: True if updating chromite to master, rather
332                                than prod.
333
334    @raises subprocess.CalledProcessError on a command failure.
335    @raises UnstableServices if any services are unstable after restart.
336    """
337    defined_cmds = set(discover_update_commands())
338    cmds = defined_cmds - cmds_skip
339    if cmds:
340        print('Running update commands:', ', '.join(cmds))
341        for cmd in cmds:
342            if (cmd in PRIMARY_ONLY_COMMANDS and
343                not AFE.run('get_servers', hostname=socket.getfqdn(),
344                            status='primary')):
345                print('Command %s is only applicable to primary servers.' % cmd)
346                continue
347            update_command(cmd, dryrun=dryrun,
348                           use_chromite_master=use_chromite_master)
349
350    services = discover_restart_services()
351    if services:
352        print('Restarting Services:', ', '.join(services))
353        restart_services(services, dryrun=dryrun,
354                         skip_service_status=skip_service_status)
355
356
357def report_changes(versions_before, versions_after):
358    """Produce a report describing what changed in all repos.
359
360    @param versions_before: Results of repo_versions() from before the update.
361    @param versions_after: Results of repo_versions() from after the update.
362
363    @returns string containing a human friendly changes report.
364    """
365    result = []
366
367    if versions_after:
368        for project in sorted(set(versions_before.keys() + versions_after.keys())):
369            result.append('%s:' % project)
370
371            _, before_hash = versions_before.get(project, (None, None))
372            after_dir, after_hash = versions_after.get(project, (None, None))
373
374            if project not in versions_before:
375                result.append('Added.')
376
377            elif project not in versions_after:
378                result.append('Removed.')
379
380            elif before_hash == after_hash:
381                result.append('No Change.')
382
383            else:
384                hashes = '%s..%s' % (before_hash, after_hash)
385                cmd = ['git', 'log', hashes, '--oneline']
386                out = subprocess.check_output(cmd, cwd=after_dir,
387                                              stderr=subprocess.STDOUT)
388                result.append(out.strip())
389
390            result.append('')
391    else:
392        for project in sorted(versions_before.keys()):
393            _, before_hash = versions_before[project]
394            result.append('%s: %s' % (project, before_hash))
395        result.append('')
396
397    return '\n'.join(result)
398
399
400def parse_arguments(args):
401    """Parse command line arguments.
402
403    @param args: The command line arguments to parse. (ususally sys.argsv[1:])
404
405    @returns An argparse.Namespace populated with argument values.
406    """
407    parser = argparse.ArgumentParser(
408            description='Command to update an autotest server.')
409    parser.add_argument('--skip-verify', action='store_false',
410                        dest='verify', default=True,
411                        help='Disable verification of a clean repository.')
412    parser.add_argument('--skip-update', action='store_false',
413                        dest='update', default=True,
414                        help='Skip the repository source code update.')
415    parser.add_argument('--skip-actions', action='store_false',
416                        dest='actions', default=True,
417                        help='Skip the post update actions.')
418    parser.add_argument('--skip-report', action='store_false',
419                        dest='report', default=True,
420                        help='Skip the git version report.')
421    parser.add_argument('--actions-only', action='store_true',
422                        help='Run the post update actions (restart services).')
423    parser.add_argument('--dryrun', action='store_true',
424                        help='Don\'t actually run any commands, just log.')
425    parser.add_argument('--skip-service-status', action='store_true',
426                        help='Skip checking the service status.')
427    parser.add_argument('--update_push_servers', action='store_true',
428                        help='Indicate to update test_push server. If not '
429                             'specify, then update server to production.')
430    parser.add_argument('--force_update', action='store_true',
431                        help='Force to run the update commands for afe, tko '
432                             'and build_externals')
433
434    results = parser.parse_args(args)
435
436    if results.actions_only:
437        results.verify = False
438        results.update = False
439        results.report = False
440
441    # TODO(dgarrett): Make these behaviors support dryrun.
442    if results.dryrun:
443        results.verify = False
444        results.update = False
445
446    return results
447
448
449class ChangeDir(object):
450
451    """Context manager for changing to a directory temporarily."""
452
453    def __init__(self, dir):
454        self.new_dir = dir
455        self.old_dir = None
456
457    def __enter__(self):
458        self.old_dir = os.getcwd()
459        os.chdir(self.new_dir)
460
461    def __exit__(self, exc_type, exc_val, exc_tb):
462        os.chdir(self.old_dir)
463
464
465def _sync_chromiumos_repo():
466    """Update ~chromeos-test/chromiumos repo."""
467    print('Updating ~chromeos-test/chromiumos')
468    with ChangeDir(os.path.expanduser('~chromeos-test/chromiumos')):
469        ret = subprocess.call(['repo', 'sync'], stderr=subprocess.STDOUT)
470        # Remove .pyc files via pyclean, which is a package on all ubuntu server
471        print('Removing .pyc files')
472        try:
473            subprocess.check_output(['pyclean', '.', '-q'])
474        except Exception as e:
475            print('Warning: fail to remove .pyc! %s' % e)
476    if ret != 0:
477        print('Update failed, exited with status: %d' % ret)
478
479
480def main(args):
481    """Main method."""
482    os.chdir(common.autotest_dir)
483    global_config.global_config.parse_config_file()
484
485    behaviors = parse_arguments(args)
486
487    if behaviors.verify:
488        print('Checking tree status:')
489        verify_repo_clean()
490        print('Tree status: clean')
491
492    versions_before = repo_versions()
493    versions_after = set()
494    cmd_versions_before = repo_versions_to_decide_whether_run_cmd_update()
495    cmd_versions_after = set()
496
497    if behaviors.update:
498        print('Updating Repo.')
499        repo_sync(behaviors.update_push_servers)
500        versions_after = repo_versions()
501        cmd_versions_after = repo_versions_to_decide_whether_run_cmd_update()
502
503        _sync_chromiumos_repo()
504
505    if behaviors.actions:
506        # If the corresponding repo/file not change, no need to run the cmd.
507        cmds_skip = (set() if behaviors.force_update else
508                     {t[0] for t in cmd_versions_before & cmd_versions_after})
509        run_deploy_actions(
510                cmds_skip, behaviors.dryrun, behaviors.skip_service_status,
511                use_chromite_master=behaviors.update_push_servers)
512
513    if behaviors.report:
514        print('Changes:')
515        print(report_changes(versions_before, versions_after))
516
517
518if __name__ == '__main__':
519    sys.exit(main(sys.argv[1:]))
520