1#!/usr/bin/python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Utility to check the replication delay of the slave databases.
7
8The utility checks the value of Seconds_Behind_Master of slave databases,
9including:
10Slave databases of AFE database, retrieved from server database.
11Readonly replicas of TKO database, passed in by option --replicas.
12"""
13
14import argparse
15import logging
16import os
17import re
18
19import common
20from autotest_lib.client.bin import utils
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import logging_config
24from autotest_lib.frontend import setup_django_environment
25from autotest_lib.server import site_utils
26from autotest_lib.site_utils import server_manager_utils
27
28from chromite.lib import metrics
29
30
31CONFIG = global_config.global_config
32
33# SQL command to remove old test results in TKO database.
34SLAVE_STATUS_CMD = 'show slave status\G'
35DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)'
36DELAY_METRICS = 'chromeos/autotest/afe_db/slave_delay_seconds'
37# A large delay to report to metrics indicating the replica is in error.
38LARGE_DELAY = 1000000
39
40def check_delay(server, user, password):
41    """Check the delay of a given slave database server.
42
43    @param server: Hostname or IP address of the MySQL server.
44    @param user: User name to log in the MySQL server.
45    @param password: Password to log in the MySQL server.
46    """
47    try:
48        result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD)
49        search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE)
50        m = metrics.Float(DELAY_METRICS)
51        f = {'slave': server}
52        if search:
53            delay = float(search.group(1))
54            m.set(delay, fields=f)
55            logging.debug('Seconds_Behind_Master of server %s is %d.', server,
56                          delay)
57        else:
58            # The value of Seconds_Behind_Master could be NULL, report a large
59            # number to indicate database error.
60            m.set(LARGE_DELAY, fields=f)
61            logging.error('Failed to get Seconds_Behind_Master of server %s '
62                          'from slave status:\n %s', server, result)
63    except error.CmdError:
64        logging.exception('Failed to get slave status of server %s.', server)
65
66
67def parse_options():
68    """Parse command line inputs.
69
70    @return: Options to run the script.
71    """
72    parser = argparse.ArgumentParser()
73    parser.add_argument('-r', '--replicas', nargs='+',
74                        default=[],
75                        help='IP addresses of readonly replicas of TKO.')
76    parser.add_argument('-l', '--logfile', type=str,
77                        default=None,
78                        help='Path to the log file to save logs.')
79    return parser.parse_args()
80
81
82def main():
83    """Main script."""
84    with site_utils.SetupTsMonGlobalState('check_slave_db_delay',indirect=True):
85        options = parse_options()
86        log_config = logging_config.LoggingConfig()
87        if options.logfile:
88            log_config.add_file_handler(
89                file_path=os.path.abspath(options.logfile),
90                level=logging.DEBUG
91            )
92        db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user')
93        db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password')
94
95        global_db_user = CONFIG.get_config_value(
96                    'AUTOTEST_WEB', 'global_db_user', default=db_user)
97        global_db_password = CONFIG.get_config_value(
98                    'AUTOTEST_WEB', 'global_db_password', default=db_password)
99
100        logging.info('Start checking Seconds_Behind_Master of slave databases')
101
102        for replica in options.replicas:
103            check_delay(replica, global_db_user, global_db_password)
104        if not options.replicas:
105            logging.warning('No replicas checked.')
106
107        slaves = server_manager_utils.get_servers(
108                role='database_slave', status='primary')
109        for slave in slaves:
110            check_delay(slave.hostname, db_user, db_password)
111        if not slaves:
112            logging.warning('No slaves checked.')
113
114
115        logging.info('Finished checking.')
116
117
118if __name__ == '__main__':
119    main()
120