1#!/usr/bin/python 2# Copyright 2017 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Utility to check the replication delay of the slave databases. 7 8The utility checks the value of Seconds_Behind_Master of slave databases, 9including: 10Slave databases of AFE database, retrieved from server database. 11Readonly replicas of TKO database, passed in by option --replicas. 12""" 13 14import argparse 15import logging 16import os 17import re 18 19import common 20from autotest_lib.client.bin import utils 21from autotest_lib.client.common_lib import error 22from autotest_lib.client.common_lib import global_config 23from autotest_lib.client.common_lib import logging_config 24from autotest_lib.frontend import setup_django_environment 25from autotest_lib.server import site_utils 26from autotest_lib.site_utils import server_manager_utils 27 28from chromite.lib import metrics 29 30 31CONFIG = global_config.global_config 32 33# SQL command to remove old test results in TKO database. 34SLAVE_STATUS_CMD = 'show slave status\G' 35DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)' 36DELAY_METRICS = 'chromeos/autotest/afe_db/slave_delay_seconds' 37# A large delay to report to metrics indicating the replica is in error. 38LARGE_DELAY = 1000000 39 40def check_delay(server, user, password): 41 """Check the delay of a given slave database server. 42 43 @param server: Hostname or IP address of the MySQL server. 44 @param user: User name to log in the MySQL server. 45 @param password: Password to log in the MySQL server. 46 """ 47 try: 48 result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD) 49 search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE) 50 m = metrics.Float(DELAY_METRICS) 51 f = {'slave': server} 52 if search: 53 delay = float(search.group(1)) 54 m.set(delay, fields=f) 55 logging.debug('Seconds_Behind_Master of server %s is %d.', server, 56 delay) 57 else: 58 # The value of Seconds_Behind_Master could be NULL, report a large 59 # number to indicate database error. 60 m.set(LARGE_DELAY, fields=f) 61 logging.error('Failed to get Seconds_Behind_Master of server %s ' 62 'from slave status:\n %s', server, result) 63 except error.CmdError: 64 logging.exception('Failed to get slave status of server %s.', server) 65 66 67def parse_options(): 68 """Parse command line inputs. 69 70 @return: Options to run the script. 71 """ 72 parser = argparse.ArgumentParser() 73 parser.add_argument('-r', '--replicas', nargs='+', 74 default=[], 75 help='IP addresses of readonly replicas of TKO.') 76 parser.add_argument('-l', '--logfile', type=str, 77 default=None, 78 help='Path to the log file to save logs.') 79 return parser.parse_args() 80 81 82def main(): 83 """Main script.""" 84 with site_utils.SetupTsMonGlobalState('check_slave_db_delay',indirect=True): 85 options = parse_options() 86 log_config = logging_config.LoggingConfig() 87 if options.logfile: 88 log_config.add_file_handler( 89 file_path=os.path.abspath(options.logfile), 90 level=logging.DEBUG 91 ) 92 db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user') 93 db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password') 94 95 global_db_user = CONFIG.get_config_value( 96 'AUTOTEST_WEB', 'global_db_user', default=db_user) 97 global_db_password = CONFIG.get_config_value( 98 'AUTOTEST_WEB', 'global_db_password', default=db_password) 99 100 logging.info('Start checking Seconds_Behind_Master of slave databases') 101 102 for replica in options.replicas: 103 check_delay(replica, global_db_user, global_db_password) 104 if not options.replicas: 105 logging.warning('No replicas checked.') 106 107 slaves = server_manager_utils.get_servers( 108 role='database_slave', status='primary') 109 for slave in slaves: 110 check_delay(slave.hostname, db_user, db_password) 111 if not slaves: 112 logging.warning('No slaves checked.') 113 114 115 logging.info('Finished checking.') 116 117 118if __name__ == '__main__': 119 main() 120