1#!/usr/bin/python2
2
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Queries a MySQL database and emits status metrics to Monarch.
8
9Note: confusingly, 'Innodb_buffer_pool_reads' is actually the cache-misses, not
10the number of reads to the buffer pool.  'Innodb_buffer_pool_read_requests'
11corresponds to the number of reads the the buffer pool.
12"""
13import logging
14import sys
15
16import MySQLdb
17import time
18
19import common
20
21from autotest_lib.client.common_lib import global_config
22from autotest_lib.client.common_lib.cros import retry
23
24from chromite.lib import metrics
25from chromite.lib import ts_mon_config
26
27AT_DIR='/usr/local/autotest'
28DEFAULT_USER = global_config.global_config.get_config_value(
29        'CROS', 'db_backup_user', type=str, default='')
30DEFAULT_PASSWD = global_config.global_config.get_config_value(
31        'CROS', 'db_backup_password', type=str, default='')
32
33LOOP_INTERVAL = 60
34
35EMITTED_STATUSES_COUNTERS = [
36    'bytes_received',
37    'bytes_sent',
38    'connections',
39    'Innodb_buffer_pool_read_requests',
40    'Innodb_buffer_pool_reads',
41    'Innodb_row_lock_waits',
42    'questions',
43    'slow_queries',
44    'threads_created',
45]
46
47EMITTED_STATUS_GAUGES = [
48    'Innodb_row_lock_time_avg',
49    'Innodb_row_lock_current_waits',
50    'threads_running',
51    'threads_connected',
52]
53
54
55class RetryingConnection(object):
56    """Maintains a db connection and a cursor."""
57    INITIAL_SLEEP_SECONDS = 20
58    MAX_TIMEOUT_SECONDS = 60 * 60
59
60    def __init__(self, *args, **kwargs):
61        self.args = args
62        self.kwargs = kwargs
63        self.db = None
64        self.cursor = None
65
66    def Connect(self):
67        """Establishes a MySQL connection and creates a cursor."""
68        self.db = MySQLdb.connect(*self.args, **self.kwargs)
69        self.cursor = self.db.cursor()
70
71    def Reconnect(self):
72        """Attempts to close the connection, then reconnects."""
73        try:
74            self.cursor.close()
75            self.db.close()
76        except MySQLdb.Error:
77            pass
78        self.Connect()
79
80    def RetryWith(self, func):
81        """Run a function, retrying on OperationalError."""
82        return retry.retry(
83            MySQLdb.OperationalError,
84            delay_sec=self.INITIAL_SLEEP_SECONDS,
85            timeout_min=self.MAX_TIMEOUT_SECONDS,
86            callback=self.Reconnect
87        )(func)()
88
89    def Execute(self, *args, **kwargs):
90        """Runs .execute on the cursor, reconnecting on failure."""
91        def _Execute():
92            return self.cursor.execute(*args, **kwargs)
93        return self.RetryWith(_Execute)
94
95    def Fetchall(self):
96        """Runs .fetchall on the cursor."""
97        return self.cursor.fetchall()
98
99
100def GetStatus(connection, status):
101    """Get the status variable from the database, retrying on failure.
102
103    @param connection: MySQLdb cursor to query with.
104    @param status: Name of the status variable.
105    @returns The mysql query result.
106    """
107    connection.Execute('SHOW GLOBAL STATUS LIKE "%s";' % status)
108    output = connection.Fetchall()[0][1]
109
110    if not output:
111        logging.error('Cannot find any global status like %s', status)
112
113    return int(output)
114
115
116def QueryAndEmit(baselines, conn):
117    """Queries MySQL for important stats and emits Monarch metrics
118
119    @param baselines: A dict containing the initial values for the cumulative
120                      metrics.
121    @param conn: The mysql connection object.
122    """
123    for status in EMITTED_STATUSES_COUNTERS:
124        metric_name = 'chromeos/autotest/afe_db/%s' % status.lower()
125        delta = GetStatus(conn, status) - baselines[status]
126        metrics.Counter(metric_name).set(delta)
127
128    for status in EMITTED_STATUS_GAUGES:
129        metric_name = 'chromeos/autotest/afe_db/%s' % status.lower()
130        metrics.Gauge(metric_name).set(GetStatus(conn, status))
131
132    pages_free = GetStatus(conn, 'Innodb_buffer_pool_pages_free')
133    pages_total = GetStatus(conn, 'Innodb_buffer_pool_pages_total')
134
135    metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set(
136        pages_free, fields={'used': False})
137
138    metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set(
139        pages_total - pages_free, fields={'used': True})
140
141
142def main():
143    """Sets up ts_mon and repeatedly queries MySQL stats"""
144    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
145    conn = RetryingConnection('localhost', DEFAULT_USER, DEFAULT_PASSWD)
146    conn.Connect()
147
148    # TODO(crbug.com/803566) Use indirect=False to mitigate orphan mysql_stats
149    # processes overwhelming shards.
150    with ts_mon_config.SetupTsMonGlobalState('mysql_stats', indirect=False):
151      QueryLoop(conn)
152
153
154def QueryLoop(conn):
155    """Queries and emits metrics every LOOP_INTERVAL seconds.
156
157    @param conn: The mysql connection object.
158    """
159    # Get the baselines for cumulative metrics. Otherwise the windowed rate at
160    # the very beginning will be extremely high as it shoots up from 0 to its
161    # current value.
162    baselines = dict((s, GetStatus(conn, s))
163                     for s in EMITTED_STATUSES_COUNTERS)
164
165    while True:
166        now = time.time()
167        QueryAndEmit(baselines, conn)
168        time_spent = time.time() - now
169        sleep_duration = LOOP_INTERVAL - time_spent
170        time.sleep(max(0, sleep_duration))
171
172
173if __name__ == '__main__':
174  main()
175