1# Copyright 2015 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Provides an interface for debugging the anomaly detection function.""" 6 7import json 8import urllib 9 10from dashboard import datastore_hooks 11from dashboard import find_anomalies 12from dashboard import find_change_points 13from dashboard import request_handler 14from dashboard import utils 15from dashboard.models import anomaly 16from dashboard.models import anomaly_config 17from dashboard.models import graph_data 18 19# Default number of points before and after a point to analyze. 20_NUM_BEFORE = 40 21_NUM_AFTER = 10 22 23 24class QueryParameterError(Exception): 25 pass 26 27 28class DebugAlertHandler(request_handler.RequestHandler): 29 """Request handler for the /debug_alert page.""" 30 31 def get(self): 32 """Displays UI for debugging the anomaly detection function. 33 34 Request parameters: 35 test_path: Full test path (Master/bot/suite/chart) for test with alert. 36 rev: A revision (Row id number) to center the graph on. 37 num_before: Maximum number of points after the given revision to get. 38 num_after: Maximum number of points before the given revision. 39 config: Config parameters for in JSON form. 40 41 Outputs: 42 A HTML page with a chart (if test_path is given) and a form. 43 """ 44 try: 45 test = self._GetTest() 46 num_before, num_after = self._GetNumBeforeAfter() 47 config_name = self._GetConfigName(test) 48 config_dict = anomaly_config.CleanConfigDict(self._GetConfigDict(test)) 49 except QueryParameterError as e: 50 self.RenderHtml('debug_alert.html', {'error': e.message}) 51 return 52 53 revision = self.request.get('rev') 54 if revision: 55 rows = _FetchRowsAroundRev(test, int(revision), num_before, num_after) 56 else: 57 rows = _FetchLatestRows(test, num_before) 58 59 chart_series = _ChartSeries(rows) 60 lookup = _RevisionList(rows) 61 62 # Get the anomaly data from the new anomaly detection module. This will 63 # also be passed to the template so that it can be shown on the page. 64 change_points = SimulateAlertProcessing(chart_series, **config_dict) 65 anomaly_indexes = [c.x_value for c in change_points] 66 anomaly_points = [(i, chart_series[i][1]) for i in anomaly_indexes] 67 anomaly_segments = _AnomalySegmentSeries(change_points) 68 69 plot_data = _GetPlotData(chart_series, anomaly_points, anomaly_segments) 70 71 # Render the debug_alert page with all of the parameters filled in. 72 self.RenderHtml('debug_alert.html', { 73 'test_path': test.test_path, 74 'rev': revision or '', 75 'num_before': num_before, 76 'num_after': num_after, 77 'sheriff_name': 'None' if not test.sheriff else test.sheriff.id(), 78 'config_name': config_name, 79 'config_json': json.dumps(config_dict, indent=2, sort_keys=True), 80 'plot_data': json.dumps(plot_data), 81 'lookup': json.dumps(lookup), 82 'anomalies': json.dumps([c.AsDict() for c in change_points]), 83 'csv_url': _CsvUrl(test.test_path, rows), 84 'graph_url': _GraphUrl(test, revision), 85 'stored_anomalies': _FetchStoredAnomalies(test, lookup), 86 }) 87 88 def post(self): 89 """A POST request to this endpoint does the same thing as a GET request.""" 90 return self.get() 91 92 def _GetTest(self): 93 test_path = self.request.get('test_path') 94 if not test_path: 95 raise QueryParameterError('No test specified.') 96 test = utils.TestKey(test_path).get() 97 if not test: 98 raise QueryParameterError('Test "%s" not found.' % test_path) 99 return test 100 101 def _GetNumBeforeAfter(self): 102 try: 103 num_before = int(self.request.get('num_before', _NUM_BEFORE)) 104 num_after = int(self.request.get('num_after', _NUM_AFTER)) 105 except ValueError: 106 raise QueryParameterError('Invalid "num_before" or "num_after".') 107 return num_before, num_after 108 109 def _GetConfigName(self, test): 110 """Gets the name of the custom anomaly threshold, just for display.""" 111 if test.overridden_anomaly_config: 112 return test.overridden_anomaly_config.string_id() 113 if self.request.get('config'): 114 return 'Custom config' 115 return 'Default config' 116 117 def _GetConfigDict(self, test): 118 """Gets the name of the anomaly threshold dict to use.""" 119 input_config_json = self.request.get('config') 120 if not input_config_json: 121 return anomaly_config.GetAnomalyConfigDict(test) 122 try: 123 return json.loads(input_config_json) 124 except ValueError: 125 raise QueryParameterError('Invalid JSON.') 126 127 128def SimulateAlertProcessing(chart_series, **config_dict): 129 """Finds the same alerts as would be found normally as points are added. 130 131 Each time a new point is added to a data series on dashboard, the 132 FindChangePoints function is called with some points from that series. 133 In order to simulate this here, we need to repeatedly call FindChangePoints. 134 135 Args: 136 chart_series: A list of (x, y) pairs. 137 **config_dict: An alert threshold config dict. 138 139 Returns: 140 A list of find_change_points.ChangePoint objects, one for each alert found. 141 """ 142 all_change_points = [] 143 highest_x = None # This is used to avoid finding duplicate alerts. 144 # The number of points that are passed in to FindChangePoints normally may 145 # depend on either the specific "max_window_size" value or another default 146 # used in find_anomalies. 147 window = config_dict.get('max_window_size', find_anomalies.DEFAULT_NUM_POINTS) 148 for end in range(1, len(chart_series)): 149 start = max(0, end - window) 150 series = chart_series[start:end] 151 change_points = find_change_points.FindChangePoints(series, **config_dict) 152 change_points = [c for c in change_points if c.x_value > highest_x] 153 if change_points: 154 highest_x = max(c.x_value for c in change_points) 155 all_change_points.extend(change_points) 156 return all_change_points 157 158 159def _AnomalySegmentSeries(change_points): 160 """Makes a list of data series for showing segments next to anomalies. 161 162 Args: 163 change_points: A list of find_change_points.ChangePoint objects. 164 165 Returns: 166 A list of data series (lists of pairs) to be graphed by Flot. 167 """ 168 # We make a separate series for each anomaly, since segments may overlap. 169 anomaly_series_list = [] 170 171 for change_point in change_points: 172 anomaly_series = [] 173 174 # In a Flot data series, null is treated as a special value which 175 # indicates a discontinuity. We want to end each segment with null 176 # so that they show up as separate segments on the graph. 177 anomaly_series.append([change_point.window_start, None]) 178 179 for x in range(change_point.window_start + 1, change_point.x_value): 180 anomaly_series.append([x, change_point.median_before]) 181 anomaly_series.append([change_point.x_value, None]) 182 183 for x in range(change_point.x_value + 1, change_point.window_end + 1): 184 anomaly_series.append([x, change_point.median_after]) 185 anomaly_series.append([change_point.window_end, None]) 186 anomaly_series_list.append(anomaly_series) 187 188 return anomaly_series_list 189 190 191def _GetPlotData(chart_series, anomaly_points, anomaly_segments): 192 """Returns data to embed on the front-end for the chart. 193 194 Args: 195 chart_series: A series, i.e. a list of (index, value) pairs. 196 anomaly_points: A series which contains the list of points where the 197 anomalies were detected. 198 anomaly_segments: A list of series, each of which represents one segment, 199 which is a horizontal line across a range of values used in finding 200 an anomaly. 201 202 Returns: 203 A list of data series, in the format accepted by Flot, which can be 204 serialized as JSON and embedded on the page. 205 """ 206 data = [ 207 { 208 'data': chart_series, 209 'color': '#666', 210 'lines': {'show': True}, 211 'points': {'show': False}, 212 }, 213 { 214 'data': anomaly_points, 215 'color': '#f90', 216 'lines': {'show': False}, 217 'points': {'show': True, 'radius': 4} 218 }, 219 ] 220 for series in anomaly_segments: 221 data.append({ 222 'data': series, 223 'color': '#f90', 224 'lines': {'show': True}, 225 'points': {'show': False}, 226 }) 227 return data 228 229 230def _ChartSeries(rows): 231 """Returns a data series and index to revision map.""" 232 return [(i, r.value) for i, r in enumerate(rows)] 233 234 235def _RevisionList(rows): 236 """Returns a list of revisions.""" 237 return [r.revision for r in rows] 238 239 240def _FetchLatestRows(test, num_points): 241 """Does a query for the latest Row entities in the given test. 242 243 Args: 244 test: A Test entity to fetch Row entities for. 245 num_points: Number of points to fetch. 246 247 Returns: 248 A list of Row entities, ordered by revision. The number to fetch is limited 249 to the number that is expected to be processed at once by GASP. 250 """ 251 assert utils.IsInternalUser() or not test.internal_only 252 datastore_hooks.SetSinglePrivilegedRequest() 253 q = graph_data.Row.query(projection=['revision', 'value']) 254 q = q.filter(graph_data.Row.parent_test == test.key) 255 q = q.order(-graph_data.Row.revision) 256 rows = list(reversed(q.fetch(limit=num_points))) 257 return rows 258 259 260def _FetchRowsAroundRev(test, revision, num_before, num_after): 261 """Fetches Row entities before and after a given revision. 262 263 Args: 264 test: A Test entity. 265 revision: A Row ID. 266 num_before: Maximum number of Rows before |revision| to fetch. 267 num_after: Max number of Rows starting from |revision| to fetch. 268 269 Returns: 270 A list of Row entities ordered by ID. The Row entities will have at least 271 the "revision" and "value" properties, which are the only ones relevant 272 to their use in this module. 273 """ 274 assert utils.IsInternalUser() or not test.internal_only 275 query = graph_data.Row.query(projection=['revision', 'value']) 276 query = query.filter(graph_data.Row.parent_test == test.key) 277 278 before_query = query.filter(graph_data.Row.revision < revision) 279 before_query = before_query.order(-graph_data.Row.revision) 280 datastore_hooks.SetSinglePrivilegedRequest() 281 rows_before = list(reversed(before_query.fetch(limit=num_before))) 282 283 after_query = query.filter(graph_data.Row.revision >= revision) 284 after_query = after_query.order(graph_data.Row.revision) 285 datastore_hooks.SetSinglePrivilegedRequest() 286 rows_at_and_after = after_query.fetch(num_after) 287 288 return rows_before + rows_at_and_after 289 290 291def _FetchStoredAnomalies(test, revisions): 292 """Makes a list of data about Anomaly entities for a Test.""" 293 stored_anomalies = anomaly.Anomaly.query().filter( 294 anomaly.Anomaly.test == test.key).fetch() 295 296 stored_anomaly_dicts = [] 297 for a in stored_anomalies: 298 if a.end_revision > revisions[0]: 299 stored_anomaly_dicts.append({ 300 'revision': a.end_revision, 301 'median_before': a.median_before_anomaly, 302 'median_after': a.median_after_anomaly, 303 'percent_changed': a.percent_changed, 304 'bug_id': _GetDisplayBugId(a.bug_id), 305 'timestamp': a.timestamp, 306 }) 307 return stored_anomaly_dicts 308 309 310def _CsvUrl(test_path, rows): 311 """Constructs an URL for requesting data from /graph_csv for |rows|.""" 312 # Using a list of pairs ensures a predictable order for the parameters. 313 params = [('test_path', test_path)] 314 if rows: 315 params += [ 316 ('num_points', len(rows)), 317 ('rev', rows[-1].revision), 318 ] 319 return '/graph_csv?%s' % urllib.urlencode(params) 320 321 322def _GraphUrl(test, revision): 323 """Constructs an URL for requesting data from /graph_csv for |rows|.""" 324 params = [ 325 ('masters', test.master_name), 326 ('bots', test.bot_name), 327 ('tests', '/'.join(test.test_path.split('/')[2:])), 328 ] 329 if revision: 330 params.append(('rev', revision)) 331 return '/report?%s' % urllib.urlencode(params) 332 333 334def _GetDisplayBugId(bug_id): 335 """Returns a display string for the given bug ID property of an anomaly.""" 336 special_ids = {-1: 'INVALID', -2: 'IGNORE', None: 'NONE'} 337 return special_ids.get(bug_id, str(bug_id)) 338