1# Copyright 2015 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Provides an interface for debugging the anomaly detection function."""
6
7import json
8import urllib
9
10from dashboard import datastore_hooks
11from dashboard import find_anomalies
12from dashboard import find_change_points
13from dashboard import request_handler
14from dashboard import utils
15from dashboard.models import anomaly
16from dashboard.models import anomaly_config
17from dashboard.models import graph_data
18
19# Default number of points before and after a point to analyze.
20_NUM_BEFORE = 40
21_NUM_AFTER = 10
22
23
24class QueryParameterError(Exception):
25  pass
26
27
28class DebugAlertHandler(request_handler.RequestHandler):
29  """Request handler for the /debug_alert page."""
30
31  def get(self):
32    """Displays UI for debugging the anomaly detection function.
33
34    Request parameters:
35      test_path: Full test path (Master/bot/suite/chart) for test with alert.
36      rev: A revision (Row id number) to center the graph on.
37      num_before: Maximum number of points after the given revision to get.
38      num_after: Maximum number of points before the given revision.
39      config: Config parameters for in JSON form.
40
41    Outputs:
42      A HTML page with a chart (if test_path is given) and a form.
43    """
44    try:
45      test = self._GetTest()
46      num_before, num_after = self._GetNumBeforeAfter()
47      config_name = self._GetConfigName(test)
48      config_dict = anomaly_config.CleanConfigDict(self._GetConfigDict(test))
49    except QueryParameterError as e:
50      self.RenderHtml('debug_alert.html', {'error': e.message})
51      return
52
53    revision = self.request.get('rev')
54    if revision:
55      rows = _FetchRowsAroundRev(test, int(revision), num_before, num_after)
56    else:
57      rows = _FetchLatestRows(test, num_before)
58
59    chart_series = _ChartSeries(rows)
60    lookup = _RevisionList(rows)
61
62    # Get the anomaly data from the new anomaly detection module. This will
63    # also be passed to the template so that it can be shown on the page.
64    change_points = SimulateAlertProcessing(chart_series, **config_dict)
65    anomaly_indexes = [c.x_value for c in change_points]
66    anomaly_points = [(i, chart_series[i][1]) for i in anomaly_indexes]
67    anomaly_segments = _AnomalySegmentSeries(change_points)
68
69    plot_data = _GetPlotData(chart_series, anomaly_points, anomaly_segments)
70
71    # Render the debug_alert page with all of the parameters filled in.
72    self.RenderHtml('debug_alert.html', {
73        'test_path': test.test_path,
74        'rev': revision or '',
75        'num_before': num_before,
76        'num_after': num_after,
77        'sheriff_name': 'None' if not test.sheriff else test.sheriff.id(),
78        'config_name': config_name,
79        'config_json': json.dumps(config_dict, indent=2, sort_keys=True),
80        'plot_data': json.dumps(plot_data),
81        'lookup': json.dumps(lookup),
82        'anomalies': json.dumps([c.AsDict() for c in change_points]),
83        'csv_url': _CsvUrl(test.test_path, rows),
84        'graph_url': _GraphUrl(test, revision),
85        'stored_anomalies': _FetchStoredAnomalies(test, lookup),
86    })
87
88  def post(self):
89    """A POST request to this endpoint does the same thing as a GET request."""
90    return self.get()
91
92  def _GetTest(self):
93    test_path = self.request.get('test_path')
94    if not test_path:
95      raise QueryParameterError('No test specified.')
96    test = utils.TestKey(test_path).get()
97    if not test:
98      raise QueryParameterError('Test "%s" not found.' % test_path)
99    return test
100
101  def _GetNumBeforeAfter(self):
102    try:
103      num_before = int(self.request.get('num_before', _NUM_BEFORE))
104      num_after = int(self.request.get('num_after', _NUM_AFTER))
105    except ValueError:
106      raise QueryParameterError('Invalid "num_before" or "num_after".')
107    return num_before, num_after
108
109  def _GetConfigName(self, test):
110    """Gets the name of the custom anomaly threshold, just for display."""
111    if test.overridden_anomaly_config:
112      return test.overridden_anomaly_config.string_id()
113    if self.request.get('config'):
114      return 'Custom config'
115    return 'Default config'
116
117  def _GetConfigDict(self, test):
118    """Gets the name of the anomaly threshold dict to use."""
119    input_config_json = self.request.get('config')
120    if not input_config_json:
121      return anomaly_config.GetAnomalyConfigDict(test)
122    try:
123      return json.loads(input_config_json)
124    except ValueError:
125      raise QueryParameterError('Invalid JSON.')
126
127
128def SimulateAlertProcessing(chart_series, **config_dict):
129  """Finds the same alerts as would be found normally as points are added.
130
131  Each time a new point is added to a data series on dashboard, the
132  FindChangePoints function is called with some points from that series.
133  In order to simulate this here, we need to repeatedly call FindChangePoints.
134
135  Args:
136    chart_series: A list of (x, y) pairs.
137    **config_dict: An alert threshold config dict.
138
139  Returns:
140    A list of find_change_points.ChangePoint objects, one for each alert found.
141  """
142  all_change_points = []
143  highest_x = None  # This is used to avoid finding duplicate alerts.
144  # The number of points that are passed in to FindChangePoints normally may
145  # depend on either the specific "max_window_size" value or another default
146  # used in find_anomalies.
147  window = config_dict.get('max_window_size', find_anomalies.DEFAULT_NUM_POINTS)
148  for end in range(1, len(chart_series)):
149    start = max(0, end - window)
150    series = chart_series[start:end]
151    change_points = find_change_points.FindChangePoints(series, **config_dict)
152    change_points = [c for c in change_points if c.x_value > highest_x]
153    if change_points:
154      highest_x = max(c.x_value for c in change_points)
155      all_change_points.extend(change_points)
156  return all_change_points
157
158
159def _AnomalySegmentSeries(change_points):
160  """Makes a list of data series for showing segments next to anomalies.
161
162  Args:
163    change_points: A list of find_change_points.ChangePoint objects.
164
165  Returns:
166    A list of data series (lists of pairs) to be graphed by Flot.
167  """
168  # We make a separate series for each anomaly, since segments may overlap.
169  anomaly_series_list = []
170
171  for change_point in change_points:
172    anomaly_series = []
173
174    # In a Flot data series, null is treated as a special value which
175    # indicates a discontinuity. We want to end each segment with null
176    # so that they show up as separate segments on the graph.
177    anomaly_series.append([change_point.window_start, None])
178
179    for x in range(change_point.window_start + 1, change_point.x_value):
180      anomaly_series.append([x, change_point.median_before])
181    anomaly_series.append([change_point.x_value, None])
182
183    for x in range(change_point.x_value + 1, change_point.window_end + 1):
184      anomaly_series.append([x, change_point.median_after])
185    anomaly_series.append([change_point.window_end, None])
186    anomaly_series_list.append(anomaly_series)
187
188  return anomaly_series_list
189
190
191def _GetPlotData(chart_series, anomaly_points, anomaly_segments):
192  """Returns data to embed on the front-end for the chart.
193
194  Args:
195    chart_series: A series, i.e. a list of (index, value) pairs.
196    anomaly_points: A series which contains the list of points where the
197        anomalies were detected.
198    anomaly_segments: A list of series, each of which represents one segment,
199        which is a horizontal line across a range of values used in finding
200        an anomaly.
201
202  Returns:
203    A list of data series, in the format accepted by Flot, which can be
204    serialized as JSON and embedded on the page.
205  """
206  data = [
207      {
208          'data': chart_series,
209          'color': '#666',
210          'lines': {'show': True},
211          'points': {'show': False},
212      },
213      {
214          'data': anomaly_points,
215          'color': '#f90',
216          'lines': {'show': False},
217          'points': {'show': True, 'radius': 4}
218      },
219  ]
220  for series in anomaly_segments:
221    data.append({
222        'data': series,
223        'color': '#f90',
224        'lines': {'show': True},
225        'points': {'show': False},
226    })
227  return data
228
229
230def _ChartSeries(rows):
231  """Returns a data series and index to revision map."""
232  return [(i, r.value) for i, r in enumerate(rows)]
233
234
235def _RevisionList(rows):
236  """Returns a list of revisions."""
237  return [r.revision for r in rows]
238
239
240def _FetchLatestRows(test, num_points):
241  """Does a query for the latest Row entities in the given test.
242
243  Args:
244    test: A Test entity to fetch Row entities for.
245    num_points: Number of points to fetch.
246
247  Returns:
248    A list of Row entities, ordered by revision. The number to fetch is limited
249    to the number that is expected to be processed at once by GASP.
250  """
251  assert utils.IsInternalUser() or not test.internal_only
252  datastore_hooks.SetSinglePrivilegedRequest()
253  q = graph_data.Row.query(projection=['revision', 'value'])
254  q = q.filter(graph_data.Row.parent_test == test.key)
255  q = q.order(-graph_data.Row.revision)
256  rows = list(reversed(q.fetch(limit=num_points)))
257  return rows
258
259
260def _FetchRowsAroundRev(test, revision, num_before, num_after):
261  """Fetches Row entities before and after a given revision.
262
263  Args:
264    test: A Test entity.
265    revision: A Row ID.
266    num_before: Maximum number of Rows before |revision| to fetch.
267    num_after: Max number of Rows starting from |revision| to fetch.
268
269  Returns:
270    A list of Row entities ordered by ID. The Row entities will have at least
271    the "revision" and "value" properties, which are the only ones relevant
272    to their use in this module.
273  """
274  assert utils.IsInternalUser() or not test.internal_only
275  query = graph_data.Row.query(projection=['revision', 'value'])
276  query = query.filter(graph_data.Row.parent_test == test.key)
277
278  before_query = query.filter(graph_data.Row.revision < revision)
279  before_query = before_query.order(-graph_data.Row.revision)
280  datastore_hooks.SetSinglePrivilegedRequest()
281  rows_before = list(reversed(before_query.fetch(limit=num_before)))
282
283  after_query = query.filter(graph_data.Row.revision >= revision)
284  after_query = after_query.order(graph_data.Row.revision)
285  datastore_hooks.SetSinglePrivilegedRequest()
286  rows_at_and_after = after_query.fetch(num_after)
287
288  return rows_before + rows_at_and_after
289
290
291def _FetchStoredAnomalies(test, revisions):
292  """Makes a list of data about Anomaly entities for a Test."""
293  stored_anomalies = anomaly.Anomaly.query().filter(
294      anomaly.Anomaly.test == test.key).fetch()
295
296  stored_anomaly_dicts = []
297  for a in stored_anomalies:
298    if a.end_revision > revisions[0]:
299      stored_anomaly_dicts.append({
300          'revision': a.end_revision,
301          'median_before': a.median_before_anomaly,
302          'median_after': a.median_after_anomaly,
303          'percent_changed': a.percent_changed,
304          'bug_id': _GetDisplayBugId(a.bug_id),
305          'timestamp': a.timestamp,
306      })
307  return stored_anomaly_dicts
308
309
310def _CsvUrl(test_path, rows):
311  """Constructs an URL for requesting data from /graph_csv for |rows|."""
312  # Using a list of pairs ensures a predictable order for the parameters.
313  params = [('test_path', test_path)]
314  if rows:
315    params += [
316        ('num_points', len(rows)),
317        ('rev', rows[-1].revision),
318    ]
319  return '/graph_csv?%s' % urllib.urlencode(params)
320
321
322def _GraphUrl(test, revision):
323  """Constructs an URL for requesting data from /graph_csv for |rows|."""
324  params = [
325      ('masters', test.master_name),
326      ('bots', test.bot_name),
327      ('tests', '/'.join(test.test_path.split('/')[2:])),
328  ]
329  if revision:
330    params.append(('rev', revision))
331  return '/report?%s' % urllib.urlencode(params)
332
333
334def _GetDisplayBugId(bug_id):
335  """Returns a display string for the given bug ID property of an anomaly."""
336  special_ids = {-1: 'INVALID', -2: 'IGNORE', None: 'NONE'}
337  return special_ids.get(bug_id, str(bug_id))
338