1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import copy
6import json
7import logging
8import os
9import shutil
10import subprocess
11import tempfile
12
13from telemetry.core import util
14
15
16_TRACE2HTML_PATH = os.path.join(
17    util.GetCatapultDir(), 'tracing', 'bin', 'trace2html')
18
19
20class NonSerializableTraceData(Exception):
21  """Raised when raw trace data cannot be serialized to TraceData."""
22  pass
23
24
25class TraceDataPart(object):
26  """TraceData can have a variety of events.
27
28  These are called "parts" and are accessed by the following fixed field names.
29  """
30  def __init__(self, raw_field_name):
31    self._raw_field_name = raw_field_name
32
33  def __repr__(self):
34    return 'TraceDataPart("%s")' % self._raw_field_name
35
36  @property
37  def raw_field_name(self):
38    return self._raw_field_name
39
40  def __eq__(self, other):
41    return self.raw_field_name == other.raw_field_name
42
43  def __hash__(self):
44    return hash(self.raw_field_name)
45
46
47ATRACE_PART = TraceDataPart('systemTraceEvents')
48BATTOR_TRACE_PART = TraceDataPart('powerTraceAsString')
49CHROME_TRACE_PART = TraceDataPart('traceEvents')
50CPU_TRACE_DATA = TraceDataPart('cpuSnapshots')
51INSPECTOR_TRACE_PART = TraceDataPart('inspectorTimelineEvents')
52SURFACE_FLINGER_PART = TraceDataPart('surfaceFlinger')
53TAB_ID_PART = TraceDataPart('tabIds')
54TELEMETRY_PART = TraceDataPart('telemetry')
55
56ALL_TRACE_PARTS = {ATRACE_PART,
57                   BATTOR_TRACE_PART,
58                   CHROME_TRACE_PART,
59                   CPU_TRACE_DATA,
60                   INSPECTOR_TRACE_PART,
61                   SURFACE_FLINGER_PART,
62                   TAB_ID_PART,
63                   TELEMETRY_PART}
64
65ALL_TRACE_PARTS_RAW_NAMES = set(k.raw_field_name for k in ALL_TRACE_PARTS)
66
67def _HasTraceFor(part, raw):
68  assert isinstance(part, TraceDataPart)
69  if part.raw_field_name not in raw:
70    return False
71  return len(raw[part.raw_field_name]) > 0
72
73
74def _GetFilePathForTrace(trace, dir_path):
75  """ Return path to a file that contains |trace|.
76
77  Note: if |trace| is an instance of TraceFileHandle, this reuses the trace path
78  that the trace file handle holds. Otherwise, it creates a new trace file
79  in |dir_path| directory.
80  """
81  if isinstance(trace, TraceFileHandle):
82    return trace.file_path
83  with tempfile.NamedTemporaryFile(mode='w', dir=dir_path, delete=False) as fp:
84    if isinstance(trace, basestring):
85      fp.write(trace)
86    elif isinstance(trace, dict) or isinstance(trace, list):
87      json.dump(trace, fp)
88    else:
89      raise TypeError('Trace is of unknown type.')
90    return fp.name
91
92
93class TraceData(object):
94  """ TraceData holds a collection of traces from multiple sources.
95
96  A TraceData can have multiple active parts. Each part represents traces
97  collected from a different trace agent.
98  """
99  def __init__(self):
100    """Creates TraceData from the given data."""
101    self._raw_data = {}
102    self._events_are_safely_mutable = False
103
104  def _SetFromBuilder(self, d):
105    self._raw_data = d
106    self._events_are_safely_mutable = True
107
108  @property
109  def events_are_safely_mutable(self):
110    """Returns true if the events in this value are completely sealed.
111
112    Some importers want to take complex fields out of the TraceData and add
113    them to the model, changing them subtly as they do so. If the TraceData
114    was constructed with data that is shared with something outside the trace
115    data, for instance a test harness, then this mutation is unexpected. But,
116    if the values are sealed, then mutating the events is a lot faster.
117
118    We know if events are sealed if the value came from a string, or if the
119    value came from a TraceDataBuilder.
120    """
121    return self._events_are_safely_mutable
122
123  @property
124  def active_parts(self):
125    return {p for p in ALL_TRACE_PARTS if p.raw_field_name in self._raw_data}
126
127  def HasTracesFor(self, part):
128    return _HasTraceFor(part, self._raw_data)
129
130  def GetTracesFor(self, part):
131    """ Return the list of traces for |part| in string or dictionary forms.
132
133    Note: since this API return the traces that can be directly accessed in
134    memory, it may require lots of memory usage as some of the trace can be
135    very big.
136    For references, we have cases where Telemetry is OOM'ed because the memory
137    required for processing the trace in Python is too big (crbug.com/672097).
138    """
139    assert isinstance(part, TraceDataPart)
140    if not self.HasTracesFor(part):
141      return []
142    traces_list = self._raw_data[part.raw_field_name]
143    # Since this API return the traces in memory form, and since the memory
144    # bottleneck of Telemetry is for keeping trace in memory, there is no uses
145    # in keeping the on-disk form of tracing beyond this point. Hence we convert
146    # all traces for part of form TraceFileHandle to the JSON form.
147    for i, data in enumerate(traces_list):
148      if isinstance(data, TraceFileHandle):
149        traces_list[i] = data.AsTraceData()
150    return traces_list
151
152  def GetTraceFor(self, part):
153    assert isinstance(part, TraceDataPart)
154    traces = self.GetTracesFor(part)
155    assert len(traces) == 1
156    return traces[0]
157
158  def CleanUpAllTraces(self):
159    """ Remove all the traces that this has handles to.
160
161    Those include traces stored in memory & on disk. After invoking this,
162    one can no longer uses this object for collecting the traces.
163    """
164    for traces_list in self._raw_data.itervalues():
165      for trace in traces_list:
166        if isinstance(trace, TraceFileHandle):
167          trace.Clean()
168    self._raw_data = {}
169
170  def Serialize(self, file_path, trace_title=''):
171    """Serializes the trace result to |file_path|.
172
173    """
174    if not self._raw_data:
175      logging.warning('No traces to convert to html.')
176      return
177    temp_dir = tempfile.mkdtemp()
178    trace_files = []
179    try:
180      trace_size_data = {}
181      for part, traces_list in self._raw_data.iteritems():
182        for trace in traces_list:
183          path = _GetFilePathForTrace(trace, temp_dir)
184          trace_size_data.setdefault(part, 0)
185          trace_size_data[part] += os.path.getsize(path)
186          trace_files.append(path)
187      logging.info('Trace sizes in bytes: %s', trace_size_data)
188
189      cmd = (['python', _TRACE2HTML_PATH] + trace_files +
190          ['--output', file_path] + ['--title', trace_title])
191      subprocess.check_output(cmd)
192    finally:
193      shutil.rmtree(temp_dir)
194
195
196class TraceFileHandle(object):
197  """A trace file handle object allows storing trace data on disk.
198
199  TraceFileHandle API allows one to collect traces from Chrome into disk instead
200  of keeping them in memory. This is important for keeping memory usage of
201  Telemetry low to avoid OOM (see:
202  https://github.com/catapult-project/catapult/issues/3119).
203
204  The fact that this uses a file underneath to store tracing data means the
205  callsite is repsonsible for discarding the file when they no longer need the
206  tracing data. Call TraceFileHandle.Clean when you done using this object.
207  """
208  def __init__(self):
209    self._backing_file = None
210    self._file_path = None
211    self._trace_data = None
212
213  def Open(self):
214    assert not self._backing_file and not self._file_path
215    self._backing_file = tempfile.NamedTemporaryFile(delete=False, mode='a')
216
217  def AppendTraceData(self, partial_trace_data):
218    assert isinstance(partial_trace_data, basestring)
219    self._backing_file.write(partial_trace_data)
220
221  @property
222  def file_path(self):
223    assert self._file_path, (
224        'Either the handle need to be closed first or this handle is cleaned')
225    return self._file_path
226
227  def Close(self):
228    assert self._backing_file
229    self._backing_file.close()
230    self._file_path = self._backing_file.name
231    self._backing_file = None
232
233  def AsTraceData(self):
234    """Get the object form of trace data that this handle manages.
235
236    *Warning: this can have large memory footprint if the trace data is big.
237
238    Since this requires the in-memory form of the trace, it is no longer useful
239    to still keep the backing file underneath, invoking this will also discard
240    the file to avoid the risk of leaking the backing trace file.
241    """
242    if self._trace_data:
243      return self._trace_data
244    assert self._file_path
245    with open(self._file_path) as f:
246      self._trace_data = json.load(f)
247    self.Clean()
248    return self._trace_data
249
250  def Clean(self):
251    """Remove the backing file used for storing trace on disk.
252
253    This should be called when and only when you no longer need to use
254    TraceFileHandle.
255    """
256    assert self._file_path
257    os.remove(self._file_path)
258    self._file_path = None
259
260
261class TraceDataBuilder(object):
262  """TraceDataBuilder helps build up a trace from multiple trace agents.
263
264  TraceData is supposed to be immutable, but it is useful during recording to
265  have a mutable version. That is TraceDataBuilder.
266  """
267  def __init__(self):
268    self._raw_data = {}
269
270  def AsData(self):
271    if self._raw_data == None:
272      raise Exception('Can only AsData once')
273    data = TraceData()
274    data._SetFromBuilder(self._raw_data)
275    self._raw_data = None
276    return data
277
278  def AddTraceFor(self, part, trace):
279    assert isinstance(part, TraceDataPart)
280    if part == CHROME_TRACE_PART:
281      assert (isinstance(trace, dict) or
282              isinstance(trace, TraceFileHandle))
283    else:
284      assert (isinstance(trace, basestring) or
285              isinstance(trace, dict) or
286              isinstance(trace, list))
287
288    if self._raw_data == None:
289      raise Exception('Already called AsData() on this builder.')
290
291    self._raw_data.setdefault(part.raw_field_name, [])
292    self._raw_data[part.raw_field_name].append(trace)
293
294  def HasTracesFor(self, part):
295    return _HasTraceFor(part, self._raw_data)
296
297
298def CreateTraceDataFromRawData(raw_data):
299  """Convenient method for creating a TraceData object from |raw_data|.
300     This is mostly used for testing.
301
302     Args:
303        raw_data can be:
304            + A dictionary that repsents multiple trace parts. Keys of the
305            dictionary must always contain 'traceEvents', as chrome trace
306            must always present.
307            + A list that represents Chrome trace events.
308            + JSON string of either above.
309
310  """
311  raw_data = copy.deepcopy(raw_data)
312  if isinstance(raw_data, basestring):
313    json_data = json.loads(raw_data)
314  else:
315    json_data = raw_data
316
317  b = TraceDataBuilder()
318  if not json_data:
319    return b.AsData()
320  if isinstance(json_data, dict):
321    assert 'traceEvents' in json_data, 'Only raw chrome trace is supported'
322    trace_parts_keys = []
323    for k in json_data:
324      if k != 'traceEvents' and k in ALL_TRACE_PARTS_RAW_NAMES:
325        trace_parts_keys.append(k)
326        b.AddTraceFor(TraceDataPart(k), json_data[k])
327    # Delete the data for extra keys to form trace data for Chrome part only.
328    for k in trace_parts_keys:
329      del json_data[k]
330    b.AddTraceFor(CHROME_TRACE_PART, json_data)
331  elif isinstance(json_data, list):
332    b.AddTraceFor(CHROME_TRACE_PART, {'traceEvents': json_data})
333  else:
334    raise NonSerializableTraceData('Unrecognized data format.')
335  return b.AsData()
336