1# Copyright 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4import atexit
5import json
6import os
7import sys
8import time
9import threading
10import multiprocessing
11import multiprocessing_shim
12
13from py_trace_event.trace_event_impl import perfetto_trace_writer
14from py_trace_event import trace_time
15
16from py_utils import lock
17
18
19# Trace file formats:
20
21# Legacy format: json list of events.
22# Events can be written from multiple processes, but since no process
23# can be sure that it is the last one, nobody writes the closing ']'.
24# So the resulting file is not technically correct json.
25JSON = "json"
26
27# Full json with events and metadata.
28# This format produces correct json ready to feed into TraceDataBuilder.
29# Note that it is the responsibility of the user of py_trace_event to make sure
30# that trace_disable() is called after all child processes have finished.
31JSON_WITH_METADATA = "json_with_metadata"
32
33# Perfetto protobuf trace format.
34PROTOBUF = "protobuf"
35
36
37_lock = threading.Lock()
38
39_enabled = False
40_log_file = None
41
42_cur_events = [] # events that have yet to be buffered
43_benchmark_metadata = {}
44
45# Default timestamp values for clock snapshot.
46# If a ClockSnapshot message with these default values is emitted, Telemetry
47# events' time will not be translated by trace processor, because both
48# TELEMETRY and BOOTTIME timestamps are the same. This allows the old-style
49# synchronization (using clock_sync events) to take place in catapult.
50# If we want to actually synchronize Telemetry with other trace producers
51# via clock snapshots in trace processor, we should set _boottime_ts to the
52# actual BOOTTIME of the device and _emit_clock_sync to False. In this case,
53# trace processor will translate both Chrome's and telemetry's timestamps
54# to the device time (BOOTTIME) during proto-to-json conversion, and catapult's
55# clock synchronization will not take place because we do not emit the
56# clock_sync event.
57# Note that we can't use both synchronization methods at the same time
58# because that will lead to wrong results.
59_telemetry_ts = trace_time.Now()
60_boottime_ts = _telemetry_ts
61_emit_clock_sync = True
62
63_tls = threading.local() # tls used to detect forking/etc
64_atexit_regsitered_for_pid = None
65
66_control_allowed = True
67
68_original_multiprocessing_process = multiprocessing.Process
69
70class TraceException(Exception):
71  pass
72
73def _note(msg, *args):
74  pass
75#  print "%i: %s" % (os.getpid(), msg)
76
77
78def _locked(fn):
79  def locked_fn(*args,**kwargs):
80    _lock.acquire()
81    try:
82      ret = fn(*args,**kwargs)
83    finally:
84      _lock.release()
85    return ret
86  return locked_fn
87
88def _disallow_tracing_control():
89  global _control_allowed
90  _control_allowed = False
91
92def trace_enable(log_file=None, format=None):
93  """Enable tracing.
94
95  Args:
96    log_file: file to write trace into. Can be a file-like object,
97      a name of file, or None. If None, file name is constructed
98      from executable name.
99    format: trace file format. See trace_event.py for available options.
100  """
101  global _emit_clock_sync
102  if format is None:
103    format = JSON
104  # Can only write clock snapshots in protobuf format. In all other formats
105  # should use clock_syncs.
106  if format != PROTOBUF:
107    _emit_clock_sync = True
108  _trace_enable(log_file, format)
109
110def _write_header():
111  if _format == PROTOBUF:
112    tid = threading.current_thread().ident
113    perfetto_trace_writer.write_clock_snapshot(
114        output=_log_file,
115        tid=tid,
116        telemetry_ts=_telemetry_ts,
117        boottime_ts=_boottime_ts,
118    )
119    perfetto_trace_writer.write_thread_descriptor_event(
120        output=_log_file,
121        pid=os.getpid(),
122        tid=tid,
123        ts=trace_time.Now(),
124    )
125    perfetto_trace_writer.write_event(
126        output=_log_file,
127        ph="M",
128        category="process_argv",
129        name="process_argv",
130        ts=trace_time.Now(),
131        args={"argv": sys.argv},
132        tid=tid,
133    )
134  else:
135    if _format == JSON:
136      _log_file.write('[')
137    elif _format == JSON_WITH_METADATA:
138      _log_file.write('{"traceEvents": [\n')
139    else:
140      raise TraceException("Unknown format: %s" % _format)
141    json.dump({
142        "ph": "M",
143        "category": "process_argv",
144        "pid": os.getpid(),
145        "tid": threading.current_thread().ident,
146        "ts": trace_time.Now(),
147        "name": "process_argv",
148        "args": {"argv": sys.argv},
149    }, _log_file)
150    _log_file.write('\n')
151
152
153@_locked
154def _trace_enable(log_file=None, format=None):
155  global _format
156  _format = format
157  global _enabled
158  if _enabled:
159    raise TraceException("Already enabled")
160  if not _control_allowed:
161    raise TraceException("Tracing control not allowed in child processes.")
162  _enabled = True
163  global _log_file
164  if log_file == None:
165    if sys.argv[0] == '':
166      n = 'trace_event'
167    else:
168      n = sys.argv[0]
169    if _format == PROTOBUF:
170      log_file = open("%s.pb" % n, "ab", False)
171    else:
172      log_file = open("%s.json" % n, "ab", False)
173  elif isinstance(log_file, basestring):
174    log_file = open("%s" % log_file, "ab", False)
175  elif not hasattr(log_file, 'fileno'):
176    raise TraceException(
177        "Log file must be None, a string, or file-like object with a fileno()")
178
179  _note("trace_event: tracelog name is %s" % log_file)
180
181  _log_file = log_file
182  with lock.FileLock(_log_file, lock.LOCK_EX):
183    _log_file.seek(0, os.SEEK_END)
184
185    lastpos = _log_file.tell()
186    creator = lastpos == 0
187    if creator:
188      _note("trace_event: Opened new tracelog, lastpos=%i", lastpos)
189      _write_header()
190    else:
191      _note("trace_event: Opened existing tracelog")
192    _log_file.flush()
193  # Monkeypatch in our process replacement for the multiprocessing.Process class
194  if multiprocessing.Process != multiprocessing_shim.ProcessShim:
195      multiprocessing.Process = multiprocessing_shim.ProcessShim
196
197@_locked
198def trace_flush():
199  if _enabled:
200    _flush()
201
202@_locked
203def trace_disable():
204  global _enabled
205  if not _control_allowed:
206    raise TraceException("Tracing control not allowed in child processes.")
207  if not _enabled:
208    return
209  _enabled = False
210  _flush(close=True)
211  # Clear the collected interned data so that the next trace session
212  # could start from a clean state.
213  perfetto_trace_writer.reset_global_state()
214  multiprocessing.Process = _original_multiprocessing_process
215
216def _write_cur_events():
217  if _format == PROTOBUF:
218    for e in _cur_events:
219      perfetto_trace_writer.write_event(
220          output=_log_file,
221          ph=e["ph"],
222          category=e["category"],
223          name=e["name"],
224          ts=e["ts"],
225          args=e["args"],
226          tid=threading.current_thread().ident,
227      )
228  elif _format in (JSON, JSON_WITH_METADATA):
229    for e in _cur_events:
230      _log_file.write(",\n")
231      json.dump(e, _log_file)
232  else:
233    raise TraceException("Unknown format: %s" % _format)
234  del _cur_events[:]
235
236def _write_footer():
237  if _format in [JSON, PROTOBUF]:
238    # In JSON format we might not be the only process writing to this logfile.
239    # So, we will simply close the file rather than writing the trailing ] that
240    # it technically requires. The trace viewer understands this and
241    # will insert a trailing ] during loading.
242    # In PROTOBUF format there's no need for a footer. The metadata has already
243    # been written in a special proto message.
244    pass
245  elif _format == JSON_WITH_METADATA:
246    _log_file.write('],\n"metadata": ')
247    json.dump(_benchmark_metadata, _log_file)
248    _log_file.write('}')
249  else:
250    raise TraceException("Unknown format: %s" % _format)
251
252def _flush(close=False):
253  global _log_file
254  with lock.FileLock(_log_file, lock.LOCK_EX):
255    _log_file.seek(0, os.SEEK_END)
256    if len(_cur_events):
257      _write_cur_events()
258    if close:
259      _write_footer()
260    _log_file.flush()
261
262  if close:
263    _note("trace_event: Closed")
264    _log_file.close()
265    _log_file = None
266  else:
267    _note("trace_event: Flushed")
268
269@_locked
270def trace_is_enabled():
271  return _enabled
272
273@_locked
274def add_trace_event(ph, ts, category, name, args=None):
275  global _enabled
276  if not _enabled:
277    return
278  if not hasattr(_tls, 'pid') or _tls.pid != os.getpid():
279    _tls.pid = os.getpid()
280    global _atexit_regsitered_for_pid
281    if _tls.pid != _atexit_regsitered_for_pid:
282      _atexit_regsitered_for_pid = _tls.pid
283      atexit.register(_trace_disable_atexit)
284      _tls.pid = os.getpid()
285      del _cur_events[:] # we forked, clear the event buffer!
286    tid = threading.current_thread().ident
287    if not tid:
288      tid = os.getpid()
289    _tls.tid = tid
290
291  _cur_events.append({
292      "ph": ph,
293      "category": category,
294      "pid": _tls.pid,
295      "tid": _tls.tid,
296      "ts": ts,
297      "name": name,
298      "args": args or {},
299  });
300
301def trace_begin(name, args=None):
302  add_trace_event("B", trace_time.Now(), "python", name, args)
303
304def trace_end(name, args=None):
305  add_trace_event("E", trace_time.Now(), "python", name, args)
306
307def trace_set_thread_name(thread_name):
308  add_trace_event("M", trace_time.Now(), "__metadata", "thread_name",
309                  {"name": thread_name})
310
311def trace_add_benchmark_metadata(
312    benchmark_start_time_us,
313    story_run_time_us,
314    benchmark_name,
315    benchmark_description,
316    story_name,
317    story_tags,
318    story_run_index,
319    label=None,
320):
321  """Add benchmark metadata to be written to trace file.
322
323  Args:
324    benchmark_start_time_us: Benchmark start time in microseconds.
325    story_run_time_us: Story start time in microseconds.
326    benchmark_name: Name of the benchmark.
327    benchmark_description: Description of the benchmark.
328    story_name: Name of the story.
329    story_tags: List of story tags.
330    story_run_index: Index of the story run.
331    label: Optional label.
332    had_failures: Whether this story run failed.
333  """
334  global _benchmark_metadata
335  if _format == PROTOBUF:
336    # Write metadata immediately.
337    perfetto_trace_writer.write_metadata(
338        output=_log_file,
339        benchmark_start_time_us=benchmark_start_time_us,
340        story_run_time_us=story_run_time_us,
341        benchmark_name=benchmark_name,
342        benchmark_description=benchmark_description,
343        story_name=story_name,
344        story_tags=story_tags,
345        story_run_index=story_run_index,
346        label=label,
347    )
348    if _emit_clock_sync:
349      perfetto_trace_writer.write_chrome_metadata(
350          output=_log_file,
351          clock_domain="TELEMETRY",
352      )
353  elif _format == JSON_WITH_METADATA:
354    # Store metadata to write it in the footer.
355    telemetry_metadata_for_json = {
356        "benchmarkStart": benchmark_start_time_us / 1000.0,
357        "traceStart": story_run_time_us / 1000.0,
358        "benchmarks": [benchmark_name],
359        "benchmarkDescriptions": [benchmark_description],
360        "stories": [story_name],
361        "storyTags": story_tags,
362        "storysetRepeats": [story_run_index],
363    }
364    if label:
365      telemetry_metadata_for_json["labels"] = [label]
366
367    assert _emit_clock_sync
368    _benchmark_metadata = {
369        # TODO(crbug.com/948633): For right now, we use "TELEMETRY" as the
370        # clock domain to guarantee that Telemetry is given its own clock
371        # domain. Telemetry isn't really a clock domain, though: it's a
372        # system that USES a clock domain like LINUX_CLOCK_MONOTONIC or
373        # WIN_QPC. However, there's a chance that a Telemetry controller
374        # running on Linux (using LINUX_CLOCK_MONOTONIC) is interacting
375        # with an Android phone (also using LINUX_CLOCK_MONOTONIC, but
376        # on a different machine). The current logic collapses clock
377        # domains based solely on the clock domain string, but we really
378        # should to collapse based on some (device ID, clock domain ID)
379        # tuple. Giving Telemetry its own clock domain is a work-around
380        # for this.
381        "clock-domain": "TELEMETRY",
382        "telemetry": telemetry_metadata_for_json,
383    }
384  elif _format == JSON:
385    raise TraceException("Can't write metadata in JSON format")
386  else:
387    raise TraceException("Unknown format: %s" % _format)
388
389def trace_set_clock_snapshot(telemetry_ts, boottime_ts):
390  """Set timestamps to be written in a ClockSnapshot message.
391
392  This function must be called before the trace start. When trace starts,
393  a ClockSnapshot message with given timestamps will be written in protobuf
394  format. In json format, nothing will happen. Use clock_sync function
395  for clock synchronization in json format.
396
397  Args:
398    telemetry_ts: BOOTTIME of the device where Telemetry runs.
399    boottime_ts: BOOTTIME of the device where the tested browser runs.
400  """
401  global _telemetry_ts
402  global _boottime_ts
403  global _emit_clock_sync
404  global _enabled
405  if _enabled:
406    raise TraceException("Can't set the clock snapshot after trace started.")
407  _telemetry_ts = telemetry_ts
408  _boottime_ts = boottime_ts
409  _emit_clock_sync = False
410
411def clock_sync(sync_id, issue_ts=None):
412  """Add a clock sync event to the trace log.
413
414  Adds a clock sync event if the TBMv2-style synchronization is enabled.
415  It's enabled in two cases:
416    1) Trace format is json.
417    2) The clock snapshot was not set before the trace start.
418
419  Args:
420    sync_id: ID of clock sync event.
421    issue_ts: Time at which clock sync was issued, in microseconds.
422  """
423  if _emit_clock_sync:
424    time_stamp = trace_time.Now()
425    args_to_log = {"sync_id": sync_id}
426    if issue_ts: # Issuer if issue_ts is set, else reciever.
427      assert issue_ts <= time_stamp
428      args_to_log["issue_ts"] = issue_ts
429    add_trace_event("c", time_stamp, "python", "clock_sync", args_to_log)
430
431def _trace_disable_atexit():
432  trace_disable()
433
434def is_tracing_controllable():
435  global _control_allowed
436  return _control_allowed
437