1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""TensorFlow 2.0 Profiler for both Eager Mode and Graph Mode.
16
17The profiler has two mode:
18- Programmatic Mode: start(), stop() and Profiler class. It will perform
19                    when calling start() or create Profiler class and will stop
20                    when calling stop() or destroying Profiler class.
21- On-demand Mode: start_profiler_server(). It will perform profiling when
22                  receive profiling request.
23
24NOTE: Only one active profiler session is allowed. Use of simultaneous
25Programmatic Mode and On-demand Mode is undefined and will likely fail.
26
27NOTE: The Keras TensorBoard callback will automatically perform sampled
28profiling. Before enabling customized profiling, set the callback flag
29"profile_batches=[]" to disable automatic sampled profiling.
30customized profiling.
31"""
32
33from __future__ import absolute_import
34from __future__ import division
35from __future__ import print_function
36
37import datetime
38import os
39import threading
40
41from tensorflow.python import pywrap_tensorflow
42from tensorflow.python.eager import context
43from tensorflow.python.framework import c_api_util
44from tensorflow.python.platform import gfile
45from tensorflow.python.platform import tf_logging as logging
46from tensorflow.python.util import compat
47
48_profiler = None
49_profiler_lock = threading.Lock()
50_run_num = 0
51# This suffix should be kept in sync with kProfileEmptySuffix in
52# tensorflow/core/profiler/rpc/client/capture_profile.cc.
53_EVENT_FILE_SUFFIX = '.profile-empty'
54
55
56class ProfilerAlreadyRunningError(Exception):
57  pass
58
59
60class ProfilerNotRunningError(Exception):
61  pass
62
63
64def start():
65  """Start profiling.
66
67  Raises:
68    ProfilerAlreadyRunningError: If another profiling session is running.
69  """
70  global _profiler
71  with _profiler_lock:
72    if _profiler is not None:
73      raise ProfilerAlreadyRunningError('Another profiler is running.')
74    profiler_context = pywrap_tensorflow.TFE_NewProfilerContext()
75    if context.default_execution_mode == context.EAGER_MODE:
76      pywrap_tensorflow.TFE_ProfilerContextSetEagerContext(
77          profiler_context,
78          context.context()._handle)  # pylint: disable=protected-access
79    _profiler = pywrap_tensorflow.TFE_NewProfiler(profiler_context)
80    pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context)
81    if not pywrap_tensorflow.TFE_ProfilerIsOk(_profiler):
82      logging.warning('Another profiler session is running which is probably '
83                      'created by profiler server. Please avoid using profiler '
84                      'server and profiler APIs at the same time.')
85
86
87def stop():
88  """Stop current profiling session and return its result.
89
90  Returns:
91    A binary string of tensorflow.tpu.Trace. User can write the string
92    to file for offline analysis by tensorboard.
93
94  Raises:
95    ProfilerNotRunningError: If there is no active profiling session.
96  """
97  global _profiler
98  global _run_num
99  with _profiler_lock:
100    if _profiler is None:
101      raise ProfilerNotRunningError(
102          'Cannot stop profiling. No profiler is running.')
103    with c_api_util.tf_buffer() as buffer_:
104      pywrap_tensorflow.TFE_ProfilerSerializeToString(
105          context.context()._handle,  # pylint: disable=protected-access
106          _profiler,
107          buffer_)
108      result = pywrap_tensorflow.TF_GetBuffer(buffer_)
109    pywrap_tensorflow.TFE_DeleteProfiler(_profiler)
110    _profiler = None
111    _run_num += 1
112  return result
113
114
115def maybe_create_event_file(logdir):
116  """Create an empty event file if not already exists.
117
118  This event file indicates that we have a plugins/profile/ directory in the
119  current logdir.
120
121  Args:
122    logdir: log directory.
123  """
124  for file_name in gfile.ListDirectory(logdir):
125    if file_name.endswith(_EVENT_FILE_SUFFIX):
126      return
127  # TODO(b/127330388): Use summary_ops_v2.create_file_writer instead.
128  event_writer = pywrap_tensorflow.EventsWriter(
129      compat.as_bytes(os.path.join(logdir, 'events')))
130  event_writer.InitWithSuffix(compat.as_bytes(_EVENT_FILE_SUFFIX))
131
132
133def save(logdir, result):
134  """Save profile result to TensorBoard logdir.
135
136  Args:
137    logdir: log directory read by TensorBoard.
138    result: profiling result returned by stop().
139  """
140  plugin_dir = os.path.join(
141      logdir, 'plugins', 'profile',
142      datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
143  gfile.MakeDirs(plugin_dir)
144  maybe_create_event_file(logdir)
145  with gfile.Open(os.path.join(plugin_dir, 'local.trace'), 'wb') as f:
146    f.write(result)
147
148
149def start_profiler_server(port):
150  """Start a profiler grpc server that listens to given port.
151
152  The profiler server will keep the program running even the training finishes.
153  Please shutdown the server with CTRL-C. It can be used in both eager mode and
154  graph mode. The service defined in
155  tensorflow/core/profiler/profiler_service.proto. Please use
156  tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture tracable
157  file following https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace
158
159  Args:
160    port: port profiler server listens to.
161  """
162  profiler_context = pywrap_tensorflow.TFE_NewProfilerContext()
163  if context.default_execution_mode == context.EAGER_MODE:
164    pywrap_tensorflow.TFE_ProfilerContextSetEagerContext(
165        profiler_context,
166        context.context()._handle)  # pylint: disable=protected-access
167  pywrap_tensorflow.TFE_StartProfilerServer(profiler_context, port)
168  pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context)
169
170
171class Profiler(object):
172  """Context-manager eager profiler api.
173
174  Example usage:
175  ```python
176  with Profiler("/path/to/logdir"):
177    # do some work
178  ```
179  """
180
181  def __init__(self, logdir):
182    self._logdir = logdir
183
184  def __enter__(self):
185    start()
186
187  def __exit__(self, typ, value, tb):
188    result = stop()
189    save(self._logdir, result)
190