1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Utilities for building profiler options."""
16from __future__ import absolute_import
17from __future__ import division
18from __future__ import print_function
19
20import copy
21
22from tensorflow.python.profiler import tfprof_logger
23from tensorflow.python.util.tf_export import tf_export
24
25
26@tf_export(v1=['profiler.ProfileOptionBuilder'])
27class ProfileOptionBuilder(object):
28  # pylint: disable=line-too-long
29  """Option Builder for Profiling API.
30
31  For tutorial on the options, see
32  https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md
33
34  ```python
35  # Users can use pre-built options:
36  opts = (
37      tf.profiler.ProfileOptionBuilder.trainable_variables_parameter())
38
39  # Or, build your own options:
40  opts = (tf.compat.v1.profiler.ProfileOptionBuilder()
41      .with_max_depth(10)
42      .with_min_micros(1000)
43      .select(['accelerator_micros'])
44      .with_stdout_output()
45      .build()
46
47  # Or customize the pre-built options:
48  opts = (tf.compat.v1.profiler.ProfileOptionBuilder(
49      tf.profiler.ProfileOptionBuilder.time_and_memory())
50      .with_displaying_options(show_name_regexes=['.*rnn.*'])
51      .build())
52
53  # Finally, profiling with the options:
54  _ = tf.compat.v1.profiler.profile(tf.compat.v1.get_default_graph(),
55                          run_meta=run_meta,
56                          cmd='scope',
57                          options=opts)
58  ```
59  """
60  # pylint: enable=line-too-long
61
62  def __init__(self, options=None):
63    """Constructor.
64
65    Args:
66      options: Optional initial option dict to start with.
67    """
68    if options is not None:
69      self._options = copy.deepcopy(options)
70    else:
71      self._options = {'max_depth': 100,
72                       'min_bytes': 0,
73                       'min_micros': 0,
74                       'min_params': 0,
75                       'min_float_ops': 0,
76                       'min_occurrence': 0,
77                       'order_by': 'name',
78                       'account_type_regexes': ['.*'],
79                       'start_name_regexes': ['.*'],
80                       'trim_name_regexes': [],
81                       'show_name_regexes': ['.*'],
82                       'hide_name_regexes': [],
83                       'account_displayed_op_only': False,
84                       'select': ['micros'],
85                       'step': -1,
86                       'output': 'stdout'}
87
88  @staticmethod
89  def trainable_variables_parameter():
90    """Options used to profile trainable variable parameters.
91
92    Normally used together with 'scope' view.
93
94    Returns:
95      A dict of profiling options.
96    """
97    return {'max_depth': 10000,
98            'min_bytes': 0,
99            'min_micros': 0,
100            'min_params': 0,
101            'min_float_ops': 0,
102            'min_occurrence': 0,
103            'order_by': 'name',
104            'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
105            'start_name_regexes': ['.*'],
106            'trim_name_regexes': [],
107            'show_name_regexes': ['.*'],
108            'hide_name_regexes': [],
109            'account_displayed_op_only': True,
110            'select': ['params'],
111            'step': -1,
112            'output': 'stdout'}
113
114  @staticmethod
115  def float_operation():
116    # pylint: disable=line-too-long
117    """Options used to profile float operations.
118
119    Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md
120    on the caveats of calculating float operations.
121
122    Returns:
123      A dict of profiling options.
124    """
125    # pylint: enable=line-too-long
126    return {'max_depth': 10000,
127            'min_bytes': 0,
128            'min_micros': 0,
129            'min_params': 0,
130            'min_float_ops': 1,
131            'min_occurrence': 0,
132            'order_by': 'float_ops',
133            'account_type_regexes': ['.*'],
134            'start_name_regexes': ['.*'],
135            'trim_name_regexes': [],
136            'show_name_regexes': ['.*'],
137            'hide_name_regexes': [],
138            'account_displayed_op_only': True,
139            'select': ['float_ops'],
140            'step': -1,
141            'output': 'stdout'}
142
143  @staticmethod
144  def time_and_memory(min_micros=1, min_bytes=1, min_accelerator_micros=0,
145                      min_cpu_micros=0, min_peak_bytes=0, min_residual_bytes=0,
146                      min_output_bytes=0):
147    """Show operation time and memory consumptions.
148
149    Args:
150      min_micros: Only show profiler nodes with execution time
151          no less than this. It sums accelerator and cpu times.
152      min_bytes: Only show profiler nodes requested to allocate no less bytes
153          than this.
154      min_accelerator_micros: Only show profiler nodes spend no less than
155          this time on accelerator (e.g. GPU).
156      min_cpu_micros: Only show profiler nodes spend no less than
157          this time on cpu.
158      min_peak_bytes: Only show profiler nodes using no less than this bytes
159          at peak (high watermark). For profiler nodes consist of multiple
160          graph nodes, it sums the graph nodes' peak_bytes.
161      min_residual_bytes: Only show profiler nodes have no less than
162          this bytes not being de-allocated after Compute() ends. For
163          profiler nodes consist of multiple graph nodes, it sums the
164          graph nodes' residual_bytes.
165      min_output_bytes: Only show profiler nodes have no less than this bytes
166          output. The output are not necessarily allocated by this profiler
167          nodes.
168    Returns:
169      A dict of profiling options.
170    """
171    return {'max_depth': 10000,
172            'min_bytes': min_bytes,
173            'min_peak_bytes': min_peak_bytes,
174            'min_residual_bytes': min_residual_bytes,
175            'min_output_bytes': min_output_bytes,
176            'min_micros': min_micros,
177            'min_accelerator_micros': min_accelerator_micros,
178            'min_cpu_micros': min_cpu_micros,
179            'min_params': 0,
180            'min_float_ops': 0,
181            'min_occurrence': 0,
182            'order_by': 'micros',
183            'account_type_regexes': ['.*'],
184            'start_name_regexes': ['.*'],
185            'trim_name_regexes': [],
186            'show_name_regexes': ['.*'],
187            'hide_name_regexes': [],
188            'account_displayed_op_only': True,
189            'select': ['micros', 'bytes'],
190            'step': -1,
191            'output': 'stdout'}
192
193  def build(self):
194    """Build a profiling option.
195
196    Returns:
197      A dict of profiling options.
198    """
199    return copy.deepcopy(self._options)
200
201  def with_max_depth(self, max_depth):
202    """Set the maximum depth of display.
203
204    The depth depends on profiling view. For 'scope' view, it's the
205    depth of name scope hierarchy (tree), for 'op' view, it's the number
206    of operation types (list), etc.
207
208    Args:
209      max_depth: Maximum depth of the data structure to display.
210    Returns:
211      self
212    """
213    self._options['max_depth'] = max_depth
214    return self
215
216  def with_min_memory(self,
217                      min_bytes=0,
218                      min_peak_bytes=0,
219                      min_residual_bytes=0,
220                      min_output_bytes=0):
221    """Only show profiler nodes consuming no less than 'min_bytes'.
222
223    Args:
224      min_bytes: Only show profiler nodes requested to allocate no less bytes
225          than this.
226      min_peak_bytes: Only show profiler nodes using no less than this bytes
227          at peak (high watermark). For profiler nodes consist of multiple
228          graph nodes, it sums the graph nodes' peak_bytes.
229      min_residual_bytes: Only show profiler nodes have no less than
230          this bytes not being de-allocated after Compute() ends. For
231          profiler nodes consist of multiple graph nodes, it sums the
232          graph nodes' residual_bytes.
233      min_output_bytes: Only show profiler nodes have no less than this bytes
234          output. The output are not necessarily allocated by this profiler
235          nodes.
236    Returns:
237      self
238    """
239    self._options['min_bytes'] = min_bytes
240    self._options['min_peak_bytes'] = min_peak_bytes
241    self._options['min_residual_bytes'] = min_residual_bytes
242    self._options['min_output_bytes'] = min_output_bytes
243    return self
244
245  def with_min_execution_time(self,
246                              min_micros=0,
247                              min_accelerator_micros=0,
248                              min_cpu_micros=0):
249    """Only show profiler nodes consuming no less than 'min_micros'.
250
251    Args:
252      min_micros: Only show profiler nodes with execution time
253          no less than this. It sums accelerator and cpu times.
254      min_accelerator_micros: Only show profiler nodes spend no less than
255          this time on accelerator (e.g. GPU).
256      min_cpu_micros: Only show profiler nodes spend no less than
257          this time on cpu.
258    Returns:
259      self
260    """
261    self._options['min_micros'] = min_micros
262    self._options['min_accelerator_micros'] = min_accelerator_micros
263    self._options['min_cpu_micros'] = min_cpu_micros
264    return self
265
266  def with_min_parameters(self, min_params):
267    """Only show profiler nodes holding no less than 'min_params' parameters.
268
269    'Parameters' normally refers the weights of in TensorFlow variables.
270    It reflects the 'capacity' of models.
271
272    Args:
273      min_params: Only show profiler nodes holding number parameters
274          no less than this.
275    Returns:
276      self
277    """
278    self._options['min_params'] = min_params
279    return self
280
281  def with_min_occurrence(self, min_occurrence):
282    # pylint: disable=line-too-long
283    """Only show profiler nodes including no less than 'min_occurrence' graph nodes.
284
285    A "node" means a profiler output node, which can be a python line
286    (code view), an operation type (op view), or a graph node
287    (graph/scope view). A python line includes all graph nodes created by that
288    line, while an operation type includes all graph nodes of that type.
289
290    Args:
291      min_occurrence: Only show nodes including no less than this.
292    Returns:
293      self
294    """
295    # pylint: enable=line-too-long
296    self._options['min_occurrence'] = min_occurrence
297    return self
298
299  def with_min_float_operations(self, min_float_ops):
300    # pylint: disable=line-too-long
301    """Only show profiler nodes consuming no less than 'min_float_ops'.
302
303    Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md
304    on the caveats of calculating float operations.
305
306    Args:
307      min_float_ops: Only show profiler nodes with float operations
308          no less than this.
309    Returns:
310      self
311    """
312    # pylint: enable=line-too-long
313    self._options['min_float_ops'] = min_float_ops
314    return self
315
316  def with_accounted_types(self, account_type_regexes):
317    """Selectively counting statistics based on node types.
318
319    Here, 'types' means the profiler nodes' properties. Profiler by default
320    consider device name (e.g. /job:xx/.../device:GPU:0) and operation type
321    (e.g. MatMul) as profiler nodes' properties. User can also associate
322    customized 'types' to profiler nodes through OpLogProto proto.
323
324    For example, user can select profiler nodes placed on gpu:0 with:
325    `account_type_regexes=['.*gpu:0.*']`
326
327    If none of a node's properties match the specified regexes, the node is
328    not displayed nor accounted.
329
330    Args:
331      account_type_regexes: A list of regexes specifying the types.
332    Returns:
333      self.
334    """
335    self._options['account_type_regexes'] = copy.copy(account_type_regexes)
336    return self
337
338  def with_node_names(self,
339                      start_name_regexes=None,
340                      show_name_regexes=None,
341                      hide_name_regexes=None,
342                      trim_name_regexes=None):
343    """Regular expressions used to select profiler nodes to display.
344
345    After 'with_accounted_types' is evaluated, 'with_node_names' are
346    evaluated as follows:
347
348      For a profile data structure, profiler first finds the profiler
349      nodes matching 'start_name_regexes', and starts displaying profiler
350      nodes from there. Then, if a node matches 'show_name_regexes' and
351      doesn't match 'hide_name_regexes', it's displayed. If a node matches
352      'trim_name_regexes', profiler stops further searching that branch.
353
354    Args:
355      start_name_regexes: list of node name regexes to start displaying.
356      show_name_regexes: list of node names regexes to display.
357      hide_name_regexes: list of node_names regexes that should be hidden.
358      trim_name_regexes: list of node name regexes from where to stop.
359    Returns:
360      self
361    """
362    if start_name_regexes is not None:
363      self._options['start_name_regexes'] = copy.copy(start_name_regexes)
364    if show_name_regexes is not None:
365      self._options['show_name_regexes'] = copy.copy(show_name_regexes)
366    if hide_name_regexes is not None:
367      self._options['hide_name_regexes'] = copy.copy(hide_name_regexes)
368    if trim_name_regexes is not None:
369      self._options['trim_name_regexes'] = copy.copy(trim_name_regexes)
370    return self
371
372  def account_displayed_op_only(self, is_true):
373    """Whether only account the statistics of displayed profiler nodes.
374
375    Args:
376      is_true: If true, only account statistics of nodes eventually
377          displayed by the outputs.
378          Otherwise, a node's statistics are accounted by its parents
379          as long as it's types match 'account_type_regexes', even if
380          it is hidden from the output, say, by hide_name_regexes.
381    Returns:
382      self
383    """
384    self._options['account_displayed_op_only'] = is_true
385    return self
386
387  def with_empty_output(self):
388    """Do not generate side-effect outputs."""
389    self._options['output'] = 'none'
390    return self
391
392  def with_stdout_output(self):
393    """Print the result to stdout."""
394    self._options['output'] = 'stdout'
395    return self
396
397  def with_file_output(self, outfile):
398    """Print the result to a file."""
399    self._options['output'] = 'file:outfile=%s' % outfile
400    return self
401
402  def with_timeline_output(self, timeline_file):
403    """Generate a timeline json file."""
404    self._options['output'] = 'timeline:outfile=%s' % timeline_file
405    return self
406
407  def with_pprof_output(self, pprof_file):
408    """Generate a pprof profile gzip file.
409
410    To use the pprof file:
411      pprof -png --nodecount=100 --sample_index=1 <pprof_file>
412
413    Args:
414      pprof_file: filename for output, usually suffixed with .pb.gz.
415    Returns:
416      self.
417    """
418    self._options['output'] = 'pprof:outfile=%s' % pprof_file
419    return self
420
421  def order_by(self, attribute):
422    # pylint: disable=line-too-long
423    """Order the displayed profiler nodes based on a attribute.
424
425    Supported attribute includes micros, bytes, occurrence, params, etc.
426    https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md
427
428    Args:
429      attribute: An attribute the profiler node has.
430    Returns:
431      self
432    """
433    # pylint: enable=line-too-long
434    self._options['order_by'] = attribute
435    return self
436
437  def select(self, attributes):
438    # pylint: disable=line-too-long
439    """Select the attributes to display.
440
441    See https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md
442    for supported attributes.
443
444    Args:
445      attributes: A list of attribute the profiler node has.
446    Returns:
447      self
448    """
449    # pylint: enable=line-too-long
450    self._options['select'] = copy.copy(attributes)
451    return self
452
453  def with_step(self, step):
454    """Which profile step to use for profiling.
455
456    The 'step' here refers to the step defined by `Profiler.add_step()` API.
457
458    Args:
459      step: When multiple steps of profiles are available, select which step's
460         profile to use. If -1, use average of all available steps.
461    Returns:
462      self
463    """
464    self._options['step'] = step
465    return self
466