1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Utilities for building profiler options.""" 16from __future__ import absolute_import 17from __future__ import division 18from __future__ import print_function 19 20import copy 21 22from tensorflow.python.profiler import tfprof_logger 23from tensorflow.python.util.tf_export import tf_export 24 25 26@tf_export(v1=['profiler.ProfileOptionBuilder']) 27class ProfileOptionBuilder(object): 28 # pylint: disable=line-too-long 29 """Option Builder for Profiling API. 30 31 For tutorial on the options, see 32 https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md 33 34 ```python 35 # Users can use pre-built options: 36 opts = ( 37 tf.profiler.ProfileOptionBuilder.trainable_variables_parameter()) 38 39 # Or, build your own options: 40 opts = (tf.compat.v1.profiler.ProfileOptionBuilder() 41 .with_max_depth(10) 42 .with_min_micros(1000) 43 .select(['accelerator_micros']) 44 .with_stdout_output() 45 .build() 46 47 # Or customize the pre-built options: 48 opts = (tf.compat.v1.profiler.ProfileOptionBuilder( 49 tf.profiler.ProfileOptionBuilder.time_and_memory()) 50 .with_displaying_options(show_name_regexes=['.*rnn.*']) 51 .build()) 52 53 # Finally, profiling with the options: 54 _ = tf.compat.v1.profiler.profile(tf.compat.v1.get_default_graph(), 55 run_meta=run_meta, 56 cmd='scope', 57 options=opts) 58 ``` 59 """ 60 # pylint: enable=line-too-long 61 62 def __init__(self, options=None): 63 """Constructor. 64 65 Args: 66 options: Optional initial option dict to start with. 67 """ 68 if options is not None: 69 self._options = copy.deepcopy(options) 70 else: 71 self._options = {'max_depth': 100, 72 'min_bytes': 0, 73 'min_micros': 0, 74 'min_params': 0, 75 'min_float_ops': 0, 76 'min_occurrence': 0, 77 'order_by': 'name', 78 'account_type_regexes': ['.*'], 79 'start_name_regexes': ['.*'], 80 'trim_name_regexes': [], 81 'show_name_regexes': ['.*'], 82 'hide_name_regexes': [], 83 'account_displayed_op_only': False, 84 'select': ['micros'], 85 'step': -1, 86 'output': 'stdout'} 87 88 @staticmethod 89 def trainable_variables_parameter(): 90 """Options used to profile trainable variable parameters. 91 92 Normally used together with 'scope' view. 93 94 Returns: 95 A dict of profiling options. 96 """ 97 return {'max_depth': 10000, 98 'min_bytes': 0, 99 'min_micros': 0, 100 'min_params': 0, 101 'min_float_ops': 0, 102 'min_occurrence': 0, 103 'order_by': 'name', 104 'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES], 105 'start_name_regexes': ['.*'], 106 'trim_name_regexes': [], 107 'show_name_regexes': ['.*'], 108 'hide_name_regexes': [], 109 'account_displayed_op_only': True, 110 'select': ['params'], 111 'step': -1, 112 'output': 'stdout'} 113 114 @staticmethod 115 def float_operation(): 116 # pylint: disable=line-too-long 117 """Options used to profile float operations. 118 119 Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md 120 on the caveats of calculating float operations. 121 122 Returns: 123 A dict of profiling options. 124 """ 125 # pylint: enable=line-too-long 126 return {'max_depth': 10000, 127 'min_bytes': 0, 128 'min_micros': 0, 129 'min_params': 0, 130 'min_float_ops': 1, 131 'min_occurrence': 0, 132 'order_by': 'float_ops', 133 'account_type_regexes': ['.*'], 134 'start_name_regexes': ['.*'], 135 'trim_name_regexes': [], 136 'show_name_regexes': ['.*'], 137 'hide_name_regexes': [], 138 'account_displayed_op_only': True, 139 'select': ['float_ops'], 140 'step': -1, 141 'output': 'stdout'} 142 143 @staticmethod 144 def time_and_memory(min_micros=1, min_bytes=1, min_accelerator_micros=0, 145 min_cpu_micros=0, min_peak_bytes=0, min_residual_bytes=0, 146 min_output_bytes=0): 147 """Show operation time and memory consumptions. 148 149 Args: 150 min_micros: Only show profiler nodes with execution time 151 no less than this. It sums accelerator and cpu times. 152 min_bytes: Only show profiler nodes requested to allocate no less bytes 153 than this. 154 min_accelerator_micros: Only show profiler nodes spend no less than 155 this time on accelerator (e.g. GPU). 156 min_cpu_micros: Only show profiler nodes spend no less than 157 this time on cpu. 158 min_peak_bytes: Only show profiler nodes using no less than this bytes 159 at peak (high watermark). For profiler nodes consist of multiple 160 graph nodes, it sums the graph nodes' peak_bytes. 161 min_residual_bytes: Only show profiler nodes have no less than 162 this bytes not being de-allocated after Compute() ends. For 163 profiler nodes consist of multiple graph nodes, it sums the 164 graph nodes' residual_bytes. 165 min_output_bytes: Only show profiler nodes have no less than this bytes 166 output. The output are not necessarily allocated by this profiler 167 nodes. 168 Returns: 169 A dict of profiling options. 170 """ 171 return {'max_depth': 10000, 172 'min_bytes': min_bytes, 173 'min_peak_bytes': min_peak_bytes, 174 'min_residual_bytes': min_residual_bytes, 175 'min_output_bytes': min_output_bytes, 176 'min_micros': min_micros, 177 'min_accelerator_micros': min_accelerator_micros, 178 'min_cpu_micros': min_cpu_micros, 179 'min_params': 0, 180 'min_float_ops': 0, 181 'min_occurrence': 0, 182 'order_by': 'micros', 183 'account_type_regexes': ['.*'], 184 'start_name_regexes': ['.*'], 185 'trim_name_regexes': [], 186 'show_name_regexes': ['.*'], 187 'hide_name_regexes': [], 188 'account_displayed_op_only': True, 189 'select': ['micros', 'bytes'], 190 'step': -1, 191 'output': 'stdout'} 192 193 def build(self): 194 """Build a profiling option. 195 196 Returns: 197 A dict of profiling options. 198 """ 199 return copy.deepcopy(self._options) 200 201 def with_max_depth(self, max_depth): 202 """Set the maximum depth of display. 203 204 The depth depends on profiling view. For 'scope' view, it's the 205 depth of name scope hierarchy (tree), for 'op' view, it's the number 206 of operation types (list), etc. 207 208 Args: 209 max_depth: Maximum depth of the data structure to display. 210 Returns: 211 self 212 """ 213 self._options['max_depth'] = max_depth 214 return self 215 216 def with_min_memory(self, 217 min_bytes=0, 218 min_peak_bytes=0, 219 min_residual_bytes=0, 220 min_output_bytes=0): 221 """Only show profiler nodes consuming no less than 'min_bytes'. 222 223 Args: 224 min_bytes: Only show profiler nodes requested to allocate no less bytes 225 than this. 226 min_peak_bytes: Only show profiler nodes using no less than this bytes 227 at peak (high watermark). For profiler nodes consist of multiple 228 graph nodes, it sums the graph nodes' peak_bytes. 229 min_residual_bytes: Only show profiler nodes have no less than 230 this bytes not being de-allocated after Compute() ends. For 231 profiler nodes consist of multiple graph nodes, it sums the 232 graph nodes' residual_bytes. 233 min_output_bytes: Only show profiler nodes have no less than this bytes 234 output. The output are not necessarily allocated by this profiler 235 nodes. 236 Returns: 237 self 238 """ 239 self._options['min_bytes'] = min_bytes 240 self._options['min_peak_bytes'] = min_peak_bytes 241 self._options['min_residual_bytes'] = min_residual_bytes 242 self._options['min_output_bytes'] = min_output_bytes 243 return self 244 245 def with_min_execution_time(self, 246 min_micros=0, 247 min_accelerator_micros=0, 248 min_cpu_micros=0): 249 """Only show profiler nodes consuming no less than 'min_micros'. 250 251 Args: 252 min_micros: Only show profiler nodes with execution time 253 no less than this. It sums accelerator and cpu times. 254 min_accelerator_micros: Only show profiler nodes spend no less than 255 this time on accelerator (e.g. GPU). 256 min_cpu_micros: Only show profiler nodes spend no less than 257 this time on cpu. 258 Returns: 259 self 260 """ 261 self._options['min_micros'] = min_micros 262 self._options['min_accelerator_micros'] = min_accelerator_micros 263 self._options['min_cpu_micros'] = min_cpu_micros 264 return self 265 266 def with_min_parameters(self, min_params): 267 """Only show profiler nodes holding no less than 'min_params' parameters. 268 269 'Parameters' normally refers the weights of in TensorFlow variables. 270 It reflects the 'capacity' of models. 271 272 Args: 273 min_params: Only show profiler nodes holding number parameters 274 no less than this. 275 Returns: 276 self 277 """ 278 self._options['min_params'] = min_params 279 return self 280 281 def with_min_occurrence(self, min_occurrence): 282 # pylint: disable=line-too-long 283 """Only show profiler nodes including no less than 'min_occurrence' graph nodes. 284 285 A "node" means a profiler output node, which can be a python line 286 (code view), an operation type (op view), or a graph node 287 (graph/scope view). A python line includes all graph nodes created by that 288 line, while an operation type includes all graph nodes of that type. 289 290 Args: 291 min_occurrence: Only show nodes including no less than this. 292 Returns: 293 self 294 """ 295 # pylint: enable=line-too-long 296 self._options['min_occurrence'] = min_occurrence 297 return self 298 299 def with_min_float_operations(self, min_float_ops): 300 # pylint: disable=line-too-long 301 """Only show profiler nodes consuming no less than 'min_float_ops'. 302 303 Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md 304 on the caveats of calculating float operations. 305 306 Args: 307 min_float_ops: Only show profiler nodes with float operations 308 no less than this. 309 Returns: 310 self 311 """ 312 # pylint: enable=line-too-long 313 self._options['min_float_ops'] = min_float_ops 314 return self 315 316 def with_accounted_types(self, account_type_regexes): 317 """Selectively counting statistics based on node types. 318 319 Here, 'types' means the profiler nodes' properties. Profiler by default 320 consider device name (e.g. /job:xx/.../device:GPU:0) and operation type 321 (e.g. MatMul) as profiler nodes' properties. User can also associate 322 customized 'types' to profiler nodes through OpLogProto proto. 323 324 For example, user can select profiler nodes placed on gpu:0 with: 325 `account_type_regexes=['.*gpu:0.*']` 326 327 If none of a node's properties match the specified regexes, the node is 328 not displayed nor accounted. 329 330 Args: 331 account_type_regexes: A list of regexes specifying the types. 332 Returns: 333 self. 334 """ 335 self._options['account_type_regexes'] = copy.copy(account_type_regexes) 336 return self 337 338 def with_node_names(self, 339 start_name_regexes=None, 340 show_name_regexes=None, 341 hide_name_regexes=None, 342 trim_name_regexes=None): 343 """Regular expressions used to select profiler nodes to display. 344 345 After 'with_accounted_types' is evaluated, 'with_node_names' are 346 evaluated as follows: 347 348 For a profile data structure, profiler first finds the profiler 349 nodes matching 'start_name_regexes', and starts displaying profiler 350 nodes from there. Then, if a node matches 'show_name_regexes' and 351 doesn't match 'hide_name_regexes', it's displayed. If a node matches 352 'trim_name_regexes', profiler stops further searching that branch. 353 354 Args: 355 start_name_regexes: list of node name regexes to start displaying. 356 show_name_regexes: list of node names regexes to display. 357 hide_name_regexes: list of node_names regexes that should be hidden. 358 trim_name_regexes: list of node name regexes from where to stop. 359 Returns: 360 self 361 """ 362 if start_name_regexes is not None: 363 self._options['start_name_regexes'] = copy.copy(start_name_regexes) 364 if show_name_regexes is not None: 365 self._options['show_name_regexes'] = copy.copy(show_name_regexes) 366 if hide_name_regexes is not None: 367 self._options['hide_name_regexes'] = copy.copy(hide_name_regexes) 368 if trim_name_regexes is not None: 369 self._options['trim_name_regexes'] = copy.copy(trim_name_regexes) 370 return self 371 372 def account_displayed_op_only(self, is_true): 373 """Whether only account the statistics of displayed profiler nodes. 374 375 Args: 376 is_true: If true, only account statistics of nodes eventually 377 displayed by the outputs. 378 Otherwise, a node's statistics are accounted by its parents 379 as long as it's types match 'account_type_regexes', even if 380 it is hidden from the output, say, by hide_name_regexes. 381 Returns: 382 self 383 """ 384 self._options['account_displayed_op_only'] = is_true 385 return self 386 387 def with_empty_output(self): 388 """Do not generate side-effect outputs.""" 389 self._options['output'] = 'none' 390 return self 391 392 def with_stdout_output(self): 393 """Print the result to stdout.""" 394 self._options['output'] = 'stdout' 395 return self 396 397 def with_file_output(self, outfile): 398 """Print the result to a file.""" 399 self._options['output'] = 'file:outfile=%s' % outfile 400 return self 401 402 def with_timeline_output(self, timeline_file): 403 """Generate a timeline json file.""" 404 self._options['output'] = 'timeline:outfile=%s' % timeline_file 405 return self 406 407 def with_pprof_output(self, pprof_file): 408 """Generate a pprof profile gzip file. 409 410 To use the pprof file: 411 pprof -png --nodecount=100 --sample_index=1 <pprof_file> 412 413 Args: 414 pprof_file: filename for output, usually suffixed with .pb.gz. 415 Returns: 416 self. 417 """ 418 self._options['output'] = 'pprof:outfile=%s' % pprof_file 419 return self 420 421 def order_by(self, attribute): 422 # pylint: disable=line-too-long 423 """Order the displayed profiler nodes based on a attribute. 424 425 Supported attribute includes micros, bytes, occurrence, params, etc. 426 https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md 427 428 Args: 429 attribute: An attribute the profiler node has. 430 Returns: 431 self 432 """ 433 # pylint: enable=line-too-long 434 self._options['order_by'] = attribute 435 return self 436 437 def select(self, attributes): 438 # pylint: disable=line-too-long 439 """Select the attributes to display. 440 441 See https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md 442 for supported attributes. 443 444 Args: 445 attributes: A list of attribute the profiler node has. 446 Returns: 447 self 448 """ 449 # pylint: enable=line-too-long 450 self._options['select'] = copy.copy(attributes) 451 return self 452 453 def with_step(self, step): 454 """Which profile step to use for profiling. 455 456 The 'step' here refers to the step defined by `Profiler.add_step()` API. 457 458 Args: 459 step: When multiple steps of profiles are available, select which step's 460 profile to use. If -1, use average of all available steps. 461 Returns: 462 self 463 """ 464 self._options['step'] = step 465 return self 466