1#! /usr/bin/env python3
2#
3# Class for profiling python code. rev 1.0  6/2/94
4#
5# Written by James Roskind
6# Based on prior profile module by Sjoerd Mullender...
7#   which was hacked somewhat by: Guido van Rossum
8
9"""Class for profiling Python code."""
10
11# Copyright Disney Enterprises, Inc.  All Rights Reserved.
12# Licensed to PSF under a Contributor Agreement
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Unless required by applicable law or agreed to in writing, software
21# distributed under the License is distributed on an "AS IS" BASIS,
22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23# either express or implied.  See the License for the specific language
24# governing permissions and limitations under the License.
25
26
27import sys
28import time
29import marshal
30
31__all__ = ["run", "runctx", "Profile"]
32
33# Sample timer for use with
34#i_count = 0
35#def integer_timer():
36#       global i_count
37#       i_count = i_count + 1
38#       return i_count
39#itimes = integer_timer # replace with C coded timer returning integers
40
41class _Utils:
42    """Support class for utility functions which are shared by
43    profile.py and cProfile.py modules.
44    Not supposed to be used directly.
45    """
46
47    def __init__(self, profiler):
48        self.profiler = profiler
49
50    def run(self, statement, filename, sort):
51        prof = self.profiler()
52        try:
53            prof.run(statement)
54        except SystemExit:
55            pass
56        finally:
57            self._show(prof, filename, sort)
58
59    def runctx(self, statement, globals, locals, filename, sort):
60        prof = self.profiler()
61        try:
62            prof.runctx(statement, globals, locals)
63        except SystemExit:
64            pass
65        finally:
66            self._show(prof, filename, sort)
67
68    def _show(self, prof, filename, sort):
69        if filename is not None:
70            prof.dump_stats(filename)
71        else:
72            prof.print_stats(sort)
73
74
75#**************************************************************************
76# The following are the static member functions for the profiler class
77# Note that an instance of Profile() is *not* needed to call them.
78#**************************************************************************
79
80def run(statement, filename=None, sort=-1):
81    """Run statement under profiler optionally saving results in filename
82
83    This function takes a single argument that can be passed to the
84    "exec" statement, and an optional file name.  In all cases this
85    routine attempts to "exec" its first argument and gather profiling
86    statistics from the execution. If no file name is present, then this
87    function automatically prints a simple profiling report, sorted by the
88    standard name string (file/line/function-name) that is presented in
89    each line.
90    """
91    return _Utils(Profile).run(statement, filename, sort)
92
93def runctx(statement, globals, locals, filename=None, sort=-1):
94    """Run statement under profiler, supplying your own globals and locals,
95    optionally saving results in filename.
96
97    statement and filename have the same semantics as profile.run
98    """
99    return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
100
101
102class Profile:
103    """Profiler class.
104
105    self.cur is always a tuple.  Each such tuple corresponds to a stack
106    frame that is currently active (self.cur[-2]).  The following are the
107    definitions of its members.  We use this external "parallel stack" to
108    avoid contaminating the program that we are profiling. (old profiler
109    used to write into the frames local dictionary!!) Derived classes
110    can change the definition of some entries, as long as they leave
111    [-2:] intact (frame and previous tuple).  In case an internal error is
112    detected, the -3 element is used as the function name.
113
114    [ 0] = Time that needs to be charged to the parent frame's function.
115           It is used so that a function call will not have to access the
116           timing data for the parent frame.
117    [ 1] = Total time spent in this frame's function, excluding time in
118           subfunctions (this latter is tallied in cur[2]).
119    [ 2] = Total time spent in subfunctions, excluding time executing the
120           frame's function (this latter is tallied in cur[1]).
121    [-3] = Name of the function that corresponds to this frame.
122    [-2] = Actual frame that we correspond to (used to sync exception handling).
123    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
124
125    Timing data for each function is stored as a 5-tuple in the dictionary
126    self.timings[].  The index is always the name stored in self.cur[-3].
127    The following are the definitions of the members:
128
129    [0] = The number of times this function was called, not counting direct
130          or indirect recursion,
131    [1] = Number of times this function appears on the stack, minus one
132    [2] = Total time spent internal to this function
133    [3] = Cumulative time that this function was present on the stack.  In
134          non-recursive functions, this is the total execution time from start
135          to finish of each invocation of a function, including time spent in
136          all subfunctions.
137    [4] = A dictionary indicating for each function name, the number of times
138          it was called by us.
139    """
140
141    bias = 0  # calibration constant
142
143    def __init__(self, timer=None, bias=None):
144        self.timings = {}
145        self.cur = None
146        self.cmd = ""
147        self.c_func_name = ""
148
149        if bias is None:
150            bias = self.bias
151        self.bias = bias     # Materialize in local dict for lookup speed.
152
153        if not timer:
154            self.timer = self.get_time = time.process_time
155            self.dispatcher = self.trace_dispatch_i
156        else:
157            self.timer = timer
158            t = self.timer() # test out timer function
159            try:
160                length = len(t)
161            except TypeError:
162                self.get_time = timer
163                self.dispatcher = self.trace_dispatch_i
164            else:
165                if length == 2:
166                    self.dispatcher = self.trace_dispatch
167                else:
168                    self.dispatcher = self.trace_dispatch_l
169                # This get_time() implementation needs to be defined
170                # here to capture the passed-in timer in the parameter
171                # list (for performance).  Note that we can't assume
172                # the timer() result contains two values in all
173                # cases.
174                def get_time_timer(timer=timer, sum=sum):
175                    return sum(timer())
176                self.get_time = get_time_timer
177        self.t = self.get_time()
178        self.simulate_call('profiler')
179
180    # Heavily optimized dispatch routine for time.process_time() timer
181
182    def trace_dispatch(self, frame, event, arg):
183        timer = self.timer
184        t = timer()
185        t = t[0] + t[1] - self.t - self.bias
186
187        if event == "c_call":
188            self.c_func_name = arg.__name__
189
190        if self.dispatch[event](self, frame,t):
191            t = timer()
192            self.t = t[0] + t[1]
193        else:
194            r = timer()
195            self.t = r[0] + r[1] - t # put back unrecorded delta
196
197    # Dispatch routine for best timer program (return = scalar, fastest if
198    # an integer but float works too -- and time.process_time() relies on that).
199
200    def trace_dispatch_i(self, frame, event, arg):
201        timer = self.timer
202        t = timer() - self.t - self.bias
203
204        if event == "c_call":
205            self.c_func_name = arg.__name__
206
207        if self.dispatch[event](self, frame, t):
208            self.t = timer()
209        else:
210            self.t = timer() - t  # put back unrecorded delta
211
212    # Dispatch routine for macintosh (timer returns time in ticks of
213    # 1/60th second)
214
215    def trace_dispatch_mac(self, frame, event, arg):
216        timer = self.timer
217        t = timer()/60.0 - self.t - self.bias
218
219        if event == "c_call":
220            self.c_func_name = arg.__name__
221
222        if self.dispatch[event](self, frame, t):
223            self.t = timer()/60.0
224        else:
225            self.t = timer()/60.0 - t  # put back unrecorded delta
226
227    # SLOW generic dispatch routine for timer returning lists of numbers
228
229    def trace_dispatch_l(self, frame, event, arg):
230        get_time = self.get_time
231        t = get_time() - self.t - self.bias
232
233        if event == "c_call":
234            self.c_func_name = arg.__name__
235
236        if self.dispatch[event](self, frame, t):
237            self.t = get_time()
238        else:
239            self.t = get_time() - t # put back unrecorded delta
240
241    # In the event handlers, the first 3 elements of self.cur are unpacked
242    # into vrbls w/ 3-letter names.  The last two characters are meant to be
243    # mnemonic:
244    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
245    #     _it  self.cur[1] "internal time" time spent directly in the function
246    #     _et  self.cur[2] "external time" time spent in subfunctions
247
248    def trace_dispatch_exception(self, frame, t):
249        rpt, rit, ret, rfn, rframe, rcur = self.cur
250        if (rframe is not frame) and rcur:
251            return self.trace_dispatch_return(rframe, t)
252        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
253        return 1
254
255
256    def trace_dispatch_call(self, frame, t):
257        if self.cur and frame.f_back is not self.cur[-2]:
258            rpt, rit, ret, rfn, rframe, rcur = self.cur
259            if not isinstance(rframe, Profile.fake_frame):
260                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
261                                                       rframe, rframe.f_back,
262                                                       frame, frame.f_back)
263                self.trace_dispatch_return(rframe, 0)
264                assert (self.cur is None or \
265                        frame.f_back is self.cur[-2]), ("Bad call",
266                                                        self.cur[-3])
267        fcode = frame.f_code
268        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
269        self.cur = (t, 0, 0, fn, frame, self.cur)
270        timings = self.timings
271        if fn in timings:
272            cc, ns, tt, ct, callers = timings[fn]
273            timings[fn] = cc, ns + 1, tt, ct, callers
274        else:
275            timings[fn] = 0, 0, 0, 0, {}
276        return 1
277
278    def trace_dispatch_c_call (self, frame, t):
279        fn = ("", 0, self.c_func_name)
280        self.cur = (t, 0, 0, fn, frame, self.cur)
281        timings = self.timings
282        if fn in timings:
283            cc, ns, tt, ct, callers = timings[fn]
284            timings[fn] = cc, ns+1, tt, ct, callers
285        else:
286            timings[fn] = 0, 0, 0, 0, {}
287        return 1
288
289    def trace_dispatch_return(self, frame, t):
290        if frame is not self.cur[-2]:
291            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
292            self.trace_dispatch_return(self.cur[-2], 0)
293
294        # Prefix "r" means part of the Returning or exiting frame.
295        # Prefix "p" means part of the Previous or Parent or older frame.
296
297        rpt, rit, ret, rfn, frame, rcur = self.cur
298        rit = rit + t
299        frame_total = rit + ret
300
301        ppt, pit, pet, pfn, pframe, pcur = rcur
302        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
303
304        timings = self.timings
305        cc, ns, tt, ct, callers = timings[rfn]
306        if not ns:
307            # This is the only occurrence of the function on the stack.
308            # Else this is a (directly or indirectly) recursive call, and
309            # its cumulative time will get updated when the topmost call to
310            # it returns.
311            ct = ct + frame_total
312            cc = cc + 1
313
314        if pfn in callers:
315            callers[pfn] = callers[pfn] + 1  # hack: gather more
316            # stats such as the amount of time added to ct courtesy
317            # of this specific call, and the contribution to cc
318            # courtesy of this call.
319        else:
320            callers[pfn] = 1
321
322        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
323
324        return 1
325
326
327    dispatch = {
328        "call": trace_dispatch_call,
329        "exception": trace_dispatch_exception,
330        "return": trace_dispatch_return,
331        "c_call": trace_dispatch_c_call,
332        "c_exception": trace_dispatch_return,  # the C function returned
333        "c_return": trace_dispatch_return,
334        }
335
336
337    # The next few functions play with self.cmd. By carefully preloading
338    # our parallel stack, we can force the profiled result to include
339    # an arbitrary string as the name of the calling function.
340    # We use self.cmd as that string, and the resulting stats look
341    # very nice :-).
342
343    def set_cmd(self, cmd):
344        if self.cur[-1]: return   # already set
345        self.cmd = cmd
346        self.simulate_call(cmd)
347
348    class fake_code:
349        def __init__(self, filename, line, name):
350            self.co_filename = filename
351            self.co_line = line
352            self.co_name = name
353            self.co_firstlineno = 0
354
355        def __repr__(self):
356            return repr((self.co_filename, self.co_line, self.co_name))
357
358    class fake_frame:
359        def __init__(self, code, prior):
360            self.f_code = code
361            self.f_back = prior
362
363    def simulate_call(self, name):
364        code = self.fake_code('profile', 0, name)
365        if self.cur:
366            pframe = self.cur[-2]
367        else:
368            pframe = None
369        frame = self.fake_frame(code, pframe)
370        self.dispatch['call'](self, frame, 0)
371
372    # collect stats from pending stack, including getting final
373    # timings for self.cmd frame.
374
375    def simulate_cmd_complete(self):
376        get_time = self.get_time
377        t = get_time() - self.t
378        while self.cur[-1]:
379            # We *can* cause assertion errors here if
380            # dispatch_trace_return checks for a frame match!
381            self.dispatch['return'](self, self.cur[-2], t)
382            t = 0
383        self.t = get_time() - t
384
385
386    def print_stats(self, sort=-1):
387        import pstats
388        pstats.Stats(self).strip_dirs().sort_stats(sort). \
389                  print_stats()
390
391    def dump_stats(self, file):
392        with open(file, 'wb') as f:
393            self.create_stats()
394            marshal.dump(self.stats, f)
395
396    def create_stats(self):
397        self.simulate_cmd_complete()
398        self.snapshot_stats()
399
400    def snapshot_stats(self):
401        self.stats = {}
402        for func, (cc, ns, tt, ct, callers) in self.timings.items():
403            callers = callers.copy()
404            nc = 0
405            for callcnt in callers.values():
406                nc += callcnt
407            self.stats[func] = cc, nc, tt, ct, callers
408
409
410    # The following two methods can be called by clients to use
411    # a profiler to profile a statement, given as a string.
412
413    def run(self, cmd):
414        import __main__
415        dict = __main__.__dict__
416        return self.runctx(cmd, dict, dict)
417
418    def runctx(self, cmd, globals, locals):
419        self.set_cmd(cmd)
420        sys.setprofile(self.dispatcher)
421        try:
422            exec(cmd, globals, locals)
423        finally:
424            sys.setprofile(None)
425        return self
426
427    # This method is more useful to profile a single function call.
428    def runcall(self, func, /, *args, **kw):
429        self.set_cmd(repr(func))
430        sys.setprofile(self.dispatcher)
431        try:
432            return func(*args, **kw)
433        finally:
434            sys.setprofile(None)
435
436
437    #******************************************************************
438    # The following calculates the overhead for using a profiler.  The
439    # problem is that it takes a fair amount of time for the profiler
440    # to stop the stopwatch (from the time it receives an event).
441    # Similarly, there is a delay from the time that the profiler
442    # re-starts the stopwatch before the user's code really gets to
443    # continue.  The following code tries to measure the difference on
444    # a per-event basis.
445    #
446    # Note that this difference is only significant if there are a lot of
447    # events, and relatively little user code per event.  For example,
448    # code with small functions will typically benefit from having the
449    # profiler calibrated for the current platform.  This *could* be
450    # done on the fly during init() time, but it is not worth the
451    # effort.  Also note that if too large a value specified, then
452    # execution time on some functions will actually appear as a
453    # negative number.  It is *normal* for some functions (with very
454    # low call counts) to have such negative stats, even if the
455    # calibration figure is "correct."
456    #
457    # One alternative to profile-time calibration adjustments (i.e.,
458    # adding in the magic little delta during each event) is to track
459    # more carefully the number of events (and cumulatively, the number
460    # of events during sub functions) that are seen.  If this were
461    # done, then the arithmetic could be done after the fact (i.e., at
462    # display time).  Currently, we track only call/return events.
463    # These values can be deduced by examining the callees and callers
464    # vectors for each functions.  Hence we *can* almost correct the
465    # internal time figure at print time (note that we currently don't
466    # track exception event processing counts).  Unfortunately, there
467    # is currently no similar information for cumulative sub-function
468    # time.  It would not be hard to "get all this info" at profiler
469    # time.  Specifically, we would have to extend the tuples to keep
470    # counts of this in each frame, and then extend the defs of timing
471    # tuples to include the significant two figures. I'm a bit fearful
472    # that this additional feature will slow the heavily optimized
473    # event/time ratio (i.e., the profiler would run slower, fur a very
474    # low "value added" feature.)
475    #**************************************************************
476
477    def calibrate(self, m, verbose=0):
478        if self.__class__ is not Profile:
479            raise TypeError("Subclasses must override .calibrate().")
480
481        saved_bias = self.bias
482        self.bias = 0
483        try:
484            return self._calibrate_inner(m, verbose)
485        finally:
486            self.bias = saved_bias
487
488    def _calibrate_inner(self, m, verbose):
489        get_time = self.get_time
490
491        # Set up a test case to be run with and without profiling.  Include
492        # lots of calls, because we're trying to quantify stopwatch overhead.
493        # Do not raise any exceptions, though, because we want to know
494        # exactly how many profile events are generated (one call event, +
495        # one return event, per Python-level call).
496
497        def f1(n):
498            for i in range(n):
499                x = 1
500
501        def f(m, f1=f1):
502            for i in range(m):
503                f1(100)
504
505        f(m)    # warm up the cache
506
507        # elapsed_noprofile <- time f(m) takes without profiling.
508        t0 = get_time()
509        f(m)
510        t1 = get_time()
511        elapsed_noprofile = t1 - t0
512        if verbose:
513            print("elapsed time without profiling =", elapsed_noprofile)
514
515        # elapsed_profile <- time f(m) takes with profiling.  The difference
516        # is profiling overhead, only some of which the profiler subtracts
517        # out on its own.
518        p = Profile()
519        t0 = get_time()
520        p.runctx('f(m)', globals(), locals())
521        t1 = get_time()
522        elapsed_profile = t1 - t0
523        if verbose:
524            print("elapsed time with profiling =", elapsed_profile)
525
526        # reported_time <- "CPU seconds" the profiler charged to f and f1.
527        total_calls = 0.0
528        reported_time = 0.0
529        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
530                p.timings.items():
531            if funcname in ("f", "f1"):
532                total_calls += cc
533                reported_time += tt
534
535        if verbose:
536            print("'CPU seconds' profiler reported =", reported_time)
537            print("total # calls =", total_calls)
538        if total_calls != m + 1:
539            raise ValueError("internal error: total calls = %d" % total_calls)
540
541        # reported_time - elapsed_noprofile = overhead the profiler wasn't
542        # able to measure.  Divide by twice the number of calls (since there
543        # are two profiler events per call in this test) to get the hidden
544        # overhead per event.
545        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
546        if verbose:
547            print("mean stopwatch overhead per profile event =", mean)
548        return mean
549
550#****************************************************************************
551
552def main():
553    import os
554    from optparse import OptionParser
555
556    usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
557    parser = OptionParser(usage=usage)
558    parser.allow_interspersed_args = False
559    parser.add_option('-o', '--outfile', dest="outfile",
560        help="Save stats to <outfile>", default=None)
561    parser.add_option('-m', dest="module", action="store_true",
562        help="Profile a library module.", default=False)
563    parser.add_option('-s', '--sort', dest="sort",
564        help="Sort order when printing to stdout, based on pstats.Stats class",
565        default=-1)
566
567    if not sys.argv[1:]:
568        parser.print_usage()
569        sys.exit(2)
570
571    (options, args) = parser.parse_args()
572    sys.argv[:] = args
573
574    # The script that we're profiling may chdir, so capture the absolute path
575    # to the output file at startup.
576    if options.outfile is not None:
577        options.outfile = os.path.abspath(options.outfile)
578
579    if len(args) > 0:
580        if options.module:
581            import runpy
582            code = "run_module(modname, run_name='__main__')"
583            globs = {
584                'run_module': runpy.run_module,
585                'modname': args[0]
586            }
587        else:
588            progname = args[0]
589            sys.path.insert(0, os.path.dirname(progname))
590            with open(progname, 'rb') as fp:
591                code = compile(fp.read(), progname, 'exec')
592            globs = {
593                '__file__': progname,
594                '__name__': '__main__',
595                '__package__': None,
596                '__cached__': None,
597            }
598        runctx(code, globs, None, options.outfile, options.sort)
599    else:
600        parser.print_usage()
601    return parser
602
603# When invoked as main program, invoke the profiler on a script
604if __name__ == '__main__':
605    main()
606