1#!/usr/bin/env python
2# Copyright 2017 The PDFium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Compares the performance of two versions of the pdfium code."""
7
8import argparse
9import functools
10import json
11import multiprocessing
12import os
13import re
14import shutil
15import subprocess
16import sys
17import tempfile
18
19from common import GetBooleanGnArg
20from common import PrintErr
21from common import RunCommandPropagateErr
22from githelper import GitHelper
23from safetynet_conclusions import ComparisonConclusions
24from safetynet_conclusions import PrintConclusionsDictHumanReadable
25from safetynet_conclusions import RATING_IMPROVEMENT
26from safetynet_conclusions import RATING_REGRESSION
27
28
29def RunSingleTestCaseParallel(this, run_label, build_dir, test_case):
30  result = this.RunSingleTestCase(run_label, build_dir, test_case)
31  return (test_case, result)
32
33
34class CompareRun(object):
35  """A comparison between two branches of pdfium."""
36
37  def __init__(self, args):
38    self.git = GitHelper()
39    self.args = args
40    self._InitPaths()
41
42  def _InitPaths(self):
43    if self.args.this_repo:
44      self.safe_script_dir = self.args.build_dir
45    else:
46      self.safe_script_dir = os.path.join('testing', 'tools')
47
48    self.safe_measure_script_path = os.path.abspath(
49        os.path.join(self.safe_script_dir,
50                     'safetynet_measure.py'))
51
52    input_file_re = re.compile('^.+[.]pdf$')
53    self.test_cases = []
54    for input_path in self.args.input_paths:
55      if os.path.isfile(input_path):
56        self.test_cases.append(input_path)
57      elif os.path.isdir(input_path):
58        for file_dir, _, filename_list in os.walk(input_path):
59          for input_filename in filename_list:
60            if input_file_re.match(input_filename):
61              file_path = os.path.join(file_dir, input_filename)
62              if os.path.isfile(file_path):
63                self.test_cases.append(file_path)
64
65    self.after_build_dir = self.args.build_dir
66    if self.args.build_dir_before:
67      self.before_build_dir = self.args.build_dir_before
68    else:
69      self.before_build_dir = self.after_build_dir
70
71  def Run(self):
72    """Runs comparison by checking out branches, building and measuring them.
73
74    Returns:
75      Exit code for the script.
76    """
77    if self.args.this_repo:
78      self._FreezeMeasureScript()
79
80    if self.args.branch_after:
81      if self.args.this_repo:
82        before, after = self._ProfileTwoOtherBranchesInThisRepo(
83            self.args.branch_before,
84            self.args.branch_after)
85      else:
86        before, after = self._ProfileTwoOtherBranches(
87            self.args.branch_before,
88            self.args.branch_after)
89    elif self.args.branch_before:
90      if self.args.this_repo:
91        before, after = self._ProfileCurrentAndOtherBranchInThisRepo(
92            self.args.branch_before)
93      else:
94        before, after = self._ProfileCurrentAndOtherBranch(
95            self.args.branch_before)
96    else:
97      if self.args.this_repo:
98        before, after = self._ProfileLocalChangesAndCurrentBranchInThisRepo()
99      else:
100        before, after = self._ProfileLocalChangesAndCurrentBranch()
101
102    conclusions = self._DrawConclusions(before, after)
103    conclusions_dict = conclusions.GetOutputDict()
104    conclusions_dict.setdefault('metadata', {})['profiler'] = self.args.profiler
105
106    self._PrintConclusions(conclusions_dict)
107
108    self._CleanUp(conclusions)
109
110    return 0
111
112  def _FreezeMeasureScript(self):
113    """Freezes a version of the measuring script.
114
115    This is needed to make sure we are comparing the pdfium library changes and
116    not script changes that may happen between the two branches.
117    """
118    self.__FreezeFile(os.path.join('testing', 'tools', 'safetynet_measure.py'))
119    self.__FreezeFile(os.path.join('testing', 'tools', 'common.py'))
120
121  def __FreezeFile(self, file):
122    RunCommandPropagateErr(['cp', file, self.safe_script_dir],
123                           exit_status_on_error=1)
124
125  def _ProfileTwoOtherBranchesInThisRepo(self, before_branch, after_branch):
126    """Profiles two branches that are not the current branch.
127
128    This is done in the local repository and changes may not be restored if the
129    script fails or is interrupted.
130
131    after_branch does not need to descend from before_branch, they will be
132    measured the same way
133
134    Args:
135      before_branch: One branch to profile.
136      after_branch: Other branch to profile.
137
138    Returns:
139      A tuple (before, after), where each of before and after is a dict
140      mapping a test case name to the profiling values for that test case
141      in the given branch.
142    """
143    branch_to_restore = self.git.GetCurrentBranchName()
144
145    self._StashLocalChanges()
146
147    self._CheckoutBranch(after_branch)
148    self._BuildCurrentBranch(self.after_build_dir)
149    after = self._MeasureCurrentBranch('after', self.after_build_dir)
150
151    self._CheckoutBranch(before_branch)
152    self._BuildCurrentBranch(self.before_build_dir)
153    before = self._MeasureCurrentBranch('before', self.before_build_dir)
154
155    self._CheckoutBranch(branch_to_restore)
156    self._RestoreLocalChanges()
157
158    return before, after
159
160  def _ProfileTwoOtherBranches(self, before_branch, after_branch):
161    """Profiles two branches that are not the current branch.
162
163    This is done in new, cloned repositories, therefore it is safer but slower
164    and requires downloads.
165
166    after_branch does not need to descend from before_branch, they will be
167    measured the same way
168
169    Args:
170      before_branch: One branch to profile.
171      after_branch: Other branch to profile.
172
173    Returns:
174      A tuple (before, after), where each of before and after is a dict
175      mapping a test case name to the profiling values for that test case
176      in the given branch.
177    """
178    after = self._ProfileSeparateRepo('after',
179                                      self.after_build_dir,
180                                      after_branch)
181    before = self._ProfileSeparateRepo('before',
182                                       self.before_build_dir,
183                                       before_branch)
184    return before, after
185
186  def _ProfileCurrentAndOtherBranchInThisRepo(self, other_branch):
187    """Profiles the current branch (with uncommitted changes) and another one.
188
189    This is done in the local repository and changes may not be restored if the
190    script fails or is interrupted.
191
192    The current branch does not need to descend from other_branch.
193
194    Args:
195      other_branch: Other branch to profile that is not the current.
196
197    Returns:
198      A tuple (before, after), where each of before and after is a dict
199      mapping a test case name to the profiling values for that test case
200      in the given branch. The current branch is considered to be "after" and
201      the other branch is considered to be "before".
202    """
203    branch_to_restore = self.git.GetCurrentBranchName()
204
205    self._BuildCurrentBranch(self.after_build_dir)
206    after = self._MeasureCurrentBranch('after', self.after_build_dir)
207
208    self._StashLocalChanges()
209
210    self._CheckoutBranch(other_branch)
211    self._BuildCurrentBranch(self.before_build_dir)
212    before = self._MeasureCurrentBranch('before', self.before_build_dir)
213
214    self._CheckoutBranch(branch_to_restore)
215    self._RestoreLocalChanges()
216
217    return before, after
218
219  def _ProfileCurrentAndOtherBranch(self, other_branch):
220    """Profiles the current branch (with uncommitted changes) and another one.
221
222    This is done in new, cloned repositories, therefore it is safer but slower
223    and requires downloads.
224
225    The current branch does not need to descend from other_branch.
226
227    Args:
228      other_branch: Other branch to profile that is not the current. None will
229          compare to the same branch.
230
231    Returns:
232      A tuple (before, after), where each of before and after is a dict
233      mapping a test case name to the profiling values for that test case
234      in the given branch. The current branch is considered to be "after" and
235      the other branch is considered to be "before".
236    """
237    self._BuildCurrentBranch(self.after_build_dir)
238    after = self._MeasureCurrentBranch('after', self.after_build_dir)
239
240    before = self._ProfileSeparateRepo('before',
241                                       self.before_build_dir,
242                                       other_branch)
243
244    return before, after
245
246  def _ProfileLocalChangesAndCurrentBranchInThisRepo(self):
247    """Profiles the current branch with and without uncommitted changes.
248
249    This is done in the local repository and changes may not be restored if the
250    script fails or is interrupted.
251
252    Returns:
253      A tuple (before, after), where each of before and after is a dict
254      mapping a test case name to the profiling values for that test case
255      using the given version. The current branch without uncommitted changes is
256      considered to be "before" and with uncommitted changes is considered to be
257      "after".
258    """
259    self._BuildCurrentBranch(self.after_build_dir)
260    after = self._MeasureCurrentBranch('after', self.after_build_dir)
261
262    pushed = self._StashLocalChanges()
263    if not pushed and not self.args.build_dir_before:
264      PrintErr('Warning: No local changes to compare')
265
266    before_build_dir = self.before_build_dir
267
268    self._BuildCurrentBranch(before_build_dir)
269    before = self._MeasureCurrentBranch('before', before_build_dir)
270
271    self._RestoreLocalChanges()
272
273    return before, after
274
275  def _ProfileLocalChangesAndCurrentBranch(self):
276    """Profiles the current branch with and without uncommitted changes.
277
278    This is done in new, cloned repositories, therefore it is safer but slower
279    and requires downloads.
280
281    Returns:
282      A tuple (before, after), where each of before and after is a dict
283      mapping a test case name to the profiling values for that test case
284      using the given version. The current branch without uncommitted changes is
285      considered to be "before" and with uncommitted changes is considered to be
286      "after".
287    """
288    return self._ProfileCurrentAndOtherBranch(other_branch=None)
289
290  def _ProfileSeparateRepo(self, run_label, relative_build_dir, branch):
291    """Profiles a branch in a a temporary git repository.
292
293    Args:
294      run_label: String to differentiate this version of the code in output
295          files from other versions.
296      relative_build_dir: Path to the build dir in the current working dir to
297          clone build args from.
298      branch: Branch to checkout in the new repository. None will
299          profile the same branch checked out in the original repo.
300    Returns:
301      A dict mapping each test case name to the profiling values for that
302      test case.
303    """
304    build_dir = self._CreateTempRepo('repo_%s' % run_label,
305                                     relative_build_dir,
306                                     branch)
307
308    self._BuildCurrentBranch(build_dir)
309    return self._MeasureCurrentBranch(run_label, build_dir)
310
311  def _CreateTempRepo(self, dir_name, relative_build_dir, branch):
312    """Clones a temporary git repository out of the current working dir.
313
314    Args:
315      dir_name: Name for the temporary repository directory
316      relative_build_dir: Path to the build dir in the current working dir to
317          clone build args from.
318      branch: Branch to checkout in the new repository. None will keep checked
319          out the same branch as the local repo.
320    Returns:
321      Path to the build directory of the new repository.
322    """
323    cwd = os.getcwd()
324
325    repo_dir = tempfile.mkdtemp(suffix='-%s' % dir_name)
326    src_dir = os.path.join(repo_dir, 'pdfium')
327
328    self.git.CloneLocal(os.getcwd(), src_dir)
329
330    if branch is not None:
331      os.chdir(src_dir)
332      self.git.Checkout(branch)
333
334    os.chdir(repo_dir)
335    PrintErr('Syncing...')
336
337    cmd = ['gclient', 'config', '--unmanaged',
338           'https://pdfium.googlesource.com/pdfium.git']
339    if self.args.cache_dir:
340      cmd.append('--cache-dir=%s' % self.args.cache_dir)
341    RunCommandPropagateErr(cmd, exit_status_on_error=1)
342
343    RunCommandPropagateErr(['gclient', 'sync'], exit_status_on_error=1)
344
345    PrintErr('Done.')
346
347    build_dir = os.path.join(src_dir, relative_build_dir)
348    os.makedirs(build_dir)
349    os.chdir(src_dir)
350
351    source_gn_args = os.path.join(cwd, relative_build_dir, 'args.gn')
352    dest_gn_args = os.path.join(build_dir, 'args.gn')
353    shutil.copy(source_gn_args, dest_gn_args)
354
355    RunCommandPropagateErr(['gn', 'gen', relative_build_dir],
356                           exit_status_on_error=1)
357
358    os.chdir(cwd)
359
360    return build_dir
361
362
363  def _CheckoutBranch(self, branch):
364    PrintErr("Checking out branch '%s'" % branch)
365    self.git.Checkout(branch)
366
367  def _StashLocalChanges(self):
368    PrintErr('Stashing local changes')
369    return self.git.StashPush()
370
371  def _RestoreLocalChanges(self):
372    PrintErr('Restoring local changes')
373    self.git.StashPopAll()
374
375  def _BuildCurrentBranch(self, build_dir):
376    """Synchronizes and builds the current version of pdfium.
377
378    Args:
379      build_dir: String with path to build directory
380    """
381    PrintErr('Syncing...')
382    RunCommandPropagateErr(['gclient', 'sync'], exit_status_on_error=1)
383    PrintErr('Done.')
384
385    PrintErr('Building...')
386    cmd = ['ninja', '-C', build_dir, 'pdfium_test']
387    if GetBooleanGnArg('use_goma', build_dir):
388      cmd.extend(['-j', '250'])
389    RunCommandPropagateErr(cmd, stdout_has_errors=True, exit_status_on_error=1)
390    PrintErr('Done.')
391
392  def _MeasureCurrentBranch(self, run_label, build_dir):
393    PrintErr('Measuring...')
394    if self.args.num_workers > 1 and len(self.test_cases) > 1:
395      results = self._RunAsync(run_label, build_dir)
396    else:
397      results = self._RunSync(run_label, build_dir)
398    PrintErr('Done.')
399
400    return results
401
402  def _RunSync(self, run_label, build_dir):
403    """Profiles the test cases synchronously.
404
405    Args:
406      run_label: String to differentiate this version of the code in output
407          files from other versions.
408      build_dir: String with path to build directory
409
410    Returns:
411      A dict mapping each test case name to the profiling values for that
412      test case.
413    """
414    results = {}
415
416    for test_case in self.test_cases:
417      result = self.RunSingleTestCase(run_label, build_dir, test_case)
418      if result is not None:
419        results[test_case] = result
420
421    return results
422
423  def _RunAsync(self, run_label, build_dir):
424    """Profiles the test cases asynchronously.
425
426    Uses as many workers as configured by --num-workers.
427
428    Args:
429      run_label: String to differentiate this version of the code in output
430          files from other versions.
431      build_dir: String with path to build directory
432
433    Returns:
434      A dict mapping each test case name to the profiling values for that
435      test case.
436    """
437    results = {}
438    pool = multiprocessing.Pool(self.args.num_workers)
439    worker_func = functools.partial(
440        RunSingleTestCaseParallel, self, run_label, build_dir)
441
442    try:
443      # The timeout is a workaround for http://bugs.python.org/issue8296
444      # which prevents KeyboardInterrupt from working.
445      one_year_in_seconds = 3600 * 24 * 365
446      worker_results = (pool.map_async(worker_func, self.test_cases)
447                        .get(one_year_in_seconds))
448      for worker_result in worker_results:
449        test_case, result = worker_result
450        if result is not None:
451          results[test_case] = result
452    except KeyboardInterrupt:
453      pool.terminate()
454      sys.exit(1)
455    else:
456      pool.close()
457
458    pool.join()
459
460    return results
461
462  def RunSingleTestCase(self, run_label, build_dir, test_case):
463    """Profiles a single test case.
464
465    Args:
466      run_label: String to differentiate this version of the code in output
467          files from other versions.
468      build_dir: String with path to build directory
469      test_case: Path to the test case.
470
471    Returns:
472      The measured profiling value for that test case.
473    """
474    command = [self.safe_measure_script_path, test_case,
475               '--build-dir=%s' % build_dir]
476
477    if self.args.interesting_section:
478      command.append('--interesting-section')
479
480    if self.args.profiler:
481      command.append('--profiler=%s' % self.args.profiler)
482
483    profile_file_path = self._GetProfileFilePath(run_label, test_case)
484    if profile_file_path:
485      command.append('--output-path=%s' % profile_file_path)
486
487    output = RunCommandPropagateErr(command)
488
489    if output is None:
490      return None
491
492    # Get the time number as output, making sure it's just a number
493    output = output.strip()
494    if re.match('^[0-9]+$', output):
495      return int(output)
496
497    return None
498
499  def _GetProfileFilePath(self, run_label, test_case):
500    if self.args.output_dir:
501      output_filename = ('callgrind.out.%s.%s'
502                         % (test_case.replace('/', '_'),
503                            run_label))
504      return os.path.join(self.args.output_dir, output_filename)
505    else:
506      return None
507
508  def _DrawConclusions(self, times_before_branch, times_after_branch):
509    """Draws conclusions comparing results of test runs in two branches.
510
511    Args:
512      times_before_branch: A dict mapping each test case name to the
513          profiling values for that test case in the branch to be considered
514          as the baseline.
515      times_after_branch: A dict mapping each test case name to the
516          profiling values for that test case in the branch to be considered
517          as the new version.
518
519    Returns:
520      ComparisonConclusions with all test cases processed.
521    """
522    conclusions = ComparisonConclusions(self.args.threshold_significant)
523
524    for test_case in sorted(self.test_cases):
525      before = times_before_branch.get(test_case)
526      after = times_after_branch.get(test_case)
527      conclusions.ProcessCase(test_case, before, after)
528
529    return conclusions
530
531  def _PrintConclusions(self, conclusions_dict):
532    """Prints the conclusions as the script output.
533
534    Depending on the script args, this can output a human or a machine-readable
535    version of the conclusions.
536
537    Args:
538      conclusions_dict: Dict to print returned from
539          ComparisonConclusions.GetOutputDict().
540    """
541    if self.args.machine_readable:
542      print json.dumps(conclusions_dict)
543    else:
544      PrintConclusionsDictHumanReadable(
545          conclusions_dict, colored=True, key=self.args.case_order)
546
547  def _CleanUp(self, conclusions):
548    """Removes profile output files for uninteresting cases.
549
550    Cases without significant regressions or improvements and considered
551    uninteresting.
552
553    Args:
554      conclusions: A ComparisonConclusions.
555    """
556    if not self.args.output_dir:
557      return
558
559    if self.args.profiler != 'callgrind':
560      return
561
562    for case_result in conclusions.GetCaseResults().values():
563      if case_result.rating not in [RATING_REGRESSION, RATING_IMPROVEMENT]:
564        self._CleanUpOutputFile('before', case_result.case_name)
565        self._CleanUpOutputFile('after', case_result.case_name)
566
567  def _CleanUpOutputFile(self, run_label, case_name):
568    """Removes one profile output file.
569
570    If the output file does not exist, fails silently.
571
572    Args:
573      run_label: String to differentiate a version of the code in output
574          files from other versions.
575      case_name: String identifying test case for which to remove the output
576          file.
577    """
578    try:
579      os.remove(self._GetProfileFilePath(run_label, case_name))
580    except OSError:
581      pass
582
583
584def main():
585  parser = argparse.ArgumentParser()
586  parser.add_argument('input_paths', nargs='+',
587                      help='pdf files or directories to search for pdf files '
588                           'to run as test cases')
589  parser.add_argument('--branch-before',
590                      help='git branch to use as "before" for comparison. '
591                           'Omitting this will use the current branch '
592                           'without uncommitted changes as the baseline.')
593  parser.add_argument('--branch-after',
594                      help='git branch to use as "after" for comparison. '
595                           'Omitting this will use the current branch '
596                           'with uncommitted changes.')
597  parser.add_argument('--build-dir', default=os.path.join('out', 'Release'),
598                      help='relative path from the base source directory '
599                           'to the build directory')
600  parser.add_argument('--build-dir-before',
601                      help='relative path from the base source directory '
602                           'to the build directory for the "before" branch, if '
603                           'different from the build directory for the '
604                           '"after" branch')
605  parser.add_argument('--cache-dir', default=None,
606                      help='directory with a new or preexisting cache for '
607                           'downloads. Default is to not use a cache.')
608  parser.add_argument('--this-repo', action='store_true',
609                      help='use the repository where the script is instead of '
610                           'checking out a temporary one. This is faster and '
611                           'does not require downloads, but although it '
612                           'restores the state of the local repo, if the '
613                           'script is killed or crashes the changes can remain '
614                           'stashed and you may be on another branch.')
615  parser.add_argument('--profiler', default='callgrind',
616                      help='which profiler to use. Supports callgrind and '
617                           'perfstat for now. Default is callgrind.')
618  parser.add_argument('--interesting-section', action='store_true',
619                      help='whether to measure just the interesting section or '
620                           'the whole test harness. Limiting to only the '
621                           'interesting section does not work on Release since '
622                           'the delimiters are optimized out')
623  parser.add_argument('--num-workers', default=multiprocessing.cpu_count(),
624                      type=int, help='run NUM_WORKERS jobs in parallel')
625  parser.add_argument('--output-dir',
626                      help='directory to write the profile data output files')
627  parser.add_argument('--threshold-significant', default=0.02, type=float,
628                      help='variations in performance above this factor are '
629                           'considered significant')
630  parser.add_argument('--machine-readable', action='store_true',
631                      help='whether to get output for machines. If enabled the '
632                           'output will be a json with the format specified in '
633                           'ComparisonConclusions.GetOutputDict(). Default is '
634                           'human-readable.')
635  parser.add_argument('--case-order', default=None,
636                      help='what key to use when sorting test cases in the '
637                           'output. Accepted values are "after", "before", '
638                           '"ratio" and "rating". Default is sorting by test '
639                           'case path.')
640
641  args = parser.parse_args()
642
643  # Always start at the pdfium src dir, which is assumed to be two level above
644  # this script.
645  pdfium_src_dir = os.path.join(
646      os.path.dirname(__file__),
647      os.path.pardir,
648      os.path.pardir)
649  os.chdir(pdfium_src_dir)
650
651  git = GitHelper()
652
653  if args.branch_after and not args.branch_before:
654    PrintErr('--branch-after requires --branch-before to be specified.')
655    return 1
656
657  if args.branch_after and not git.BranchExists(args.branch_after):
658    PrintErr('Branch "%s" does not exist' % args.branch_after)
659    return 1
660
661  if args.branch_before and not git.BranchExists(args.branch_before):
662    PrintErr('Branch "%s" does not exist' % args.branch_before)
663    return 1
664
665  if args.output_dir:
666    args.output_dir = os.path.expanduser(args.output_dir)
667    if not os.path.isdir(args.output_dir):
668      PrintErr('"%s" is not a directory' % args.output_dir)
669      return 1
670
671  if args.threshold_significant <= 0.0:
672    PrintErr('--threshold-significant should receive a positive float')
673    return 1
674
675  run = CompareRun(args)
676  return run.Run()
677
678
679if __name__ == '__main__':
680  sys.exit(main())
681