1# Copyright 2017 The PDFium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Compares pairs of page images and generates an HTML to look at differences.
5"""
6
7import functools
8import glob
9import multiprocessing
10import os
11import re
12import subprocess
13import sys
14import webbrowser
15
16# pylint: disable=relative-import
17from common import DirectoryFinder
18
19
20def GenerateOneDiffParallel(image_comparison, image):
21  return image_comparison.GenerateOneDiff(image)
22
23
24class ImageComparison(object):
25  """Compares pairs of page images and generates an HTML to look at differences.
26
27  The images are all assumed to have the same name and be in two directories:
28  [output_path]/[two_labels[0]] and [output_path]/[two_labels[1]]. For example,
29  if output_path is "/tmp/images" and two_labels is ("before", "after"),
30  images in /tmp/images/before will be compared to /tmp/images/after. The HTML
31  produced will be in /tmp/images/compare.html and have relative links to these
32  images, so /tmp/images is self-contained and can be moved around or shared.
33  """
34
35  def __init__(self, build_dir, output_path, two_labels, num_workers,
36               threshold_fraction):
37    """Constructor.
38
39    Args:
40      build_dir: Path to the build directory.
41      output_path: Path with the pngs and where the html will be created.
42      two_labels: Tuple of two strings that name the subdirectories in
43          output_path containing the images.
44      num_workers: Number of worker threads to start.
45      threshold_fraction: Minimum percentage (0.0 to 1.0) of pixels below which
46          an image is considered to have only small changes. They will not be
47          displayed on the HTML, only listed.
48    """
49    self.build_dir = build_dir
50    self.output_path = output_path
51    self.two_labels = two_labels
52    self.num_workers = num_workers
53    self.threshold = threshold_fraction * 100
54
55  def Run(self, open_in_browser):
56    """Runs the comparison and generates an HTML with the results.
57
58    Returns:
59        Exit status.
60    """
61
62    # Running a test defines a number of attributes on the fly.
63    # pylint: disable=attribute-defined-outside-init
64
65    if len(self.two_labels) != 2:
66      print >> sys.stderr, 'two_labels must be a tuple of length 2'
67      return 1
68
69    finder = DirectoryFinder(self.build_dir)
70    self.img_diff_bin = finder.ExecutablePath('pdfium_diff')
71
72    html_path = os.path.join(self.output_path, 'compare.html')
73
74    self.diff_path = os.path.join(self.output_path, 'diff')
75    if not os.path.exists(self.diff_path):
76      os.makedirs(self.diff_path)
77
78    self.image_locations = ImageLocations(self.output_path, self.diff_path,
79                                          self.two_labels)
80
81    difference = self._GenerateDiffs()
82
83    small_changes = []
84
85    with open(html_path, 'w') as f:
86      f.write('<html><body>')
87      f.write('<table>')
88      for image in self.image_locations.Images():
89        diff = difference[image]
90        if diff is None:
91          print >> sys.stderr, 'Failed to compare image %s' % image
92        elif diff > self.threshold:
93          self._WriteImageRows(f, image, diff)
94        else:
95          small_changes.append((image, diff))
96      self._WriteSmallChanges(f, small_changes)
97      f.write('</table>')
98      f.write('</body></html>')
99
100    if open_in_browser:
101      webbrowser.open(html_path)
102
103    return 0
104
105  def _GenerateDiffs(self):
106    """Runs a diff over all pairs of page images, producing diff images.
107
108    As a side effect, the diff images will be saved to [output_path]/diff
109    with the same image name.
110
111    Returns:
112      A dict mapping image names to percentage of pixels changes.
113    """
114    difference = {}
115    pool = multiprocessing.Pool(self.num_workers)
116    worker_func = functools.partial(GenerateOneDiffParallel, self)
117
118    try:
119      # The timeout is a workaround for http://bugs.python.org/issue8296
120      # which prevents KeyboardInterrupt from working.
121      one_year_in_seconds = 3600 * 24 * 365
122      worker_results = (
123          pool.map_async(
124              worker_func,
125              self.image_locations.Images()).get(one_year_in_seconds))
126      for worker_result in worker_results:
127        image, result = worker_result
128        difference[image] = result
129    except KeyboardInterrupt:
130      pool.terminate()
131      sys.exit(1)
132    else:
133      pool.close()
134
135    pool.join()
136
137    return difference
138
139  def GenerateOneDiff(self, image):
140    """Runs a diff over one pair of images, producing a diff image.
141
142    As a side effect, the diff image will be saved to [output_path]/diff
143    with the same image name.
144
145    Args:
146      image: Page image to compare.
147
148    Returns:
149      A tuple (image, diff), where image is the parameter and diff is the
150      percentage of pixels changed.
151    """
152    try:
153      subprocess.check_output([
154          self.img_diff_bin,
155          self.image_locations.Left(image),
156          self.image_locations.Right(image)
157      ])
158    except subprocess.CalledProcessError as e:
159      percentage_change = float(re.findall(r'\d+\.\d+', e.output)[0])
160    else:
161      return image, 0
162
163    try:
164      subprocess.check_output([
165          self.img_diff_bin, '--diff',
166          self.image_locations.Left(image),
167          self.image_locations.Right(image),
168          self.image_locations.Diff(image)
169      ])
170    except subprocess.CalledProcessError as e:
171      return image, percentage_change
172    else:
173      print >> sys.stderr, 'Warning: Should have failed the previous diff.'
174      return image, 0
175
176  def _GetRelativePath(self, absolute_path):
177    return os.path.relpath(absolute_path, start=self.output_path)
178
179  def _WriteImageRows(self, f, image, diff):
180    """Write table rows for a page image comparing its two versions.
181
182    Args:
183      f: Open HTML file to write to.
184      image: Image file name.
185      diff: Percentage of different pixels.
186    """
187    f.write('<tr><td colspan="2">')
188    f.write('%s (%.4f%% changed)' % (image, diff))
189    f.write('</td></tr>')
190
191    f.write('<tr>')
192    self._WritePageCompareTd(
193        f, self._GetRelativePath(self.image_locations.Left(image)),
194        self._GetRelativePath(self.image_locations.Right(image)))
195    self._WritePageTd(f, self._GetRelativePath(
196        self.image_locations.Diff(image)))
197    f.write('</tr>')
198
199  def _WritePageTd(self, f, image_path):
200    """Write table column with a single image.
201
202    Args:
203      f: Open HTML file to write to.
204      image_path: Path to image file.
205    """
206    f.write('<td>')
207    f.write('<img src="%s">' % image_path)
208    f.write('</td>')
209
210  def _WritePageCompareTd(self, f, normal_image_path, hover_image_path):
211    """Write table column for an image comparing its two versions.
212
213    Args:
214      f: Open HTML file to write to.
215      normal_image_path: Path to image to be used in the "normal" state.
216      hover_image_path: Path to image to be used in the "hover" state.
217    """
218    f.write('<td>')
219    f.write('<img src="%s" '
220            'onmouseover="this.src=\'%s\';" '
221            'onmouseout="this.src=\'%s\';">' %
222            (normal_image_path, hover_image_path, normal_image_path))
223    f.write('</td>')
224
225  def _WriteSmallChanges(self, f, small_changes):
226    """Write table rows for all images considered to have only small changes.
227
228    Args:
229      f: Open HTML file to write to.
230      small_changes: List of (image, change) tuples, where image is the page
231          image and change is the percentage of pixels changed.
232    """
233    for image, change in small_changes:
234      f.write('<tr><td colspan="2">')
235      if not change:
236        f.write('No change for: %s' % image)
237      else:
238        f.write('Small change of %.4f%% for: %s' % (change, image))
239      f.write('</td></tr>')
240
241
242class ImageLocations(object):
243  """Contains the locations of input and output image files.
244  """
245
246  def __init__(self, output_path, diff_path, two_labels):
247    """Constructor.
248
249    Args:
250      output_path: Path to directory with the pngs.
251      diff_path: Path to directory where the diffs will be generated.
252      two_labels: Tuple of two strings that name the subdirectories in
253          output_path containing the images.
254    """
255    self.output_path = output_path
256    self.diff_path = diff_path
257    self.two_labels = two_labels
258
259    self.left = self._FindImages(self.two_labels[0])
260    self.right = self._FindImages(self.two_labels[1])
261
262    self.images = list(self.left.viewkeys() & self.right.viewkeys())
263
264    # Sort by pdf filename, then page number
265    def KeyFn(s):
266      pieces = s.rsplit('.', 2)
267      return (pieces[0], int(pieces[1]))
268
269    self.images.sort(key=KeyFn)
270    self.diff = {
271        image: os.path.join(self.diff_path, image) for image in self.images
272    }
273
274  def _FindImages(self, label):
275    """Traverses a dir and builds a dict of all page images to compare in it.
276
277    Args:
278      label: name of subdirectory of output_path to traverse.
279
280    Returns:
281      Dict mapping page image names to the path of the image file.
282    """
283    image_path_matcher = os.path.join(self.output_path, label, '*.*.png')
284    image_paths = glob.glob(image_path_matcher)
285
286    image_dict = {
287        os.path.split(image_path)[1]: image_path for image_path in image_paths
288    }
289
290    return image_dict
291
292  def Images(self):
293    """Returns a list of all page images present in both directories."""
294    return self.images
295
296  def Left(self, test_case):
297    """Returns the path for a page image in the first subdirectory."""
298    return self.left[test_case]
299
300  def Right(self, test_case):
301    """Returns the path for a page image in the second subdirectory."""
302    return self.right[test_case]
303
304  def Diff(self, test_case):
305    """Returns the path for a page diff image."""
306    return self.diff[test_case]
307