1# Copyright 2015 The PDFium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5
6import json
7import os
8import shlex
9import shutil
10
11# This module collects and writes output in a format expected by the
12# Gold baseline tool. Based on meta data provided explicitly and by
13# adding a series of test results it can be used to produce
14# a JSON file that is uploaded to Google Storage and ingested by Gold.
15#
16# The output will look similar this:
17#
18# {
19#    "build_number" : "2",
20#    "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c",
21#    "key" : {
22#       "arch" : "arm64",
23#       "compiler" : "Clang",
24#    },
25#    "results" : [
26#       {
27#          "key" : {
28#             "config" : "vk",
29#             "name" : "yuv_nv12_to_rgb_effect",
30#             "source_type" : "gm"
31#          },
32#          "md5" : "7db34da246868d50ab9ddd776ce6d779",
33#          "options" : {
34#             "ext" : "png",
35#             "gamma_correct" : "no"
36#          }
37#       },
38#       {
39#          "key" : {
40#             "config" : "vk",
41#             "name" : "yuv_to_rgb_effect",
42#             "source_type" : "gm"
43#          },
44#          "md5" : "0b955f387740c66eb23bf0e253c80d64",
45#          "options" : {
46#             "ext" : "png",
47#             "gamma_correct" : "no"
48#          }
49#       }
50#    ],
51# }
52#
53class GoldResults(object):
54  def __init__(self, source_type, outputDir, propertiesStr, keyStr,
55               ignore_hashes_file):
56    """
57    source_type is the source_type (=corpus) field used for all results.
58    output_dir is the directory where the resulting images are copied and
59               the dm.json file is written.
60    propertiesStr is a string with space separated key/value pairs that
61               is used to set the top level fields in the output JSON file.
62    keyStr is a string with space separated key/value pairs that
63               is used to set the 'key' field in the output JSON file.
64    ignore_hashes_file is a file that contains a list of image hashes
65               that should be ignored.
66    """
67    self._source_type = source_type
68    self._properties = self._parseKeyValuePairs(propertiesStr)
69    self._properties["key"] = self._parseKeyValuePairs(keyStr)
70    self._results =  []
71    self._outputDir = outputDir
72
73    # make sure the output directory exists.
74    if not os.path.exists(outputDir):
75      os.makedirs(outputDir)
76
77    self._ignore_hashes = set()
78    if ignore_hashes_file:
79      with open(ignore_hashes_file, 'r') as ig_file:
80        hashes=[x.strip() for x in ig_file.readlines() if x.strip()]
81        self._ignore_hashes = set(hashes)
82
83  def AddTestResult(self, testName, md5Hash, outputImagePath):
84    # If the hash is in the list of hashes to ignore then we don'try
85    # make a copy, but add it to the result.
86    imgExt = os.path.splitext(outputImagePath)[1].lstrip(".")
87    if md5Hash not in self._ignore_hashes:
88      # Copy the image to <output_dir>/<md5Hash>.<image_extension>
89      if not imgExt:
90        raise ValueError("File %s does not have an extension" % outputImagePath)
91      newFilePath = os.path.join(self._outputDir, md5Hash + '.' + imgExt)
92      shutil.copy2(outputImagePath, newFilePath)
93
94    # Add an entry to the list of test results
95    self._results.append({
96      "key": {
97        "name": testName,
98        "source_type": self._source_type,
99      },
100      "md5": md5Hash,
101      "options": {
102        "ext": imgExt,
103        "gamma_correct": "no"
104      }
105    })
106
107  def _parseKeyValuePairs(self, kvStr):
108    kvPairs = shlex.split(kvStr)
109    if len(kvPairs) % 2:
110      raise ValueError("Uneven number of key/value pairs. Got %s" % kvStr)
111    return { kvPairs[i]:kvPairs[i+1] for i in range(0, len(kvPairs), 2) }
112
113  def WriteResults(self):
114    self._properties.update({
115      "results": self._results
116    })
117
118    outputFileName = os.path.join(self._outputDir, "dm.json")
119    with open(outputFileName, 'wb') as outfile:
120      json.dump(self._properties, outfile, indent=1)
121      outfile.write("\n")
122
123# Produce example output for manual testing.
124if __name__ == "__main__":
125  # Create a test directory with three empty 'image' files.
126  testDir = "./testdirectory"
127  if not os.path.exists(testDir):
128    os.makedirs(testDir)
129  open(os.path.join(testDir, "image1.png"), 'wb').close()
130  open(os.path.join(testDir, "image2.png"), 'wb').close()
131  open(os.path.join(testDir, "image3.png"), 'wb').close()
132
133  # Create an instance and add results.
134  propStr = """build_number 2 "builder name" Builder-Name gitHash a4a338179013b029d6dd55e737b5bd648a9fb68c"""
135
136  keyStr = "arch arm64 compiler Clang configuration Debug"
137
138  hash_file = os.path.join(testDir, "ignore_hashes.txt")
139  with open(hash_file, 'wb') as f:
140    f.write("\n".join(["hash-1","hash-4"]) + "\n")
141
142  gr = GoldResults("pdfium", testDir, propStr, keyStr, hash_file)
143  gr.AddTestResult("test-1", "hash-1", os.path.join(testDir, "image1.png"))
144  gr.AddTestResult("test-2", "hash-2", os.path.join(testDir, "image2.png"))
145  gr.AddTestResult("test-3", "hash-3", os.path.join(testDir, "image3.png"))
146  gr.WriteResults()
147