1#!/usr/bin/env python
2
3"""Validate compact unwind info by cross checking the llvm-objdump
4reports of the input object file vs final linked output.
5"""
6from __future__ import print_function
7import sys
8import argparse
9import re
10from pprint import pprint
11
12def main():
13  hex = "[a-f\d]"
14  hex8 = hex + "{8}"
15
16  parser = argparse.ArgumentParser(description=__doc__)
17  parser.add_argument('files', metavar='FILES', nargs='*',
18                      help='output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output')
19  parser.add_argument('--debug', action='store_true')
20  args = parser.parse_args()
21
22  if args.files:
23    objdump_string = ''.join([open(f).read() for f in args.files])
24  else:
25    objdump_string = sys.stdin.read()
26
27  object_encodings_list = [(symbol, encoding, personality, lsda)
28    for symbol, encoding, personality, lsda in
29    re.findall(r"start:\s+0x%s+\s+(\w+)\s+" % hex +
30               r"length:\s+0x%s+\s+" % hex +
31               r"compact encoding:\s+0x(%s+)(?:\s+" % hex +
32               r"personality function:\s+0x(%s+)\s+\w+\s+" % hex +
33               r"LSDA:\s+0x(%s+)\s+\w+(?: \+ 0x%s+)?)?" % (hex, hex),
34               objdump_string, re.DOTALL)]
35  object_encodings_map = {symbol:encoding
36    for symbol, encoding, _, _ in object_encodings_list}
37  if not object_encodings_map:
38    sys.exit("no object encodings found in input")
39
40  program_symbols_map = {address:symbol
41    for address, symbol in
42    re.findall(r"^%s(%s) g\s+F __TEXT,__text (x\1)$" % (hex8, hex8),
43               objdump_string, re.MULTILINE)}
44  if not program_symbols_map:
45    sys.exit("no program symbols found in input")
46
47  program_common_encodings = (
48    re.findall(r"^\s+encoding\[\d+\]: 0x(%s+)$" % hex,
49               objdump_string, re.MULTILINE))
50  if not program_common_encodings:
51    sys.exit("no common encodings found in input")
52
53  program_encodings_map = {program_symbols_map[address]:encoding
54    for address, encoding in
55    re.findall(r"^\s+\[\d+\]: function offset=0x(%s+), " % hex +
56               r"encoding\[\d+\]=0x(%s+)$" % hex,
57               objdump_string, re.MULTILINE)}
58  if not object_encodings_map:
59    sys.exit("no program encodings found in input")
60
61  # Fold adjacent entries from the object file that have matching encodings
62  # TODO(gkm) add check for personality+lsda
63  encoding0 = 0
64  for symbol in sorted(object_encodings_map):
65    encoding = object_encodings_map[symbol]
66    fold = (encoding == encoding0)
67    if fold:
68      del object_encodings_map[symbol]
69    if args.debug:
70      print("%s %s with %s" % (
71              'delete' if fold else 'retain', symbol, encoding))
72    encoding0 = encoding
73
74  if program_encodings_map != object_encodings_map:
75    if args.debug:
76      pprint("program encodings map:\n" + program_encodings_map)
77      pprint("object encodings map:\n" + object_encodings_map)
78    sys.exit("encoding maps differ")
79
80  # Count frequency of object-file folded encodings
81  # and compare with the program-file common encodings table
82  encoding_frequency_map = {}
83  for _, encoding in object_encodings_map.items():
84    encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)
85  encoding_frequencies = [x for x in
86                          sorted(encoding_frequency_map,
87                                 key=lambda x: (encoding_frequency_map.get(x), x),
88                                 reverse=True)]
89
90  if program_common_encodings != encoding_frequencies:
91    if args.debug:
92      pprint("program common encodings:\n" + program_common_encodings)
93      pprint("object encoding frequencies:\n" + encoding_frequencies)
94    sys.exit("encoding frequencies differ")
95
96
97if __name__ == '__main__':
98  main()
99