1#!/usr/bin/env python 2# Copyright (c) 2012 Google Inc. 3# All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Normalizes and de-duplicates paths within Breakpad symbol files. 32 33When using DWARF for storing debug symbols, some file information will be 34stored relative to the current working directory of the current compilation 35unit, and may be further relativized based upon how the file was #included. 36 37This helper can be used to parse the Breakpad symbol file generated from such 38DWARF files and normalize and de-duplicate the FILE records found within, 39updating any references to the FILE records in the other record types. 40""" 41 42import macpath 43import ntpath 44import optparse 45import os 46import posixpath 47import sys 48 49class BreakpadParseError(Exception): 50 """Unsupported Breakpad symbol record exception class.""" 51 pass 52 53class SymbolFileParser(object): 54 """Parser for Breakpad symbol files. 55 56 The format of these files is documented at 57 https://code.google.com/p/google-breakpad/wiki/SymbolFiles 58 """ 59 60 def __init__(self, input_stream, output_stream, ignored_prefixes=None, 61 path_handler=os.path): 62 """Inits a SymbolFileParser to read symbol records from |input_stream| and 63 write the processed output to |output_stream|. 64 65 |ignored_prefixes| contains a list of optional path prefixes that 66 should be stripped from the final, normalized path outputs. 67 68 For example, if the Breakpad symbol file had all paths starting with a 69 common prefix, such as: 70 FILE 1 /b/build/src/foo.cc 71 FILE 2 /b/build/src/bar.cc 72 Then adding "/b/build/src" as an ignored prefix would result in an output 73 file that contained: 74 FILE 1 foo.cc 75 FILE 2 bar.cc 76 77 Note that |ignored_prefixes| does not necessarily contain file system 78 paths, as the contents of the DWARF DW_AT_comp_dir attribute is dependent 79 upon the host system and compiler, and may contain additional information 80 such as hostname or compiler version. 81 """ 82 83 self.unique_files = {} 84 self.duplicate_files = {} 85 self.input_stream = input_stream 86 self.output_stream = output_stream 87 self.ignored_prefixes = ignored_prefixes or [] 88 self.path_handler = path_handler 89 90 def Process(self): 91 """Processes the Breakpad symbol file.""" 92 for line in self.input_stream: 93 parsed = self._ParseRecord(line.rstrip()) 94 if parsed: 95 self.output_stream.write(parsed + '\n') 96 97 def _ParseRecord(self, record): 98 """Parses a single Breakpad symbol record - a single line from the symbol 99 file. 100 101 Returns: 102 The modified string to write to the output file, or None if no line 103 should be written. 104 """ 105 record_type = record.partition(' ')[0] 106 if record_type == 'FILE': 107 return self._ParseFileRecord(record) 108 elif self._IsLineRecord(record_type): 109 return self._ParseLineRecord(record) 110 else: 111 # Simply pass the record through unaltered. 112 return record 113 114 def _NormalizePath(self, path): 115 """Normalizes a file path to its canonical form. 116 117 As this may not execute on the machine or file system originally 118 responsible for compilation, it may be necessary to further correct paths 119 for symlinks, junctions, or other such file system indirections. 120 121 Returns: 122 A unique, canonical representation for the the file path. 123 """ 124 return self.path_handler.normpath(path) 125 126 def _AdjustPath(self, path): 127 """Adjusts the supplied path after performing path de-duplication. 128 129 This may be used to perform secondary adjustments, such as removing a 130 common prefix, such as "/D/build", or replacing the file system path with 131 information from the version control system. 132 133 Returns: 134 The actual path to use when writing the FILE record. 135 """ 136 return path[len(filter(path.startswith, 137 self.ignored_prefixes + [''])[0]):] 138 139 def _ParseFileRecord(self, file_record): 140 """Parses and corrects a FILE record.""" 141 file_info = file_record[5:].split(' ', 3) 142 if len(file_info) > 2: 143 raise BreakpadParseError('Unsupported FILE record: ' + file_record) 144 file_index = int(file_info[0]) 145 file_name = self._NormalizePath(file_info[1]) 146 existing_file_index = self.unique_files.get(file_name) 147 if existing_file_index is None: 148 self.unique_files[file_name] = file_index 149 file_info[1] = self._AdjustPath(file_name) 150 return 'FILE ' + ' '.join(file_info) 151 else: 152 self.duplicate_files[file_index] = existing_file_index 153 return None 154 155 def _IsLineRecord(self, record_type): 156 """Determines if the current record type is a Line record""" 157 try: 158 line = int(record_type, 16) 159 except (ValueError, TypeError): 160 return False 161 return True 162 163 def _ParseLineRecord(self, line_record): 164 """Parses and corrects a Line record.""" 165 line_info = line_record.split(' ', 5) 166 if len(line_info) > 4: 167 raise BreakpadParseError('Unsupported Line record: ' + line_record) 168 file_index = int(line_info[3]) 169 line_info[3] = str(self.duplicate_files.get(file_index, file_index)) 170 return ' '.join(line_info) 171 172def main(): 173 option_parser = optparse.OptionParser() 174 option_parser.add_option("-p", "--prefix", 175 action="append", dest="prefixes", type="string", 176 default=[], 177 help="A path prefix that should be removed from " 178 "all FILE lines. May be repeated to specify " 179 "multiple prefixes.") 180 option_parser.add_option("-t", "--path_type", 181 action="store", type="choice", dest="path_handler", 182 choices=['win32', 'posix'], 183 help="Indicates how file paths should be " 184 "interpreted. The default is to treat paths " 185 "the same as the OS running Python (eg: " 186 "os.path)") 187 options, args = option_parser.parse_args() 188 if args: 189 option_parser.error('Unknown argument: %s' % args) 190 191 path_handler = { 'win32': ntpath, 192 'posix': posixpath }.get(options.path_handler, os.path) 193 try: 194 symbol_parser = SymbolFileParser(sys.stdin, sys.stdout, options.prefixes, 195 path_handler) 196 symbol_parser.Process() 197 except BreakpadParseError, e: 198 print >> sys.stderr, 'Got an error while processing symbol file' 199 print >> sys.stderr, str(e) 200 return 1 201 return 0 202 203if __name__ == '__main__': 204 sys.exit(main()) 205