1#!/usr/bin/env python2 2########################################################################## 3# 4# Copyright 2008 VMware, Inc. 5# All Rights Reserved. 6# 7# Permission is hereby granted, free of charge, to any person obtaining a 8# copy of this software and associated documentation files (the 9# "Software"), to deal in the Software without restriction, including 10# without limitation the rights to use, copy, modify, merge, publish, 11# distribute, sub license, and/or sell copies of the Software, and to 12# permit persons to whom the Software is furnished to do so, subject to 13# the following conditions: 14# 15# The above copyright notice and this permission notice (including the 16# next paragraph) shall be included in all copies or substantial portions 17# of the Software. 18# 19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26# 27########################################################################## 28 29 30import sys 31import xml.parsers.expat 32import optparse 33 34from model import * 35 36 37ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4) 38 39 40class XmlToken: 41 42 def __init__(self, type, name_or_data, attrs = None, line = None, column = None): 43 assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF) 44 self.type = type 45 self.name_or_data = name_or_data 46 self.attrs = attrs 47 self.line = line 48 self.column = column 49 50 def __str__(self): 51 if self.type == ELEMENT_START: 52 return '<' + self.name_or_data + ' ...>' 53 if self.type == ELEMENT_END: 54 return '</' + self.name_or_data + '>' 55 if self.type == CHARACTER_DATA: 56 return self.name_or_data 57 if self.type == EOF: 58 return 'end of file' 59 assert 0 60 61 62class XmlTokenizer: 63 """Expat based XML tokenizer.""" 64 65 def __init__(self, fp, skip_ws = True): 66 self.fp = fp 67 self.tokens = [] 68 self.index = 0 69 self.final = False 70 self.skip_ws = skip_ws 71 72 self.character_pos = 0, 0 73 self.character_data = '' 74 75 self.parser = xml.parsers.expat.ParserCreate() 76 self.parser.StartElementHandler = self.handle_element_start 77 self.parser.EndElementHandler = self.handle_element_end 78 self.parser.CharacterDataHandler = self.handle_character_data 79 80 def handle_element_start(self, name, attributes): 81 self.finish_character_data() 82 line, column = self.pos() 83 token = XmlToken(ELEMENT_START, name, attributes, line, column) 84 self.tokens.append(token) 85 86 def handle_element_end(self, name): 87 self.finish_character_data() 88 line, column = self.pos() 89 token = XmlToken(ELEMENT_END, name, None, line, column) 90 self.tokens.append(token) 91 92 def handle_character_data(self, data): 93 if not self.character_data: 94 self.character_pos = self.pos() 95 self.character_data += data 96 97 def finish_character_data(self): 98 if self.character_data: 99 if not self.skip_ws or not self.character_data.isspace(): 100 line, column = self.character_pos 101 token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column) 102 self.tokens.append(token) 103 self.character_data = '' 104 105 def next(self): 106 size = 16*1024 107 while self.index >= len(self.tokens) and not self.final: 108 self.tokens = [] 109 self.index = 0 110 data = self.fp.read(size) 111 self.final = len(data) < size 112 data = data.rstrip('\0') 113 try: 114 self.parser.Parse(data, self.final) 115 except xml.parsers.expat.ExpatError, e: 116 #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS: 117 if e.code == 3: 118 pass 119 else: 120 raise e 121 if self.index >= len(self.tokens): 122 line, column = self.pos() 123 token = XmlToken(EOF, None, None, line, column) 124 else: 125 token = self.tokens[self.index] 126 self.index += 1 127 return token 128 129 def pos(self): 130 return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber 131 132 133class TokenMismatch(Exception): 134 135 def __init__(self, expected, found): 136 self.expected = expected 137 self.found = found 138 139 def __str__(self): 140 return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found)) 141 142 143 144class XmlParser: 145 """Base XML document parser.""" 146 147 def __init__(self, fp): 148 self.tokenizer = XmlTokenizer(fp) 149 self.consume() 150 151 def consume(self): 152 self.token = self.tokenizer.next() 153 154 def match_element_start(self, name): 155 return self.token.type == ELEMENT_START and self.token.name_or_data == name 156 157 def match_element_end(self, name): 158 return self.token.type == ELEMENT_END and self.token.name_or_data == name 159 160 def element_start(self, name): 161 while self.token.type == CHARACTER_DATA: 162 self.consume() 163 if self.token.type != ELEMENT_START: 164 raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token) 165 if self.token.name_or_data != name: 166 raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token) 167 attrs = self.token.attrs 168 self.consume() 169 return attrs 170 171 def element_end(self, name): 172 while self.token.type == CHARACTER_DATA: 173 self.consume() 174 if self.token.type != ELEMENT_END: 175 raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token) 176 if self.token.name_or_data != name: 177 raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token) 178 self.consume() 179 180 def character_data(self, strip = True): 181 data = '' 182 while self.token.type == CHARACTER_DATA: 183 data += self.token.name_or_data 184 self.consume() 185 if strip: 186 data = data.strip() 187 return data 188 189 190class TraceParser(XmlParser): 191 192 def __init__(self, fp): 193 XmlParser.__init__(self, fp) 194 self.last_call_no = 0 195 196 def parse(self): 197 self.element_start('trace') 198 while self.token.type not in (ELEMENT_END, EOF): 199 call = self.parse_call() 200 self.handle_call(call) 201 if self.token.type != EOF: 202 self.element_end('trace') 203 204 def parse_call(self): 205 attrs = self.element_start('call') 206 try: 207 no = int(attrs['no']) 208 except KeyError: 209 self.last_call_no += 1 210 no = self.last_call_no 211 else: 212 self.last_call_no = no 213 klass = attrs['class'] 214 method = attrs['method'] 215 args = [] 216 ret = None 217 time = None 218 while self.token.type == ELEMENT_START: 219 if self.token.name_or_data == 'arg': 220 arg = self.parse_arg() 221 args.append(arg) 222 elif self.token.name_or_data == 'ret': 223 ret = self.parse_ret() 224 elif self.token.name_or_data == 'call': 225 # ignore nested function calls 226 self.parse_call() 227 elif self.token.name_or_data == 'time': 228 time = self.parse_time() 229 else: 230 raise TokenMismatch("<arg ...> or <ret ...>", self.token) 231 self.element_end('call') 232 233 return Call(no, klass, method, args, ret, time) 234 235 def parse_arg(self): 236 attrs = self.element_start('arg') 237 name = attrs['name'] 238 value = self.parse_value() 239 self.element_end('arg') 240 241 return name, value 242 243 def parse_ret(self): 244 attrs = self.element_start('ret') 245 value = self.parse_value() 246 self.element_end('ret') 247 248 return value 249 250 def parse_time(self): 251 attrs = self.element_start('time') 252 time = self.parse_value(); 253 self.element_end('time') 254 return time 255 256 def parse_value(self): 257 expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes') 258 if self.token.type == ELEMENT_START: 259 if self.token.name_or_data in expected_tokens: 260 method = getattr(self, 'parse_' + self.token.name_or_data) 261 return method() 262 raise TokenMismatch(" or " .join(expected_tokens), self.token) 263 264 def parse_null(self): 265 self.element_start('null') 266 self.element_end('null') 267 return Literal(None) 268 269 def parse_bool(self): 270 self.element_start('bool') 271 value = int(self.character_data()) 272 self.element_end('bool') 273 return Literal(value) 274 275 def parse_int(self): 276 self.element_start('int') 277 value = int(self.character_data()) 278 self.element_end('int') 279 return Literal(value) 280 281 def parse_uint(self): 282 self.element_start('uint') 283 value = int(self.character_data()) 284 self.element_end('uint') 285 return Literal(value) 286 287 def parse_float(self): 288 self.element_start('float') 289 value = float(self.character_data()) 290 self.element_end('float') 291 return Literal(value) 292 293 def parse_enum(self): 294 self.element_start('enum') 295 name = self.character_data() 296 self.element_end('enum') 297 return NamedConstant(name) 298 299 def parse_string(self): 300 self.element_start('string') 301 value = self.character_data() 302 self.element_end('string') 303 return Literal(value) 304 305 def parse_bytes(self): 306 self.element_start('bytes') 307 value = self.character_data() 308 self.element_end('bytes') 309 return Blob(value) 310 311 def parse_array(self): 312 self.element_start('array') 313 elems = [] 314 while self.token.type != ELEMENT_END: 315 elems.append(self.parse_elem()) 316 self.element_end('array') 317 return Array(elems) 318 319 def parse_elem(self): 320 self.element_start('elem') 321 value = self.parse_value() 322 self.element_end('elem') 323 return value 324 325 def parse_struct(self): 326 attrs = self.element_start('struct') 327 name = attrs['name'] 328 members = [] 329 while self.token.type != ELEMENT_END: 330 members.append(self.parse_member()) 331 self.element_end('struct') 332 return Struct(name, members) 333 334 def parse_member(self): 335 attrs = self.element_start('member') 336 name = attrs['name'] 337 value = self.parse_value() 338 self.element_end('member') 339 340 return name, value 341 342 def parse_ptr(self): 343 self.element_start('ptr') 344 address = self.character_data() 345 self.element_end('ptr') 346 347 return Pointer(address) 348 349 def handle_call(self, call): 350 pass 351 352 353class TraceDumper(TraceParser): 354 355 def __init__(self, fp, outStream = sys.stdout): 356 TraceParser.__init__(self, fp) 357 self.formatter = format.DefaultFormatter(outStream) 358 self.pretty_printer = PrettyPrinter(self.formatter) 359 360 def handle_call(self, call): 361 call.visit(self.pretty_printer) 362 self.formatter.newline() 363 364 365class Main: 366 '''Common main class for all retrace command line utilities.''' 367 368 def __init__(self): 369 pass 370 371 def main(self): 372 optparser = self.get_optparser() 373 (options, args) = optparser.parse_args(sys.argv[1:]) 374 375 if not args: 376 optparser.error('insufficient number of arguments') 377 378 for arg in args: 379 if arg.endswith('.gz'): 380 from gzip import GzipFile 381 stream = GzipFile(arg, 'rt') 382 elif arg.endswith('.bz2'): 383 from bz2 import BZ2File 384 stream = BZ2File(arg, 'rU') 385 else: 386 stream = open(arg, 'rt') 387 self.process_arg(stream, options) 388 389 def get_optparser(self): 390 optparser = optparse.OptionParser( 391 usage="\n\t%prog [options] TRACE [...]") 392 return optparser 393 394 def process_arg(self, stream, options): 395 parser = TraceDumper(stream) 396 parser.parse() 397 398 399if __name__ == '__main__': 400 Main().main() 401