1#!/usr/bin/env python 2 3# 4# Copyright 2012 the V8 project authors. All rights reserved. 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following 13# disclaimer in the documentation and/or other materials provided 14# with the distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived 17# from this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30# 31 32# 33# Emits a C++ file to be compiled and linked into libv8 to support postmortem 34# debugging tools. Most importantly, this tool emits constants describing V8 35# internals: 36# 37# v8dbg_type_CLASS__TYPE = VALUE Describes class type values 38# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields 39# v8dbg_parent_CLASS__PARENT Describes class hierarchy 40# v8dbg_frametype_NAME = VALUE Describes stack frame values 41# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets 42# v8dbg_prop_NAME = OFFSET Object property offsets 43# v8dbg_NAME = VALUE Miscellaneous values 44# 45# These constants are declared as global integers so that they'll be present in 46# the generated libv8 binary. 47# 48 49import re 50import sys 51 52# 53# Miscellaneous constants, tags, and masks used for object identification. 54# 55consts_misc = [ 56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, 57 58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, 59 { 'name': 'StringTag', 'value': 'kStringTag' }, 60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, 61 62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, 63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, 64 { 'name': 'OneByteStringTag', 'value': 'kOneByteStringTag' }, 65 66 { 'name': 'StringRepresentationMask', 67 'value': 'kStringRepresentationMask' }, 68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, 69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, 70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, 71 { 'name': 'SlicedStringTag', 'value': 'kSlicedStringTag' }, 72 73 { 'name': 'FailureTag', 'value': 'kFailureTag' }, 74 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, 75 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, 76 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, 77 { 'name': 'SmiTag', 'value': 'kSmiTag' }, 78 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, 79 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, 80 { 'name': 'SmiShiftSize', 'value': 'kSmiShiftSize' }, 81 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, 82 83 { 'name': 'OddballFalse', 'value': 'Oddball::kFalse' }, 84 { 'name': 'OddballTrue', 'value': 'Oddball::kTrue' }, 85 { 'name': 'OddballTheHole', 'value': 'Oddball::kTheHole' }, 86 { 'name': 'OddballNull', 'value': 'Oddball::kNull' }, 87 { 'name': 'OddballArgumentMarker', 'value': 'Oddball::kArgumentMarker' }, 88 { 'name': 'OddballUndefined', 'value': 'Oddball::kUndefined' }, 89 { 'name': 'OddballUninitialized', 'value': 'Oddball::kUninitialized' }, 90 { 'name': 'OddballOther', 'value': 'Oddball::kOther' }, 91 { 'name': 'OddballException', 'value': 'Oddball::kException' }, 92 93 { 'name': 'prop_idx_first', 94 'value': 'DescriptorArray::kFirstIndex' }, 95 { 'name': 'prop_type_field', 96 'value': 'FIELD' }, 97 { 'name': 'prop_type_first_phantom', 98 'value': 'TRANSITION' }, 99 { 'name': 'prop_type_mask', 100 'value': 'PropertyDetails::TypeField::kMask' }, 101 { 'name': 'prop_index_mask', 102 'value': 'PropertyDetails::FieldIndexField::kMask' }, 103 { 'name': 'prop_index_shift', 104 'value': 'PropertyDetails::FieldIndexField::kShift' }, 105 106 { 'name': 'prop_desc_key', 107 'value': 'DescriptorArray::kDescriptorKey' }, 108 { 'name': 'prop_desc_details', 109 'value': 'DescriptorArray::kDescriptorDetails' }, 110 { 'name': 'prop_desc_value', 111 'value': 'DescriptorArray::kDescriptorValue' }, 112 { 'name': 'prop_desc_size', 113 'value': 'DescriptorArray::kDescriptorSize' }, 114 115 { 'name': 'elements_fast_holey_elements', 116 'value': 'FAST_HOLEY_ELEMENTS' }, 117 { 'name': 'elements_fast_elements', 118 'value': 'FAST_ELEMENTS' }, 119 { 'name': 'elements_dictionary_elements', 120 'value': 'DICTIONARY_ELEMENTS' }, 121 122 { 'name': 'bit_field2_elements_kind_mask', 123 'value': 'Map::kElementsKindMask' }, 124 { 'name': 'bit_field2_elements_kind_shift', 125 'value': 'Map::kElementsKindShift' }, 126 { 'name': 'bit_field3_dictionary_map_shift', 127 'value': 'Map::DictionaryMap::kShift' }, 128 129 { 'name': 'off_fp_context', 130 'value': 'StandardFrameConstants::kContextOffset' }, 131 { 'name': 'off_fp_constant_pool', 132 'value': 'StandardFrameConstants::kConstantPoolOffset' }, 133 { 'name': 'off_fp_marker', 134 'value': 'StandardFrameConstants::kMarkerOffset' }, 135 { 'name': 'off_fp_function', 136 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, 137 { 'name': 'off_fp_args', 138 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, 139]; 140 141# 142# The following useful fields are missing accessors, so we define fake ones. 143# 144extras_accessors = [ 145 'HeapObject, map, Map, kMapOffset', 146 'JSObject, elements, Object, kElementsOffset', 147 'FixedArray, data, uintptr_t, kHeaderSize', 148 'Map, instance_attributes, int, kInstanceAttributesOffset', 149 'Map, inobject_properties, int, kInObjectPropertiesOffset', 150 'Map, instance_size, int, kInstanceSizeOffset', 151 'Map, bit_field, char, kBitFieldOffset', 152 'Map, bit_field2, char, kBitField2Offset', 153 'Map, bit_field3, SMI, kBitField3Offset', 154 'Map, prototype, Object, kPrototypeOffset', 155 'NameDictionaryShape, prefix_size, int, kPrefixSize', 156 'NameDictionaryShape, entry_size, int, kEntrySize', 157 'SeededNumberDictionaryShape, prefix_size, int, kPrefixSize', 158 'UnseededNumberDictionaryShape, prefix_size, int, kPrefixSize', 159 'NumberDictionaryShape, entry_size, int, kEntrySize', 160 'Oddball, kind_offset, int, kKindOffset', 161 'HeapNumber, value, double, kValueOffset', 162 'ConsString, first, String, kFirstOffset', 163 'ConsString, second, String, kSecondOffset', 164 'ExternalString, resource, Object, kResourceOffset', 165 'SeqOneByteString, chars, char, kHeaderSize', 166 'SeqTwoByteString, chars, char, kHeaderSize', 167 'SharedFunctionInfo, code, Code, kCodeOffset', 168 'SlicedString, parent, String, kParentOffset', 169 'Code, instruction_start, uintptr_t, kHeaderSize', 170 'Code, instruction_size, int, kInstructionSizeOffset', 171]; 172 173# 174# The following is a whitelist of classes we expect to find when scanning the 175# source code. This list is not exhaustive, but it's still useful to identify 176# when this script gets out of sync with the source. See load_objects(). 177# 178expected_classes = [ 179 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', 180 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', 181 'SeqOneByteString', 'SharedFunctionInfo' 182]; 183 184 185# 186# The following structures store high-level representations of the structures 187# for which we're going to emit descriptive constants. 188# 189types = {}; # set of all type names 190typeclasses = {}; # maps type names to corresponding class names 191klasses = {}; # known classes, including parents 192fields = []; # field declarations 193 194header = ''' 195/* 196 * This file is generated by %s. Do not edit directly. 197 */ 198 199#include "v8.h" 200#include "frames.h" 201#include "frames-inl.h" /* for architecture-specific frame constants */ 202 203using namespace v8::internal; 204 205extern "C" { 206 207/* stack frame constants */ 208#define FRAME_CONST(value, klass) \ 209 int v8dbg_frametype_##klass = StackFrame::value; 210 211STACK_FRAME_TYPE_LIST(FRAME_CONST) 212 213#undef FRAME_CONST 214 215''' % sys.argv[0]; 216 217footer = ''' 218} 219''' 220 221# 222# Loads class hierarchy and type information from "objects.h". 223# 224def load_objects(): 225 objfilename = sys.argv[2]; 226 objfile = open(objfilename, 'r'); 227 in_insttype = False; 228 229 typestr = ''; 230 231 # 232 # Construct a dictionary for the classes we're sure should be present. 233 # 234 checktypes = {}; 235 for klass in expected_classes: 236 checktypes[klass] = True; 237 238 # 239 # Iterate objects.h line-by-line to collect type and class information. 240 # For types, we accumulate a string representing the entire InstanceType 241 # enum definition and parse it later because it's easier to do so 242 # without the embedded newlines. 243 # 244 for line in objfile: 245 if (line.startswith('enum InstanceType {')): 246 in_insttype = True; 247 continue; 248 249 if (in_insttype and line.startswith('};')): 250 in_insttype = False; 251 continue; 252 253 line = re.sub('//.*', '', line.rstrip().lstrip()); 254 255 if (in_insttype): 256 typestr += line; 257 continue; 258 259 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', 260 line); 261 262 if (match): 263 klass = match.group(1); 264 pklass = match.group(3); 265 klasses[klass] = { 'parent': pklass }; 266 267 # 268 # Process the instance type declaration. 269 # 270 entries = typestr.split(','); 271 for entry in entries: 272 types[re.sub('\s*=.*', '', entry).lstrip()] = True; 273 274 # 275 # Infer class names for each type based on a systematic transformation. 276 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the 277 # class for each type rather than the other way around because there are 278 # fewer cases where one type maps to more than one class than the other 279 # way around. 280 # 281 for type in types: 282 # 283 # Symbols and Strings are implemented using the same classes. 284 # 285 usetype = re.sub('SYMBOL_', 'STRING_', type); 286 287 # 288 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. 289 # 290 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); 291 292 # 293 # Remove the "_TYPE" suffix and then convert to camel case, 294 # except that a "JS" prefix remains uppercase (as in 295 # "JS_FUNCTION_TYPE" => "JSFunction"). 296 # 297 if (not usetype.endswith('_TYPE')): 298 continue; 299 300 usetype = usetype[0:len(usetype) - len('_TYPE')]; 301 parts = usetype.split('_'); 302 cctype = ''; 303 304 if (parts[0] == 'JS'): 305 cctype = 'JS'; 306 start = 1; 307 else: 308 cctype = ''; 309 start = 0; 310 311 for ii in range(start, len(parts)): 312 part = parts[ii]; 313 cctype += part[0].upper() + part[1:].lower(); 314 315 # 316 # Mapping string types is more complicated. Both types and 317 # class names for Strings specify a representation (e.g., Seq, 318 # Cons, External, or Sliced) and an encoding (TwoByte/OneByte), 319 # In the simplest case, both of these are explicit in both 320 # names, as in: 321 # 322 # EXTERNAL_ONE_BYTE_STRING_TYPE => ExternalOneByteString 323 # 324 # However, either the representation or encoding can be omitted 325 # from the type name, in which case "Seq" and "TwoByte" are 326 # assumed, as in: 327 # 328 # STRING_TYPE => SeqTwoByteString 329 # 330 # Additionally, sometimes the type name has more information 331 # than the class, as in: 332 # 333 # CONS_ONE_BYTE_STRING_TYPE => ConsString 334 # 335 # To figure this out dynamically, we first check for a 336 # representation and encoding and add them if they're not 337 # present. If that doesn't yield a valid class name, then we 338 # strip out the representation. 339 # 340 if (cctype.endswith('String')): 341 if (cctype.find('Cons') == -1 and 342 cctype.find('External') == -1 and 343 cctype.find('Sliced') == -1): 344 if (cctype.find('OneByte') != -1): 345 cctype = re.sub('OneByteString$', 346 'SeqOneByteString', cctype); 347 else: 348 cctype = re.sub('String$', 349 'SeqString', cctype); 350 351 if (cctype.find('OneByte') == -1): 352 cctype = re.sub('String$', 'TwoByteString', 353 cctype); 354 355 if (not (cctype in klasses)): 356 cctype = re.sub('OneByte', '', cctype); 357 cctype = re.sub('TwoByte', '', cctype); 358 359 # 360 # Despite all that, some types have no corresponding class. 361 # 362 if (cctype in klasses): 363 typeclasses[type] = cctype; 364 if (cctype in checktypes): 365 del checktypes[cctype]; 366 367 if (len(checktypes) > 0): 368 for klass in checktypes: 369 print('error: expected class \"%s\" not found' % klass); 370 371 sys.exit(1); 372 373 374# 375# For a given macro call, pick apart the arguments and return an object 376# describing the corresponding output constant. See load_fields(). 377# 378def parse_field(call): 379 # Replace newlines with spaces. 380 for ii in range(0, len(call)): 381 if (call[ii] == '\n'): 382 call[ii] == ' '; 383 384 idx = call.find('('); 385 kind = call[0:idx]; 386 rest = call[idx + 1: len(call) - 1]; 387 args = re.split('\s*,\s*', rest); 388 389 consts = []; 390 391 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): 392 klass = args[0]; 393 field = args[1]; 394 dtype = args[2]; 395 offset = args[3]; 396 397 return ({ 398 'name': 'class_%s__%s__%s' % (klass, field, dtype), 399 'value': '%s::%s' % (klass, offset) 400 }); 401 402 assert(kind == 'SMI_ACCESSORS' or kind == 'ACCESSORS_TO_SMI'); 403 klass = args[0]; 404 field = args[1]; 405 offset = args[2]; 406 407 return ({ 408 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), 409 'value': '%s::%s' % (klass, offset) 410 }); 411 412# 413# Load field offset information from objects-inl.h. 414# 415def load_fields(): 416 inlfilename = sys.argv[3]; 417 inlfile = open(inlfilename, 'r'); 418 419 # 420 # Each class's fields and the corresponding offsets are described in the 421 # source by calls to macros like "ACCESSORS" (and friends). All we do 422 # here is extract these macro invocations, taking into account that they 423 # may span multiple lines and may contain nested parentheses. We also 424 # call parse_field() to pick apart the invocation. 425 # 426 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 427 'SMI_ACCESSORS', 'ACCESSORS_TO_SMI' ]; 428 current = ''; 429 opens = 0; 430 431 for line in inlfile: 432 if (opens > 0): 433 # Continuation line 434 for ii in range(0, len(line)): 435 if (line[ii] == '('): 436 opens += 1; 437 elif (line[ii] == ')'): 438 opens -= 1; 439 440 if (opens == 0): 441 break; 442 443 current += line[0:ii + 1]; 444 continue; 445 446 for prefix in prefixes: 447 if (not line.startswith(prefix + '(')): 448 continue; 449 450 if (len(current) > 0): 451 fields.append(parse_field(current)); 452 current = ''; 453 454 for ii in range(len(prefix), len(line)): 455 if (line[ii] == '('): 456 opens += 1; 457 elif (line[ii] == ')'): 458 opens -= 1; 459 460 if (opens == 0): 461 break; 462 463 current += line[0:ii + 1]; 464 465 if (len(current) > 0): 466 fields.append(parse_field(current)); 467 current = ''; 468 469 for body in extras_accessors: 470 fields.append(parse_field('ACCESSORS(%s)' % body)); 471 472# 473# Emit a block of constants. 474# 475def emit_set(out, consts): 476 # Fix up overzealous parses. This could be done inside the 477 # parsers but as there are several, it's easiest to do it here. 478 ws = re.compile('\s+') 479 for const in consts: 480 name = ws.sub('', const['name']) 481 value = ws.sub('', str(const['value'])) # Can be a number. 482 out.write('int v8dbg_%s = %s;\n' % (name, value)) 483 out.write('\n'); 484 485# 486# Emit the whole output file. 487# 488def emit_config(): 489 out = file(sys.argv[1], 'w'); 490 491 out.write(header); 492 493 out.write('/* miscellaneous constants */\n'); 494 emit_set(out, consts_misc); 495 496 out.write('/* class type information */\n'); 497 consts = []; 498 keys = typeclasses.keys(); 499 keys.sort(); 500 for typename in keys: 501 klass = typeclasses[typename]; 502 consts.append({ 503 'name': 'type_%s__%s' % (klass, typename), 504 'value': typename 505 }); 506 507 emit_set(out, consts); 508 509 out.write('/* class hierarchy information */\n'); 510 consts = []; 511 keys = klasses.keys(); 512 keys.sort(); 513 for klassname in keys: 514 pklass = klasses[klassname]['parent']; 515 if (pklass == None): 516 continue; 517 518 consts.append({ 519 'name': 'parent_%s__%s' % (klassname, pklass), 520 'value': 0 521 }); 522 523 emit_set(out, consts); 524 525 out.write('/* field information */\n'); 526 emit_set(out, fields); 527 528 out.write(footer); 529 530if (len(sys.argv) < 4): 531 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); 532 sys.exit(2); 533 534load_objects(); 535load_fields(); 536emit_config(); 537