1#!/usr/bin/env python 2# 3# Copyright 2012 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30# This is a utility for converting JavaScript source code into C-style 31# char arrays. It is used for embedded JavaScript code in the V8 32# library. 33 34import os, re, sys, string 35import optparse 36import jsmin 37import bz2 38import textwrap 39 40 41class Error(Exception): 42 def __init__(self, msg): 43 Exception.__init__(self, msg) 44 45 46def ToCArray(byte_sequence): 47 result = [] 48 for chr in byte_sequence: 49 result.append(str(ord(chr))) 50 joined = ", ".join(result) 51 return textwrap.fill(joined, 80) 52 53 54def RemoveCommentsAndTrailingWhitespace(lines): 55 lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments 56 lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments. 57 lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace 58 return lines 59 60 61def ReadFile(filename): 62 file = open(filename, "rt") 63 try: 64 lines = file.read() 65 finally: 66 file.close() 67 return lines 68 69 70EVAL_PATTERN = re.compile(r'\beval\s*\(') 71WITH_PATTERN = re.compile(r'\bwith\s*\(') 72 73def Validate(lines): 74 # Because of simplified context setup, eval and with is not 75 # allowed in the natives files. 76 if EVAL_PATTERN.search(lines): 77 raise Error("Eval disallowed in natives.") 78 if WITH_PATTERN.search(lines): 79 raise Error("With statements disallowed in natives.") 80 81 # Pass lines through unchanged. 82 return lines 83 84 85def ExpandConstants(lines, constants): 86 for key, value in constants: 87 lines = key.sub(str(value), lines) 88 return lines 89 90 91def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander): 92 pattern_match = name_pattern.search(lines, pos) 93 while pattern_match is not None: 94 # Scan over the arguments 95 height = 1 96 start = pattern_match.start() 97 end = pattern_match.end() 98 assert lines[end - 1] == '(' 99 last_match = end 100 arg_index = [0] # Wrap state into array, to work around Python "scoping" 101 mapping = { } 102 def add_arg(str): 103 # Remember to expand recursively in the arguments 104 replacement = expander(str.strip()) 105 mapping[macro.args[arg_index[0]]] = replacement 106 arg_index[0] += 1 107 while end < len(lines) and height > 0: 108 # We don't count commas at higher nesting levels. 109 if lines[end] == ',' and height == 1: 110 add_arg(lines[last_match:end]) 111 last_match = end + 1 112 elif lines[end] in ['(', '{', '[']: 113 height = height + 1 114 elif lines[end] in [')', '}', ']']: 115 height = height - 1 116 end = end + 1 117 # Remember to add the last match. 118 add_arg(lines[last_match:end-1]) 119 result = macro.expand(mapping) 120 # Replace the occurrence of the macro with the expansion 121 lines = lines[:start] + result + lines[end:] 122 pattern_match = name_pattern.search(lines, start + len(result)) 123 return lines 124 125def ExpandMacros(lines, macros): 126 # We allow macros to depend on the previously declared macros, but 127 # we don't allow self-dependecies or recursion. 128 for name_pattern, macro in reversed(macros): 129 def expander(s): 130 return ExpandMacros(s, macros) 131 lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander) 132 return lines 133 134class TextMacro: 135 def __init__(self, args, body): 136 self.args = args 137 self.body = body 138 def expand(self, mapping): 139 result = self.body 140 for key, value in mapping.items(): 141 result = result.replace(key, value) 142 return result 143 144class PythonMacro: 145 def __init__(self, args, fun): 146 self.args = args 147 self.fun = fun 148 def expand(self, mapping): 149 args = [] 150 for arg in self.args: 151 args.append(mapping[arg]) 152 return str(self.fun(*args)) 153 154CONST_PATTERN = re.compile(r'^const\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$') 155MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') 156PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') 157 158 159def ReadMacros(lines): 160 constants = [] 161 macros = [] 162 for line in lines.split('\n'): 163 hash = line.find('#') 164 if hash != -1: line = line[:hash] 165 line = line.strip() 166 if len(line) is 0: continue 167 const_match = CONST_PATTERN.match(line) 168 if const_match: 169 name = const_match.group(1) 170 value = const_match.group(2).strip() 171 constants.append((re.compile("\\b%s\\b" % name), value)) 172 else: 173 macro_match = MACRO_PATTERN.match(line) 174 if macro_match: 175 name = macro_match.group(1) 176 args = [match.strip() for match in macro_match.group(2).split(',')] 177 body = macro_match.group(3).strip() 178 macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body))) 179 else: 180 python_match = PYTHON_MACRO_PATTERN.match(line) 181 if python_match: 182 name = python_match.group(1) 183 args = [match.strip() for match in python_match.group(2).split(',')] 184 body = python_match.group(3).strip() 185 fun = eval("lambda " + ",".join(args) + ': ' + body) 186 macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun))) 187 else: 188 raise Error("Illegal line: " + line) 189 return (constants, macros) 190 191INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n') 192INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n') 193 194def ExpandInlineMacros(lines): 195 pos = 0 196 while True: 197 macro_match = INLINE_MACRO_PATTERN.search(lines, pos) 198 if macro_match is None: 199 # no more macros 200 return lines 201 name = macro_match.group(1) 202 args = [match.strip() for match in macro_match.group(2).split(',')] 203 end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end()); 204 if end_macro_match is None: 205 raise Error("Macro %s unclosed" % name) 206 body = lines[macro_match.end():end_macro_match.start()] 207 208 # remove macro definition 209 lines = lines[:macro_match.start()] + lines[end_macro_match.end():] 210 name_pattern = re.compile("\\b%s\\(" % name) 211 macro = TextMacro(args, body) 212 213 # advance position to where the macro defintion was 214 pos = macro_match.start() 215 216 def non_expander(s): 217 return s 218 lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander) 219 220 221INLINE_CONSTANT_PATTERN = re.compile(r'const\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+)[;\n]') 222 223def ExpandInlineConstants(lines): 224 pos = 0 225 while True: 226 const_match = INLINE_CONSTANT_PATTERN.search(lines, pos) 227 if const_match is None: 228 # no more constants 229 return lines 230 name = const_match.group(1) 231 replacement = const_match.group(2) 232 name_pattern = re.compile("\\b%s\\b" % name) 233 234 # remove constant definition and replace 235 lines = (lines[:const_match.start()] + 236 re.sub(name_pattern, replacement, lines[const_match.end():])) 237 238 # advance position to where the constant defintion was 239 pos = const_match.start() 240 241 242HEADER_TEMPLATE = """\ 243// Copyright 2011 Google Inc. All Rights Reserved. 244 245// This file was generated from .js source files by GYP. If you 246// want to make changes to this file you should either change the 247// javascript source files or the GYP script. 248 249#include "src/v8.h" 250#include "src/natives.h" 251#include "src/utils.h" 252 253namespace v8 { 254namespace internal { 255 256%(sources_declaration)s\ 257 258%(raw_sources_declaration)s\ 259 260 template <> 261 int NativesCollection<%(type)s>::GetBuiltinsCount() { 262 return %(builtin_count)i; 263 } 264 265 template <> 266 int NativesCollection<%(type)s>::GetDebuggerCount() { 267 return %(debugger_count)i; 268 } 269 270 template <> 271 int NativesCollection<%(type)s>::GetIndex(const char* name) { 272%(get_index_cases)s\ 273 return -1; 274 } 275 276 template <> 277 int NativesCollection<%(type)s>::GetRawScriptsSize() { 278 return %(raw_total_length)i; 279 } 280 281 template <> 282 Vector<const char> NativesCollection<%(type)s>::GetRawScriptSource(int index) { 283%(get_raw_script_source_cases)s\ 284 return Vector<const char>("", 0); 285 } 286 287 template <> 288 Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) { 289%(get_script_name_cases)s\ 290 return Vector<const char>("", 0); 291 } 292 293 template <> 294 Vector<const byte> NativesCollection<%(type)s>::GetScriptsSource() { 295 return Vector<const byte>(sources, %(total_length)i); 296 } 297 298 template <> 299 void NativesCollection<%(type)s>::SetRawScriptsSource(Vector<const char> raw_source) { 300 DCHECK(%(raw_total_length)i == raw_source.length()); 301 raw_sources = raw_source.start(); 302 } 303 304} // internal 305} // v8 306""" 307 308SOURCES_DECLARATION = """\ 309 static const byte sources[] = { %s }; 310""" 311 312 313RAW_SOURCES_COMPRESSION_DECLARATION = """\ 314 static const char* raw_sources = NULL; 315""" 316 317 318RAW_SOURCES_DECLARATION = """\ 319 static const char* raw_sources = reinterpret_cast<const char*>(sources); 320""" 321 322 323GET_INDEX_CASE = """\ 324 if (strcmp(name, "%(id)s") == 0) return %(i)i; 325""" 326 327 328GET_RAW_SCRIPT_SOURCE_CASE = """\ 329 if (index == %(i)i) return Vector<const char>(raw_sources + %(offset)i, %(raw_length)i); 330""" 331 332 333GET_SCRIPT_NAME_CASE = """\ 334 if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i); 335""" 336 337 338def BuildFilterChain(macro_filename): 339 """Build the chain of filter functions to be applied to the sources. 340 341 Args: 342 macro_filename: Name of the macro file, if any. 343 344 Returns: 345 A function (string -> string) that reads a source file and processes it. 346 """ 347 filter_chain = [ReadFile] 348 349 if macro_filename: 350 (consts, macros) = ReadMacros(ReadFile(macro_filename)) 351 filter_chain.append(lambda l: ExpandConstants(l, consts)) 352 filter_chain.append(lambda l: ExpandMacros(l, macros)) 353 354 filter_chain.extend([ 355 RemoveCommentsAndTrailingWhitespace, 356 ExpandInlineMacros, 357 ExpandInlineConstants, 358 Validate, 359 jsmin.JavaScriptMinifier().JSMinify 360 ]) 361 362 def chain(f1, f2): 363 return lambda x: f2(f1(x)) 364 365 return reduce(chain, filter_chain) 366 367 368class Sources: 369 def __init__(self): 370 self.names = [] 371 self.modules = [] 372 self.is_debugger_id = [] 373 374 375def IsDebuggerFile(filename): 376 return filename.endswith("-debugger.js") 377 378def IsMacroFile(filename): 379 return filename.endswith("macros.py") 380 381 382def PrepareSources(source_files): 383 """Read, prepare and assemble the list of source files. 384 385 Args: 386 sources: List of Javascript-ish source files. A file named macros.py 387 will be treated as a list of macros. 388 389 Returns: 390 An instance of Sources. 391 """ 392 macro_file = None 393 macro_files = filter(IsMacroFile, source_files) 394 assert len(macro_files) in [0, 1] 395 if macro_files: 396 source_files.remove(macro_files[0]) 397 macro_file = macro_files[0] 398 399 filters = BuildFilterChain(macro_file) 400 401 # Sort 'debugger' sources first. 402 source_files = sorted(source_files, 403 lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l)) 404 405 result = Sources() 406 for source in source_files: 407 try: 408 lines = filters(source) 409 except Error as e: 410 raise Error("In file %s:\n%s" % (source, str(e))) 411 412 result.modules.append(lines); 413 414 is_debugger = IsDebuggerFile(source) 415 result.is_debugger_id.append(is_debugger); 416 417 name = os.path.basename(source)[:-3] 418 result.names.append(name if not is_debugger else name[:-9]); 419 return result 420 421 422def BuildMetadata(sources, source_bytes, native_type): 423 """Build the meta data required to generate a libaries file. 424 425 Args: 426 sources: A Sources instance with the prepared sources. 427 source_bytes: A list of source bytes. 428 (The concatenation of all sources; might be compressed.) 429 native_type: The parameter for the NativesCollection template. 430 431 Returns: 432 A dictionary for use with HEADER_TEMPLATE. 433 """ 434 total_length = len(source_bytes) 435 raw_sources = "".join(sources.modules) 436 437 # The sources are expected to be ASCII-only. 438 assert not filter(lambda value: ord(value) >= 128, raw_sources) 439 440 # Loop over modules and build up indices into the source blob: 441 get_index_cases = [] 442 get_script_name_cases = [] 443 get_raw_script_source_cases = [] 444 offset = 0 445 for i in xrange(len(sources.modules)): 446 native_name = "native %s.js" % sources.names[i] 447 d = { 448 "i": i, 449 "id": sources.names[i], 450 "name": native_name, 451 "length": len(native_name), 452 "offset": offset, 453 "raw_length": len(sources.modules[i]), 454 } 455 get_index_cases.append(GET_INDEX_CASE % d) 456 get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d) 457 get_raw_script_source_cases.append(GET_RAW_SCRIPT_SOURCE_CASE % d) 458 offset += len(sources.modules[i]) 459 assert offset == len(raw_sources) 460 461 # If we have the raw sources we can declare them accordingly. 462 have_raw_sources = source_bytes == raw_sources 463 raw_sources_declaration = (RAW_SOURCES_DECLARATION 464 if have_raw_sources else RAW_SOURCES_COMPRESSION_DECLARATION) 465 466 metadata = { 467 "builtin_count": len(sources.modules), 468 "debugger_count": sum(sources.is_debugger_id), 469 "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes), 470 "raw_sources_declaration": raw_sources_declaration, 471 "raw_total_length": sum(map(len, sources.modules)), 472 "total_length": total_length, 473 "get_index_cases": "".join(get_index_cases), 474 "get_raw_script_source_cases": "".join(get_raw_script_source_cases), 475 "get_script_name_cases": "".join(get_script_name_cases), 476 "type": native_type, 477 } 478 return metadata 479 480 481def CompressMaybe(sources, compression_type): 482 """Take the prepared sources and generate a sequence of bytes. 483 484 Args: 485 sources: A Sources instance with the prepared sourced. 486 compression_type: string, describing the desired compression. 487 488 Returns: 489 A sequence of bytes. 490 """ 491 sources_bytes = "".join(sources.modules) 492 if compression_type == "off": 493 return sources_bytes 494 elif compression_type == "bz2": 495 return bz2.compress(sources_bytes) 496 else: 497 raise Error("Unknown compression type %s." % compression_type) 498 499 500def PutInt(blob_file, value): 501 assert(value >= 0 and value < (1 << 20)) 502 size = 1 if (value < 1 << 6) else (2 if (value < 1 << 14) else 3) 503 value_with_length = (value << 2) | size 504 505 byte_sequence = bytearray() 506 for i in xrange(size): 507 byte_sequence.append(value_with_length & 255) 508 value_with_length >>= 8; 509 blob_file.write(byte_sequence) 510 511 512def PutStr(blob_file, value): 513 PutInt(blob_file, len(value)); 514 blob_file.write(value); 515 516 517def WriteStartupBlob(sources, startup_blob): 518 """Write a startup blob, as expected by V8 Initialize ... 519 TODO(vogelheim): Add proper method name. 520 521 Args: 522 sources: A Sources instance with the prepared sources. 523 startup_blob_file: Name of file to write the blob to. 524 """ 525 output = open(startup_blob, "wb") 526 527 debug_sources = sum(sources.is_debugger_id); 528 PutInt(output, debug_sources) 529 for i in xrange(debug_sources): 530 PutStr(output, sources.names[i]); 531 PutStr(output, sources.modules[i]); 532 533 PutInt(output, len(sources.names) - debug_sources) 534 for i in xrange(debug_sources, len(sources.names)): 535 PutStr(output, sources.names[i]); 536 PutStr(output, sources.modules[i]); 537 538 output.close() 539 540 541def JS2C(source, target, native_type, compression_type, raw_file, startup_blob): 542 sources = PrepareSources(source) 543 sources_bytes = CompressMaybe(sources, compression_type) 544 metadata = BuildMetadata(sources, sources_bytes, native_type) 545 546 # Optionally emit raw file. 547 if raw_file: 548 output = open(raw_file, "w") 549 output.write(sources_bytes) 550 output.close() 551 552 if startup_blob: 553 WriteStartupBlob(sources, startup_blob); 554 555 # Emit resulting source file. 556 output = open(target, "w") 557 output.write(HEADER_TEMPLATE % metadata) 558 output.close() 559 560 561def main(): 562 parser = optparse.OptionParser() 563 parser.add_option("--raw", action="store", 564 help="file to write the processed sources array to.") 565 parser.add_option("--startup_blob", action="store", 566 help="file to write the startup blob to.") 567 parser.set_usage("""js2c out.cc type compression sources.js ... 568 out.cc: C code to be generated. 569 type: type parameter for NativesCollection template. 570 compression: type of compression used. [off|bz2] 571 sources.js: JS internal sources or macros.py.""") 572 (options, args) = parser.parse_args() 573 574 JS2C(args[3:], args[0], args[1], args[2], options.raw, options.startup_blob) 575 576 577if __name__ == "__main__": 578 main() 579