1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30# This is a utility for converting JavaScript source code into C-style
31# char arrays. It is used for embedded JavaScript code in the V8
32# library.
33
34import os, re
35import optparse
36import jsmin
37import textwrap
38
39
40class Error(Exception):
41  def __init__(self, msg):
42    Exception.__init__(self, msg)
43
44
45def ToCArray(byte_sequence):
46  result = []
47  for chr in byte_sequence:
48    result.append(str(ord(chr)))
49  joined = ", ".join(result)
50  return textwrap.fill(joined, 80)
51
52
53def RemoveCommentsAndTrailingWhitespace(lines):
54  lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
55  lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
56  lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace
57  return lines
58
59
60def ReadFile(filename):
61  file = open(filename, "rt")
62  try:
63    lines = file.read()
64  finally:
65    file.close()
66  return lines
67
68
69EVAL_PATTERN = re.compile(r'\beval\s*\(')
70WITH_PATTERN = re.compile(r'\bwith\s*\(')
71INVALID_ERROR_MESSAGE_PATTERN = re.compile(
72    r'Make(?!Generic)\w*Error\(([kA-Z]\w+)')
73NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))')
74
75def Validate(lines):
76  # Because of simplified context setup, eval and with is not
77  # allowed in the natives files.
78  if EVAL_PATTERN.search(lines):
79    raise Error("Eval disallowed in natives.")
80  if WITH_PATTERN.search(lines):
81    raise Error("With statements disallowed in natives.")
82  invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines)
83  if invalid_error:
84    raise Error("Unknown error message template '%s'" % invalid_error.group(1))
85  if NEW_ERROR_PATTERN.search(lines):
86    raise Error("Error constructed without message template.")
87  # Pass lines through unchanged.
88  return lines
89
90
91def ExpandConstants(lines, constants):
92  for key, value in constants:
93    lines = key.sub(str(value), lines)
94  return lines
95
96
97def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
98  pattern_match = name_pattern.search(lines, pos)
99  while pattern_match is not None:
100    # Scan over the arguments
101    height = 1
102    start = pattern_match.start()
103    end = pattern_match.end()
104    assert lines[end - 1] == '('
105    last_match = end
106    arg_index = [0]  # Wrap state into array, to work around Python "scoping"
107    mapping = { }
108    def add_arg(str):
109      # Remember to expand recursively in the arguments
110      if arg_index[0] >= len(macro.args):
111        lineno = lines.count(os.linesep, 0, start) + 1
112        raise Error('line %s: Too many arguments for macro "%s"' % (lineno, name_pattern.pattern))
113      replacement = expander(str.strip())
114      mapping[macro.args[arg_index[0]]] = replacement
115      arg_index[0] += 1
116    while end < len(lines) and height > 0:
117      # We don't count commas at higher nesting levels.
118      if lines[end] == ',' and height == 1:
119        add_arg(lines[last_match:end])
120        last_match = end + 1
121      elif lines[end] in ['(', '{', '[']:
122        height = height + 1
123      elif lines[end] in [')', '}', ']']:
124        height = height - 1
125      end = end + 1
126    # Remember to add the last match.
127    add_arg(lines[last_match:end-1])
128    result = macro.expand(mapping)
129    # Replace the occurrence of the macro with the expansion
130    lines = lines[:start] + result + lines[end:]
131    pattern_match = name_pattern.search(lines, start + len(result))
132  return lines
133
134def ExpandMacros(lines, macros):
135  # We allow macros to depend on the previously declared macros, but
136  # we don't allow self-dependecies or recursion.
137  for name_pattern, macro in reversed(macros):
138    def expander(s):
139      return ExpandMacros(s, macros)
140    lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
141  return lines
142
143class TextMacro:
144  def __init__(self, args, body):
145    self.args = args
146    self.body = body
147  def expand(self, mapping):
148    # Keys could be substrings of earlier values. To avoid unintended
149    # clobbering, apply all replacements simultaneously.
150    any_key_pattern = "|".join(re.escape(k) for k in mapping.iterkeys())
151    def replace(match):
152      return mapping[match.group(0)]
153    return re.sub(any_key_pattern, replace, self.body)
154
155class PythonMacro:
156  def __init__(self, args, fun):
157    self.args = args
158    self.fun = fun
159  def expand(self, mapping):
160    args = []
161    for arg in self.args:
162      args.append(mapping[arg])
163    return str(self.fun(*args))
164
165CONST_PATTERN = re.compile(r'^define\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
166MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
167PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
168
169
170def ReadMacros(lines):
171  constants = []
172  macros = []
173  for line in lines.split('\n'):
174    hash = line.find('#')
175    if hash != -1: line = line[:hash]
176    line = line.strip()
177    if len(line) is 0: continue
178    const_match = CONST_PATTERN.match(line)
179    if const_match:
180      name = const_match.group(1)
181      value = const_match.group(2).strip()
182      constants.append((re.compile("\\b%s\\b" % name), value))
183    else:
184      macro_match = MACRO_PATTERN.match(line)
185      if macro_match:
186        name = macro_match.group(1)
187        args = [match.strip() for match in macro_match.group(2).split(',')]
188        body = macro_match.group(3).strip()
189        macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
190      else:
191        python_match = PYTHON_MACRO_PATTERN.match(line)
192        if python_match:
193          name = python_match.group(1)
194          args = [match.strip() for match in python_match.group(2).split(',')]
195          body = python_match.group(3).strip()
196          fun = eval("lambda " + ",".join(args) + ': ' + body)
197          macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun)))
198        else:
199          raise Error("Illegal line: " + line)
200  return (constants, macros)
201
202
203TEMPLATE_PATTERN = re.compile(r'^\s+T\(([A-Z][a-zA-Z0-9]*),')
204
205def ReadMessageTemplates(lines):
206  templates = []
207  index = 0
208  for line in lines.split('\n'):
209    template_match = TEMPLATE_PATTERN.match(line)
210    if template_match:
211      name = "k%s" % template_match.group(1)
212      value = index
213      index = index + 1
214      templates.append((re.compile("\\b%s\\b" % name), value))
215  return templates
216
217INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
218INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')
219
220def ExpandInlineMacros(lines):
221  pos = 0
222  while True:
223    macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
224    if macro_match is None:
225      # no more macros
226      return lines
227    name = macro_match.group(1)
228    args = [match.strip() for match in macro_match.group(2).split(',')]
229    end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
230    if end_macro_match is None:
231      raise Error("Macro %s unclosed" % name)
232    body = lines[macro_match.end():end_macro_match.start()]
233
234    # remove macro definition
235    lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
236    name_pattern = re.compile("\\b%s\\(" % name)
237    macro = TextMacro(args, body)
238
239    # advance position to where the macro defintion was
240    pos = macro_match.start()
241
242    def non_expander(s):
243      return s
244    lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
245
246
247INLINE_CONSTANT_PATTERN = re.compile(r'define\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+);\n')
248
249def ExpandInlineConstants(lines):
250  pos = 0
251  while True:
252    const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
253    if const_match is None:
254      # no more constants
255      return lines
256    name = const_match.group(1)
257    replacement = const_match.group(2)
258    name_pattern = re.compile("\\b%s\\b" % name)
259
260    # remove constant definition and replace
261    lines = (lines[:const_match.start()] +
262             re.sub(name_pattern, replacement, lines[const_match.end():]))
263
264    # advance position to where the constant defintion was
265    pos = const_match.start()
266
267
268HEADER_TEMPLATE = """\
269// Copyright 2011 Google Inc. All Rights Reserved.
270
271// This file was generated from .js source files by GYP.  If you
272// want to make changes to this file you should either change the
273// javascript source files or the GYP script.
274
275#include "src/v8.h"
276#include "src/snapshot/natives.h"
277#include "src/utils.h"
278
279namespace v8 {
280namespace internal {
281
282%(sources_declaration)s\
283
284  template <>
285  int NativesCollection<%(type)s>::GetBuiltinsCount() {
286    return %(builtin_count)i;
287  }
288
289  template <>
290  int NativesCollection<%(type)s>::GetDebuggerCount() {
291    return %(debugger_count)i;
292  }
293
294  template <>
295  int NativesCollection<%(type)s>::GetIndex(const char* name) {
296%(get_index_cases)s\
297    return -1;
298  }
299
300  template <>
301  Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) {
302%(get_script_source_cases)s\
303    return Vector<const char>("", 0);
304  }
305
306  template <>
307  Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
308%(get_script_name_cases)s\
309    return Vector<const char>("", 0);
310  }
311
312  template <>
313  Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() {
314    return Vector<const char>(sources, %(total_length)i);
315  }
316}  // internal
317}  // v8
318"""
319
320SOURCES_DECLARATION = """\
321  static const char sources[] = { %s };
322"""
323
324
325GET_INDEX_CASE = """\
326    if (strcmp(name, "%(id)s") == 0) return %(i)i;
327"""
328
329
330GET_SCRIPT_SOURCE_CASE = """\
331    if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i);
332"""
333
334
335GET_SCRIPT_NAME_CASE = """\
336    if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
337"""
338
339
340def BuildFilterChain(macro_filename, message_template_file):
341  """Build the chain of filter functions to be applied to the sources.
342
343  Args:
344    macro_filename: Name of the macro file, if any.
345
346  Returns:
347    A function (string -> string) that processes a source file.
348  """
349  filter_chain = []
350
351  if macro_filename:
352    (consts, macros) = ReadMacros(ReadFile(macro_filename))
353    filter_chain.append(lambda l: ExpandMacros(l, macros))
354    filter_chain.append(lambda l: ExpandConstants(l, consts))
355
356  if message_template_file:
357    message_templates = ReadMessageTemplates(ReadFile(message_template_file))
358    filter_chain.append(lambda l: ExpandConstants(l, message_templates))
359
360  filter_chain.extend([
361    RemoveCommentsAndTrailingWhitespace,
362    ExpandInlineMacros,
363    ExpandInlineConstants,
364    Validate,
365    jsmin.JavaScriptMinifier().JSMinify
366  ])
367
368  def chain(f1, f2):
369    return lambda x: f2(f1(x))
370
371  return reduce(chain, filter_chain)
372
373def BuildExtraFilterChain():
374  return lambda x: RemoveCommentsAndTrailingWhitespace(Validate(x))
375
376class Sources:
377  def __init__(self):
378    self.names = []
379    self.modules = []
380    self.is_debugger_id = []
381
382
383def IsDebuggerFile(filename):
384  return "debug" in filename
385
386def IsMacroFile(filename):
387  return filename.endswith("macros.py")
388
389def IsMessageTemplateFile(filename):
390  return filename.endswith("messages.h")
391
392
393def PrepareSources(source_files, native_type, emit_js):
394  """Read, prepare and assemble the list of source files.
395
396  Args:
397    source_files: List of JavaScript-ish source files. A file named macros.py
398        will be treated as a list of macros.
399    native_type: String corresponding to a NativeType enum value, allowing us
400        to treat different types of sources differently.
401    emit_js: True if we should skip the byte conversion and just leave the
402        sources as JS strings.
403
404  Returns:
405    An instance of Sources.
406  """
407  macro_file = None
408  macro_files = filter(IsMacroFile, source_files)
409  assert len(macro_files) in [0, 1]
410  if macro_files:
411    source_files.remove(macro_files[0])
412    macro_file = macro_files[0]
413
414  message_template_file = None
415  message_template_files = filter(IsMessageTemplateFile, source_files)
416  assert len(message_template_files) in [0, 1]
417  if message_template_files:
418    source_files.remove(message_template_files[0])
419    message_template_file = message_template_files[0]
420
421  filters = None
422  if native_type in ("EXTRAS", "EXPERIMENTAL_EXTRAS"):
423    filters = BuildExtraFilterChain()
424  else:
425    filters = BuildFilterChain(macro_file, message_template_file)
426
427  # Sort 'debugger' sources first.
428  source_files = sorted(source_files,
429                        lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))
430
431  source_files_and_contents = [(f, ReadFile(f)) for f in source_files]
432
433  # Have a single not-quite-empty source file if there are none present;
434  # otherwise you get errors trying to compile an empty C++ array.
435  # It cannot be empty (or whitespace, which gets trimmed to empty), as
436  # the deserialization code assumes each file is nonempty.
437  if not source_files_and_contents:
438    source_files_and_contents = [("dummy.js", "(function() {})")]
439
440  result = Sources()
441
442  for (source, contents) in source_files_and_contents:
443    try:
444      lines = filters(contents)
445    except Error as e:
446      raise Error("In file %s:\n%s" % (source, str(e)))
447
448    result.modules.append(lines)
449
450    is_debugger = IsDebuggerFile(source)
451    result.is_debugger_id.append(is_debugger)
452
453    name = os.path.basename(source)[:-3]
454    result.names.append(name)
455
456  return result
457
458
459def BuildMetadata(sources, source_bytes, native_type):
460  """Build the meta data required to generate a libaries file.
461
462  Args:
463    sources: A Sources instance with the prepared sources.
464    source_bytes: A list of source bytes.
465        (The concatenation of all sources; might be compressed.)
466    native_type: The parameter for the NativesCollection template.
467
468  Returns:
469    A dictionary for use with HEADER_TEMPLATE.
470  """
471  total_length = len(source_bytes)
472  raw_sources = "".join(sources.modules)
473
474  # The sources are expected to be ASCII-only.
475  assert not filter(lambda value: ord(value) >= 128, raw_sources)
476
477  # Loop over modules and build up indices into the source blob:
478  get_index_cases = []
479  get_script_name_cases = []
480  get_script_source_cases = []
481  offset = 0
482  for i in xrange(len(sources.modules)):
483    native_name = "native %s.js" % sources.names[i]
484    d = {
485        "i": i,
486        "id": sources.names[i],
487        "name": native_name,
488        "length": len(native_name),
489        "offset": offset,
490        "source_length": len(sources.modules[i]),
491    }
492    get_index_cases.append(GET_INDEX_CASE % d)
493    get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
494    get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d)
495    offset += len(sources.modules[i])
496  assert offset == len(raw_sources)
497
498  metadata = {
499    "builtin_count": len(sources.modules),
500    "debugger_count": sum(sources.is_debugger_id),
501    "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
502    "total_length": total_length,
503    "get_index_cases": "".join(get_index_cases),
504    "get_script_source_cases": "".join(get_script_source_cases),
505    "get_script_name_cases": "".join(get_script_name_cases),
506    "type": native_type,
507  }
508  return metadata
509
510
511def PutInt(blob_file, value):
512  assert(value >= 0 and value < (1 << 28))
513  if (value < 1 << 6):
514    size = 1
515  elif (value < 1 << 14):
516    size = 2
517  elif (value < 1 << 22):
518    size = 3
519  else:
520    size = 4
521  value_with_length = (value << 2) | (size - 1)
522
523  byte_sequence = bytearray()
524  for i in xrange(size):
525    byte_sequence.append(value_with_length & 255)
526    value_with_length >>= 8;
527  blob_file.write(byte_sequence)
528
529
530def PutStr(blob_file, value):
531  PutInt(blob_file, len(value));
532  blob_file.write(value);
533
534
535def WriteStartupBlob(sources, startup_blob):
536  """Write a startup blob, as expected by V8 Initialize ...
537    TODO(vogelheim): Add proper method name.
538
539  Args:
540    sources: A Sources instance with the prepared sources.
541    startup_blob_file: Name of file to write the blob to.
542  """
543  output = open(startup_blob, "wb")
544
545  debug_sources = sum(sources.is_debugger_id);
546  PutInt(output, debug_sources)
547  for i in xrange(debug_sources):
548    PutStr(output, sources.names[i]);
549    PutStr(output, sources.modules[i]);
550
551  PutInt(output, len(sources.names) - debug_sources)
552  for i in xrange(debug_sources, len(sources.names)):
553    PutStr(output, sources.names[i]);
554    PutStr(output, sources.modules[i]);
555
556  output.close()
557
558
559def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js):
560  prepared_sources = PrepareSources(sources, native_type, emit_js)
561  sources_output = "".join(prepared_sources.modules)
562  metadata = BuildMetadata(prepared_sources, sources_output, native_type)
563
564  # Optionally emit raw file.
565  if raw_file:
566    output = open(raw_file, "w")
567    output.write(sources_output)
568    output.close()
569
570  if startup_blob:
571    WriteStartupBlob(prepared_sources, startup_blob)
572
573  # Emit resulting source file.
574  output = open(target, "w")
575  if emit_js:
576    output.write(sources_output)
577  else:
578    output.write(HEADER_TEMPLATE % metadata)
579  output.close()
580
581
582def main():
583  parser = optparse.OptionParser()
584  parser.add_option("--raw",
585                    help="file to write the processed sources array to.")
586  parser.add_option("--startup_blob",
587                    help="file to write the startup blob to.")
588  parser.add_option("--js",
589                    help="writes a JS file output instead of a C file",
590                    action="store_true", default=False, dest='js')
591  parser.add_option("--nojs", action="store_false", default=False, dest='js')
592  parser.set_usage("""js2c out.cc type sources.js ...
593        out.cc: C code to be generated.
594        type: type parameter for NativesCollection template.
595        sources.js: JS internal sources or macros.py.""")
596  (options, args) = parser.parse_args()
597  JS2C(args[2:],
598       args[0],
599       args[1],
600       options.raw,
601       options.startup_blob,
602       options.js)
603
604
605if __name__ == "__main__":
606  main()
607