1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Utility function for stripping comments out of JavaScript source code."""
6
7import re
8
9
10def _TokenizeJS(text):
11  """Splits source code text into segments in preparation for comment stripping.
12
13  Note that this doesn't tokenize for parsing. There is no notion of statements,
14  variables, etc. The only tokens of interest are comment-related tokens.
15
16  Args:
17    text: The contents of a JavaScript file.
18
19  Yields:
20    A succession of strings in the file, including all comment-related symbols.
21  """
22  rest = text
23  tokens = ['//', '/*', '*/', '\n']
24  next_tok = re.compile('|'.join(re.escape(x) for x in tokens))
25  while len(rest):
26    m = next_tok.search(rest)
27    if not m:
28      # end of string
29      yield rest
30      return
31    min_index = m.start()
32    end_index = m.end()
33
34    if min_index > 0:
35      yield rest[:min_index]
36
37    yield rest[min_index:end_index]
38    rest = rest[end_index:]
39
40
41def StripJSComments(text):
42  """Strips comments out of JavaScript source code.
43
44  Args:
45    text: JavaScript source text.
46
47  Returns:
48    JavaScript source text with comments stripped out.
49  """
50  result_tokens = []
51  token_stream = _TokenizeJS(text).__iter__()
52  while True:
53    try:
54      t = next(token_stream)
55    except StopIteration:
56      break
57
58    if t == '//':
59      while True:
60        try:
61          t2 = next(token_stream)
62          if t2 == '\n':
63            break
64        except StopIteration:
65          break
66    elif t == '/*':
67      nesting = 1
68      while True:
69        try:
70          t2 = next(token_stream)
71          if t2 == '/*':
72            nesting += 1
73          elif t2 == '*/':
74            nesting -= 1
75            if nesting == 0:
76              break
77        except StopIteration:
78          break
79    else:
80      result_tokens.append(t)
81  return ''.join(result_tokens)
82