1#!/usr/bin/env python3
2#  Copyright 2016 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS-IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16from concurrent import futures
17
18import itertools
19import sys
20import re
21import pygraphviz as gv
22import ply.lex as lex
23import ply.yacc as yacc
24from functools import lru_cache as memoize
25
26diagnostic_header_pattern = re.compile('[^ ]+\.[^ ]+:[0-9]+:[0-9]+: ([^ ]*): (.*)')
27in_file_included_from_pattern = re.compile('In file included from .*:')
28in_instantiation_of_template_pattern = re.compile('in instantiation of (.*) (?:requested|required) here')
29static_warning_marked_deprecated_here_pattern = re.compile('\'static_warning\' has been explicitly marked deprecated here')
30
31class Diagnostic:
32    def __init__(self, kind, message):
33        self.kind = kind
34        self.message = message
35        self.template_instantiation_trace = []
36
37tokens = (
38    'LPAREN',
39    'RPAREN',
40    'LBRACKET',
41    'RBRACKET',
42    'LBRACE',
43    'RBRACE',
44    'LESS_THAN',
45    'GREATER_THAN',
46    'DOUBLE_COLON',
47    'COMMA',
48    'IDENTIFIER',
49    'ASTERISK',
50    'AMPERSAND',
51)
52
53t_LPAREN = r'\('
54t_RPAREN = r'\)'
55t_LBRACKET = r'\['
56t_RBRACKET = r'\]'
57t_LBRACE =  r'}'
58t_RBRACE = r'{'
59t_LESS_THAN = r'<'
60t_GREATER_THAN = r'>'
61t_DOUBLE_COLON = r'::'
62t_COMMA = r','
63t_ASTERISK = r'\*'
64t_AMPERSAND = r'&'
65# We conflate numbers as identifiers too, we don't care about the difference.
66t_IDENTIFIER = r'[a-zA-Z0-9_]+'
67
68t_ignore = ' \t'
69
70def t_error(t):
71    raise Exception("Illegal character '%s' followed by %s" % (t.value[0], t.value[1:]))
72
73class LayoutNeedsMultipleLinesException(Exception):
74    pass
75
76class AstNode:
77    def __str__(self):
78        return ''.join(self)
79
80class TerminalAstNode(AstNode):
81    def __init__(self, s):
82        self.s = s
83        self.is_multiline = (s == '\n')
84        # last_line_length is the string length if s is not a multiline string.
85        # For multiline strings ending in a newline, this is 0.
86        if self.is_multiline:
87            self.first_line_length = 0
88            self.last_line_length = 0
89            self.max_line_length = 0
90        else:
91            # This never happens ATM, so we don't handle it.
92            assert not '\n' in s
93
94            self.first_line_length = len(s)
95            self.last_line_length = len(s)
96            self.max_line_length = len(s)
97
98    def __iter__(self):
99        return iter((self.s,))
100
101class NonTerminalAstNode(AstNode):
102    def __init__(self, children_ast_nodes):
103        self.children_ast_nodes = children_ast_nodes
104        first_line_length = 0
105        last_line_length = 0
106        is_multiline = False
107        max_line_length = 0
108        for node in children_ast_nodes:
109            if node.is_multiline:
110                last_line_length = node.last_line_length
111                max_line_length = max(max_line_length, last_line_length + node.first_line_length, node.max_line_length)
112                is_multiline = True
113            else:
114                last_line_length += node.last_line_length
115                max_line_length = max(max_line_length, last_line_length)
116
117        self.first_line_length = first_line_length
118        self.last_line_length = last_line_length
119        self.is_multiline = is_multiline
120        self.max_line_length = max_line_length
121
122    def __iter__(self):
123        return itertools.chain(*self.children_ast_nodes)
124
125max_line_length = 80
126# Size of an indent in spaces.
127single_indent_length = 4
128
129class TerminalNodeFactory():
130    def __init__(self, s):
131        self.s = s
132
133    def __call__(self, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
134        return TerminalAstNode(self.s)
135
136# 'balanced_string' nodes evaluate to a function (or a callable object) taking these parameters:
137#     current_indent (integer): the indentation in the current line (spaces only)
138#     current_line_length (integer): the number of preceding characters in the current line (>=current_indent)
139#     inside_meta_type (boolean): whether we're inside a Type<...>
140#     last_token_was_type_wrapper (boolean): whether the immediately-preceding token was the identifier 'Type'
141#  and returning an AstNode
142# 'comma_separated_balanced_string' nodes evaluate to a tuple of such functions
143
144def p_comma_separated_balanced_string_empty(p):
145    'comma_separated_balanced_string : '
146    p[0] = tuple()
147
148def p_comma_separated_balanced_string_not_empty(p):
149    'comma_separated_balanced_string : COMMA balanced_string comma_separated_balanced_string'
150    p[0] = (
151        p[2],
152        *(p[3])
153    )
154
155def p_optional_balanced_string_empty(p):
156    'optional_balanced_string : '
157    p[0] = TerminalNodeFactory('')
158
159def p_optional_balanced_string_not_empty(p):
160    'optional_balanced_string : balanced_string'
161    p[0] = p[1]
162
163class BalancedStringTerminalNodeFactory():
164    def __init__(self, first_token, node_factory):
165        self.first_token = first_token
166        self.node_factory = node_factory
167
168    def __call__(self, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
169        terminal_node = TerminalAstNode(self.first_token)
170        non_terminal_node = self.node_factory(
171            current_indent,
172            current_line_length + len(self.first_token),
173            inside_meta_type,
174            self.first_token == 'Type',
175            accept_single_line_only)
176        if non_terminal_node is None:
177            return None
178        return NonTerminalAstNode((terminal_node, non_terminal_node))
179
180def p_balanced_string_terminal(p):
181    '''balanced_string : DOUBLE_COLON balanced_string
182                       | IDENTIFIER optional_balanced_string
183                       | ASTERISK optional_balanced_string
184                       | AMPERSAND optional_balanced_string
185    '''
186    first_token = p[1]
187    node_factory = p[2]
188
189    p[0] = BalancedStringTerminalNodeFactory(first_token, node_factory)
190
191def create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, accept_single_line_only):
192    nodes = []
193    for node_factory, current_indent, inside_meta_type in node_factory_inside_meta_type_pairs:
194        node = node_factory(current_indent, current_line_length, inside_meta_type, False, accept_single_line_only)
195        if node is None:
196            return None
197        nodes.append(node)
198        if node.is_multiline:
199            if accept_single_line_only:
200                raise Exception('Unexpected multiline, due to factory: ' + node_factory)
201            # Note that due to the way we break lines, the last line will have the same indent as the first.
202            # So we don't need to update current_indent here.
203            current_line_length = node.last_line_length
204        else:
205            current_line_length += node.last_line_length
206    return NonTerminalAstNode(nodes)
207
208def compute_layout(left_token, intermediate_node_factories, right_token, rhs_node_factory, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
209    # We lay out the result in one of two ways:
210    #
211    # $previousIndent $previousContent LPAREN x1, x2, x3 RPAREN balanced_string
212    #
213    # Or:
214    #
215    # $previousIndent $previousContent LPAREN
216    # $previousIndent $indent x1 ,
217    # $previousIndent $indent x2 ,
218    # $previousIndent $indent x3 RPAREN balanced_string
219
220    entering_meta_type = last_token_was_type_wrapper
221
222    # First, we try to use the first format if possible
223    node_factory_inside_meta_type_pairs = [
224        (TerminalNodeFactory(left_token), current_indent, inside_meta_type),
225        *((intermediate_node_factory, current_indent, (inside_meta_type or entering_meta_type))
226          for intermediate_node_factory in intermediate_node_factories),
227        (TerminalNodeFactory(right_token), current_indent, inside_meta_type),
228        (rhs_node_factory, current_indent, inside_meta_type),
229    ]
230    node_with_single_line_layout = create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, True)
231    if node_with_single_line_layout is not None and node_with_single_line_layout.max_line_length <= max_line_length:
232        assert not node_with_single_line_layout.is_multiline
233        return node_with_single_line_layout
234
235    if accept_single_line_only:
236        return None
237
238    # The result exceeds the line length, let's switch to the second one.
239    node_factory_inside_meta_type_pairs = [
240        (TerminalNodeFactory(left_token),
241         current_indent,
242         inside_meta_type)
243    ]
244    new_indent_length = current_indent + single_indent_length
245    comma_node_factory_inside_meta_type_pair = (TerminalNodeFactory(','), current_indent, inside_meta_type or entering_meta_type)
246    newline_node_factory_inside_meta_type_pair = (TerminalNodeFactory('\n'), current_indent, inside_meta_type or entering_meta_type)
247    indent_node_factory_inside_meta_type_pair = (TerminalNodeFactory(' ' * new_indent_length), current_indent, inside_meta_type or entering_meta_type)
248    for inner_node_factory in intermediate_node_factories:
249        node_factory_inside_meta_type_pairs.append(newline_node_factory_inside_meta_type_pair)
250        node_factory_inside_meta_type_pairs.append(indent_node_factory_inside_meta_type_pair)
251        node_factory_inside_meta_type_pairs.append((inner_node_factory, new_indent_length, inside_meta_type or entering_meta_type))
252        node_factory_inside_meta_type_pairs.append(comma_node_factory_inside_meta_type_pair)
253    node_factory_inside_meta_type_pairs.pop()
254    node_factory_inside_meta_type_pairs.append((TerminalNodeFactory(right_token), current_indent, inside_meta_type))
255    node_factory_inside_meta_type_pairs.append((rhs_node_factory, current_indent, inside_meta_type))
256    return create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, accept_single_line_only)
257
258
259def p_balanced_string_with_balanced_token_no_comma_separated_elems(p):
260    '''balanced_string : LPAREN    RPAREN       optional_balanced_string
261                       | LBRACKET  RBRACKET     optional_balanced_string
262                       | LBRACE    RBRACE       optional_balanced_string
263                       | LESS_THAN GREATER_THAN optional_balanced_string
264    '''
265    p_1 = p[1]
266    p_2 = p[2]
267    p_3 = p[3]
268    def result(current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
269        return compute_layout(p_1, [], p_2, p_3, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only)
270
271    p[0] = result
272
273def p_balanced_string_with_balanced_token_some_comma_separated_elems(p):
274    '''balanced_string : LPAREN    balanced_string comma_separated_balanced_string RPAREN       optional_balanced_string
275                       | LBRACKET  balanced_string comma_separated_balanced_string RBRACKET     optional_balanced_string
276                       | LBRACE    balanced_string comma_separated_balanced_string RBRACE       optional_balanced_string
277                       | LESS_THAN balanced_string comma_separated_balanced_string GREATER_THAN optional_balanced_string
278    '''
279    p_1 = p[1]
280    p_2 = p[2]
281    p_3 = p[3]
282    p_4 = p[4]
283    p_5 = p[5]
284    def result(current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
285        if not inside_meta_type:
286            if p_1 == '(' and p_4 == ')':
287                if len(p_3) == 0:
288                    if isinstance(p_2, BalancedStringTerminalNodeFactory) and p_2.first_token == '*':
289                        if isinstance(p_2.node_factory, TerminalNodeFactory) and p_2.node_factory.s == '':
290                            # Special case: we're not inside a Type<...> and we've encountered a '(*)'.
291                            # Discard it and just print the rhs.
292                            return p_5(current_indent, current_line_length, inside_meta_type, False, accept_single_line_only)
293
294        return compute_layout(p_1, (p_2, *(p_3)), p_4, p_5, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only)
295
296    p[0] = result
297
298def p_error(p):
299    raise Exception("Syntax error when parsing meta type: ", p[:])
300
301lexer = lex.lex()
302parser = yacc.yacc(start='balanced_string')
303
304strings_to_remove = re.compile(r'template class |template type alias |function template specialization |member class |member function |default argument for |fruit::impl::meta::|fruit::impl::|fruit::')
305
306def do_simplify_template_trace_element(element):
307    element, _ = re.subn(strings_to_remove, '', element)
308    element = element.strip()
309    if element[0] != '\'' or element[-1] != '\'':
310        raise Exception('Expected single quotes in: ' + element)
311    element = element[1:-1]
312    if element.startswith('DoEval<') and element[-1] == '>':
313        element = element[7:-1]
314    result = ''.join(parser.parse(element, lexer)(0, 0, False, False, False))
315    return result
316
317@memoize(maxsize=1000)
318def simplify_template_trace_element(element, executor):
319    return executor.submit(do_simplify_template_trace_element, element)
320
321def to_dot_left_justified_string(s):
322    return '\\l'.join(s.splitlines() + [''])
323
324def main():
325    diagnostics = []
326
327    with futures.ProcessPoolExecutor() as executor:
328        lines = sys.stdin.readlines()
329        for line_number, line in enumerate(lines):
330            # Remove the newline
331            line = line[:-1]
332
333            matches = in_file_included_from_pattern.search(line)
334            if matches:
335                continue
336
337            matches = diagnostic_header_pattern.search(line)
338            if matches:
339                diagnostic_kind, diagnostic_message = matches.groups()
340                if diagnostic_kind == 'error':
341                    diagnostics.append(Diagnostic(diagnostic_kind, diagnostic_message))
342                    print('Processing diagnostic. (%s / %s) ' % (line_number, len(lines)), file=sys.stderr)
343                elif diagnostic_kind == 'note':
344                    matches = in_instantiation_of_template_pattern.search(diagnostic_message)
345                    if matches:
346                        if not diagnostics:
347                            raise Exception('Found template instantiation note before any error diagnostic: %s' % diagnostic_message)
348                        if 'in instantiation of template type alias' in line:
349                            pass
350                        else:
351                            group = matches.groups()[0]
352                            trace_element_future = simplify_template_trace_element(group, executor)
353                            diagnostics[-1].template_instantiation_trace.append(trace_element_future)
354                        continue
355
356                    matches = static_warning_marked_deprecated_here_pattern.search(diagnostic_message)
357                    if matches:
358                        continue
359
360                    raise Exception('Found unknown note: %s' % diagnostic_message)
361
362        call_graph = {}
363        graph = gv.AGraph(directed=True)
364
365        for diagnostic_index, diagnostic in enumerate(diagnostics):
366            if diagnostic_index % 10 == 0:
367                print('Constructing dep graph: iteration %s/%s' % (diagnostic_index, len(diagnostics)), file=sys.stderr)
368
369            template_instantiation_trace = [trace_element_future.result() for trace_element_future in diagnostic.template_instantiation_trace]
370            for called, caller in zip(template_instantiation_trace[1:], template_instantiation_trace[2:]):
371                if called in call_graph and call_graph[called] != caller:
372                    # Avoid this edge, so that the resulting graph is a tree
373                    continue
374                graph.add_edge(to_dot_left_justified_string(caller), to_dot_left_justified_string(called))
375                call_graph[called] = caller
376
377        print(graph)
378
379if __name__ == '__main__':
380    main()
381