1#------------------------------------------------------------------------------
2# pycparser: c_json.py
3#
4# by Michael White (@mypalmike)
5#
6# This example includes functions to serialize and deserialize an ast
7# to and from json format. Serializing involves walking the ast and converting
8# each node from a python Node object into a python dict. Deserializing
9# involves the opposite conversion, walking the tree formed by the
10# dict and converting each dict into the specific Node object it represents.
11# The dict itself is serialized and deserialized using the python json module.
12#
13# The dict representation is a fairly direct transformation of the object
14# attributes. Each node in the dict gets one metadata field referring to the
15# specific node class name, _nodetype. Each local attribute (i.e. not linking
16# to child nodes) has a string value or array of string values. Each child
17# attribute is either another dict or an array of dicts, exactly as in the
18# Node object representation. The "coord" attribute, representing the
19# node's location within the source code, is serialized/deserialized from
20# a Coord object into a string of the format "filename:line[:column]".
21#
22# Example TypeDecl node, with IdentifierType child node, represented as a dict:
23#     "type": {
24#         "_nodetype": "TypeDecl",
25#         "coord": "c_files/funky.c:8",
26#         "declname": "o",
27#         "quals": [],
28#         "type": {
29#             "_nodetype": "IdentifierType",
30#             "coord": "c_files/funky.c:8",
31#             "names": [
32#                 "char"
33#             ]
34#         }
35#     }
36#------------------------------------------------------------------------------
37from __future__ import print_function
38
39import json
40import sys
41import re
42
43# This is not required if you've installed pycparser into
44# your site-packages/ with setup.py
45#
46sys.path.extend(['.', '..'])
47
48from pycparser import parse_file, c_ast
49from pycparser.plyparser import Coord
50
51
52RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
53RE_INTERNAL_ATTR = re.compile('__.*__')
54
55
56class CJsonError(Exception):
57    pass
58
59
60def memodict(fn):
61    """ Fast memoization decorator for a function taking a single argument """
62    class memodict(dict):
63        def __missing__(self, key):
64            ret = self[key] = fn(key)
65            return ret
66    return memodict().__getitem__
67
68
69@memodict
70def child_attrs_of(klass):
71    """
72    Given a Node class, get a set of child attrs.
73    Memoized to avoid highly repetitive string manipulation
74
75    """
76    non_child_attrs = set(klass.attr_names)
77    all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
78    return all_attrs - non_child_attrs
79
80
81def to_dict(node):
82    """ Recursively convert an ast into dict representation. """
83    klass = node.__class__
84
85    result = {}
86
87    # Metadata
88    result['_nodetype'] = klass.__name__
89
90    # Local node attributes
91    for attr in klass.attr_names:
92        result[attr] = getattr(node, attr)
93
94    # Coord object
95    if node.coord:
96        result['coord'] = str(node.coord)
97    else:
98        result['coord'] = None
99
100    # Child attributes
101    for child_name, child in node.children():
102        # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
103        match = RE_CHILD_ARRAY.match(child_name)
104        if match:
105            array_name, array_index = match.groups()
106            array_index = int(array_index)
107            # arrays come in order, so we verify and append.
108            result[array_name] = result.get(array_name, [])
109            if array_index != len(result[array_name]):
110                raise CJsonError('Internal ast error. Array {} out of order. '
111                    'Expected index {}, got {}'.format(
112                    array_name, len(result[array_name]), array_index))
113            result[array_name].append(to_dict(child))
114        else:
115            result[child_name] = to_dict(child)
116
117    # Any child attributes that were missing need "None" values in the json.
118    for child_attr in child_attrs_of(klass):
119        if child_attr not in result:
120            result[child_attr] = None
121
122    return result
123
124
125def to_json(node, **kwargs):
126    """ Convert ast node to json string """
127    return json.dumps(to_dict(node), **kwargs)
128
129
130def file_to_dict(filename):
131    """ Load C file into dict representation of ast """
132    ast = parse_file(filename, use_cpp=True)
133    return to_dict(ast)
134
135
136def file_to_json(filename, **kwargs):
137    """ Load C file into json string representation of ast """
138    ast = parse_file(filename, use_cpp=True)
139    return to_json(ast, **kwargs)
140
141
142def _parse_coord(coord_str):
143    """ Parse coord string (file:line[:column]) into Coord object. """
144    if coord_str is None:
145        return None
146
147    vals = coord_str.split(':')
148    vals.extend([None] * 3)
149    filename, line, column = vals[:3]
150    return Coord(filename, line, column)
151
152
153def _convert_to_obj(value):
154    """
155    Convert an object in the dict representation into an object.
156    Note: Mutually recursive with from_dict.
157
158    """
159    value_type = type(value)
160    if value_type == dict:
161        return from_dict(value)
162    elif value_type == list:
163        return [_convert_to_obj(item) for item in value]
164    else:
165        # String
166        return value
167
168
169def from_dict(node_dict):
170    """ Recursively build an ast from dict representation """
171    class_name = node_dict.pop('_nodetype')
172
173    klass = getattr(c_ast, class_name)
174
175    # Create a new dict containing the key-value pairs which we can pass
176    # to node constructors.
177    objs = {}
178    for key, value in node_dict.items():
179        if key == 'coord':
180            objs[key] = _parse_coord(value)
181        else:
182            objs[key] = _convert_to_obj(value)
183
184    # Use keyword parameters, which works thanks to beautifully consistent
185    # ast Node initializers.
186    return klass(**objs)
187
188
189def from_json(ast_json):
190    """ Build an ast from json string representation """
191    return from_dict(json.loads(ast_json))
192
193
194#------------------------------------------------------------------------------
195if __name__ == "__main__":
196    if len(sys.argv) > 1:
197        # Some test code...
198        # Do trip from C -> ast -> dict -> ast -> json, then print.
199        ast_dict = file_to_dict(sys.argv[1])
200        ast = from_dict(ast_dict)
201        print(to_json(ast, sort_keys=True, indent=4))
202    else:
203        print("Please provide a filename as argument")
204