1#!/usr/bin/env python3
2"""A glorified C pre-processor parser."""
3
4import ctypes
5import logging
6import os
7import re
8import site
9import unittest
10import utils
11
12top = os.getenv('ANDROID_BUILD_TOP')
13if top is None:
14    utils.panic('ANDROID_BUILD_TOP not set.\n')
15
16# Set up the env vars for libclang.
17site.addsitedir(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/python3/site-packages/'))
18
19import clang.cindex
20from clang.cindex import conf
21from clang.cindex import Cursor
22from clang.cindex import CursorKind
23from clang.cindex import SourceLocation
24from clang.cindex import SourceRange
25from clang.cindex import TokenGroup
26from clang.cindex import TokenKind
27from clang.cindex import TranslationUnit
28
29# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
30# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
31clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/libclang.so'))
32
33from defaults import *
34
35
36debugBlockParser = False
37debugCppExpr = False
38debugOptimIf01 = False
39
40###############################################################################
41###############################################################################
42#####                                                                     #####
43#####           C P P   T O K E N S                                       #####
44#####                                                                     #####
45###############################################################################
46###############################################################################
47
48# the list of supported C-preprocessor tokens
49# plus a couple of C tokens as well
50tokEOF = "\0"
51tokLN = "\n"
52tokSTRINGIFY = "#"
53tokCONCAT = "##"
54tokLOGICAND = "&&"
55tokLOGICOR = "||"
56tokSHL = "<<"
57tokSHR = ">>"
58tokEQUAL = "=="
59tokNEQUAL = "!="
60tokLT = "<"
61tokLTE = "<="
62tokGT = ">"
63tokGTE = ">="
64tokELLIPSIS = "..."
65tokSPACE = " "
66tokDEFINED = "defined"
67tokLPAREN = "("
68tokRPAREN = ")"
69tokNOT = "!"
70tokPLUS = "+"
71tokMINUS = "-"
72tokMULTIPLY = "*"
73tokDIVIDE = "/"
74tokMODULUS = "%"
75tokBINAND = "&"
76tokBINOR = "|"
77tokBINXOR = "^"
78tokCOMMA = ","
79tokLBRACE = "{"
80tokRBRACE = "}"
81tokARROW = "->"
82tokINCREMENT = "++"
83tokDECREMENT = "--"
84tokNUMBER = "<number>"
85tokIDENT = "<ident>"
86tokSTRING = "<string>"
87
88
89class Token(clang.cindex.Token):
90    """A class that represents one token after parsing.
91
92    It inherits the class in libclang, with an extra id property to hold the
93    new spelling of the token. The spelling property in the base class is
94    defined as read-only. New names after macro instantiation are saved in
95    their ids now. It also facilitates the renaming of directive optimizations
96    like replacing 'ifndef X' with 'if !defined(X)'.
97
98    It also overrides the cursor property of the base class. Because the one
99    in libclang always queries based on a single token, which usually doesn't
100    hold useful information. The cursor in this class can be set by calling
101    CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
102    base class.
103    """
104
105    def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
106                 cursor=None):
107        clang.cindex.Token.__init__(self)
108        self._id = None
109        self._tu = tu
110        self._group = group
111        self._cursor = cursor
112        # self.int_data and self.ptr_data are from the base class. But
113        # self.int_data doesn't accept a None value.
114        if int_data is not None:
115            self.int_data = int_data
116        self.ptr_data = ptr_data
117
118    @property
119    def id(self):
120        """Name of the token."""
121        if self._id is None:
122            return self.spelling
123        else:
124            return self._id
125
126    @id.setter
127    def id(self, new_id):
128        """Setting name of the token."""
129        self._id = new_id
130
131    @property
132    def cursor(self):
133        if self._cursor is None:
134            self._cursor = clang.cindex.Token.cursor
135        return self._cursor
136
137    @cursor.setter
138    def cursor(self, new_cursor):
139        self._cursor = new_cursor
140
141    def __repr__(self):
142        if self.id == 'defined':
143            return self.id
144        elif self.kind == TokenKind.IDENTIFIER:
145            return "(ident %s)" % self.id
146
147        return self.id
148
149    def __str__(self):
150        return self.id
151
152
153class BadExpectedToken(Exception):
154    """An exception that will be raised for unexpected tokens."""
155    pass
156
157
158class UnparseableStruct(Exception):
159    """An exception that will be raised for structs that cannot be parsed."""
160    pass
161
162
163# The __contains__ function in libclang SourceRange class contains a bug. It
164# gives wrong result when dealing with single line range.
165# Bug filed with upstream:
166# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
167def SourceRange__contains__(self, other):
168    """Determine if a given location is inside the range."""
169    if not isinstance(other, SourceLocation):
170        return False
171    if other.file is None and self.start.file is None:
172        pass
173    elif (self.start.file.name != other.file.name or
174          other.file.name != self.end.file.name):
175        # same file name
176        return False
177    # same file, in between lines
178    if self.start.line < other.line < self.end.line:
179        return True
180    # same file, same line
181    elif self.start.line == other.line == self.end.line:
182        if self.start.column <= other.column <= self.end.column:
183            return True
184    elif self.start.line == other.line:
185        # same file first line
186        if self.start.column <= other.column:
187            return True
188    elif other.line == self.end.line:
189        # same file last line
190        if other.column <= self.end.column:
191            return True
192    return False
193
194
195SourceRange.__contains__ = SourceRange__contains__
196
197
198################################################################################
199################################################################################
200#####                                                                      #####
201#####           C P P   T O K E N I Z E R                                  #####
202#####                                                                      #####
203################################################################################
204################################################################################
205
206
207class CppTokenizer(object):
208    """A tokenizer that converts some input text into a list of tokens.
209
210    It calls libclang's tokenizer to get the parsed tokens. In addition, it
211    updates the cursor property in each token after parsing, by calling
212    getTokensWithCursors().
213    """
214
215    clang_flags = ['-E', '-x', 'c']
216    options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
217
218    def __init__(self):
219        """Initialize a new CppTokenizer object."""
220        self._indexer = clang.cindex.Index.create()
221        self._tu = None
222        self._index = 0
223        self.tokens = None
224
225    def _getTokensWithCursors(self):
226        """Helper method to return all tokens with their cursors.
227
228        The cursor property in a clang Token doesn't provide enough
229        information. Because it is queried based on single token each time
230        without any context, i.e. via calling conf.lib.clang_annotateTokens()
231        with only one token given. So we often see 'INVALID_FILE' in one
232        token's cursor. In this function it passes all the available tokens
233        to get more informative cursors.
234        """
235
236        tokens_memory = ctypes.POINTER(clang.cindex.Token)()
237        tokens_count = ctypes.c_uint()
238
239        conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
240                                ctypes.byref(tokens_memory),
241                                ctypes.byref(tokens_count))
242
243        count = int(tokens_count.value)
244
245        # If we get no tokens, no memory was allocated. Be sure not to return
246        # anything and potentially call a destructor on nothing.
247        if count < 1:
248            return
249
250        cursors = (Cursor * count)()
251        cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
252
253        conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
254                                      cursors_memory)
255
256        tokens_array = ctypes.cast(
257            tokens_memory,
258            ctypes.POINTER(clang.cindex.Token * count)).contents
259        token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
260
261        tokens = []
262        for i in range(0, count):
263            token = Token(self._tu, token_group,
264                          int_data=tokens_array[i].int_data,
265                          ptr_data=tokens_array[i].ptr_data,
266                          cursor=cursors[i])
267            # We only want non-comment tokens.
268            if token.kind != TokenKind.COMMENT:
269                tokens.append(token)
270
271        return tokens
272
273    def parseString(self, lines):
274        """Parse a list of text lines into a BlockList object."""
275        file_ = 'no-filename-available.c'
276        self._tu = self._indexer.parse(file_, self.clang_flags,
277                                       unsaved_files=[(file_, lines)],
278                                       options=self.options)
279        self.tokens = self._getTokensWithCursors()
280
281    def parseFile(self, file_):
282        """Parse a file into a BlockList object."""
283        self._tu = self._indexer.parse(file_, self.clang_flags,
284                                       options=self.options)
285        self.tokens = self._getTokensWithCursors()
286
287    def nextToken(self):
288        """Return next token from the list."""
289        if self._index < len(self.tokens):
290            t = self.tokens[self._index]
291            self._index += 1
292            return t
293        else:
294            return None
295
296
297class CppStringTokenizer(CppTokenizer):
298    """A CppTokenizer derived class that accepts a string of text as input."""
299
300    def __init__(self, line):
301        CppTokenizer.__init__(self)
302        self.parseString(line)
303
304
305class CppFileTokenizer(CppTokenizer):
306    """A CppTokenizer derived class that accepts a file as input."""
307
308    def __init__(self, file_):
309        CppTokenizer.__init__(self)
310        self.parseFile(file_)
311
312
313# Unit testing
314#
315class CppTokenizerTests(unittest.TestCase):
316    """CppTokenizer tests."""
317
318    def get_tokens(self, token_string, line_col=False):
319        tokens = CppStringTokenizer(token_string)
320        token_list = []
321        while True:
322            token = tokens.nextToken()
323            if not token:
324                break
325            if line_col:
326                token_list.append((token.id, token.location.line,
327                                   token.location.column))
328            else:
329                token_list.append(token.id)
330        return token_list
331
332    def test_hash(self):
333        self.assertEqual(self.get_tokens("#an/example  && (01923_xy)"),
334                         ["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
335                          "01923_xy", tokRPAREN])
336
337    def test_parens(self):
338        self.assertEqual(self.get_tokens("FOO(BAR) && defined(BAZ)"),
339                         ["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
340                          "defined", tokLPAREN, "BAZ", tokRPAREN])
341
342    def test_comment(self):
343        self.assertEqual(self.get_tokens("/*\n#\n*/"), [])
344
345    def test_line_cross(self):
346        self.assertEqual(self.get_tokens("first\nsecond"), ["first", "second"])
347
348    def test_line_cross_line_col(self):
349        self.assertEqual(self.get_tokens("first second\n  third", True),
350                         [("first", 1, 1), ("second", 1, 7), ("third", 2, 3)])
351
352    def test_comment_line_col(self):
353        self.assertEqual(self.get_tokens("boo /* what the\nhell */", True),
354                         [("boo", 1, 1)])
355
356    def test_escapes(self):
357        self.assertEqual(self.get_tokens("an \\\n example", True),
358                         [("an", 1, 1), ("example", 2, 2)])
359
360
361################################################################################
362################################################################################
363#####                                                                      #####
364#####           C P P   E X P R E S S I O N S                              #####
365#####                                                                      #####
366################################################################################
367################################################################################
368
369
370class CppExpr(object):
371    """A class that models the condition of #if directives into an expr tree.
372
373    Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
374    "op" is a string describing the operation
375    """
376
377    unaries = ["!", "~"]
378    binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
379                "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
380    precedences = {
381        "?": 1, ":": 1,
382        "||": 2,
383        "&&": 3,
384        "|": 4,
385        "^": 5,
386        "&": 6,
387        "==": 7, "!=": 7,
388        "<": 8, "<=": 8, ">": 8, ">=": 8,
389        "<<": 9, ">>": 9,
390        "+": 10, "-": 10,
391        "*": 11, "/": 11, "%": 11,
392        "!": 12, "~": 12
393    }
394
395    def __init__(self, tokens):
396        """Initialize a CppExpr. 'tokens' must be a CppToken list."""
397        self.tokens = tokens
398        self._num_tokens = len(tokens)
399        self._index = 0
400
401        if debugCppExpr:
402            print("CppExpr: trying to parse %s" % repr(tokens))
403        self.expr = self.parseExpression(0)
404        if debugCppExpr:
405            print("CppExpr: got " + repr(self.expr))
406        if self._index != self._num_tokens:
407            self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
408                       % (self._index, self._num_tokens, repr(tokens)))
409
410    def throw(self, exception, msg):
411        if self._index < self._num_tokens:
412            tok = self.tokens[self._index]
413            print("%d:%d: %s" % (tok.location.line, tok.location.column, msg))
414        else:
415            print("EOF: %s" % msg)
416        raise exception(msg)
417
418    def expectId(self, id):
419        """Check that a given token id is at the current position."""
420        token = self.tokens[self._index]
421        if self._index >= self._num_tokens or token.id != id:
422            self.throw(BadExpectedToken,
423                       "### expecting '%s' in expression, got '%s'" % (
424                           id, token.id))
425        self._index += 1
426
427    def is_decimal(self):
428        token = self.tokens[self._index].id
429        if token[-1] in "ULul":
430            token = token[:-1]
431        try:
432            val = int(token, 10)
433            self._index += 1
434            return ('int', val)
435        except ValueError:
436            return None
437
438    def is_octal(self):
439        token = self.tokens[self._index].id
440        if token[-1] in "ULul":
441            token = token[:-1]
442        if len(token) < 2 or token[0] != '0':
443            return None
444        try:
445            val = int(token, 8)
446            self._index += 1
447            return ('oct', val)
448        except ValueError:
449            return None
450
451    def is_hexadecimal(self):
452        token = self.tokens[self._index].id
453        if token[-1] in "ULul":
454            token = token[:-1]
455        if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
456            return None
457        try:
458            val = int(token, 16)
459            self._index += 1
460            return ('hex', val)
461        except ValueError:
462            return None
463
464    def is_integer(self):
465        if self.tokens[self._index].kind != TokenKind.LITERAL:
466            return None
467
468        c = self.is_hexadecimal()
469        if c:
470            return c
471
472        c = self.is_octal()
473        if c:
474            return c
475
476        c = self.is_decimal()
477        if c:
478            return c
479
480        return None
481
482    def is_number(self):
483        t = self.tokens[self._index]
484        if t.id == tokMINUS and self._index + 1 < self._num_tokens:
485            self._index += 1
486            c = self.is_integer()
487            if c:
488                op, val = c
489                return (op, -val)
490        if t.id == tokPLUS and self._index + 1 < self._num_tokens:
491            self._index += 1
492            c = self.is_integer()
493            if c:
494                return c
495
496        return self.is_integer()
497
498    def is_defined(self):
499        t = self.tokens[self._index]
500        if t.id != tokDEFINED:
501            return None
502
503        # We have the defined keyword, check the rest.
504        self._index += 1
505        used_parens = False
506        if (self._index < self._num_tokens and
507            self.tokens[self._index].id == tokLPAREN):
508            used_parens = True
509            self._index += 1
510
511        if self._index >= self._num_tokens:
512            self.throw(BadExpectedToken,
513                       "### 'defined' must be followed by macro name or left "
514                       "paren")
515
516        t = self.tokens[self._index]
517        if t.kind != TokenKind.IDENTIFIER:
518            self.throw(BadExpectedToken,
519                       "### 'defined' must be followed by macro name")
520
521        self._index += 1
522        if used_parens:
523            self.expectId(tokRPAREN)
524
525        return ("defined", t.id)
526
527    def is_call_or_ident(self):
528        if self._index >= self._num_tokens:
529            return None
530
531        t = self.tokens[self._index]
532        if t.kind != TokenKind.IDENTIFIER:
533            return None
534
535        name = t.id
536
537        self._index += 1
538        if (self._index >= self._num_tokens or
539            self.tokens[self._index].id != tokLPAREN):
540            return ("ident", name)
541
542        params = []
543        depth = 1
544        self._index += 1
545        j = self._index
546        while self._index < self._num_tokens:
547            id = self.tokens[self._index].id
548            if id == tokLPAREN:
549                depth += 1
550            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
551                k = self._index
552                param = self.tokens[j:k]
553                params.append(param)
554                if id == tokRPAREN:
555                    break
556                j = self._index + 1
557            elif id == tokRPAREN:
558                depth -= 1
559            self._index += 1
560
561        if self._index >= self._num_tokens:
562            return None
563
564        self._index += 1
565        return ("call", (name, params))
566
567    # Implements the "precedence climbing" algorithm from
568    # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
569    # The "classic" algorithm would be fine if we were using a tool to
570    # generate the parser, but we're not. Dijkstra's "shunting yard"
571    # algorithm hasn't been necessary yet.
572
573    def parseExpression(self, minPrecedence):
574        if self._index >= self._num_tokens:
575            return None
576
577        node = self.parsePrimary()
578        while (self.token() and self.isBinary(self.token()) and
579               self.precedence(self.token()) >= minPrecedence):
580            op = self.token()
581            self.nextToken()
582            rhs = self.parseExpression(self.precedence(op) + 1)
583            node = (op.id, node, rhs)
584
585        return node
586
587    def parsePrimary(self):
588        op = self.token()
589        if self.isUnary(op):
590            self.nextToken()
591            return (op.id, self.parseExpression(self.precedence(op)))
592
593        primary = None
594        if op.id == tokLPAREN:
595            self.nextToken()
596            primary = self.parseExpression(0)
597            self.expectId(tokRPAREN)
598        elif op.id == "?":
599            self.nextToken()
600            primary = self.parseExpression(0)
601            self.expectId(":")
602        elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
603            primary = self.is_number()
604        # Checking for 'defined' needs to come first now because 'defined' is
605        # recognized as IDENTIFIER.
606        elif op.id == tokDEFINED:
607            primary = self.is_defined()
608        elif op.kind == TokenKind.IDENTIFIER:
609            primary = self.is_call_or_ident()
610        else:
611            self.throw(BadExpectedToken,
612                       "didn't expect to see a %s in factor" % (
613                           self.tokens[self._index].id))
614        return primary
615
616    def isBinary(self, token):
617        return token.id in self.binaries
618
619    def isUnary(self, token):
620        return token.id in self.unaries
621
622    def precedence(self, token):
623        return self.precedences.get(token.id)
624
625    def token(self):
626        if self._index >= self._num_tokens:
627            return None
628        return self.tokens[self._index]
629
630    def nextToken(self):
631        self._index += 1
632        if self._index >= self._num_tokens:
633            return None
634        return self.tokens[self._index]
635
636    def dump_node(self, e):
637        op = e[0]
638        line = "(" + op
639        if op == "int":
640            line += " %d)" % e[1]
641        elif op == "oct":
642            line += " 0%o)" % e[1]
643        elif op == "hex":
644            line += " 0x%x)" % e[1]
645        elif op == "ident":
646            line += " %s)" % e[1]
647        elif op == "defined":
648            line += " %s)" % e[1]
649        elif op == "call":
650            arg = e[1]
651            line += " %s [" % arg[0]
652            prefix = ""
653            for param in arg[1]:
654                par = ""
655                for tok in param:
656                    par += str(tok)
657                line += "%s%s" % (prefix, par)
658                prefix = ","
659            line += "])"
660        elif op in CppExpr.unaries:
661            line += " %s)" % self.dump_node(e[1])
662        elif op in CppExpr.binaries:
663            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
664        else:
665            line += " ?%s)" % repr(e[1])
666
667        return line
668
669    def __repr__(self):
670        return self.dump_node(self.expr)
671
672    def source_node(self, e):
673        op = e[0]
674        if op == "int":
675            return "%d" % e[1]
676        if op == "hex":
677            return "0x%x" % e[1]
678        if op == "oct":
679            return "0%o" % e[1]
680        if op == "ident":
681            # XXX: should try to expand
682            return e[1]
683        if op == "defined":
684            return "defined(%s)" % e[1]
685
686        prec = CppExpr.precedences.get(op, 1000)
687        arg = e[1]
688        if op in CppExpr.unaries:
689            arg_src = self.source_node(arg)
690            arg_op = arg[0]
691            arg_prec = CppExpr.precedences.get(arg_op, 1000)
692            if arg_prec < prec:
693                return "!(" + arg_src + ")"
694            else:
695                return "!" + arg_src
696        if op in CppExpr.binaries:
697            arg2 = e[2]
698            arg1_op = arg[0]
699            arg2_op = arg2[0]
700            arg1_src = self.source_node(arg)
701            arg2_src = self.source_node(arg2)
702            if CppExpr.precedences.get(arg1_op, 1000) < prec:
703                arg1_src = "(%s)" % arg1_src
704            if CppExpr.precedences.get(arg2_op, 1000) < prec:
705                arg2_src = "(%s)" % arg2_src
706
707            return "%s %s %s" % (arg1_src, op, arg2_src)
708        return "???"
709
710    def __str__(self):
711        return self.source_node(self.expr)
712
713    @staticmethod
714    def int_node(e):
715        if e[0] in ["int", "oct", "hex"]:
716            return e[1]
717        else:
718            return None
719
720    def toInt(self):
721        return self.int_node(self.expr)
722
723    def optimize_node(self, e, macros=None):
724        if macros is None:
725            macros = {}
726        op = e[0]
727
728        if op == "defined":
729            op, name = e
730            if name in macros:
731                if macros[name] == kCppUndefinedMacro:
732                    return ("int", 0)
733                else:
734                    try:
735                        value = int(macros[name])
736                        return ("int", value)
737                    except ValueError:
738                        return ("defined", macros[name])
739
740            if kernel_remove_config_macros and name.startswith("CONFIG_"):
741                return ("int", 0)
742
743            return e
744
745        elif op == "ident":
746            op, name = e
747            if name in macros:
748                try:
749                    value = int(macros[name])
750                    expanded = ("int", value)
751                except ValueError:
752                    expanded = ("ident", macros[name])
753                return self.optimize_node(expanded, macros)
754            return e
755
756        elif op == "!":
757            op, v = e
758            v = self.optimize_node(v, macros)
759            if v[0] == "int":
760                if v[1] == 0:
761                    return ("int", 1)
762                else:
763                    return ("int", 0)
764            return ('!', v)
765
766        elif op == "&&":
767            op, l, r = e
768            l = self.optimize_node(l, macros)
769            r = self.optimize_node(r, macros)
770            li = self.int_node(l)
771            ri = self.int_node(r)
772            if li is not None:
773                if li == 0:
774                    return ("int", 0)
775                else:
776                    return r
777            elif ri is not None:
778                if ri == 0:
779                    return ("int", 0)
780                else:
781                    return l
782            return (op, l, r)
783
784        elif op == "||":
785            op, l, r = e
786            l = self.optimize_node(l, macros)
787            r = self.optimize_node(r, macros)
788            li = self.int_node(l)
789            ri = self.int_node(r)
790            if li is not None:
791                if li == 0:
792                    return r
793                else:
794                    return ("int", 1)
795            elif ri is not None:
796                if ri == 0:
797                    return l
798                else:
799                    return ("int", 1)
800            return (op, l, r)
801
802        else:
803            return e
804
805    def optimize(self, macros=None):
806        if macros is None:
807            macros = {}
808        self.expr = self.optimize_node(self.expr, macros)
809
810class CppExprTest(unittest.TestCase):
811    """CppExpr unit tests."""
812
813    def get_expr(self, expr):
814        return repr(CppExpr(CppStringTokenizer(expr).tokens))
815
816    def test_cpp_expr(self):
817        self.assertEqual(self.get_expr("0"), "(int 0)")
818        self.assertEqual(self.get_expr("1"), "(int 1)")
819        self.assertEqual(self.get_expr("-5"), "(int -5)")
820        self.assertEqual(self.get_expr("+1"), "(int 1)")
821        self.assertEqual(self.get_expr("0U"), "(int 0)")
822        self.assertEqual(self.get_expr("015"), "(oct 015)")
823        self.assertEqual(self.get_expr("015l"), "(oct 015)")
824        self.assertEqual(self.get_expr("0x3e"), "(hex 0x3e)")
825        self.assertEqual(self.get_expr("(0)"), "(int 0)")
826        self.assertEqual(self.get_expr("1 && 1"), "(&& (int 1) (int 1))")
827        self.assertEqual(self.get_expr("1 && 0"), "(&& (int 1) (int 0))")
828        self.assertEqual(self.get_expr("EXAMPLE"), "(ident EXAMPLE)")
829        self.assertEqual(self.get_expr("EXAMPLE - 3"),
830                         "(- (ident EXAMPLE) (int 3))")
831        self.assertEqual(self.get_expr("defined(EXAMPLE)"),
832                         "(defined EXAMPLE)")
833        self.assertEqual(self.get_expr("defined ( EXAMPLE ) "),
834                         "(defined EXAMPLE)")
835        self.assertEqual(self.get_expr("!defined(EXAMPLE)"),
836                         "(! (defined EXAMPLE))")
837        self.assertEqual(self.get_expr("defined(ABC) || defined(BINGO)"),
838                         "(|| (defined ABC) (defined BINGO))")
839        self.assertEqual(self.get_expr("FOO(BAR,5)"), "(call FOO [BAR,5])")
840        self.assertEqual(self.get_expr("A == 1 || defined(B)"),
841                         "(|| (== (ident A) (int 1)) (defined B))")
842
843    def get_expr_optimize(self, expr, macros=None):
844        if macros is None:
845            macros = {}
846        e = CppExpr(CppStringTokenizer(expr).tokens)
847        e.optimize(macros)
848        return repr(e)
849
850    def test_cpp_expr_optimize(self):
851        self.assertEqual(self.get_expr_optimize("0"), "(int 0)")
852        self.assertEqual(self.get_expr_optimize("1"), "(int 1)")
853        self.assertEqual(self.get_expr_optimize("1 && 1"), "(int 1)")
854        self.assertEqual(self.get_expr_optimize("1 && +1"), "(int 1)")
855        self.assertEqual(self.get_expr_optimize("0x1 && 01"), "(oct 01)")
856        self.assertEqual(self.get_expr_optimize("1 && 0"), "(int 0)")
857        self.assertEqual(self.get_expr_optimize("0 && 1"), "(int 0)")
858        self.assertEqual(self.get_expr_optimize("0 && 0"), "(int 0)")
859        self.assertEqual(self.get_expr_optimize("1 || 1"), "(int 1)")
860        self.assertEqual(self.get_expr_optimize("1 || 0"), "(int 1)")
861        self.assertEqual(self.get_expr_optimize("0 || 1"), "(int 1)")
862        self.assertEqual(self.get_expr_optimize("0 || 0"), "(int 0)")
863        self.assertEqual(self.get_expr_optimize("A"), "(ident A)")
864        self.assertEqual(self.get_expr_optimize("A", {"A": 1}), "(int 1)")
865        self.assertEqual(self.get_expr_optimize("A || B", {"A": 1}), "(int 1)")
866        self.assertEqual(self.get_expr_optimize("A || B", {"B": 1}), "(int 1)")
867        self.assertEqual(self.get_expr_optimize("A && B", {"A": 1}), "(ident B)")
868        self.assertEqual(self.get_expr_optimize("A && B", {"B": 1}), "(ident A)")
869        self.assertEqual(self.get_expr_optimize("A && B"), "(&& (ident A) (ident B))")
870        self.assertEqual(self.get_expr_optimize("EXAMPLE"), "(ident EXAMPLE)")
871        self.assertEqual(self.get_expr_optimize("EXAMPLE - 3"), "(- (ident EXAMPLE) (int 3))")
872        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)"), "(defined EXAMPLE)")
873        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
874                                                {"EXAMPLE": "XOWOE"}),
875                         "(defined XOWOE)")
876        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
877                                                {"EXAMPLE": kCppUndefinedMacro}),
878                         "(int 0)")
879        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)"), "(! (defined EXAMPLE))")
880        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
881                                                {"EXAMPLE": "XOWOE"}),
882                         "(! (defined XOWOE))")
883        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
884                                                {"EXAMPLE": kCppUndefinedMacro}),
885                         "(int 1)")
886        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)"),
887                        "(|| (defined A) (defined B))")
888        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
889                                                {"A": "1"}),
890                         "(int 1)")
891        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
892                                                {"B": "1"}),
893                         "(int 1)")
894        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
895                                                {"B": kCppUndefinedMacro}),
896                         "(defined A)")
897        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
898                                                {"A": kCppUndefinedMacro,
899                                                 "B": kCppUndefinedMacro}),
900                         "(int 0)")
901        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)"),
902                         "(&& (defined A) (defined B))")
903        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
904                                                {"A": "1"}),
905                         "(defined B)")
906        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
907                                                {"B": "1"}),
908                         "(defined A)")
909        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
910                                                {"B": kCppUndefinedMacro}),
911                        "(int 0)")
912        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
913                                                {"A": kCppUndefinedMacro}),
914                        "(int 0)")
915        self.assertEqual(self.get_expr_optimize("A == 1 || defined(B)"),
916                         "(|| (== (ident A) (int 1)) (defined B))")
917        self.assertEqual(self.get_expr_optimize(
918              "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
919              {"__KERNEL__": kCppUndefinedMacro}),
920              "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))")
921
922    def get_expr_string(self, expr):
923        return str(CppExpr(CppStringTokenizer(expr).tokens))
924
925    def test_cpp_expr_string(self):
926        self.assertEqual(self.get_expr_string("0"), "0")
927        self.assertEqual(self.get_expr_string("1"), "1")
928        self.assertEqual(self.get_expr_string("1 && 1"), "1 && 1")
929        self.assertEqual(self.get_expr_string("1 && 0"), "1 && 0")
930        self.assertEqual(self.get_expr_string("0 && 1"), "0 && 1")
931        self.assertEqual(self.get_expr_string("0 && 0"), "0 && 0")
932        self.assertEqual(self.get_expr_string("1 || 1"), "1 || 1")
933        self.assertEqual(self.get_expr_string("1 || 0"), "1 || 0")
934        self.assertEqual(self.get_expr_string("0 || 1"), "0 || 1")
935        self.assertEqual(self.get_expr_string("0 || 0"), "0 || 0")
936        self.assertEqual(self.get_expr_string("EXAMPLE"), "EXAMPLE")
937        self.assertEqual(self.get_expr_string("EXAMPLE - 3"), "EXAMPLE - 3")
938        self.assertEqual(self.get_expr_string("defined(EXAMPLE)"), "defined(EXAMPLE)")
939        self.assertEqual(self.get_expr_string("defined EXAMPLE"), "defined(EXAMPLE)")
940        self.assertEqual(self.get_expr_string("A == 1 || defined(B)"), "A == 1 || defined(B)")
941
942
943################################################################################
944################################################################################
945#####                                                                      #####
946#####          C P P   B L O C K                                           #####
947#####                                                                      #####
948################################################################################
949################################################################################
950
951
952class Block(object):
953    """A class used to model a block of input source text.
954
955    There are two block types:
956      - directive blocks: contain the tokens of a single pre-processor
957        directive (e.g. #if)
958      - text blocks, contain the tokens of non-directive blocks
959
960    The cpp parser class below will transform an input source file into a list
961    of Block objects (grouped in a BlockList object for convenience)
962    """
963
964    def __init__(self, tokens, directive=None, lineno=0, identifier=None):
965        """Initialize a new block, if 'directive' is None, it is a text block.
966
967        NOTE: This automatically converts '#ifdef MACRO' into
968        '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
969        """
970
971        if directive == "ifdef":
972            tok = Token()
973            tok.id = tokDEFINED
974            tokens = [tok] + tokens
975            directive = "if"
976
977        elif directive == "ifndef":
978            tok1 = Token()
979            tok2 = Token()
980            tok1.id = tokNOT
981            tok2.id = tokDEFINED
982            tokens = [tok1, tok2] + tokens
983            directive = "if"
984
985        self.tokens = tokens
986        self.directive = directive
987        self.define_id = identifier
988        if lineno > 0:
989            self.lineno = lineno
990        else:
991            self.lineno = self.tokens[0].location.line
992
993        if self.isIf():
994            self.expr = CppExpr(self.tokens)
995
996    def isDirective(self):
997        """Return True iff this is a directive block."""
998        return self.directive is not None
999
1000    def isConditional(self):
1001        """Return True iff this is a conditional directive block."""
1002        return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
1003                                  "endif"]
1004
1005    def isDefine(self):
1006        """Return the macro name in a #define directive, or None otherwise."""
1007        if self.directive != "define":
1008            return None
1009        return self.define_id
1010
1011    def isIf(self):
1012        """Return True iff this is an #if-like directive block."""
1013        return self.directive in ["if", "ifdef", "ifndef", "elif"]
1014
1015    def isEndif(self):
1016        """Return True iff this is an #endif directive block."""
1017        return self.directive == "endif"
1018
1019    def isInclude(self):
1020        """Check whether this is a #include directive.
1021
1022        If true, returns the corresponding file name (with brackets or
1023        double-qoutes). None otherwise.
1024        """
1025
1026        if self.directive != "include":
1027            return None
1028        return ''.join([str(x) for x in self.tokens])
1029
1030    @staticmethod
1031    def format_blocks(tokens, indent=0):
1032        """Return the formatted lines of strings with proper indentation."""
1033        newline = True
1034        result = []
1035        buf = ''
1036        i = 0
1037        while i < len(tokens):
1038            t = tokens[i]
1039            if t.id == '{':
1040                buf += ' {'
1041                result.append(strip_space(buf))
1042                # Do not indent if this is extern "C" {
1043                if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"':
1044                    indent += 2
1045                buf = ''
1046                newline = True
1047            elif t.id == '}':
1048                if indent >= 2:
1049                    indent -= 2
1050                if not newline:
1051                    result.append(strip_space(buf))
1052                # Look ahead to determine if it's the end of line.
1053                if (i + 1 < len(tokens) and
1054                    (tokens[i+1].id == ';' or
1055                     tokens[i+1].id in ['else', '__attribute__',
1056                                        '__attribute', '__packed'] or
1057                     tokens[i+1].kind == TokenKind.IDENTIFIER)):
1058                    buf = ' ' * indent + '}'
1059                    newline = False
1060                else:
1061                    result.append(' ' * indent + '}')
1062                    buf = ''
1063                    newline = True
1064            elif t.id == ';':
1065                result.append(strip_space(buf) + ';')
1066                buf = ''
1067                newline = True
1068            # We prefer a new line for each constant in enum.
1069            elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
1070                result.append(strip_space(buf) + ',')
1071                buf = ''
1072                newline = True
1073            else:
1074                if newline:
1075                    buf += ' ' * indent + str(t)
1076                else:
1077                    buf += ' ' + str(t)
1078                newline = False
1079            i += 1
1080
1081        if buf:
1082            result.append(strip_space(buf))
1083
1084        return result, indent
1085
1086    def write(self, out, indent):
1087        """Dump the current block."""
1088        # removeWhiteSpace() will sometimes creates non-directive blocks
1089        # without any tokens. These come from blocks that only contained
1090        # empty lines and spaces. They should not be printed in the final
1091        # output, and then should not be counted for this operation.
1092        #
1093        if self.directive is None and not self.tokens:
1094            return indent
1095
1096        if self.directive:
1097            out.write(str(self) + '\n')
1098        else:
1099            lines, indent = self.format_blocks(self.tokens, indent)
1100            for line in lines:
1101                out.write(line + '\n')
1102
1103        return indent
1104
1105    def __repr__(self):
1106        """Generate the representation of a given block."""
1107        if self.directive:
1108            result = "#%s " % self.directive
1109            if self.isIf():
1110                result += repr(self.expr)
1111            else:
1112                for tok in self.tokens:
1113                    result += repr(tok)
1114        else:
1115            result = ""
1116            for tok in self.tokens:
1117                result += repr(tok)
1118
1119        return result
1120
1121    def __str__(self):
1122        """Generate the string representation of a given block."""
1123        if self.directive:
1124            # "#if"
1125            if self.directive == "if":
1126                # small optimization to re-generate #ifdef and #ifndef
1127                e = self.expr.expr
1128                op = e[0]
1129                if op == "defined":
1130                    result = "#ifdef %s" % e[1]
1131                elif op == "!" and e[1][0] == "defined":
1132                    result = "#ifndef %s" % e[1][1]
1133                else:
1134                    result = "#if " + str(self.expr)
1135
1136            # "#define"
1137            elif self.isDefine():
1138                result = "#%s %s" % (self.directive, self.define_id)
1139                if self.tokens:
1140                    result += " "
1141                expr = strip_space(' '.join([tok.id for tok in self.tokens]))
1142                # remove the space between name and '(' in function call
1143                result += re.sub(r'(\w+) \(', r'\1(', expr)
1144
1145            # "#error"
1146            # Concatenating tokens with a space separator, because they may
1147            # not be quoted and broken into several tokens
1148            elif self.directive == "error":
1149                result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
1150
1151            else:
1152                result = "#%s" % self.directive
1153                if self.tokens:
1154                    result += " "
1155                result += ''.join([tok.id for tok in self.tokens])
1156        else:
1157            lines, _ = self.format_blocks(self.tokens)
1158            result = '\n'.join(lines)
1159
1160        return result
1161
1162
1163class BlockList(object):
1164    """A convenience class used to hold and process a list of blocks.
1165
1166    It calls the cpp parser to get the blocks.
1167    """
1168
1169    def __init__(self, blocks):
1170        self.blocks = blocks
1171
1172    def __len__(self):
1173        return len(self.blocks)
1174
1175    def __getitem__(self, n):
1176        return self.blocks[n]
1177
1178    def __repr__(self):
1179        return repr(self.blocks)
1180
1181    def __str__(self):
1182        result = '\n'.join([str(b) for b in self.blocks])
1183        return result
1184
1185    def dump(self):
1186        """Dump all the blocks in current BlockList."""
1187        print('##### BEGIN #####')
1188        for i, b in enumerate(self.blocks):
1189            print('### BLOCK %d ###' % i)
1190            print(b)
1191        print('##### END #####')
1192
1193    def optimizeIf01(self):
1194        """Remove the code between #if 0 .. #endif in a BlockList."""
1195        self.blocks = optimize_if01(self.blocks)
1196
1197    def optimizeMacros(self, macros):
1198        """Remove known defined and undefined macros from a BlockList."""
1199        for b in self.blocks:
1200            if b.isIf():
1201                b.expr.optimize(macros)
1202
1203    def removeStructs(self, structs):
1204        """Remove structs."""
1205        extra_includes = set()
1206        block_num = 0
1207        num_blocks = len(self.blocks)
1208        while block_num < num_blocks:
1209            b = self.blocks[block_num]
1210            block_num += 1
1211            # Have to look in each block for a top-level struct definition.
1212            if b.directive:
1213                continue
1214            num_tokens = len(b.tokens)
1215            # A struct definition usually looks like:
1216            #   struct
1217            #   ident
1218            #   {
1219            #   }
1220            #   ;
1221            # However, the structure might be spread across multiple blocks
1222            # if the structure looks like this:
1223            #   struct ident
1224            #   {
1225            #   #ifdef VARIABLE
1226            #     pid_t pid;
1227            #   #endif
1228            #   }:
1229            # So the total number of tokens in the block might be less than
1230            # five but assume at least three.
1231            if num_tokens < 3:
1232                continue
1233
1234            # This is a simple struct finder, it might fail if a top-level
1235            # structure has an #if type directives that confuses the algorithm
1236            # for finding the end of the structure. Or if there is another
1237            # structure definition embedded in the structure.
1238            i = 0
1239            while i < num_tokens - 2:
1240                if (b.tokens[i].kind != TokenKind.KEYWORD or
1241                    b.tokens[i].id != "struct"):
1242                    i += 1
1243                    continue
1244                if (b.tokens[i + 1].kind == TokenKind.IDENTIFIER and
1245                    b.tokens[i + 2].kind == TokenKind.PUNCTUATION and
1246                    b.tokens[i + 2].id == "{" and b.tokens[i + 1].id in structs):
1247                    # Add an include for the structure to be removed of the form:
1248                    #  #include <bits/STRUCT_NAME.h>
1249                    struct_token = b.tokens[i + 1]
1250                    if struct_token.id in structs and structs[struct_token.id]:
1251                        extra_includes.add("<%s>" % structs[struct_token.id])
1252
1253                    # Search forward for the end of the structure.
1254                    # Very simple search, look for } and ; tokens.
1255                    # If we hit the end of the block, we'll need to start
1256                    # looking at the next block.
1257                    j = i + 3
1258                    depth = 1
1259                    struct_removed = False
1260                    while not struct_removed:
1261                        while j < num_tokens:
1262                            if b.tokens[j].kind == TokenKind.PUNCTUATION:
1263                                if b.tokens[j].id == '{':
1264                                    depth += 1
1265                                elif b.tokens[j].id == '}':
1266                                    depth -= 1
1267                                elif b.tokens[j].id == ';' and depth == 0:
1268                                    b.tokens = b.tokens[0:i] + b.tokens[j + 1:num_tokens]
1269                                    num_tokens = len(b.tokens)
1270                                    struct_removed = True
1271                                    break
1272                            j += 1
1273                        if not struct_removed:
1274                            b.tokens = b.tokens[0:i]
1275
1276                            # Skip directive blocks.
1277                            start_block = block_num
1278                            while block_num < num_blocks:
1279                                if not self.blocks[block_num].directive:
1280                                    break
1281                                block_num += 1
1282                            if block_num >= num_blocks:
1283                                # Unparsable struct, error out.
1284                                raise UnparseableStruct("Cannot remove struct %s: %s" % (struct_token.id, struct_token.location))
1285                            self.blocks = self.blocks[0:start_block] + self.blocks[block_num:num_blocks]
1286                            num_blocks = len(self.blocks)
1287                            b = self.blocks[start_block]
1288                            block_num = start_block + 1
1289                            num_tokens = len(b.tokens)
1290                            i = 0
1291                            j = 0
1292                    continue
1293                i += 1
1294
1295        for extra_include in sorted(extra_includes):
1296            replacement = CppStringTokenizer(extra_include)
1297            self.blocks.insert(2, Block(replacement.tokens, directive='include'))
1298
1299    def optimizeAll(self, macros):
1300        self.optimizeMacros(macros)
1301        self.optimizeIf01()
1302        return
1303
1304    def findIncludes(self):
1305        """Return the list of included files in a BlockList."""
1306        result = []
1307        for b in self.blocks:
1308            i = b.isInclude()
1309            if i:
1310                result.append(i)
1311        return result
1312
1313    def write(self, out):
1314        indent = 0
1315        for b in self.blocks:
1316            indent = b.write(out, indent)
1317
1318    def removeVarsAndFuncs(self, keep):
1319        """Remove variable and function declarations.
1320
1321        All extern and static declarations corresponding to variable and
1322        function declarations are removed. We only accept typedefs and
1323        enum/structs/union declarations.
1324
1325        In addition, remove any macros expanding in the headers. Usually,
1326        these macros are static inline functions, which is why they are
1327        removed.
1328
1329        However, we keep the definitions corresponding to the set of known
1330        static inline functions in the set 'keep', which is useful
1331        for optimized byteorder swap functions and stuff like that.
1332        """
1333
1334        # state = NORMAL => normal (i.e. LN + spaces)
1335        # state = OTHER_DECL => typedef/struct encountered, ends with ";"
1336        # state = VAR_DECL => var declaration encountered, ends with ";"
1337        # state = FUNC_DECL => func declaration encountered, ends with "}"
1338        NORMAL = 0
1339        OTHER_DECL = 1
1340        VAR_DECL = 2
1341        FUNC_DECL = 3
1342
1343        state = NORMAL
1344        depth = 0
1345        blocksToKeep = []
1346        blocksInProgress = []
1347        blocksOfDirectives = []
1348        ident = ""
1349        state_token = ""
1350        macros = set()
1351        for block in self.blocks:
1352            if block.isDirective():
1353                # Record all macros.
1354                if block.directive == 'define':
1355                    macro_name = block.define_id
1356                    paren_index = macro_name.find('(')
1357                    if paren_index == -1:
1358                        macros.add(macro_name)
1359                    else:
1360                        macros.add(macro_name[0:paren_index])
1361                blocksInProgress.append(block)
1362                # If this is in a function/variable declaration, we might need
1363                # to emit the directives alone, so save them separately.
1364                blocksOfDirectives.append(block)
1365                continue
1366
1367            numTokens = len(block.tokens)
1368            lastTerminatorIndex = 0
1369            i = 0
1370            while i < numTokens:
1371                token_id = block.tokens[i].id
1372                terminator = False
1373                if token_id == '{':
1374                    depth += 1
1375                    if (i >= 2 and block.tokens[i-2].id == 'extern' and
1376                        block.tokens[i-1].id == '"C"'):
1377                        # For an extern "C" { pretend as though this is depth 0.
1378                        depth -= 1
1379                elif token_id == '}':
1380                    if depth > 0:
1381                        depth -= 1
1382                    if depth == 0:
1383                        if state == OTHER_DECL:
1384                            # Loop through until we hit the ';'
1385                            i += 1
1386                            while i < numTokens:
1387                                if block.tokens[i].id == ';':
1388                                    token_id = ';'
1389                                    break
1390                                i += 1
1391                            # If we didn't hit the ';', just consider this the
1392                            # terminator any way.
1393                        terminator = True
1394                elif depth == 0:
1395                    if token_id == ';':
1396                        if state == NORMAL:
1397                            blocksToKeep.extend(blocksInProgress)
1398                            blocksInProgress = []
1399                            blocksOfDirectives = []
1400                            state = FUNC_DECL
1401                        terminator = True
1402                    elif (state == NORMAL and token_id == '(' and i >= 1 and
1403                          block.tokens[i-1].kind == TokenKind.IDENTIFIER and
1404                          block.tokens[i-1].id in macros):
1405                        # This is a plain macro being expanded in the header
1406                        # which needs to be removed.
1407                        blocksToKeep.extend(blocksInProgress)
1408                        if lastTerminatorIndex < i - 1:
1409                            blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1]))
1410                        blocksInProgress = []
1411                        blocksOfDirectives = []
1412
1413                        # Skip until we see the terminating ')'
1414                        i += 1
1415                        paren_depth = 1
1416                        while i < numTokens:
1417                            if block.tokens[i].id == ')':
1418                                paren_depth -= 1
1419                                if paren_depth == 0:
1420                                    break
1421                            elif block.tokens[i].id == '(':
1422                                paren_depth += 1
1423                            i += 1
1424                        lastTerminatorIndex = i + 1
1425                    elif (state != FUNC_DECL and token_id == '(' and
1426                          state_token != 'typedef'):
1427                        blocksToKeep.extend(blocksInProgress)
1428                        blocksInProgress = []
1429                        blocksOfDirectives = []
1430                        state = VAR_DECL
1431                    elif state == NORMAL and token_id in ['struct', 'typedef',
1432                                                          'enum', 'union',
1433                                                          '__extension__']:
1434                        state = OTHER_DECL
1435                        state_token = token_id
1436                    elif block.tokens[i].kind == TokenKind.IDENTIFIER:
1437                        if state != VAR_DECL or ident == "":
1438                            ident = token_id
1439
1440                if terminator:
1441                    if state != VAR_DECL and state != FUNC_DECL or ident in keep:
1442                        blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1]))
1443                        blocksToKeep.extend(blocksInProgress)
1444                    else:
1445                        # Only keep the directives found.
1446                        blocksToKeep.extend(blocksOfDirectives)
1447                    lastTerminatorIndex = i + 1
1448                    blocksInProgress = []
1449                    blocksOfDirectives = []
1450                    state = NORMAL
1451                    ident = ""
1452                    state_token = ""
1453                i += 1
1454            if lastTerminatorIndex < numTokens:
1455                blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens]))
1456        if len(blocksInProgress) > 0:
1457            blocksToKeep.extend(blocksInProgress)
1458        self.blocks = blocksToKeep
1459
1460    def replaceTokens(self, replacements):
1461        """Replace tokens according to the given dict."""
1462        for b in self.blocks:
1463            made_change = False
1464            if b.isInclude() is None:
1465                i = 0
1466                while i < len(b.tokens):
1467                    tok = b.tokens[i]
1468                    if tok.kind == TokenKind.IDENTIFIER:
1469                        if tok.id in replacements:
1470                            tok.id = replacements[tok.id]
1471                            made_change = True
1472                    i += 1
1473
1474                if b.isDefine():
1475                    tokens = CppStringTokenizer(b.define_id).tokens
1476                    id_change = False
1477                    for tok in tokens:
1478                        if tok.kind == TokenKind.IDENTIFIER:
1479                            if tok.id in replacements:
1480                                tok.id = replacements[tok.id]
1481                                id_change = True
1482                    if id_change:
1483                        b.define_id = ''.join([tok.id for tok in tokens])
1484                        made_change = True
1485
1486
1487            if made_change and b.isIf():
1488                # Keep 'expr' in sync with 'tokens'.
1489                b.expr = CppExpr(b.tokens)
1490
1491
1492
1493def strip_space(s):
1494    """Strip out redundant space in a given string."""
1495
1496    # NOTE: It ought to be more clever to not destroy spaces in string tokens.
1497    replacements = {' . ': '.',
1498                    ' [': '[',
1499                    '[ ': '[',
1500                    ' ]': ']',
1501                    '( ': '(',
1502                    ' )': ')',
1503                    ' ,': ',',
1504                    '# ': '#',
1505                    ' ;': ';',
1506                    '~ ': '~',
1507                    ' -> ': '->'}
1508    result = s
1509    for r in replacements:
1510        result = result.replace(r, replacements[r])
1511
1512    # Remove the space between function name and the parenthesis.
1513    result = re.sub(r'(\w+) \(', r'\1(', result)
1514    return result
1515
1516
1517class BlockParser(object):
1518    """A class that converts an input source file into a BlockList object."""
1519
1520    def __init__(self, tokzer=None):
1521        """Initialize a block parser.
1522
1523        The input source is provided through a Tokenizer object.
1524        """
1525        self._tokzer = tokzer
1526        self._parsed = False
1527
1528    @property
1529    def parsed(self):
1530        return self._parsed
1531
1532    @staticmethod
1533    def _short_extent(extent):
1534        return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
1535                                  extent.end.line, extent.end.column)
1536
1537    def getBlocks(self, tokzer=None):
1538        """Return all the blocks parsed."""
1539
1540        def consume_extent(i, tokens, extent=None, detect_change=False):
1541            """Return tokens that belong to the given extent.
1542
1543            It parses all the tokens that follow tokens[i], until getting out
1544            of the extent. When detect_change is True, it may terminate early
1545            when detecting preprocessing directives inside the extent.
1546            """
1547
1548            result = []
1549            if extent is None:
1550                extent = tokens[i].cursor.extent
1551
1552            while i < len(tokens) and tokens[i].location in extent:
1553                t = tokens[i]
1554                if debugBlockParser:
1555                    print(' ' * 2, t.id, t.kind, t.cursor.kind)
1556                if (detect_change and t.cursor.extent != extent and
1557                    t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
1558                    break
1559                result.append(t)
1560                i += 1
1561            return (i, result)
1562
1563        def consume_line(i, tokens):
1564            """Return tokens that follow tokens[i] in the same line."""
1565            result = []
1566            line = tokens[i].location.line
1567            while i < len(tokens) and tokens[i].location.line == line:
1568                if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1569                    break
1570                result.append(tokens[i])
1571                i += 1
1572            return (i, result)
1573
1574        if tokzer is None:
1575            tokzer = self._tokzer
1576        tokens = tokzer.tokens
1577
1578        blocks = []
1579        buf = []
1580        i = 0
1581
1582        while i < len(tokens):
1583            t = tokens[i]
1584            cursor = t.cursor
1585
1586            if debugBlockParser:
1587                print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
1588                       "extent=[%s]" % (t.location.line, t.spelling, t.kind,
1589                                        cursor.kind,
1590                                        self._short_extent(cursor.extent)))
1591
1592            if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1593                if buf:
1594                    blocks.append(Block(buf))
1595                    buf = []
1596
1597                j = i
1598                if j + 1 >= len(tokens):
1599                    raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
1600                directive = tokens[j+1].id
1601
1602                if directive == 'define':
1603                    if i+2 >= len(tokens):
1604                        raise BadExpectedToken("### BAD TOKEN at %s" %
1605                                               (tokens[i].location))
1606
1607                    # Skip '#' and 'define'.
1608                    extent = tokens[i].cursor.extent
1609                    i += 2
1610                    id = ''
1611                    # We need to separate the id from the remaining of
1612                    # the line, especially for the function-like macro.
1613                    if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
1614                        (tokens[i].location.column + len(tokens[i].spelling) ==
1615                         tokens[i+1].location.column)):
1616                        while i < len(tokens):
1617                            id += tokens[i].id
1618                            if tokens[i].spelling == ')':
1619                                i += 1
1620                                break
1621                            i += 1
1622                    else:
1623                        id += tokens[i].id
1624                        # Advance to the next token that follows the macro id
1625                        i += 1
1626
1627                    (i, ret) = consume_extent(i, tokens, extent=extent)
1628                    blocks.append(Block(ret, directive=directive,
1629                                        lineno=t.location.line, identifier=id))
1630
1631                else:
1632                    (i, ret) = consume_extent(i, tokens)
1633                    blocks.append(Block(ret[2:], directive=directive,
1634                                        lineno=t.location.line))
1635
1636            elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
1637                if buf:
1638                    blocks.append(Block(buf))
1639                    buf = []
1640                directive = tokens[i+1].id
1641                (i, ret) = consume_extent(i, tokens)
1642
1643                blocks.append(Block(ret[2:], directive=directive,
1644                                    lineno=t.location.line))
1645
1646            elif cursor.kind == CursorKind.VAR_DECL:
1647                if buf:
1648                    blocks.append(Block(buf))
1649                    buf = []
1650
1651                (i, ret) = consume_extent(i, tokens, detect_change=True)
1652                buf += ret
1653
1654            elif cursor.kind == CursorKind.FUNCTION_DECL:
1655                if buf:
1656                    blocks.append(Block(buf))
1657                    buf = []
1658
1659                (i, ret) = consume_extent(i, tokens, detect_change=True)
1660                buf += ret
1661
1662            else:
1663                (i, ret) = consume_line(i, tokens)
1664                buf += ret
1665
1666        if buf:
1667            blocks.append(Block(buf))
1668
1669        # _parsed=True indicates a successful parsing, although may result an
1670        # empty BlockList.
1671        self._parsed = True
1672
1673        return BlockList(blocks)
1674
1675    def parse(self, tokzer):
1676        return self.getBlocks(tokzer)
1677
1678    def parseFile(self, path):
1679        return self.getBlocks(CppFileTokenizer(path))
1680
1681
1682class BlockParserTests(unittest.TestCase):
1683    """BlockParser unit tests."""
1684
1685    def get_blocks(self, lines):
1686        blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
1687        return list(map(lambda a: str(a), blocks))
1688
1689    def test_hash(self):
1690        self.assertEqual(self.get_blocks(["#error hello"]), ["#error hello"])
1691
1692    def test_empty_line(self):
1693        self.assertEqual(self.get_blocks(["foo", "", "bar"]), ["foo bar"])
1694
1695    def test_hash_with_space(self):
1696        # We currently cannot handle the following case with libclang properly.
1697        # Fortunately it doesn't appear in current headers.
1698        #self.assertEqual(self.get_blocks(["foo", "  #  ", "bar"]), ["foo", "bar"])
1699        pass
1700
1701    def test_with_comment(self):
1702        self.assertEqual(self.get_blocks(["foo",
1703                                          "  #  /* ahah */ if defined(__KERNEL__) /* more */",
1704                                          "bar", "#endif"]),
1705                         ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
1706
1707
1708################################################################################
1709################################################################################
1710#####                                                                      #####
1711#####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
1712#####                                                                      #####
1713################################################################################
1714################################################################################
1715
1716
1717def find_matching_endif(blocks, i):
1718    """Traverse the blocks to find out the matching #endif."""
1719    n = len(blocks)
1720    depth = 1
1721    while i < n:
1722        if blocks[i].isDirective():
1723            dir_ = blocks[i].directive
1724            if dir_ in ["if", "ifndef", "ifdef"]:
1725                depth += 1
1726            elif depth == 1 and dir_ in ["else", "elif"]:
1727                return i
1728            elif dir_ == "endif":
1729                depth -= 1
1730                if depth == 0:
1731                    return i
1732        i += 1
1733    return i
1734
1735
1736def optimize_if01(blocks):
1737    """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
1738    i = 0
1739    n = len(blocks)
1740    result = []
1741    while i < n:
1742        j = i
1743        while j < n and not blocks[j].isIf():
1744            j += 1
1745        if j > i:
1746            logging.debug("appending lines %d to %d", blocks[i].lineno,
1747                          blocks[j-1].lineno)
1748            result += blocks[i:j]
1749        if j >= n:
1750            break
1751        expr = blocks[j].expr
1752        r = expr.toInt()
1753        if r is None:
1754            result.append(blocks[j])
1755            i = j + 1
1756            continue
1757
1758        if r == 0:
1759            # if 0 => skip everything until the corresponding #endif
1760            start_dir = blocks[j].directive
1761            j = find_matching_endif(blocks, j + 1)
1762            if j >= n:
1763                # unterminated #if 0, finish here
1764                break
1765            dir_ = blocks[j].directive
1766            if dir_ == "endif":
1767                logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
1768                              blocks[i].lineno, blocks[j].lineno)
1769                if start_dir == "elif":
1770                    # Put an endif since we started with an elif.
1771                    result += blocks[j:j+1]
1772                i = j + 1
1773            elif dir_ == "else":
1774                # convert 'else' into 'if 1'
1775                logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
1776                              "to %d)", blocks[i].lineno, blocks[j-1].lineno)
1777                if start_dir == "elif":
1778                    blocks[j].directive = "elif"
1779                else:
1780                    blocks[j].directive = "if"
1781                blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
1782                i = j
1783            elif dir_ == "elif":
1784                # convert 'elif' into 'if'
1785                logging.debug("convert 'if 0' .. 'elif' into 'if'")
1786                if start_dir == "elif":
1787                    blocks[j].directive = "elif"
1788                else:
1789                    blocks[j].directive = "if"
1790                i = j
1791            continue
1792
1793        # if 1 => find corresponding endif and remove/transform them
1794        k = find_matching_endif(blocks, j + 1)
1795        if k >= n:
1796            # unterminated #if 1, finish here
1797            logging.debug("unterminated 'if 1'")
1798            result += blocks[j+1:k]
1799            break
1800
1801        start_dir = blocks[j].directive
1802        dir_ = blocks[k].directive
1803        if dir_ == "endif":
1804            logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
1805                          blocks[j].lineno, blocks[k].lineno)
1806            if start_dir == "elif":
1807                # Add the elif in to the results and convert it to an elif 1.
1808                blocks[j].tokens = CppStringTokenizer("1").tokens
1809                result += blocks[j:j+1]
1810            result += optimize_if01(blocks[j+1:k])
1811            if start_dir == "elif":
1812                # Add the endif in to the results.
1813                result += blocks[k:k+1]
1814            i = k + 1
1815        elif dir_ == "else":
1816            # convert 'else' into 'if 0'
1817            logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
1818                          blocks[j].lineno, blocks[k].lineno)
1819            if start_dir == "elif":
1820                # Add the elif in to the results and convert it to an elif 1.
1821                blocks[j].tokens = CppStringTokenizer("1").tokens
1822                result += blocks[j:j+1]
1823            result += optimize_if01(blocks[j+1:k])
1824            if start_dir == "elif":
1825                blocks[k].directive = "elif"
1826            else:
1827                blocks[k].directive = "if"
1828            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1829            i = k
1830        elif dir_ == "elif":
1831            # convert 'elif' into 'if 0'
1832            logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
1833                          blocks[j].lineno, blocks[k].lineno)
1834            result += optimize_if01(blocks[j+1:k])
1835            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1836            i = k
1837    return result
1838
1839class OptimizerTests(unittest.TestCase):
1840    def parse(self, text, macros=None):
1841        out = utils.StringOutput()
1842        blocks = BlockParser().parse(CppStringTokenizer(text))
1843        blocks.optimizeAll(macros)
1844        blocks.write(out)
1845        return out.get()
1846
1847    def test_if1(self):
1848        text = """\
1849#if 1
1850#define  GOOD
1851#endif
1852"""
1853        expected = """\
1854#define GOOD
1855"""
1856        self.assertEqual(self.parse(text), expected)
1857
1858    def test_if0(self):
1859        text = """\
1860#if 0
1861#define  SHOULD_SKIP1
1862#define  SHOULD_SKIP2
1863#endif
1864"""
1865        expected = ""
1866        self.assertEqual(self.parse(text), expected)
1867
1868    def test_if1_else(self):
1869        text = """\
1870#if 1
1871#define  GOOD
1872#else
1873#define  BAD
1874#endif
1875"""
1876        expected = """\
1877#define GOOD
1878"""
1879        self.assertEqual(self.parse(text), expected)
1880
1881    def test_if0_else(self):
1882        text = """\
1883#if 0
1884#define  BAD
1885#else
1886#define  GOOD
1887#endif
1888"""
1889        expected = """\
1890#define GOOD
1891"""
1892        self.assertEqual(self.parse(text), expected)
1893
1894    def test_if_elif1(self):
1895        text = """\
1896#if defined(something)
1897#define EXISTS
1898#elif 1
1899#define GOOD
1900#endif
1901"""
1902        expected = """\
1903#ifdef something
1904#define EXISTS
1905#elif 1
1906#define GOOD
1907#endif
1908"""
1909        self.assertEqual(self.parse(text), expected)
1910
1911    def test_if_elif1_macro(self):
1912        text = """\
1913#if defined(something)
1914#define EXISTS
1915#elif defined(WILL_BE_ONE)
1916#define GOOD
1917#endif
1918"""
1919        expected = """\
1920#ifdef something
1921#define EXISTS
1922#elif 1
1923#define GOOD
1924#endif
1925"""
1926        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1927
1928
1929    def test_if_elif1_else(self):
1930        text = """\
1931#if defined(something)
1932#define EXISTS
1933#elif 1
1934#define GOOD
1935#else
1936#define BAD
1937#endif
1938"""
1939        expected = """\
1940#ifdef something
1941#define EXISTS
1942#elif 1
1943#define GOOD
1944#endif
1945"""
1946        self.assertEqual(self.parse(text), expected)
1947
1948    def test_if_elif1_else_macro(self):
1949        text = """\
1950#if defined(something)
1951#define EXISTS
1952#elif defined(WILL_BE_ONE)
1953#define GOOD
1954#else
1955#define BAD
1956#endif
1957"""
1958        expected = """\
1959#ifdef something
1960#define EXISTS
1961#elif 1
1962#define GOOD
1963#endif
1964"""
1965        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1966
1967
1968    def test_if_elif1_else_macro(self):
1969        text = """\
1970#if defined(something)
1971#define EXISTS
1972#elif defined(WILL_BE_ONE)
1973#define GOOD
1974#else
1975#define BAD
1976#endif
1977"""
1978        expected = """\
1979#ifdef something
1980#define EXISTS
1981#elif 1
1982#define GOOD
1983#endif
1984"""
1985        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1986
1987    def test_macro_set_to_undefined_single(self):
1988        text = """\
1989#if defined(__KERNEL__)
1990#define BAD_KERNEL
1991#endif
1992"""
1993        expected = ""
1994        macros = {"__KERNEL__": kCppUndefinedMacro}
1995        self.assertEqual(self.parse(text, macros), expected)
1996
1997    def test_macro_set_to_undefined_if(self):
1998        text = """\
1999#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
2000#define CHECK
2001#endif
2002"""
2003        expected = """\
2004#if !defined(__GLIBC__) || __GLIBC__ < 2
2005#define CHECK
2006#endif
2007"""
2008        macros = {"__KERNEL__": kCppUndefinedMacro}
2009        self.assertEqual(self.parse(text, macros), expected)
2010
2011    def test_endif_comment_removed(self):
2012        text = """\
2013#ifndef SIGRTMAX
2014#define SIGRTMAX 123
2015#endif /* SIGRTMAX */
2016"""
2017        expected = """\
2018#ifndef SIGRTMAX
2019#define SIGRTMAX 123
2020#endif
2021"""
2022        self.assertEqual(self.parse(text), expected)
2023
2024    def test_multilevel_if0(self):
2025        text = """\
2026#if 0
2027#if 1
2028#define  BAD_6
2029#endif
2030#endif
2031"""
2032        expected = ""
2033        self.assertEqual(self.parse(text), expected)
2034
2035class RemoveStructsTests(unittest.TestCase):
2036    def parse(self, text, structs):
2037        out = utils.StringOutput()
2038        blocks = BlockParser().parse(CppStringTokenizer(text))
2039        blocks.removeStructs(structs)
2040        blocks.write(out)
2041        return out.get()
2042
2043    def test_remove_struct_from_start(self):
2044        text = """\
2045struct remove {
2046  int val1;
2047  int val2;
2048};
2049struct something {
2050  struct timeval val1;
2051  struct timeval val2;
2052};
2053"""
2054        expected = """\
2055struct something {
2056  struct timeval val1;
2057  struct timeval val2;
2058};
2059"""
2060        self.assertEqual(self.parse(text, {"remove": True}), expected)
2061
2062    def test_remove_struct_from_end(self):
2063        text = """\
2064struct something {
2065  struct timeval val1;
2066  struct timeval val2;
2067};
2068struct remove {
2069  int val1;
2070  int val2;
2071};
2072"""
2073        expected = """\
2074struct something {
2075  struct timeval val1;
2076  struct timeval val2;
2077};
2078"""
2079        self.assertEqual(self.parse(text, {"remove": True}), expected)
2080
2081    def test_remove_minimal_struct(self):
2082        text = """\
2083struct remove {
2084};
2085"""
2086        expected = "";
2087        self.assertEqual(self.parse(text, {"remove": True}), expected)
2088
2089    def test_remove_struct_with_struct_fields(self):
2090        text = """\
2091struct something {
2092  struct remove val1;
2093  struct remove val2;
2094};
2095struct remove {
2096  int val1;
2097  struct something val3;
2098  int val2;
2099};
2100"""
2101        expected = """\
2102struct something {
2103  struct remove val1;
2104  struct remove val2;
2105};
2106"""
2107        self.assertEqual(self.parse(text, {"remove": True}), expected)
2108
2109    def test_remove_consecutive_structs(self):
2110        text = """\
2111struct keep1 {
2112  struct timeval val1;
2113  struct timeval val2;
2114};
2115struct remove1 {
2116  int val1;
2117  int val2;
2118};
2119struct remove2 {
2120  int val1;
2121  int val2;
2122  int val3;
2123};
2124struct keep2 {
2125  struct timeval val1;
2126  struct timeval val2;
2127};
2128"""
2129        expected = """\
2130struct keep1 {
2131  struct timeval val1;
2132  struct timeval val2;
2133};
2134struct keep2 {
2135  struct timeval val1;
2136  struct timeval val2;
2137};
2138"""
2139        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
2140
2141    def test_remove_multiple_structs(self):
2142        text = """\
2143struct keep1 {
2144  int val;
2145};
2146struct remove1 {
2147  int val1;
2148  int val2;
2149};
2150struct keep2 {
2151  int val;
2152};
2153struct remove2 {
2154  struct timeval val1;
2155  struct timeval val2;
2156};
2157struct keep3 {
2158  int val;
2159};
2160"""
2161        expected = """\
2162struct keep1 {
2163  int val;
2164};
2165struct keep2 {
2166  int val;
2167};
2168struct keep3 {
2169  int val;
2170};
2171"""
2172        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
2173
2174    def test_remove_struct_with_inline_structs(self):
2175        text = """\
2176struct remove {
2177  int val1;
2178  int val2;
2179  struct {
2180    int val1;
2181    struct {
2182      int val1;
2183    } level2;
2184  } level1;
2185};
2186struct something {
2187  struct timeval val1;
2188  struct timeval val2;
2189};
2190"""
2191        expected = """\
2192struct something {
2193  struct timeval val1;
2194  struct timeval val2;
2195};
2196"""
2197        self.assertEqual(self.parse(text, {"remove": True}), expected)
2198
2199    def test_remove_struct_across_blocks(self):
2200        text = """\
2201struct remove {
2202  int val1;
2203  int val2;
2204#ifdef PARAMETER1
2205  PARAMETER1
2206#endif
2207#ifdef PARAMETER2
2208  PARAMETER2
2209#endif
2210};
2211struct something {
2212  struct timeval val1;
2213  struct timeval val2;
2214};
2215"""
2216        expected = """\
2217struct something {
2218  struct timeval val1;
2219  struct timeval val2;
2220};
2221"""
2222        self.assertEqual(self.parse(text, {"remove": True}), expected)
2223
2224    def test_remove_struct_across_blocks_multiple_structs(self):
2225        text = """\
2226struct remove1 {
2227  int val1;
2228  int val2;
2229#ifdef PARAMETER1
2230  PARAMETER1
2231#endif
2232#ifdef PARAMETER2
2233  PARAMETER2
2234#endif
2235};
2236struct remove2 {
2237};
2238struct something {
2239  struct timeval val1;
2240  struct timeval val2;
2241};
2242"""
2243        expected = """\
2244struct something {
2245  struct timeval val1;
2246  struct timeval val2;
2247};
2248"""
2249        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
2250
2251    def test_remove_multiple_struct_and_add_includes(self):
2252        text = """\
2253struct remove1 {
2254  int val1;
2255  int val2;
2256};
2257struct remove2 {
2258  struct timeval val1;
2259  struct timeval val2;
2260};
2261"""
2262        expected = """\
2263#include <bits/remove1.h>
2264#include <bits/remove2.h>
2265"""
2266        self.assertEqual(self.parse(text, {"remove1": False, "remove2": False}), expected)
2267
2268
2269class FullPathTest(unittest.TestCase):
2270    """Test of the full path parsing."""
2271
2272    def parse(self, text, keep=None):
2273        if not keep:
2274            keep = set()
2275        out = utils.StringOutput()
2276        blocks = BlockParser().parse(CppStringTokenizer(text))
2277
2278        blocks.removeStructs(kernel_structs_to_remove)
2279        blocks.removeVarsAndFuncs(keep)
2280        blocks.replaceTokens(kernel_token_replacements)
2281        blocks.optimizeAll(None)
2282
2283        blocks.write(out)
2284        return out.get()
2285
2286    def test_function_removed(self):
2287        text = """\
2288static inline __u64 function()
2289{
2290}
2291"""
2292        expected = ""
2293        self.assertEqual(self.parse(text), expected)
2294
2295    def test_function_removed_with_struct(self):
2296        text = """\
2297static inline struct something* function()
2298{
2299}
2300"""
2301        expected = ""
2302        self.assertEqual(self.parse(text), expected)
2303
2304    def test_function_kept(self):
2305        text = """\
2306static inline __u64 function()
2307{
2308}
2309"""
2310        expected = """\
2311static inline __u64 function() {
2312}
2313"""
2314        self.assertEqual(self.parse(text, set(["function"])), expected)
2315
2316    def test_var_removed(self):
2317        text = "__u64 variable;"
2318        expected = ""
2319        self.assertEqual(self.parse(text), expected)
2320
2321    def test_var_kept(self):
2322        text = "__u64 variable;"
2323        expected = "__u64 variable;\n"
2324        self.assertEqual(self.parse(text, set(["variable"])), expected)
2325
2326    def test_keep_function_typedef(self):
2327        text = "typedef void somefunction_t(void);"
2328        expected = "typedef void somefunction_t(void);\n"
2329        self.assertEqual(self.parse(text), expected)
2330
2331    def test_struct_keep_attribute(self):
2332        text = """\
2333struct something_s {
2334  __u32 s1;
2335  __u32 s2;
2336} __attribute__((packed));
2337"""
2338        expected = """\
2339struct something_s {
2340  __u32 s1;
2341  __u32 s2;
2342} __attribute__((packed));
2343"""
2344        self.assertEqual(self.parse(text), expected)
2345
2346    def test_function_keep_attribute_structs(self):
2347        text = """\
2348static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
2349}
2350"""
2351        expected = """\
2352static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
2353}
2354"""
2355        self.assertEqual(self.parse(text, set(["function"])), expected)
2356
2357    def test_struct_after_struct(self):
2358        text = """\
2359struct first {
2360};
2361
2362struct second {
2363  unsigned short s1;
2364#define SOMETHING 8
2365  unsigned short s2;
2366};
2367"""
2368        expected = """\
2369struct first {
2370};
2371struct second {
2372  unsigned short s1;
2373#define SOMETHING 8
2374  unsigned short s2;
2375};
2376"""
2377        self.assertEqual(self.parse(text), expected)
2378
2379    def test_other_not_removed(self):
2380        text = """\
2381typedef union {
2382  __u64 tu1;
2383  __u64 tu2;
2384} typedef_name;
2385
2386union {
2387  __u64 u1;
2388  __u64 u2;
2389};
2390
2391struct {
2392  __u64 s1;
2393  __u64 s2;
2394};
2395
2396enum {
2397  ENUM1 = 0,
2398  ENUM2,
2399};
2400
2401__extension__ typedef __signed__ long long __s64;
2402"""
2403        expected = """\
2404typedef union {
2405  __u64 tu1;
2406  __u64 tu2;
2407} typedef_name;
2408union {
2409  __u64 u1;
2410  __u64 u2;
2411};
2412struct {
2413  __u64 s1;
2414  __u64 s2;
2415};
2416enum {
2417  ENUM1 = 0,
2418  ENUM2,
2419};
2420__extension__ typedef __signed__ long long __s64;
2421"""
2422
2423        self.assertEqual(self.parse(text), expected)
2424
2425    def test_semicolon_after_function(self):
2426        text = """\
2427static inline __u64 function()
2428{
2429};
2430
2431struct should_see {
2432        __u32                           field;
2433};
2434"""
2435        expected = """\
2436struct should_see {
2437  __u32 field;
2438};
2439"""
2440        self.assertEqual(self.parse(text), expected)
2441
2442    def test_define_in_middle_keep(self):
2443        text = """\
2444enum {
2445  ENUM0 = 0x10,
2446  ENUM1 = 0x20,
2447#define SOMETHING SOMETHING_ELSE
2448  ENUM2 = 0x40,
2449};
2450"""
2451        expected = """\
2452enum {
2453  ENUM0 = 0x10,
2454  ENUM1 = 0x20,
2455#define SOMETHING SOMETHING_ELSE
2456  ENUM2 = 0x40,
2457};
2458"""
2459        self.assertEqual(self.parse(text), expected)
2460
2461    def test_define_in_middle_remove(self):
2462        text = """\
2463static inline function() {
2464#define SOMETHING1 SOMETHING_ELSE1
2465  i = 0;
2466  {
2467    i = 1;
2468  }
2469#define SOMETHING2 SOMETHING_ELSE2
2470}
2471"""
2472        expected = """\
2473#define SOMETHING1 SOMETHING_ELSE1
2474#define SOMETHING2 SOMETHING_ELSE2
2475"""
2476        self.assertEqual(self.parse(text), expected)
2477
2478    def test_define_in_middle_force_keep(self):
2479        text = """\
2480static inline function() {
2481#define SOMETHING1 SOMETHING_ELSE1
2482  i = 0;
2483  {
2484    i = 1;
2485  }
2486#define SOMETHING2 SOMETHING_ELSE2
2487}
2488"""
2489        expected = """\
2490static inline function() {
2491#define SOMETHING1 SOMETHING_ELSE1
2492  i = 0;
2493 {
2494    i = 1;
2495  }
2496#define SOMETHING2 SOMETHING_ELSE2
2497}
2498"""
2499        self.assertEqual(self.parse(text, set(["function"])), expected)
2500
2501    def test_define_before_remove(self):
2502        text = """\
2503#define SHOULD_BE_KEPT NOTHING1
2504#define ANOTHER_TO_KEEP NOTHING2
2505static inline function() {
2506#define SOMETHING1 SOMETHING_ELSE1
2507  i = 0;
2508  {
2509    i = 1;
2510  }
2511#define SOMETHING2 SOMETHING_ELSE2
2512}
2513"""
2514        expected = """\
2515#define SHOULD_BE_KEPT NOTHING1
2516#define ANOTHER_TO_KEEP NOTHING2
2517#define SOMETHING1 SOMETHING_ELSE1
2518#define SOMETHING2 SOMETHING_ELSE2
2519"""
2520        self.assertEqual(self.parse(text), expected)
2521
2522    def test_extern_C(self):
2523        text = """\
2524#if defined(__cplusplus)
2525extern "C" {
2526#endif
2527
2528struct something {
2529};
2530
2531#if defined(__cplusplus)
2532}
2533#endif
2534"""
2535        expected = """\
2536#ifdef __cplusplus
2537extern "C" {
2538#endif
2539struct something {
2540};
2541#ifdef __cplusplus
2542}
2543#endif
2544"""
2545        self.assertEqual(self.parse(text), expected)
2546
2547    def test_macro_definition_removed(self):
2548        text = """\
2549#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {}
2550MACRO_FUNCTION_NO_PARAMS()
2551
2552#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
2553MACRO_FUNCTION_PARAMS(a = 1)
2554
2555something that should still be kept
2556MACRO_FUNCTION_PARAMS(b)
2557"""
2558        expected = """\
2559#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { }
2560#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
2561something that should still be kept
2562"""
2563        self.assertEqual(self.parse(text), expected)
2564
2565    def test_verify_timeval_itemerval(self):
2566        text = """\
2567struct __kernel_old_timeval {
2568  struct something val;
2569};
2570struct __kernel_old_itimerval {
2571  struct __kernel_old_timeval val;
2572};
2573struct fields {
2574  struct __kernel_old_timeval timeval;
2575  struct __kernel_old_itimerval itimerval;
2576};
2577"""
2578        expected = """\
2579struct fields {
2580  struct timeval timeval;
2581  struct itimerval itimerval;
2582};
2583"""
2584        self.assertEqual(self.parse(text), expected)
2585
2586    def test_token_replacement(self):
2587        text = """\
2588#define SIGRTMIN 32
2589#define SIGRTMAX _NSIG
2590#define SIGRTMAX(a,class) some_func(a, class)
2591"""
2592        expected = """\
2593#define __SIGRTMIN 32
2594#define __SIGRTMAX _KERNEL__NSIG
2595#define __SIGRTMAX(a,__linux_class) some_func(a, __linux_class)
2596"""
2597        self.assertEqual(self.parse(text), expected)
2598
2599
2600if __name__ == '__main__':
2601    unittest.main()
2602