1#!/usr/bin/python
2"""A glorified C pre-processor parser."""
3
4import ctypes
5import logging
6import os
7import re
8import site
9import utils
10
11top = os.getenv('ANDROID_BUILD_TOP')
12if top is None:
13    utils.panic('ANDROID_BUILD_TOP not set.\n')
14
15# Set up the env vars for libclang.
16site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))
17
18import clang.cindex
19from clang.cindex import conf
20from clang.cindex import Cursor
21from clang.cindex import CursorKind
22from clang.cindex import SourceLocation
23from clang.cindex import SourceRange
24from clang.cindex import TokenGroup
25from clang.cindex import TokenKind
26from clang.cindex import TranslationUnit
27
28# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
29# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
30clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64'))
31
32from defaults import kCppUndefinedMacro
33from defaults import kernel_remove_config_macros
34from defaults import kernel_token_replacements
35
36
37debugBlockParser = False
38debugCppExpr = False
39debugOptimIf01 = False
40
41###############################################################################
42###############################################################################
43#####                                                                     #####
44#####           C P P   T O K E N S                                       #####
45#####                                                                     #####
46###############################################################################
47###############################################################################
48
49# the list of supported C-preprocessor tokens
50# plus a couple of C tokens as well
51tokEOF = "\0"
52tokLN = "\n"
53tokSTRINGIFY = "#"
54tokCONCAT = "##"
55tokLOGICAND = "&&"
56tokLOGICOR = "||"
57tokSHL = "<<"
58tokSHR = ">>"
59tokEQUAL = "=="
60tokNEQUAL = "!="
61tokLT = "<"
62tokLTE = "<="
63tokGT = ">"
64tokGTE = ">="
65tokELLIPSIS = "..."
66tokSPACE = " "
67tokDEFINED = "defined"
68tokLPAREN = "("
69tokRPAREN = ")"
70tokNOT = "!"
71tokPLUS = "+"
72tokMINUS = "-"
73tokMULTIPLY = "*"
74tokDIVIDE = "/"
75tokMODULUS = "%"
76tokBINAND = "&"
77tokBINOR = "|"
78tokBINXOR = "^"
79tokCOMMA = ","
80tokLBRACE = "{"
81tokRBRACE = "}"
82tokARROW = "->"
83tokINCREMENT = "++"
84tokDECREMENT = "--"
85tokNUMBER = "<number>"
86tokIDENT = "<ident>"
87tokSTRING = "<string>"
88
89
90class Token(clang.cindex.Token):
91    """A class that represents one token after parsing.
92
93    It inherits the class in libclang, with an extra id property to hold the
94    new spelling of the token. The spelling property in the base class is
95    defined as read-only. New names after macro instantiation are saved in
96    their ids now. It also facilitates the renaming of directive optimizations
97    like replacing 'ifndef X' with 'if !defined(X)'.
98
99    It also overrides the cursor property of the base class. Because the one
100    in libclang always queries based on a single token, which usually doesn't
101    hold useful information. The cursor in this class can be set by calling
102    CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
103    base class.
104    """
105
106    def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
107                 cursor=None):
108        clang.cindex.Token.__init__(self)
109        self._id = None
110        self._tu = tu
111        self._group = group
112        self._cursor = cursor
113        # self.int_data and self.ptr_data are from the base class. But
114        # self.int_data doesn't accept a None value.
115        if int_data is not None:
116            self.int_data = int_data
117        self.ptr_data = ptr_data
118
119    @property
120    def id(self):
121        """Name of the token."""
122        if self._id is None:
123            return self.spelling
124        else:
125            return self._id
126
127    @id.setter
128    def id(self, new_id):
129        """Setting name of the token."""
130        self._id = new_id
131
132    @property
133    def cursor(self):
134        if self._cursor is None:
135            self._cursor = clang.cindex.Token.cursor
136        return self._cursor
137
138    @cursor.setter
139    def cursor(self, new_cursor):
140        self._cursor = new_cursor
141
142    def __repr__(self):
143        if self.id == 'defined':
144            return self.id
145        elif self.kind == TokenKind.IDENTIFIER:
146            return "(ident %s)" % self.id
147
148        return self.id
149
150    def __str__(self):
151        return self.id
152
153
154class BadExpectedToken(Exception):
155    """An exception that will be raised for unexpected tokens."""
156    pass
157
158
159# The __contains__ function in libclang SourceRange class contains a bug. It
160# gives wrong result when dealing with single line range.
161# Bug filed with upstream:
162# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
163def SourceRange__contains__(self, other):
164    """Determine if a given location is inside the range."""
165    if not isinstance(other, SourceLocation):
166        return False
167    if other.file is None and self.start.file is None:
168        pass
169    elif (self.start.file.name != other.file.name or
170          other.file.name != self.end.file.name):
171        # same file name
172        return False
173    # same file, in between lines
174    if self.start.line < other.line < self.end.line:
175        return True
176    # same file, same line
177    elif self.start.line == other.line == self.end.line:
178        if self.start.column <= other.column <= self.end.column:
179            return True
180    elif self.start.line == other.line:
181        # same file first line
182        if self.start.column <= other.column:
183            return True
184    elif other.line == self.end.line:
185        # same file last line
186        if other.column <= self.end.column:
187            return True
188    return False
189
190
191SourceRange.__contains__ = SourceRange__contains__
192
193
194################################################################################
195################################################################################
196#####                                                                      #####
197#####           C P P   T O K E N I Z E R                                  #####
198#####                                                                      #####
199################################################################################
200################################################################################
201
202
203class CppTokenizer(object):
204    """A tokenizer that converts some input text into a list of tokens.
205
206    It calls libclang's tokenizer to get the parsed tokens. In addition, it
207    updates the cursor property in each token after parsing, by calling
208    getTokensWithCursors().
209    """
210
211    clang_flags = ['-E', '-x', 'c']
212    options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
213
214    def __init__(self):
215        """Initialize a new CppTokenizer object."""
216        self._indexer = clang.cindex.Index.create()
217        self._tu = None
218        self._index = 0
219        self.tokens = None
220
221    def _getTokensWithCursors(self):
222        """Helper method to return all tokens with their cursors.
223
224        The cursor property in a clang Token doesn't provide enough
225        information. Because it is queried based on single token each time
226        without any context, i.e. via calling conf.lib.clang_annotateTokens()
227        with only one token given. So we often see 'INVALID_FILE' in one
228        token's cursor. In this function it passes all the available tokens
229        to get more informative cursors.
230        """
231
232        tokens_memory = ctypes.POINTER(clang.cindex.Token)()
233        tokens_count = ctypes.c_uint()
234
235        conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
236                                ctypes.byref(tokens_memory),
237                                ctypes.byref(tokens_count))
238
239        count = int(tokens_count.value)
240
241        # If we get no tokens, no memory was allocated. Be sure not to return
242        # anything and potentially call a destructor on nothing.
243        if count < 1:
244            return
245
246        cursors = (Cursor * count)()
247        cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
248
249        conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
250                                      cursors_memory)
251
252        tokens_array = ctypes.cast(
253            tokens_memory,
254            ctypes.POINTER(clang.cindex.Token * count)).contents
255        token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
256
257        tokens = []
258        for i in xrange(0, count):
259            token = Token(self._tu, token_group,
260                          int_data=tokens_array[i].int_data,
261                          ptr_data=tokens_array[i].ptr_data,
262                          cursor=cursors[i])
263            # We only want non-comment tokens.
264            if token.kind != TokenKind.COMMENT:
265                tokens.append(token)
266
267        return tokens
268
269    def parseString(self, lines):
270        """Parse a list of text lines into a BlockList object."""
271        file_ = 'dummy.c'
272        self._tu = self._indexer.parse(file_, self.clang_flags,
273                                       unsaved_files=[(file_, lines)],
274                                       options=self.options)
275        self.tokens = self._getTokensWithCursors()
276
277    def parseFile(self, file_):
278        """Parse a file into a BlockList object."""
279        self._tu = self._indexer.parse(file_, self.clang_flags,
280                                       options=self.options)
281        self.tokens = self._getTokensWithCursors()
282
283    def nextToken(self):
284        """Return next token from the list."""
285        if self._index < len(self.tokens):
286            t = self.tokens[self._index]
287            self._index += 1
288            return t
289        else:
290            return None
291
292
293class CppStringTokenizer(CppTokenizer):
294    """A CppTokenizer derived class that accepts a string of text as input."""
295
296    def __init__(self, line):
297        CppTokenizer.__init__(self)
298        self.parseString(line)
299
300
301class CppFileTokenizer(CppTokenizer):
302    """A CppTokenizer derived class that accepts a file as input."""
303
304    def __init__(self, file_):
305        CppTokenizer.__init__(self)
306        self.parseFile(file_)
307
308
309# Unit testing
310#
311class CppTokenizerTester(object):
312    """A class used to test CppTokenizer classes."""
313
314    def __init__(self, tokenizer=None):
315        self._tokenizer = tokenizer
316        self._token = None
317
318    def setTokenizer(self, tokenizer):
319        self._tokenizer = tokenizer
320
321    def expect(self, id):
322        self._token = self._tokenizer.nextToken()
323        if self._token is None:
324            tokid = ''
325        else:
326            tokid = self._token.id
327        if tokid == id:
328            return
329        raise BadExpectedToken("###  BAD TOKEN: '%s' expecting '%s'" % (
330            tokid, id))
331
332    def expectToken(self, id, line, col):
333        self.expect(id)
334        if self._token.location.line != line:
335            raise BadExpectedToken(
336                "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (
337                    id, self._token.lineno, line))
338        if self._token.location.column != col:
339            raise BadExpectedToken("###  BAD COLNO: '%d' expecting '%d'" % (
340                self._token.colno, col))
341
342    def expectTokens(self, tokens):
343        for id, line, col in tokens:
344            self.expectToken(id, line, col)
345
346    def expectList(self, list_):
347        for item in list_:
348            self.expect(item)
349
350
351def test_CppTokenizer():
352    tester = CppTokenizerTester()
353
354    tester.setTokenizer(CppStringTokenizer("#an/example  && (01923_xy)"))
355    tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
356                       "01923_xy", tokRPAREN])
357
358    tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)"))
359    tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
360                       "defined", tokLPAREN, "BAZ", tokRPAREN])
361
362    tester.setTokenizer(CppStringTokenizer("/*\n#\n*/"))
363    tester.expectList([])
364
365    tester.setTokenizer(CppStringTokenizer("first\nsecond"))
366    tester.expectList(["first", "second"])
367
368    tester.setTokenizer(CppStringTokenizer("first second\n  third"))
369    tester.expectTokens([("first", 1, 1),
370                         ("second", 1, 7),
371                         ("third", 2, 3)])
372
373    tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */"))
374    tester.expectTokens([("boo", 1, 1)])
375
376    tester.setTokenizer(CppStringTokenizer("an \\\n example"))
377    tester.expectTokens([("an", 1, 1),
378                         ("example", 2, 2)])
379    return True
380
381
382################################################################################
383################################################################################
384#####                                                                      #####
385#####           C P P   E X P R E S S I O N S                              #####
386#####                                                                      #####
387################################################################################
388################################################################################
389
390
391class CppExpr(object):
392    """A class that models the condition of #if directives into an expr tree.
393
394    Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
395    "op" is a string describing the operation
396    """
397
398    unaries = ["!", "~"]
399    binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
400                "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
401    precedences = {
402        "?": 1, ":": 1,
403        "||": 2,
404        "&&": 3,
405        "|": 4,
406        "^": 5,
407        "&": 6,
408        "==": 7, "!=": 7,
409        "<": 8, "<=": 8, ">": 8, ">=": 8,
410        "<<": 9, ">>": 9,
411        "+": 10, "-": 10,
412        "*": 11, "/": 11, "%": 11,
413        "!": 12, "~": 12
414    }
415
416    def __init__(self, tokens):
417        """Initialize a CppExpr. 'tokens' must be a CppToken list."""
418        self.tokens = tokens
419        self._num_tokens = len(tokens)
420        self._index = 0
421
422        if debugCppExpr:
423            print "CppExpr: trying to parse %s" % repr(tokens)
424        self.expr = self.parseExpression(0)
425        if debugCppExpr:
426            print "CppExpr: got " + repr(self.expr)
427        if self._index != self._num_tokens:
428            self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
429                       % (self._index, self._num_tokens, repr(tokens)))
430
431    def throw(self, exception, msg):
432        if self._index < self._num_tokens:
433            tok = self.tokens[self._index]
434            print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
435        else:
436            print "EOF: %s" % msg
437        raise exception(msg)
438
439    def expectId(self, id):
440        """Check that a given token id is at the current position."""
441        token = self.tokens[self._index]
442        if self._index >= self._num_tokens or token.id != id:
443            self.throw(BadExpectedToken,
444                       "### expecting '%s' in expression, got '%s'" % (
445                           id, token.id))
446        self._index += 1
447
448    def is_decimal(self):
449        token = self.tokens[self._index].id
450        if token[-1] in "ULul":
451            token = token[:-1]
452        try:
453            val = int(token, 10)
454            self._index += 1
455            return ('int', val)
456        except ValueError:
457            return None
458
459    def is_octal(self):
460        token = self.tokens[self._index].id
461        if token[-1] in "ULul":
462            token = token[:-1]
463        if len(token) < 2 or token[0] != '0':
464            return None
465        try:
466            val = int(token, 8)
467            self._index += 1
468            return ('oct', val)
469        except ValueError:
470            return None
471
472    def is_hexadecimal(self):
473        token = self.tokens[self._index].id
474        if token[-1] in "ULul":
475            token = token[:-1]
476        if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
477            return None
478        try:
479            val = int(token, 16)
480            self._index += 1
481            return ('hex', val)
482        except ValueError:
483            return None
484
485    def is_integer(self):
486        if self.tokens[self._index].kind != TokenKind.LITERAL:
487            return None
488
489        c = self.is_hexadecimal()
490        if c:
491            return c
492
493        c = self.is_octal()
494        if c:
495            return c
496
497        c = self.is_decimal()
498        if c:
499            return c
500
501        return None
502
503    def is_number(self):
504        t = self.tokens[self._index]
505        if t.id == tokMINUS and self._index + 1 < self._num_tokens:
506            self._index += 1
507            c = self.is_integer()
508            if c:
509                op, val = c
510                return (op, -val)
511        if t.id == tokPLUS and self._index + 1 < self._num_tokens:
512            self._index += 1
513            c = self.is_integer()
514            if c:
515                return c
516
517        return self.is_integer()
518
519    def is_defined(self):
520        t = self.tokens[self._index]
521        if t.id != tokDEFINED:
522            return None
523
524        # We have the defined keyword, check the rest.
525        self._index += 1
526        used_parens = False
527        if (self._index < self._num_tokens and
528            self.tokens[self._index].id == tokLPAREN):
529            used_parens = True
530            self._index += 1
531
532        if self._index >= self._num_tokens:
533            self.throw(BadExpectedToken,
534                       "### 'defined' must be followed by macro name or left "
535                       "paren")
536
537        t = self.tokens[self._index]
538        if t.kind != TokenKind.IDENTIFIER:
539            self.throw(BadExpectedToken,
540                       "### 'defined' must be followed by macro name")
541
542        self._index += 1
543        if used_parens:
544            self.expectId(tokRPAREN)
545
546        return ("defined", t.id)
547
548    def is_call_or_ident(self):
549        if self._index >= self._num_tokens:
550            return None
551
552        t = self.tokens[self._index]
553        if t.kind != TokenKind.IDENTIFIER:
554            return None
555
556        name = t.id
557
558        self._index += 1
559        if (self._index >= self._num_tokens or
560            self.tokens[self._index].id != tokLPAREN):
561            return ("ident", name)
562
563        params = []
564        depth = 1
565        self._index += 1
566        j = self._index
567        while self._index < self._num_tokens:
568            id = self.tokens[self._index].id
569            if id == tokLPAREN:
570                depth += 1
571            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
572                k = self._index
573                param = self.tokens[j:k]
574                params.append(param)
575                if id == tokRPAREN:
576                    break
577                j = self._index + 1
578            elif id == tokRPAREN:
579                depth -= 1
580            self._index += 1
581
582        if self._index >= self._num_tokens:
583            return None
584
585        self._index += 1
586        return ("call", (name, params))
587
588    # Implements the "precedence climbing" algorithm from
589    # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
590    # The "classic" algorithm would be fine if we were using a tool to
591    # generate the parser, but we're not. Dijkstra's "shunting yard"
592    # algorithm hasn't been necessary yet.
593
594    def parseExpression(self, minPrecedence):
595        if self._index >= self._num_tokens:
596            return None
597
598        node = self.parsePrimary()
599        while (self.token() and self.isBinary(self.token()) and
600               self.precedence(self.token()) >= minPrecedence):
601            op = self.token()
602            self.nextToken()
603            rhs = self.parseExpression(self.precedence(op) + 1)
604            node = (op.id, node, rhs)
605
606        return node
607
608    def parsePrimary(self):
609        op = self.token()
610        if self.isUnary(op):
611            self.nextToken()
612            return (op.id, self.parseExpression(self.precedence(op)))
613
614        primary = None
615        if op.id == tokLPAREN:
616            self.nextToken()
617            primary = self.parseExpression(0)
618            self.expectId(tokRPAREN)
619        elif op.id == "?":
620            self.nextToken()
621            primary = self.parseExpression(0)
622            self.expectId(":")
623        elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
624            primary = self.is_number()
625        # Checking for 'defined' needs to come first now because 'defined' is
626        # recognized as IDENTIFIER.
627        elif op.id == tokDEFINED:
628            primary = self.is_defined()
629        elif op.kind == TokenKind.IDENTIFIER:
630            primary = self.is_call_or_ident()
631        else:
632            self.throw(BadExpectedToken,
633                       "didn't expect to see a %s in factor" % (
634                           self.tokens[self._index].id))
635        return primary
636
637    def isBinary(self, token):
638        return token.id in self.binaries
639
640    def isUnary(self, token):
641        return token.id in self.unaries
642
643    def precedence(self, token):
644        return self.precedences.get(token.id)
645
646    def token(self):
647        if self._index >= self._num_tokens:
648            return None
649        return self.tokens[self._index]
650
651    def nextToken(self):
652        self._index += 1
653        if self._index >= self._num_tokens:
654            return None
655        return self.tokens[self._index]
656
657    def dump_node(self, e):
658        op = e[0]
659        line = "(" + op
660        if op == "int":
661            line += " %d)" % e[1]
662        elif op == "oct":
663            line += " 0%o)" % e[1]
664        elif op == "hex":
665            line += " 0x%x)" % e[1]
666        elif op == "ident":
667            line += " %s)" % e[1]
668        elif op == "defined":
669            line += " %s)" % e[1]
670        elif op == "call":
671            arg = e[1]
672            line += " %s [" % arg[0]
673            prefix = ""
674            for param in arg[1]:
675                par = ""
676                for tok in param:
677                    par += str(tok)
678                line += "%s%s" % (prefix, par)
679                prefix = ","
680            line += "])"
681        elif op in CppExpr.unaries:
682            line += " %s)" % self.dump_node(e[1])
683        elif op in CppExpr.binaries:
684            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
685        else:
686            line += " ?%s)" % repr(e[1])
687
688        return line
689
690    def __repr__(self):
691        return self.dump_node(self.expr)
692
693    def source_node(self, e):
694        op = e[0]
695        if op == "int":
696            return "%d" % e[1]
697        if op == "hex":
698            return "0x%x" % e[1]
699        if op == "oct":
700            return "0%o" % e[1]
701        if op == "ident":
702            # XXX: should try to expand
703            return e[1]
704        if op == "defined":
705            return "defined(%s)" % e[1]
706
707        prec = CppExpr.precedences.get(op, 1000)
708        arg = e[1]
709        if op in CppExpr.unaries:
710            arg_src = self.source_node(arg)
711            arg_op = arg[0]
712            arg_prec = CppExpr.precedences.get(arg_op, 1000)
713            if arg_prec < prec:
714                return "!(" + arg_src + ")"
715            else:
716                return "!" + arg_src
717        if op in CppExpr.binaries:
718            arg2 = e[2]
719            arg1_op = arg[0]
720            arg2_op = arg2[0]
721            arg1_src = self.source_node(arg)
722            arg2_src = self.source_node(arg2)
723            if CppExpr.precedences.get(arg1_op, 1000) < prec:
724                arg1_src = "(%s)" % arg1_src
725            if CppExpr.precedences.get(arg2_op, 1000) < prec:
726                arg2_src = "(%s)" % arg2_src
727
728            return "%s %s %s" % (arg1_src, op, arg2_src)
729        return "???"
730
731    def __str__(self):
732        return self.source_node(self.expr)
733
734    @staticmethod
735    def int_node(e):
736        if e[0] in ["int", "oct", "hex"]:
737            return e[1]
738        else:
739            return None
740
741    def toInt(self):
742        return self.int_node(self.expr)
743
744    def optimize_node(self, e, macros=None):
745        if macros is None:
746            macros = {}
747        op = e[0]
748
749        if op == "defined":
750            op, name = e
751            if macros.has_key(name):
752                if macros[name] == kCppUndefinedMacro:
753                    return ("int", 0)
754                else:
755                    try:
756                        value = int(macros[name])
757                        return ("int", value)
758                    except ValueError:
759                        return ("defined", macros[name])
760
761            if kernel_remove_config_macros and name.startswith("CONFIG_"):
762                return ("int", 0)
763
764            return e
765
766        elif op == "ident":
767            op, name = e
768            if macros.has_key(name):
769                try:
770                    value = int(macros[name])
771                    expanded = ("int", value)
772                except ValueError:
773                    expanded = ("ident", macros[name])
774                return self.optimize_node(expanded, macros)
775            return e
776
777        elif op == "!":
778            op, v = e
779            v = self.optimize_node(v, macros)
780            if v[0] == "int":
781                if v[1] == 0:
782                    return ("int", 1)
783                else:
784                    return ("int", 0)
785            return ('!', v)
786
787        elif op == "&&":
788            op, l, r = e
789            l = self.optimize_node(l, macros)
790            r = self.optimize_node(r, macros)
791            li = self.int_node(l)
792            ri = self.int_node(r)
793            if li is not None:
794                if li == 0:
795                    return ("int", 0)
796                else:
797                    return r
798            elif ri is not None:
799                if ri == 0:
800                    return ("int", 0)
801                else:
802                    return l
803            return (op, l, r)
804
805        elif op == "||":
806            op, l, r = e
807            l = self.optimize_node(l, macros)
808            r = self.optimize_node(r, macros)
809            li = self.int_node(l)
810            ri = self.int_node(r)
811            if li is not None:
812                if li == 0:
813                    return r
814                else:
815                    return ("int", 1)
816            elif ri is not None:
817                if ri == 0:
818                    return l
819                else:
820                    return ("int", 1)
821            return (op, l, r)
822
823        else:
824            return e
825
826    def optimize(self, macros=None):
827        if macros is None:
828            macros = {}
829        self.expr = self.optimize_node(self.expr, macros)
830
831
832def test_cpp_expr(expr, expected):
833    e = CppExpr(CppStringTokenizer(expr).tokens)
834    s1 = repr(e)
835    if s1 != expected:
836        print ("[FAIL]: expression '%s' generates '%s', should be "
837               "'%s'" % (expr, s1, expected))
838        global failure_count
839        failure_count += 1
840
841
842def test_cpp_expr_optim(expr, expected, macros=None):
843    if macros is None:
844        macros = {}
845    e = CppExpr(CppStringTokenizer(expr).tokens)
846    e.optimize(macros)
847    s1 = repr(e)
848    if s1 != expected:
849        print ("[FAIL]: optimized expression '%s' generates '%s' with "
850               "macros %s, should be '%s'" % (expr, s1, macros, expected))
851        global failure_count
852        failure_count += 1
853
854
855def test_cpp_expr_source(expr, expected):
856    e = CppExpr(CppStringTokenizer(expr).tokens)
857    s1 = str(e)
858    if s1 != expected:
859        print ("[FAIL]: source expression '%s' generates '%s', should "
860               "be '%s'" % (expr, s1, expected))
861        global failure_count
862        failure_count += 1
863
864
865def test_CppExpr():
866    test_cpp_expr("0", "(int 0)")
867    test_cpp_expr("1", "(int 1)")
868    test_cpp_expr("-5", "(int -5)")
869    test_cpp_expr("+1", "(int 1)")
870    test_cpp_expr("0U", "(int 0)")
871    test_cpp_expr("015", "(oct 015)")
872    test_cpp_expr("015l", "(oct 015)")
873    test_cpp_expr("0x3e", "(hex 0x3e)")
874    test_cpp_expr("(0)", "(int 0)")
875    test_cpp_expr("1 && 1", "(&& (int 1) (int 1))")
876    test_cpp_expr("1 && 0", "(&& (int 1) (int 0))")
877    test_cpp_expr("EXAMPLE", "(ident EXAMPLE)")
878    test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
879    test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)")
880    test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)")
881    test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
882    test_cpp_expr("defined(ABC) || defined(BINGO)",
883                  "(|| (defined ABC) (defined BINGO))")
884    test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])")
885    test_cpp_expr("A == 1 || defined(B)",
886                  "(|| (== (ident A) (int 1)) (defined B))")
887
888    test_cpp_expr_optim("0", "(int 0)")
889    test_cpp_expr_optim("1", "(int 1)")
890    test_cpp_expr_optim("1 && 1", "(int 1)")
891    test_cpp_expr_optim("1 && +1", "(int 1)")
892    test_cpp_expr_optim("0x1 && 01", "(oct 01)")
893    test_cpp_expr_optim("1 && 0", "(int 0)")
894    test_cpp_expr_optim("0 && 1", "(int 0)")
895    test_cpp_expr_optim("0 && 0", "(int 0)")
896    test_cpp_expr_optim("1 || 1", "(int 1)")
897    test_cpp_expr_optim("1 || 0", "(int 1)")
898    test_cpp_expr_optim("0 || 1", "(int 1)")
899    test_cpp_expr_optim("0 || 0", "(int 0)")
900    test_cpp_expr_optim("A", "(ident A)")
901    test_cpp_expr_optim("A", "(int 1)", {"A": 1})
902    test_cpp_expr_optim("A || B", "(int 1)", {"A": 1})
903    test_cpp_expr_optim("A || B", "(int 1)", {"B": 1})
904    test_cpp_expr_optim("A && B", "(ident B)", {"A": 1})
905    test_cpp_expr_optim("A && B", "(ident A)", {"B": 1})
906    test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))")
907    test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)")
908    test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
909    test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)")
910    test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)",
911                        {"EXAMPLE": "XOWOE"})
912    test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)",
913                        {"EXAMPLE": kCppUndefinedMacro})
914    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
915    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))",
916                        {"EXAMPLE": "XOWOE"})
917    test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)",
918                        {"EXAMPLE": kCppUndefinedMacro})
919    test_cpp_expr_optim("defined(A) || defined(B)",
920                        "(|| (defined A) (defined B))")
921    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"})
922    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"})
923    test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)",
924                        {"B": kCppUndefinedMacro})
925    test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)",
926                        {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro})
927    test_cpp_expr_optim("defined(A) && defined(B)",
928                        "(&& (defined A) (defined B))")
929    test_cpp_expr_optim("defined(A) && defined(B)",
930                        "(defined B)", {"A": "1"})
931    test_cpp_expr_optim("defined(A) && defined(B)",
932                        "(defined A)", {"B": "1"})
933    test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)",
934                        {"B": kCppUndefinedMacro})
935    test_cpp_expr_optim("defined(A) && defined(B)",
936                        "(int 0)", {"A": kCppUndefinedMacro})
937    test_cpp_expr_optim("A == 1 || defined(B)",
938                        "(|| (== (ident A) (int 1)) (defined B))")
939    test_cpp_expr_optim(
940        "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
941        "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))",
942        {"__KERNEL__": kCppUndefinedMacro})
943
944    test_cpp_expr_source("0", "0")
945    test_cpp_expr_source("1", "1")
946    test_cpp_expr_source("1 && 1", "1 && 1")
947    test_cpp_expr_source("1 && 0", "1 && 0")
948    test_cpp_expr_source("0 && 1", "0 && 1")
949    test_cpp_expr_source("0 && 0", "0 && 0")
950    test_cpp_expr_source("1 || 1", "1 || 1")
951    test_cpp_expr_source("1 || 0", "1 || 0")
952    test_cpp_expr_source("0 || 1", "0 || 1")
953    test_cpp_expr_source("0 || 0", "0 || 0")
954    test_cpp_expr_source("EXAMPLE", "EXAMPLE")
955    test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3")
956    test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)")
957    test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)")
958    test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)")
959
960
961################################################################################
962################################################################################
963#####                                                                      #####
964#####          C P P   B L O C K                                           #####
965#####                                                                      #####
966################################################################################
967################################################################################
968
969
970class Block(object):
971    """A class used to model a block of input source text.
972
973    There are two block types:
974      - directive blocks: contain the tokens of a single pre-processor
975        directive (e.g. #if)
976      - text blocks, contain the tokens of non-directive blocks
977
978    The cpp parser class below will transform an input source file into a list
979    of Block objects (grouped in a BlockList object for convenience)
980    """
981
982    def __init__(self, tokens, directive=None, lineno=0, identifier=None):
983        """Initialize a new block, if 'directive' is None, it is a text block.
984
985        NOTE: This automatically converts '#ifdef MACRO' into
986        '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
987        """
988
989        if directive == "ifdef":
990            tok = Token()
991            tok.id = tokDEFINED
992            tokens = [tok] + tokens
993            directive = "if"
994
995        elif directive == "ifndef":
996            tok1 = Token()
997            tok2 = Token()
998            tok1.id = tokNOT
999            tok2.id = tokDEFINED
1000            tokens = [tok1, tok2] + tokens
1001            directive = "if"
1002
1003        self.tokens = tokens
1004        self.directive = directive
1005        self.define_id = identifier
1006        if lineno > 0:
1007            self.lineno = lineno
1008        else:
1009            self.lineno = self.tokens[0].location.line
1010
1011        if self.isIf():
1012            self.expr = CppExpr(self.tokens)
1013
1014    def isDirective(self):
1015        """Return True iff this is a directive block."""
1016        return self.directive is not None
1017
1018    def isConditional(self):
1019        """Return True iff this is a conditional directive block."""
1020        return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
1021                                  "endif"]
1022
1023    def isDefine(self):
1024        """Return the macro name in a #define directive, or None otherwise."""
1025        if self.directive != "define":
1026            return None
1027        return self.define_id
1028
1029    def isIf(self):
1030        """Return True iff this is an #if-like directive block."""
1031        return self.directive in ["if", "ifdef", "ifndef", "elif"]
1032
1033    def isEndif(self):
1034        """Return True iff this is an #endif directive block."""
1035        return self.directive == "endif"
1036
1037    def isInclude(self):
1038        """Check whether this is a #include directive.
1039
1040        If true, returns the corresponding file name (with brackets or
1041        double-qoutes). None otherwise.
1042        """
1043
1044        if self.directive != "include":
1045            return None
1046        return ''.join([str(x) for x in self.tokens])
1047
1048    @staticmethod
1049    def format_blocks(tokens, indent=0):
1050        """Return the formatted lines of strings with proper indentation."""
1051        newline = True
1052        result = []
1053        buf = ''
1054        i = 0
1055        while i < len(tokens):
1056            t = tokens[i]
1057            if t.id == '{':
1058                buf += ' {'
1059                result.append(strip_space(buf))
1060                indent += 2
1061                buf = ''
1062                newline = True
1063            elif t.id == '}':
1064                indent -= 2
1065                if not newline:
1066                    result.append(strip_space(buf))
1067                # Look ahead to determine if it's the end of line.
1068                if (i + 1 < len(tokens) and
1069                    (tokens[i+1].id == ';' or
1070                     tokens[i+1].id in ['else', '__attribute__',
1071                                        '__attribute', '__packed'] or
1072                     tokens[i+1].kind == TokenKind.IDENTIFIER)):
1073                    buf = ' ' * indent + '}'
1074                    newline = False
1075                else:
1076                    result.append(' ' * indent + '}')
1077                    buf = ''
1078                    newline = True
1079            elif t.id == ';':
1080                result.append(strip_space(buf) + ';')
1081                buf = ''
1082                newline = True
1083            # We prefer a new line for each constant in enum.
1084            elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
1085                result.append(strip_space(buf) + ',')
1086                buf = ''
1087                newline = True
1088            else:
1089                if newline:
1090                    buf += ' ' * indent + str(t)
1091                else:
1092                    buf += ' ' + str(t)
1093                newline = False
1094            i += 1
1095
1096        if buf:
1097            result.append(strip_space(buf))
1098
1099        return result, indent
1100
1101    def writeWithWarning(self, out, warning, left_count, repeat_count, indent):
1102        """Dump the current block with warnings."""
1103        # removeWhiteSpace() will sometimes creates non-directive blocks
1104        # without any tokens. These come from blocks that only contained
1105        # empty lines and spaces. They should not be printed in the final
1106        # output, and then should not be counted for this operation.
1107        #
1108        if self.directive is None and not self.tokens:
1109            return left_count, indent
1110
1111        if self.directive:
1112            out.write(str(self) + '\n')
1113            left_count -= 1
1114            if left_count == 0:
1115                out.write(warning)
1116                left_count = repeat_count
1117
1118        else:
1119            lines, indent = self.format_blocks(self.tokens, indent)
1120            for line in lines:
1121                out.write(line + '\n')
1122                left_count -= 1
1123                if left_count == 0:
1124                    out.write(warning)
1125                    left_count = repeat_count
1126
1127        return left_count, indent
1128
1129    def __repr__(self):
1130        """Generate the representation of a given block."""
1131        if self.directive:
1132            result = "#%s " % self.directive
1133            if self.isIf():
1134                result += repr(self.expr)
1135            else:
1136                for tok in self.tokens:
1137                    result += repr(tok)
1138        else:
1139            result = ""
1140            for tok in self.tokens:
1141                result += repr(tok)
1142
1143        return result
1144
1145    def __str__(self):
1146        """Generate the string representation of a given block."""
1147        if self.directive:
1148            # "#if"
1149            if self.directive == "if":
1150                # small optimization to re-generate #ifdef and #ifndef
1151                e = self.expr.expr
1152                op = e[0]
1153                if op == "defined":
1154                    result = "#ifdef %s" % e[1]
1155                elif op == "!" and e[1][0] == "defined":
1156                    result = "#ifndef %s" % e[1][1]
1157                else:
1158                    result = "#if " + str(self.expr)
1159
1160            # "#define"
1161            elif self.isDefine():
1162                result = "#%s %s" % (self.directive, self.define_id)
1163                if self.tokens:
1164                    result += " "
1165                expr = strip_space(' '.join([tok.id for tok in self.tokens]))
1166                # remove the space between name and '(' in function call
1167                result += re.sub(r'(\w+) \(', r'\1(', expr)
1168
1169            # "#error"
1170            # Concatenating tokens with a space separator, because they may
1171            # not be quoted and broken into several tokens
1172            elif self.directive == "error":
1173                result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
1174
1175            else:
1176                result = "#%s" % self.directive
1177                if self.tokens:
1178                    result += " "
1179                result += ''.join([tok.id for tok in self.tokens])
1180        else:
1181            lines, _ = self.format_blocks(self.tokens)
1182            result = '\n'.join(lines)
1183
1184        return result
1185
1186
1187class BlockList(object):
1188    """A convenience class used to hold and process a list of blocks.
1189
1190    It calls the cpp parser to get the blocks.
1191    """
1192
1193    def __init__(self, blocks):
1194        self.blocks = blocks
1195
1196    def __len__(self):
1197        return len(self.blocks)
1198
1199    def __getitem__(self, n):
1200        return self.blocks[n]
1201
1202    def __repr__(self):
1203        return repr(self.blocks)
1204
1205    def __str__(self):
1206        result = '\n'.join([str(b) for b in self.blocks])
1207        return result
1208
1209    def dump(self):
1210        """Dump all the blocks in current BlockList."""
1211        print '##### BEGIN #####'
1212        for i, b in enumerate(self.blocks):
1213            print '### BLOCK %d ###' % i
1214            print b
1215        print '##### END #####'
1216
1217    def optimizeIf01(self):
1218        """Remove the code between #if 0 .. #endif in a BlockList."""
1219        self.blocks = optimize_if01(self.blocks)
1220
1221    def optimizeMacros(self, macros):
1222        """Remove known defined and undefined macros from a BlockList."""
1223        for b in self.blocks:
1224            if b.isIf():
1225                b.expr.optimize(macros)
1226
1227    def removeMacroDefines(self, macros):
1228        """Remove known macro definitions from a BlockList."""
1229        self.blocks = remove_macro_defines(self.blocks, macros)
1230
1231    def optimizeAll(self, macros):
1232        self.optimizeMacros(macros)
1233        self.optimizeIf01()
1234        return
1235
1236    def findIncludes(self):
1237        """Return the list of included files in a BlockList."""
1238        result = []
1239        for b in self.blocks:
1240            i = b.isInclude()
1241            if i:
1242                result.append(i)
1243        return result
1244
1245    def write(self, out):
1246        out.write(str(self))
1247
1248    def writeWithWarning(self, out, warning, repeat_count):
1249        left_count = repeat_count
1250        indent = 0
1251        for b in self.blocks:
1252            left_count, indent = b.writeWithWarning(out, warning, left_count,
1253                                                    repeat_count, indent)
1254
1255    def removeVarsAndFuncs(self, knownStatics=None):
1256        """Remove variable and function declarations.
1257
1258        All extern and static declarations corresponding to variable and
1259        function declarations are removed. We only accept typedefs and
1260        enum/structs/union declarations.
1261
1262        However, we keep the definitions corresponding to the set of known
1263        static inline functions in the set 'knownStatics', which is useful
1264        for optimized byteorder swap functions and stuff like that.
1265        """
1266
1267        # NOTE: It's also removing function-like macros, such as __SYSCALL(...)
1268        # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
1269        # It could be problematic when we have function-like macros but without
1270        # '}' following them. It will skip all the tokens/blocks until seeing a
1271        # '}' as the function end. Fortunately we don't have such cases in the
1272        # current kernel headers.
1273
1274        # state = 0 => normal (i.e. LN + spaces)
1275        # state = 1 => typedef/struct encountered, ends with ";"
1276        # state = 2 => var declaration encountered, ends with ";"
1277        # state = 3 => func declaration encountered, ends with "}"
1278
1279        if knownStatics is None:
1280            knownStatics = set()
1281        state = 0
1282        depth = 0
1283        blocks2 = []
1284        skipTokens = False
1285        for b in self.blocks:
1286            if b.isDirective():
1287                blocks2.append(b)
1288            else:
1289                n = len(b.tokens)
1290                i = 0
1291                if skipTokens:
1292                    first = n
1293                else:
1294                    first = 0
1295                while i < n:
1296                    tok = b.tokens[i]
1297                    tokid = tok.id
1298                    # If we are not looking for the start of a new
1299                    # type/var/func, then skip over tokens until
1300                    # we find our terminator, managing the depth of
1301                    # accolades as we go.
1302                    if state > 0:
1303                        terminator = False
1304                        if tokid == '{':
1305                            depth += 1
1306                        elif tokid == '}':
1307                            if depth > 0:
1308                                depth -= 1
1309                            if (depth == 0) and (state == 3):
1310                                terminator = True
1311                        elif tokid == ';' and depth == 0:
1312                            terminator = True
1313
1314                        if terminator:
1315                            # we found the terminator
1316                            state = 0
1317                            if skipTokens:
1318                                skipTokens = False
1319                                first = i + 1
1320
1321                        i += 1
1322                        continue
1323
1324                    # Is it a new type definition, then start recording it
1325                    if tok.id in ['struct', 'typedef', 'enum', 'union',
1326                                  '__extension__']:
1327                        state = 1
1328                        i += 1
1329                        continue
1330
1331                    # Is it a variable or function definition. If so, first
1332                    # try to determine which type it is, and also extract
1333                    # its name.
1334                    #
1335                    # We're going to parse the next tokens of the same block
1336                    # until we find a semi-column or a left parenthesis.
1337                    #
1338                    # The semi-column corresponds to a variable definition,
1339                    # the left-parenthesis to a function definition.
1340                    #
1341                    # We also assume that the var/func name is the last
1342                    # identifier before the terminator.
1343                    #
1344                    j = i + 1
1345                    ident = ""
1346                    while j < n:
1347                        tokid = b.tokens[j].id
1348                        if tokid == '(':  # a function declaration
1349                            state = 3
1350                            break
1351                        elif tokid == ';':  # a variable declaration
1352                            state = 2
1353                            break
1354                        if b.tokens[j].kind == TokenKind.IDENTIFIER:
1355                            ident = b.tokens[j].id
1356                        j += 1
1357
1358                    if j >= n:
1359                        # This can only happen when the declaration
1360                        # does not end on the current block (e.g. with
1361                        # a directive mixed inside it.
1362                        #
1363                        # We will treat it as malformed because
1364                        # it's very hard to recover from this case
1365                        # without making our parser much more
1366                        # complex.
1367                        #
1368                        logging.debug("### skip unterminated static '%s'",
1369                                      ident)
1370                        break
1371
1372                    if ident in knownStatics:
1373                        logging.debug("### keep var/func '%s': %s", ident,
1374                                      repr(b.tokens[i:j]))
1375                    else:
1376                        # We're going to skip the tokens for this declaration
1377                        logging.debug("### skip var/func '%s': %s", ident,
1378                                      repr(b.tokens[i:j]))
1379                        if i > first:
1380                            blocks2.append(Block(b.tokens[first:i]))
1381                        skipTokens = True
1382                        first = n
1383
1384                    i += 1
1385
1386                if i > first:
1387                    # print "### final '%s'" % repr(b.tokens[first:i])
1388                    blocks2.append(Block(b.tokens[first:i]))
1389
1390        self.blocks = blocks2
1391
1392    def replaceTokens(self, replacements):
1393        """Replace tokens according to the given dict."""
1394        for b in self.blocks:
1395            made_change = False
1396            if b.isInclude() is None:
1397                for tok in b.tokens:
1398                    if tok.kind == TokenKind.IDENTIFIER:
1399                        if tok.id in replacements:
1400                            tok.id = replacements[tok.id]
1401                            made_change = True
1402
1403                if b.isDefine() and b.define_id in replacements:
1404                    b.define_id = replacements[b.define_id]
1405                    made_change = True
1406
1407            if made_change and b.isIf():
1408                # Keep 'expr' in sync with 'tokens'.
1409                b.expr = CppExpr(b.tokens)
1410
1411
1412def strip_space(s):
1413    """Strip out redundant space in a given string."""
1414
1415    # NOTE: It ought to be more clever to not destroy spaces in string tokens.
1416    replacements = {' . ': '.',
1417                    ' [': '[',
1418                    '[ ': '[',
1419                    ' ]': ']',
1420                    '( ': '(',
1421                    ' )': ')',
1422                    ' ,': ',',
1423                    '# ': '#',
1424                    ' ;': ';',
1425                    '~ ': '~',
1426                    ' -> ': '->'}
1427    result = s
1428    for r in replacements:
1429        result = result.replace(r, replacements[r])
1430
1431    # Remove the space between function name and the parenthesis.
1432    result = re.sub(r'(\w+) \(', r'\1(', result)
1433    return result
1434
1435
1436class BlockParser(object):
1437    """A class that converts an input source file into a BlockList object."""
1438
1439    def __init__(self, tokzer=None):
1440        """Initialize a block parser.
1441
1442        The input source is provided through a Tokenizer object.
1443        """
1444        self._tokzer = tokzer
1445        self._parsed = False
1446
1447    @property
1448    def parsed(self):
1449        return self._parsed
1450
1451    @staticmethod
1452    def _short_extent(extent):
1453        return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
1454                                  extent.end.line, extent.end.column)
1455
1456    def getBlocks(self, tokzer=None):
1457        """Return all the blocks parsed."""
1458
1459        def consume_extent(i, tokens, extent=None, detect_change=False):
1460            """Return tokens that belong to the given extent.
1461
1462            It parses all the tokens that follow tokens[i], until getting out
1463            of the extent. When detect_change is True, it may terminate early
1464            when detecting preprocessing directives inside the extent.
1465            """
1466
1467            result = []
1468            if extent is None:
1469                extent = tokens[i].cursor.extent
1470
1471            while i < len(tokens) and tokens[i].location in extent:
1472                t = tokens[i]
1473                if debugBlockParser:
1474                    print ' ' * 2, t.id, t.kind, t.cursor.kind
1475                if (detect_change and t.cursor.extent != extent and
1476                    t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
1477                    break
1478                result.append(t)
1479                i += 1
1480            return (i, result)
1481
1482        def consume_line(i, tokens):
1483            """Return tokens that follow tokens[i] in the same line."""
1484            result = []
1485            line = tokens[i].location.line
1486            while i < len(tokens) and tokens[i].location.line == line:
1487                if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1488                    break
1489                result.append(tokens[i])
1490                i += 1
1491            return (i, result)
1492
1493        if tokzer is None:
1494            tokzer = self._tokzer
1495        tokens = tokzer.tokens
1496
1497        blocks = []
1498        buf = []
1499        i = 0
1500
1501        while i < len(tokens):
1502            t = tokens[i]
1503            cursor = t.cursor
1504
1505            if debugBlockParser:
1506                print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
1507                       "extent=[%s]" % (t.location.line, t.spelling, t.kind,
1508                                        cursor.kind,
1509                                        self._short_extent(cursor.extent)))
1510
1511            if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1512                if buf:
1513                    blocks.append(Block(buf))
1514                    buf = []
1515
1516                j = i
1517                if j + 1 >= len(tokens):
1518                    raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
1519                directive = tokens[j+1].id
1520
1521                if directive == 'define':
1522                    if i+2 >= len(tokens):
1523                        raise BadExpectedToken("### BAD TOKEN at %s" %
1524                                               (tokens[i].location))
1525
1526                    # Skip '#' and 'define'.
1527                    extent = tokens[i].cursor.extent
1528                    i += 2
1529                    id = ''
1530                    # We need to separate the id from the remaining of
1531                    # the line, especially for the function-like macro.
1532                    if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
1533                        (tokens[i].location.column + len(tokens[i].spelling) ==
1534                         tokens[i+1].location.column)):
1535                        while i < len(tokens):
1536                            id += tokens[i].id
1537                            if tokens[i].spelling == ')':
1538                                i += 1
1539                                break
1540                            i += 1
1541                    else:
1542                        id += tokens[i].id
1543                        # Advance to the next token that follows the macro id
1544                        i += 1
1545
1546                    (i, ret) = consume_extent(i, tokens, extent=extent)
1547                    blocks.append(Block(ret, directive=directive,
1548                                        lineno=t.location.line, identifier=id))
1549
1550                else:
1551                    (i, ret) = consume_extent(i, tokens)
1552                    blocks.append(Block(ret[2:], directive=directive,
1553                                        lineno=t.location.line))
1554
1555            elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
1556                if buf:
1557                    blocks.append(Block(buf))
1558                    buf = []
1559                directive = tokens[i+1].id
1560                (i, ret) = consume_extent(i, tokens)
1561
1562                blocks.append(Block(ret[2:], directive=directive,
1563                                    lineno=t.location.line))
1564
1565            elif cursor.kind == CursorKind.VAR_DECL:
1566                if buf:
1567                    blocks.append(Block(buf))
1568                    buf = []
1569
1570                (i, ret) = consume_extent(i, tokens, detect_change=True)
1571                buf += ret
1572
1573            elif cursor.kind == CursorKind.FUNCTION_DECL:
1574                if buf:
1575                    blocks.append(Block(buf))
1576                    buf = []
1577
1578                (i, ret) = consume_extent(i, tokens, detect_change=True)
1579                buf += ret
1580
1581            else:
1582                (i, ret) = consume_line(i, tokens)
1583                buf += ret
1584
1585        if buf:
1586            blocks.append(Block(buf))
1587
1588        # _parsed=True indicates a successful parsing, although may result an
1589        # empty BlockList.
1590        self._parsed = True
1591
1592        return BlockList(blocks)
1593
1594    def parse(self, tokzer):
1595        return self.getBlocks(tokzer)
1596
1597    def parseFile(self, path):
1598        return self.getBlocks(CppFileTokenizer(path))
1599
1600
1601def test_block_parsing(lines, expected):
1602    """Helper method to test the correctness of BlockParser.parse."""
1603    blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
1604    if len(blocks) != len(expected):
1605        raise BadExpectedToken("BlockParser.parse() returned '%s' expecting "
1606                               "'%s'" % (str(blocks), repr(expected)))
1607    for n in range(len(blocks)):
1608        if str(blocks[n]) != expected[n]:
1609            raise BadExpectedToken("BlockParser.parse()[%d] is '%s', "
1610                                   "expecting '%s'" % (n, str(blocks[n]),
1611                                                       expected[n]))
1612
1613
1614def test_BlockParser():
1615    test_block_parsing(["#error hello"], ["#error hello"])
1616    test_block_parsing(["foo", "", "bar"], ["foo bar"])
1617
1618    # We currently cannot handle the following case with libclang properly.
1619    # Fortunately it doesn't appear in current headers.
1620    # test_block_parsing(["foo", "  #  ", "bar"], ["foo", "bar"])
1621
1622    test_block_parsing(["foo",
1623                        "  #  /* ahah */ if defined(__KERNEL__) /* more */",
1624                        "bar", "#endif"],
1625                       ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
1626
1627
1628################################################################################
1629################################################################################
1630#####                                                                      #####
1631#####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
1632#####                                                                      #####
1633################################################################################
1634################################################################################
1635
1636
1637def remove_macro_defines(blocks, excludedMacros=None):
1638    """Remove macro definitions like #define <macroName>  ...."""
1639    if excludedMacros is None:
1640        excludedMacros = set()
1641    result = []
1642    for b in blocks:
1643        macroName = b.isDefine()
1644        if macroName is None or macroName not in excludedMacros:
1645            result.append(b)
1646
1647    return result
1648
1649
1650def find_matching_endif(blocks, i):
1651    """Traverse the blocks to find out the matching #endif."""
1652    n = len(blocks)
1653    depth = 1
1654    while i < n:
1655        if blocks[i].isDirective():
1656            dir_ = blocks[i].directive
1657            if dir_ in ["if", "ifndef", "ifdef"]:
1658                depth += 1
1659            elif depth == 1 and dir_ in ["else", "elif"]:
1660                return i
1661            elif dir_ == "endif":
1662                depth -= 1
1663                if depth == 0:
1664                    return i
1665        i += 1
1666    return i
1667
1668
1669def optimize_if01(blocks):
1670    """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
1671    i = 0
1672    n = len(blocks)
1673    result = []
1674    while i < n:
1675        j = i
1676        while j < n and not blocks[j].isIf():
1677            j += 1
1678        if j > i:
1679            logging.debug("appending lines %d to %d", blocks[i].lineno,
1680                          blocks[j-1].lineno)
1681            result += blocks[i:j]
1682        if j >= n:
1683            break
1684        expr = blocks[j].expr
1685        r = expr.toInt()
1686        if r is None:
1687            result.append(blocks[j])
1688            i = j + 1
1689            continue
1690
1691        if r == 0:
1692            # if 0 => skip everything until the corresponding #endif
1693            j = find_matching_endif(blocks, j + 1)
1694            if j >= n:
1695                # unterminated #if 0, finish here
1696                break
1697            dir_ = blocks[j].directive
1698            if dir_ == "endif":
1699                logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
1700                              blocks[i].lineno, blocks[j].lineno)
1701                i = j + 1
1702            elif dir_ == "else":
1703                # convert 'else' into 'if 1'
1704                logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
1705                              "to %d)", blocks[i].lineno, blocks[j-1].lineno)
1706                blocks[j].directive = "if"
1707                blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
1708                i = j
1709            elif dir_ == "elif":
1710                # convert 'elif' into 'if'
1711                logging.debug("convert 'if 0' .. 'elif' into 'if'")
1712                blocks[j].directive = "if"
1713                i = j
1714            continue
1715
1716        # if 1 => find corresponding endif and remove/transform them
1717        k = find_matching_endif(blocks, j + 1)
1718        if k >= n:
1719            # unterminated #if 1, finish here
1720            logging.debug("unterminated 'if 1'")
1721            result += blocks[j+1:k]
1722            break
1723
1724        dir_ = blocks[k].directive
1725        if dir_ == "endif":
1726            logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
1727                          blocks[j].lineno, blocks[k].lineno)
1728            result += optimize_if01(blocks[j+1:k])
1729            i = k + 1
1730        elif dir_ == "else":
1731            # convert 'else' into 'if 0'
1732            logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
1733                          blocks[j].lineno, blocks[k].lineno)
1734            result += optimize_if01(blocks[j+1:k])
1735            blocks[k].directive = "if"
1736            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1737            i = k
1738        elif dir_ == "elif":
1739            # convert 'elif' into 'if 0'
1740            logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
1741                          blocks[j].lineno, blocks[k].lineno)
1742            result += optimize_if01(blocks[j+1:k])
1743            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1744            i = k
1745    return result
1746
1747
1748def test_optimizeAll():
1749    text = """\
1750#if 1
1751#define  GOOD_1
1752#endif
1753#if 0
1754#define  BAD_2
1755#define  BAD_3
1756#endif
1757
1758#if 1
1759#define  GOOD_2
1760#else
1761#define  BAD_4
1762#endif
1763
1764#if 0
1765#define  BAD_5
1766#else
1767#define  GOOD_3
1768#endif
1769
1770#if defined(__KERNEL__)
1771#define BAD_KERNEL
1772#endif
1773
1774#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
1775#define X
1776#endif
1777
1778#ifndef SIGRTMAX
1779#define SIGRTMAX 123
1780#endif /* SIGRTMAX */
1781
1782#if 0
1783#if 1
1784#define  BAD_6
1785#endif
1786#endif\
1787"""
1788
1789    expected = """\
1790#define GOOD_1
1791#define GOOD_2
1792#define GOOD_3
1793#if !defined(__GLIBC__) || __GLIBC__ < 2
1794#define X
1795#endif
1796#ifndef __SIGRTMAX
1797#define __SIGRTMAX 123
1798#endif\
1799"""
1800
1801    out = utils.StringOutput()
1802    blocks = BlockParser().parse(CppStringTokenizer(text))
1803    blocks.replaceTokens(kernel_token_replacements)
1804    blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro})
1805    blocks.write(out)
1806    if out.get() != expected:
1807        print "[FAIL]: macro optimization failed\n"
1808        print "<<<< expecting '",
1809        print expected,
1810        print "'\n>>>> result '",
1811        print out.get(),
1812        print "'\n----"
1813        global failure_count
1814        failure_count += 1
1815
1816
1817def runUnitTests():
1818    """Always run all unit tests for this program."""
1819    test_CppTokenizer()
1820    test_CppExpr()
1821    test_optimizeAll()
1822    test_BlockParser()
1823
1824
1825failure_count = 0
1826runUnitTests()
1827if failure_count != 0:
1828    utils.panic("Unit tests failed in cpp.py.\n")
1829