1#!/usr/bin/env python
2#
3# Copyright (c) 2009 Google Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#    * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#    * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#    * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31# Here are some issues that I've had people identify in my code during reviews,
32# that I think are possible to flag automatically in a lint tool.  If these were
33# caught by lint, it would save time both for myself and that of my reviewers.
34# Most likely, some of these are beyond the scope of the current lint framework,
35# but I think it is valuable to retain these wish-list items even if they cannot
36# be immediately implemented.
37#
38#  Suggestions
39#  -----------
40#  - Check for no 'explicit' for multi-arg ctor
41#  - Check for boolean assign RHS in parens
42#  - Check for ctor initializer-list colon position and spacing
43#  - Check that if there's a ctor, there should be a dtor
44#  - Check accessors that return non-pointer member variables are
45#    declared const
46#  - Check accessors that return non-const pointer member vars are
47#    *not* declared const
48#  - Check for using public includes for testing
49#  - Check for spaces between brackets in one-line inline method
50#  - Check for no assert()
51#  - Check for spaces surrounding operators
52#  - Check for 0 in pointer context (should be NULL)
53#  - Check for 0 in char context (should be '\0')
54#  - Check for camel-case method name conventions for methods
55#    that are not simple inline getters and setters
56#  - Do not indent namespace contents
57#  - Avoid inlining non-trivial constructors in header files
58#  - Check for old-school (void) cast for call-sites of functions
59#    ignored return value
60#  - Check gUnit usage of anonymous namespace
61#  - Check for class declaration order (typedefs, consts, enums,
62#    ctor(s?), dtor, friend declarations, methods, member vars)
63#
64
65"""Does google-lint on c++ files.
66
67The goal of this script is to identify places in the code that *may*
68be in non-compliance with google style.  It does not attempt to fix
69up these problems -- the point is to educate.  It does also not
70attempt to find all problems, or to ensure that everything it does
71find is legitimately a problem.
72
73In particular, we can get very confused by /* and // inside strings!
74We do a small hack, which is to ignore //'s with "'s after them on the
75same line, but it is far from perfect (in either direction).
76"""
77
78import codecs
79import copy
80import getopt
81import math  # for log
82import os
83import re
84import sre_compile
85import string
86import sys
87import unicodedata
88
89
90_USAGE = """
91Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
92                   [--counting=total|toplevel|detailed]
93                   [--quiet]
94        <file> [file] ...
95
96  The style guidelines this tries to follow are those in
97    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
98
99  Every problem is given a confidence score from 1-5, with 5 meaning we are
100  certain of the problem, and 1 meaning it could be a legitimate construct.
101  This will miss some errors, and is not a substitute for a code review.
102
103  To suppress false-positive errors of a certain category, add a
104  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
105  suppresses errors of all categories on that line.
106
107  The files passed in will be linted; at least one file must be provided.
108  Linted extensions are .cc, .cpp, and .h.  Other file types will be ignored.
109
110  Flags:
111
112    output=vs7
113      By default, the output is formatted to ease emacs parsing.  Visual Studio
114      compatible output (vs7) may also be used.  Other formats are unsupported.
115
116    verbose=#
117      Specify a number 0-5 to restrict errors to certain verbosity levels.
118
119    quiet
120      Don't print anything if no errors are found.
121
122    filter=-x,+y,...
123      Specify a comma-separated list of category-filters to apply: only
124      error messages whose category names pass the filters will be printed.
125      (Category names are printed with the message and look like
126      "[whitespace/indent]".)  Filters are evaluated left to right.
127      "-FOO" and "FOO" means "do not print categories that start with FOO".
128      "+FOO" means "do print categories that start with FOO".
129
130      Examples: --filter=-whitespace,+whitespace/braces
131                --filter=whitespace,runtime/printf,+runtime/printf_format
132                --filter=-,+build/include_what_you_use
133
134      To see a list of all the categories used in cpplint, pass no arg:
135         --filter=
136
137    counting=total|toplevel|detailed
138      The total number of errors found is always printed. If
139      'toplevel' is provided, then the count of errors in each of
140      the top-level categories like 'build' and 'whitespace' will
141      also be printed. If 'detailed' is provided, then a count
142      is provided for each category like 'build/class'.
143
144    root=subdir
145      The root directory used for deriving header guard CPP variable.
146      By default, the header guard CPP variable is calculated as the relative
147      path to the directory that contains .git, .hg, or .svn.  When this flag
148      is specified, the relative path is calculated from the specified
149      directory. If the specified directory does not exist, this flag is
150      ignored.
151
152      Examples:
153        Assuing that src/.git exists, the header guard CPP variables for
154        src/chrome/browser/ui/browser.h are:
155
156        No flag => CHROME_BROWSER_UI_BROWSER_H_
157        --root=chrome => BROWSER_UI_BROWSER_H_
158        --root=chrome/browser => UI_BROWSER_H_
159"""
160
161# We categorize each error message we print.  Here are the categories.
162# We want an explicit list so we can list them all in cpplint --filter=.
163# If you add a new error message with a new category, add it to the list
164# here!  cpplint_unittest.py should tell you if you forget to do this.
165# \ used for clearer layout -- pylint: disable-msg=C6013
166_ERROR_CATEGORIES = [
167  'build/class',
168  'build/deprecated',
169  'build/endif_comment',
170  'build/explicit_make_pair',
171  'build/forward_decl',
172  'build/header_guard',
173  'build/include',
174  'build/include_alpha',
175  'build/include_order',
176  'build/include_what_you_use',
177  'build/namespaces',
178  'build/printf_format',
179  'build/storage_class',
180  'legal/copyright',
181  'readability/alt_tokens',
182  'readability/braces',
183  'readability/casting',
184  'readability/check',
185  'readability/constructors',
186  'readability/fn_size',
187  'readability/function',
188  'readability/multiline_comment',
189  'readability/multiline_string',
190  'readability/namespace',
191  'readability/nolint',
192  'readability/streams',
193  'readability/todo',
194  'readability/utf8',
195  'runtime/arrays',
196  'runtime/casting',
197  'runtime/explicit',
198  'runtime/int',
199  'runtime/init',
200  'runtime/invalid_increment',
201  'runtime/member_string_references',
202  'runtime/memset',
203  'runtime/operator',
204  'runtime/printf',
205  'runtime/printf_format',
206  'runtime/references',
207  'runtime/rtti',
208  'runtime/sizeof',
209  'runtime/string',
210  'runtime/threadsafe_fn',
211  'whitespace/blank_line',
212  'whitespace/braces',
213  'whitespace/comma',
214  'whitespace/comments',
215  'whitespace/empty_loop_body',
216  'whitespace/end_of_line',
217  'whitespace/ending_newline',
218  'whitespace/forcolon',
219  'whitespace/indent',
220  'whitespace/labels',
221  'whitespace/line_length',
222  'whitespace/newline',
223  'whitespace/operators',
224  'whitespace/parens',
225  'whitespace/semicolon',
226  'whitespace/tab',
227  'whitespace/todo'
228  ]
229
230# The default state of the category filter. This is overrided by the --filter=
231# flag. By default all errors are on, so only add here categories that should be
232# off by default (i.e., categories that must be enabled by the --filter= flags).
233# All entries here should start with a '-' or '+', as in the --filter= flag.
234_DEFAULT_FILTERS = ['-build/include_alpha']
235
236# We used to check for high-bit characters, but after much discussion we
237# decided those were OK, as long as they were in UTF-8 and didn't represent
238# hard-coded international strings, which belong in a separate i18n file.
239
240# Headers that we consider STL headers.
241_STL_HEADERS = frozenset([
242    'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
243    'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
244    'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
245    'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
246    'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
247    'utility', 'vector', 'vector.h',
248    ])
249
250
251# Non-STL C++ system headers.
252_CPP_HEADERS = frozenset([
253    'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
254    'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
255    'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
256    'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
257    'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
258    'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
259    'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream',
260    'istream.h', 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
261    'numeric', 'ostream', 'ostream.h', 'parsestream.h', 'pfstream.h',
262    'PlotFile.h', 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h',
263    'ropeimpl.h', 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
264    'stdiostream.h', 'streambuf', 'streambuf.h', 'stream.h', 'strfile.h',
265    'string', 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo',
266    'valarray',
267    ])
268
269
270# Assertion macros.  These are defined in base/logging.h and
271# testing/base/gunit.h.  Note that the _M versions need to come first
272# for substring matching to work.
273_CHECK_MACROS = [
274    'DCHECK', 'CHECK',
275    'EXPECT_TRUE_M', 'EXPECT_TRUE',
276    'ASSERT_TRUE_M', 'ASSERT_TRUE',
277    'EXPECT_FALSE_M', 'EXPECT_FALSE',
278    'ASSERT_FALSE_M', 'ASSERT_FALSE',
279    ]
280
281# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
282_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
283
284for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
285                        ('>=', 'GE'), ('>', 'GT'),
286                        ('<=', 'LE'), ('<', 'LT')]:
287  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
288  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
289  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
290  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
291  _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
292  _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
293
294for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
295                            ('>=', 'LT'), ('>', 'LE'),
296                            ('<=', 'GT'), ('<', 'GE')]:
297  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
298  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
299  _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
300  _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
301
302# Alternative tokens and their replacements.  For full list, see section 2.5
303# Alternative tokens [lex.digraph] in the C++ standard.
304#
305# Digraphs (such as '%:') are not included here since it's a mess to
306# match those on a word boundary.
307_ALT_TOKEN_REPLACEMENT = {
308    'and': '&&',
309    'bitor': '|',
310    'or': '||',
311    'xor': '^',
312    'compl': '~',
313    'bitand': '&',
314    'and_eq': '&=',
315    'or_eq': '|=',
316    'xor_eq': '^=',
317    'not': '!',
318    'not_eq': '!='
319    }
320
321# Compile regular expression that matches all the above keywords.  The "[ =()]"
322# bit is meant to avoid matching these keywords outside of boolean expressions.
323#
324# False positives include C-style multi-line comments (http://go/nsiut )
325# and multi-line strings (http://go/beujw ), but those have always been
326# troublesome for cpplint.
327_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
328    r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
329
330
331# These constants define types of headers for use with
332# _IncludeState.CheckNextIncludeOrder().
333_C_SYS_HEADER = 1
334_CPP_SYS_HEADER = 2
335_LIKELY_MY_HEADER = 3
336_POSSIBLE_MY_HEADER = 4
337_OTHER_HEADER = 5
338
339# These constants define the current inline assembly state
340_NO_ASM = 0       # Outside of inline assembly block
341_INSIDE_ASM = 1   # Inside inline assembly block
342_END_ASM = 2      # Last line of inline assembly block
343_BLOCK_ASM = 3    # The whole block is an inline assembly block
344
345# Match start of assembly blocks
346_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
347                        r'(?:\s+(volatile|__volatile__))?'
348                        r'\s*[{(]')
349
350
351_regexp_compile_cache = {}
352
353# Finds occurrences of NOLINT or NOLINT(...).
354_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
355
356# {str, set(int)}: a map from error categories to sets of linenumbers
357# on which those errors are expected and should be suppressed.
358_error_suppressions = {}
359
360# The root directory used for deriving header guard CPP variable.
361# This is set by --root flag.
362_root = None
363
364def ParseNolintSuppressions(filename, raw_line, linenum, error):
365  """Updates the global list of error-suppressions.
366
367  Parses any NOLINT comments on the current line, updating the global
368  error_suppressions store.  Reports an error if the NOLINT comment
369  was malformed.
370
371  Args:
372    filename: str, the name of the input file.
373    raw_line: str, the line of input text, with comments.
374    linenum: int, the number of the current line.
375    error: function, an error handler.
376  """
377  # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
378  matched = _RE_SUPPRESSION.search(raw_line)
379  if matched:
380    category = matched.group(1)
381    if category in (None, '(*)'):  # => "suppress all"
382      _error_suppressions.setdefault(None, set()).add(linenum)
383    else:
384      if category.startswith('(') and category.endswith(')'):
385        category = category[1:-1]
386        if category in _ERROR_CATEGORIES:
387          _error_suppressions.setdefault(category, set()).add(linenum)
388        else:
389          error(filename, linenum, 'readability/nolint', 5,
390                'Unknown NOLINT error category: %s' % category)
391
392
393def ResetNolintSuppressions():
394  "Resets the set of NOLINT suppressions to empty."
395  _error_suppressions.clear()
396
397
398def IsErrorSuppressedByNolint(category, linenum):
399  """Returns true if the specified error category is suppressed on this line.
400
401  Consults the global error_suppressions map populated by
402  ParseNolintSuppressions/ResetNolintSuppressions.
403
404  Args:
405    category: str, the category of the error.
406    linenum: int, the current line number.
407  Returns:
408    bool, True iff the error should be suppressed due to a NOLINT comment.
409  """
410  return (linenum in _error_suppressions.get(category, set()) or
411          linenum in _error_suppressions.get(None, set()))
412
413def Match(pattern, s):
414  """Matches the string with the pattern, caching the compiled regexp."""
415  # The regexp compilation caching is inlined in both Match and Search for
416  # performance reasons; factoring it out into a separate function turns out
417  # to be noticeably expensive.
418  if not pattern in _regexp_compile_cache:
419    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
420  return _regexp_compile_cache[pattern].match(s)
421
422
423def Search(pattern, s):
424  """Searches the string for the pattern, caching the compiled regexp."""
425  if not pattern in _regexp_compile_cache:
426    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
427  return _regexp_compile_cache[pattern].search(s)
428
429
430class _IncludeState(dict):
431  """Tracks line numbers for includes, and the order in which includes appear.
432
433  As a dict, an _IncludeState object serves as a mapping between include
434  filename and line number on which that file was included.
435
436  Call CheckNextIncludeOrder() once for each header in the file, passing
437  in the type constants defined above. Calls in an illegal order will
438  raise an _IncludeError with an appropriate error message.
439
440  """
441  # self._section will move monotonically through this set. If it ever
442  # needs to move backwards, CheckNextIncludeOrder will raise an error.
443  _INITIAL_SECTION = 0
444  _MY_H_SECTION = 1
445  _C_SECTION = 2
446  _CPP_SECTION = 3
447  _OTHER_H_SECTION = 4
448
449  _TYPE_NAMES = {
450      _C_SYS_HEADER: 'C system header',
451      _CPP_SYS_HEADER: 'C++ system header',
452      _LIKELY_MY_HEADER: 'header this file implements',
453      _POSSIBLE_MY_HEADER: 'header this file may implement',
454      _OTHER_HEADER: 'other header',
455      }
456  _SECTION_NAMES = {
457      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
458      _MY_H_SECTION: 'a header this file implements',
459      _C_SECTION: 'C system header',
460      _CPP_SECTION: 'C++ system header',
461      _OTHER_H_SECTION: 'other header',
462      }
463
464  def __init__(self):
465    dict.__init__(self)
466    # The name of the current section.
467    self._section = self._INITIAL_SECTION
468    # The path of last found header.
469    self._last_header = ''
470
471  def CanonicalizeAlphabeticalOrder(self, header_path):
472    """Returns a path canonicalized for alphabetical comparison.
473
474    - replaces "-" with "_" so they both cmp the same.
475    - removes '-inl' since we don't require them to be after the main header.
476    - lowercase everything, just in case.
477
478    Args:
479      header_path: Path to be canonicalized.
480
481    Returns:
482      Canonicalized path.
483    """
484    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
485
486  def IsInAlphabeticalOrder(self, header_path):
487    """Check if a header is in alphabetical order with the previous header.
488
489    Args:
490      header_path: Header to be checked.
491
492    Returns:
493      Returns true if the header is in alphabetical order.
494    """
495    canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
496    if self._last_header > canonical_header:
497      return False
498    self._last_header = canonical_header
499    return True
500
501  def CheckNextIncludeOrder(self, header_type):
502    """Returns a non-empty error message if the next header is out of order.
503
504    This function also updates the internal state to be ready to check
505    the next include.
506
507    Args:
508      header_type: One of the _XXX_HEADER constants defined above.
509
510    Returns:
511      The empty string if the header is in the right order, or an
512      error message describing what's wrong.
513
514    """
515    error_message = ('Found %s after %s' %
516                     (self._TYPE_NAMES[header_type],
517                      self._SECTION_NAMES[self._section]))
518
519    last_section = self._section
520
521    if header_type == _C_SYS_HEADER:
522      if self._section <= self._C_SECTION:
523        self._section = self._C_SECTION
524      else:
525        self._last_header = ''
526        return error_message
527    elif header_type == _CPP_SYS_HEADER:
528      if self._section <= self._CPP_SECTION:
529        self._section = self._CPP_SECTION
530      else:
531        self._last_header = ''
532        return error_message
533    elif header_type == _LIKELY_MY_HEADER:
534      if self._section <= self._MY_H_SECTION:
535        self._section = self._MY_H_SECTION
536      else:
537        self._section = self._OTHER_H_SECTION
538    elif header_type == _POSSIBLE_MY_HEADER:
539      if self._section <= self._MY_H_SECTION:
540        self._section = self._MY_H_SECTION
541      else:
542        # This will always be the fallback because we're not sure
543        # enough that the header is associated with this file.
544        self._section = self._OTHER_H_SECTION
545    else:
546      assert header_type == _OTHER_HEADER
547      self._section = self._OTHER_H_SECTION
548
549    if last_section != self._section:
550      self._last_header = ''
551
552    return ''
553
554
555class _CppLintState(object):
556  """Maintains module-wide state.."""
557
558  def __init__(self):
559    self.verbose_level = 1  # global setting.
560    self.error_count = 0    # global count of reported errors
561    # filters to apply when emitting error messages
562    self.filters = _DEFAULT_FILTERS[:]
563    self.counting = 'total'  # In what way are we counting errors?
564    self.errors_by_category = {}  # string to int dict storing error counts
565    # BEGIN android-added
566    self.quiet = False      # global setting.
567    # END android-added
568
569    # output format:
570    # "emacs" - format that emacs can parse (default)
571    # "vs7" - format that Microsoft Visual Studio 7 can parse
572    self.output_format = 'emacs'
573
574  def SetOutputFormat(self, output_format):
575    """Sets the output format for errors."""
576    self.output_format = output_format
577
578  # BEGIN android-added
579  def SetQuiet(self, level):
580    """Sets the module's quiet setting, and returns the previous setting."""
581    last_quiet = self.quiet
582    self.quiet = level
583    return last_quiet
584  # END android-added
585
586  def SetVerboseLevel(self, level):
587    """Sets the module's verbosity, and returns the previous setting."""
588    last_verbose_level = self.verbose_level
589    self.verbose_level = level
590    return last_verbose_level
591
592  def SetCountingStyle(self, counting_style):
593    """Sets the module's counting options."""
594    self.counting = counting_style
595
596  def SetFilters(self, filters):
597    """Sets the error-message filters.
598
599    These filters are applied when deciding whether to emit a given
600    error message.
601
602    Args:
603      filters: A string of comma-separated filters (eg "+whitespace/indent").
604               Each filter should start with + or -; else we die.
605
606    Raises:
607      ValueError: The comma-separated filters did not all start with '+' or '-'.
608                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
609    """
610    # Default filters always have less priority than the flag ones.
611    self.filters = _DEFAULT_FILTERS[:]
612    for filt in filters.split(','):
613      clean_filt = filt.strip()
614      if clean_filt:
615        self.filters.append(clean_filt)
616    for filt in self.filters:
617      if not (filt.startswith('+') or filt.startswith('-')):
618        raise ValueError('Every filter in --filters must start with + or -'
619                         ' (%s does not)' % filt)
620
621  def ResetErrorCounts(self):
622    """Sets the module's error statistic back to zero."""
623    self.error_count = 0
624    self.errors_by_category = {}
625
626  def IncrementErrorCount(self, category):
627    """Bumps the module's error statistic."""
628    self.error_count += 1
629    if self.counting in ('toplevel', 'detailed'):
630      if self.counting != 'detailed':
631        category = category.split('/')[0]
632      if category not in self.errors_by_category:
633        self.errors_by_category[category] = 0
634      self.errors_by_category[category] += 1
635
636  def PrintErrorCounts(self):
637    """Print a summary of errors by category, and the total."""
638    for category, count in self.errors_by_category.iteritems():
639      sys.stderr.write('Category \'%s\' errors found: %d\n' %
640                       (category, count))
641    sys.stderr.write('Total errors found: %d\n' % self.error_count)
642
643_cpplint_state = _CppLintState()
644
645
646def _OutputFormat():
647  """Gets the module's output format."""
648  return _cpplint_state.output_format
649
650
651def _SetOutputFormat(output_format):
652  """Sets the module's output format."""
653  _cpplint_state.SetOutputFormat(output_format)
654
655
656# BEGIN android-added
657def _Quiet():
658  """Returns the module's quiet setting."""
659  return _cpplint_state.quiet
660
661
662def _SetQuiet(level):
663  """Sets the module's quiet status, and returns the previous setting."""
664  return _cpplint_state.SetQuiet(level)
665# END android-added
666
667def _VerboseLevel():
668  """Returns the module's verbosity setting."""
669  return _cpplint_state.verbose_level
670
671
672def _SetVerboseLevel(level):
673  """Sets the module's verbosity, and returns the previous setting."""
674  return _cpplint_state.SetVerboseLevel(level)
675
676
677def _SetCountingStyle(level):
678  """Sets the module's counting options."""
679  _cpplint_state.SetCountingStyle(level)
680
681
682def _Filters():
683  """Returns the module's list of output filters, as a list."""
684  return _cpplint_state.filters
685
686
687def _SetFilters(filters):
688  """Sets the module's error-message filters.
689
690  These filters are applied when deciding whether to emit a given
691  error message.
692
693  Args:
694    filters: A string of comma-separated filters (eg "whitespace/indent").
695             Each filter should start with + or -; else we die.
696  """
697  _cpplint_state.SetFilters(filters)
698
699
700class _FunctionState(object):
701  """Tracks current function name and the number of lines in its body."""
702
703  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
704  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
705
706  def __init__(self):
707    self.in_a_function = False
708    self.lines_in_function = 0
709    self.current_function = ''
710
711  def Begin(self, function_name):
712    """Start analyzing function body.
713
714    Args:
715      function_name: The name of the function being tracked.
716    """
717    self.in_a_function = True
718    self.lines_in_function = 0
719    self.current_function = function_name
720
721  def Count(self):
722    """Count line in current function body."""
723    if self.in_a_function:
724      self.lines_in_function += 1
725
726  def Check(self, error, filename, linenum):
727    """Report if too many lines in function body.
728
729    Args:
730      error: The function to call with any errors found.
731      filename: The name of the current file.
732      linenum: The number of the line to check.
733    """
734    # BEGIN android-added
735    if not self.in_a_function:
736      return
737    # END android-added
738    if Match(r'T(EST|est)', self.current_function):
739      base_trigger = self._TEST_TRIGGER
740    else:
741      base_trigger = self._NORMAL_TRIGGER
742    trigger = base_trigger * 2**_VerboseLevel()
743
744    if self.lines_in_function > trigger:
745      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
746      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
747      if error_level > 5:
748        error_level = 5
749      error(filename, linenum, 'readability/fn_size', error_level,
750            'Small and focused functions are preferred:'
751            ' %s has %d non-comment lines'
752            ' (error triggered by exceeding %d lines).'  % (
753                self.current_function, self.lines_in_function, trigger))
754
755  def End(self):
756    """Stop analyzing function body."""
757    self.in_a_function = False
758
759
760class _IncludeError(Exception):
761  """Indicates a problem with the include order in a file."""
762  pass
763
764
765class FileInfo:
766  """Provides utility functions for filenames.
767
768  FileInfo provides easy access to the components of a file's path
769  relative to the project root.
770  """
771
772  def __init__(self, filename):
773    self._filename = filename
774
775  def FullName(self):
776    """Make Windows paths like Unix."""
777    return os.path.abspath(self._filename).replace('\\', '/')
778
779  def RepositoryName(self):
780    """FullName after removing the local path to the repository.
781
782    If we have a real absolute path name here we can try to do something smart:
783    detecting the root of the checkout and truncating /path/to/checkout from
784    the name so that we get header guards that don't include things like
785    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
786    people on different computers who have checked the source out to different
787    locations won't see bogus errors.
788    """
789    fullname = self.FullName()
790
791    if os.path.exists(fullname):
792      project_dir = os.path.dirname(fullname)
793
794      if os.path.exists(os.path.join(project_dir, ".svn")):
795        # If there's a .svn file in the current directory, we recursively look
796        # up the directory tree for the top of the SVN checkout
797        root_dir = project_dir
798        one_up_dir = os.path.dirname(root_dir)
799        while os.path.exists(os.path.join(one_up_dir, ".svn")):
800          root_dir = os.path.dirname(root_dir)
801          one_up_dir = os.path.dirname(one_up_dir)
802
803        prefix = os.path.commonprefix([root_dir, project_dir])
804        return fullname[len(prefix) + 1:]
805
806      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
807      # searching up from the current path.
808      root_dir = os.path.dirname(fullname)
809      while (root_dir != os.path.dirname(root_dir) and
810             not os.path.exists(os.path.join(root_dir, ".git")) and
811             not os.path.exists(os.path.join(root_dir, ".hg")) and
812             not os.path.exists(os.path.join(root_dir, ".svn"))):
813        root_dir = os.path.dirname(root_dir)
814
815      if (os.path.exists(os.path.join(root_dir, ".git")) or
816          os.path.exists(os.path.join(root_dir, ".hg")) or
817          os.path.exists(os.path.join(root_dir, ".svn"))):
818        prefix = os.path.commonprefix([root_dir, project_dir])
819        # BEGIN android-changed
820        # return fullname[len(prefix) + 1:]
821        return "art/" + fullname[len(prefix) + 1:]
822        # END android-changed
823
824    # Don't know what to do; header guard warnings may be wrong...
825    return fullname
826
827  def Split(self):
828    """Splits the file into the directory, basename, and extension.
829
830    For 'chrome/browser/browser.cc', Split() would
831    return ('chrome/browser', 'browser', '.cc')
832
833    Returns:
834      A tuple of (directory, basename, extension).
835    """
836
837    googlename = self.RepositoryName()
838    project, rest = os.path.split(googlename)
839    return (project,) + os.path.splitext(rest)
840
841  def BaseName(self):
842    """File base name - text after the final slash, before the final period."""
843    return self.Split()[1]
844
845  def Extension(self):
846    """File extension - text following the final period."""
847    return self.Split()[2]
848
849  def NoExtension(self):
850    """File has no source file extension."""
851    return '/'.join(self.Split()[0:2])
852
853  def IsSource(self):
854    """File has a source file extension."""
855    return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
856
857
858def _ShouldPrintError(category, confidence, linenum):
859  """If confidence >= verbose, category passes filter and is not suppressed."""
860
861  # There are three ways we might decide not to print an error message:
862  # a "NOLINT(category)" comment appears in the source,
863  # the verbosity level isn't high enough, or the filters filter it out.
864  if IsErrorSuppressedByNolint(category, linenum):
865    return False
866  if confidence < _cpplint_state.verbose_level:
867    return False
868
869  is_filtered = False
870  for one_filter in _Filters():
871    if one_filter.startswith('-'):
872      if category.startswith(one_filter[1:]):
873        is_filtered = True
874    elif one_filter.startswith('+'):
875      if category.startswith(one_filter[1:]):
876        is_filtered = False
877    else:
878      assert False  # should have been checked for in SetFilter.
879  if is_filtered:
880    return False
881
882  return True
883
884
885def Error(filename, linenum, category, confidence, message):
886  """Logs the fact we've found a lint error.
887
888  We log where the error was found, and also our confidence in the error,
889  that is, how certain we are this is a legitimate style regression, and
890  not a misidentification or a use that's sometimes justified.
891
892  False positives can be suppressed by the use of
893  "cpplint(category)"  comments on the offending line.  These are
894  parsed into _error_suppressions.
895
896  Args:
897    filename: The name of the file containing the error.
898    linenum: The number of the line containing the error.
899    category: A string used to describe the "category" this bug
900      falls under: "whitespace", say, or "runtime".  Categories
901      may have a hierarchy separated by slashes: "whitespace/indent".
902    confidence: A number from 1-5 representing a confidence score for
903      the error, with 5 meaning that we are certain of the problem,
904      and 1 meaning that it could be a legitimate construct.
905    message: The error message.
906  """
907  if _ShouldPrintError(category, confidence, linenum):
908    _cpplint_state.IncrementErrorCount(category)
909    if _cpplint_state.output_format == 'vs7':
910      sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
911          filename, linenum, message, category, confidence))
912    elif _cpplint_state.output_format == 'eclipse':
913      sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
914          filename, linenum, message, category, confidence))
915    else:
916      sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
917          filename, linenum, message, category, confidence))
918
919
920# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
921_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
922    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
923# Matches strings.  Escape codes should already be removed by ESCAPES.
924_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
925# Matches characters.  Escape codes should already be removed by ESCAPES.
926_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
927# Matches multi-line C++ comments.
928# This RE is a little bit more complicated than one might expect, because we
929# have to take care of space removals tools so we can handle comments inside
930# statements better.
931# The current rule is: We only clear spaces from both sides when we're at the
932# end of the line. Otherwise, we try to remove spaces from the right side,
933# if this doesn't work we try on left side but only if there's a non-character
934# on the right.
935_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
936    r"""(\s*/\*.*\*/\s*$|
937            /\*.*\*/\s+|
938         \s+/\*.*\*/(?=\W)|
939            /\*.*\*/)""", re.VERBOSE)
940
941
942def IsCppString(line):
943  """Does line terminate so, that the next symbol is in string constant.
944
945  This function does not consider single-line nor multi-line comments.
946
947  Args:
948    line: is a partial line of code starting from the 0..n.
949
950  Returns:
951    True, if next character appended to 'line' is inside a
952    string constant.
953  """
954
955  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
956  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
957
958
959def FindNextMultiLineCommentStart(lines, lineix):
960  """Find the beginning marker for a multiline comment."""
961  while lineix < len(lines):
962    if lines[lineix].strip().startswith('/*'):
963      # Only return this marker if the comment goes beyond this line
964      if lines[lineix].strip().find('*/', 2) < 0:
965        return lineix
966    lineix += 1
967  return len(lines)
968
969
970def FindNextMultiLineCommentEnd(lines, lineix):
971  """We are inside a comment, find the end marker."""
972  while lineix < len(lines):
973    if lines[lineix].strip().endswith('*/'):
974      return lineix
975    lineix += 1
976  return len(lines)
977
978
979def RemoveMultiLineCommentsFromRange(lines, begin, end):
980  """Clears a range of lines for multi-line comments."""
981  # Having // dummy comments makes the lines non-empty, so we will not get
982  # unnecessary blank line warnings later in the code.
983  for i in range(begin, end):
984    lines[i] = '// dummy'
985
986
987def RemoveMultiLineComments(filename, lines, error):
988  """Removes multiline (c-style) comments from lines."""
989  lineix = 0
990  while lineix < len(lines):
991    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
992    if lineix_begin >= len(lines):
993      return
994    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
995    if lineix_end >= len(lines):
996      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
997            'Could not find end of multi-line comment')
998      return
999    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1000    lineix = lineix_end + 1
1001
1002
1003def CleanseComments(line):
1004  """Removes //-comments and single-line C-style /* */ comments.
1005
1006  Args:
1007    line: A line of C++ source.
1008
1009  Returns:
1010    The line with single-line comments removed.
1011  """
1012  commentpos = line.find('//')
1013  if commentpos != -1 and not IsCppString(line[:commentpos]):
1014    line = line[:commentpos].rstrip()
1015  # get rid of /* ... */
1016  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1017
1018
1019class CleansedLines(object):
1020  """Holds 3 copies of all lines with different preprocessing applied to them.
1021
1022  1) elided member contains lines without strings and comments,
1023  2) lines member contains lines without comments, and
1024  3) raw_lines member contains all the lines without processing.
1025  All these three members are of <type 'list'>, and of the same length.
1026  """
1027
1028  def __init__(self, lines):
1029    self.elided = []
1030    self.lines = []
1031    self.raw_lines = lines
1032    self.num_lines = len(lines)
1033    for linenum in range(len(lines)):
1034      self.lines.append(CleanseComments(lines[linenum]))
1035      elided = self._CollapseStrings(lines[linenum])
1036      self.elided.append(CleanseComments(elided))
1037
1038  def NumLines(self):
1039    """Returns the number of lines represented."""
1040    return self.num_lines
1041
1042  @staticmethod
1043  def _CollapseStrings(elided):
1044    """Collapses strings and chars on a line to simple "" or '' blocks.
1045
1046    We nix strings first so we're not fooled by text like '"http://"'
1047
1048    Args:
1049      elided: The line being processed.
1050
1051    Returns:
1052      The line with collapsed strings.
1053    """
1054    if not _RE_PATTERN_INCLUDE.match(elided):
1055      # Remove escaped characters first to make quote/single quote collapsing
1056      # basic.  Things that look like escaped characters shouldn't occur
1057      # outside of strings and chars.
1058      elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1059      elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
1060      elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
1061    return elided
1062
1063
1064def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
1065  """Find the position just after the matching endchar.
1066
1067  Args:
1068    line: a CleansedLines line.
1069    startpos: start searching at this position.
1070    depth: nesting level at startpos.
1071    startchar: expression opening character.
1072    endchar: expression closing character.
1073
1074  Returns:
1075    Index just after endchar.
1076  """
1077  for i in xrange(startpos, len(line)):
1078    if line[i] == startchar:
1079      depth += 1
1080    elif line[i] == endchar:
1081      depth -= 1
1082      if depth == 0:
1083        return i + 1
1084  return -1
1085
1086
1087def CloseExpression(clean_lines, linenum, pos):
1088  """If input points to ( or { or [, finds the position that closes it.
1089
1090  If lines[linenum][pos] points to a '(' or '{' or '[', finds the
1091  linenum/pos that correspond to the closing of the expression.
1092
1093  Args:
1094    clean_lines: A CleansedLines instance containing the file.
1095    linenum: The number of the line to check.
1096    pos: A position on the line.
1097
1098  Returns:
1099    A tuple (line, linenum, pos) pointer *past* the closing brace, or
1100    (line, len(lines), -1) if we never find a close.  Note we ignore
1101    strings and comments when matching; and the line we return is the
1102    'cleansed' line at linenum.
1103  """
1104
1105  line = clean_lines.elided[linenum]
1106  startchar = line[pos]
1107  if startchar not in '({[':
1108    return (line, clean_lines.NumLines(), -1)
1109  if startchar == '(': endchar = ')'
1110  if startchar == '[': endchar = ']'
1111  if startchar == '{': endchar = '}'
1112
1113  # Check first line
1114  end_pos = FindEndOfExpressionInLine(line, pos, 0, startchar, endchar)
1115  if end_pos > -1:
1116    return (line, linenum, end_pos)
1117  tail = line[pos:]
1118  num_open = tail.count(startchar) - tail.count(endchar)
1119  while linenum < clean_lines.NumLines() - 1:
1120    linenum += 1
1121    line = clean_lines.elided[linenum]
1122    delta = line.count(startchar) - line.count(endchar)
1123    if num_open + delta <= 0:
1124      return (line, linenum,
1125              FindEndOfExpressionInLine(line, 0, num_open, startchar, endchar))
1126    num_open += delta
1127
1128  # Did not find endchar before end of file, give up
1129  return (line, clean_lines.NumLines(), -1)
1130
1131def CheckForCopyright(filename, lines, error):
1132  """Logs an error if no Copyright message appears at the top of the file."""
1133
1134  # We'll say it should occur by line 10. Don't forget there's a
1135  # dummy line at the front.
1136  for line in xrange(1, min(len(lines), 11)):
1137    if re.search(r'Copyright', lines[line], re.I): break
1138  else:                       # means no copyright line was found
1139    error(filename, 0, 'legal/copyright', 5,
1140          'No copyright message found.  '
1141          'You should have a line: "Copyright [year] <Copyright Owner>"')
1142
1143
1144def GetHeaderGuardCPPVariable(filename):
1145  """Returns the CPP variable that should be used as a header guard.
1146
1147  Args:
1148    filename: The name of a C++ header file.
1149
1150  Returns:
1151    The CPP variable that should be used as a header guard in the
1152    named file.
1153
1154  """
1155
1156  # Restores original filename in case that cpplint is invoked from Emacs's
1157  # flymake.
1158  filename = re.sub(r'_flymake\.h$', '.h', filename)
1159  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
1160
1161  fileinfo = FileInfo(filename)
1162  file_path_from_root = fileinfo.RepositoryName()
1163  if _root:
1164    file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1165  return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
1166
1167
1168def CheckForHeaderGuard(filename, lines, error):
1169  """Checks that the file contains a header guard.
1170
1171  Logs an error if no #ifndef header guard is present.  For other
1172  headers, checks that the full pathname is used.
1173
1174  Args:
1175    filename: The name of the C++ header file.
1176    lines: An array of strings, each representing a line of the file.
1177    error: The function to call with any errors found.
1178  """
1179
1180  cppvar = GetHeaderGuardCPPVariable(filename)
1181
1182  ifndef = None
1183  ifndef_linenum = 0
1184  define = None
1185  endif = None
1186  endif_linenum = 0
1187  for linenum, line in enumerate(lines):
1188    linesplit = line.split()
1189    if len(linesplit) >= 2:
1190      # find the first occurrence of #ifndef and #define, save arg
1191      if not ifndef and linesplit[0] == '#ifndef':
1192        # set ifndef to the header guard presented on the #ifndef line.
1193        ifndef = linesplit[1]
1194        ifndef_linenum = linenum
1195      if not define and linesplit[0] == '#define':
1196        define = linesplit[1]
1197    # find the last occurrence of #endif, save entire line
1198    if line.startswith('#endif'):
1199      endif = line
1200      endif_linenum = linenum
1201
1202  if not ifndef:
1203    error(filename, 0, 'build/header_guard', 5,
1204          'No #ifndef header guard found, suggested CPP variable is: %s' %
1205          cppvar)
1206    return
1207
1208  if not define:
1209    error(filename, 0, 'build/header_guard', 5,
1210          'No #define header guard found, suggested CPP variable is: %s' %
1211          cppvar)
1212    return
1213
1214  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1215  # for backward compatibility.
1216  if ifndef != cppvar:
1217    error_level = 0
1218    if ifndef != cppvar + '_':
1219      error_level = 5
1220
1221    ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1222                            error)
1223    error(filename, ifndef_linenum, 'build/header_guard', error_level,
1224          '#ifndef header guard has wrong style, please use: %s' % cppvar)
1225
1226  if define != ifndef:
1227    error(filename, 0, 'build/header_guard', 5,
1228          '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1229          cppvar)
1230    return
1231
1232  if endif != ('#endif  // %s' % cppvar):
1233    error_level = 0
1234    if endif != ('#endif  // %s' % (cppvar + '_')):
1235      error_level = 5
1236
1237    ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1238                            error)
1239    error(filename, endif_linenum, 'build/header_guard', error_level,
1240          '#endif line should be "#endif  // %s"' % cppvar)
1241
1242
1243def CheckForUnicodeReplacementCharacters(filename, lines, error):
1244  """Logs an error for each line containing Unicode replacement characters.
1245
1246  These indicate that either the file contained invalid UTF-8 (likely)
1247  or Unicode replacement characters (which it shouldn't).  Note that
1248  it's possible for this to throw off line numbering if the invalid
1249  UTF-8 occurred adjacent to a newline.
1250
1251  Args:
1252    filename: The name of the current file.
1253    lines: An array of strings, each representing a line of the file.
1254    error: The function to call with any errors found.
1255  """
1256  for linenum, line in enumerate(lines):
1257    if u'\ufffd' in line:
1258      error(filename, linenum, 'readability/utf8', 5,
1259            'Line contains invalid UTF-8 (or Unicode replacement character).')
1260
1261
1262def CheckForNewlineAtEOF(filename, lines, error):
1263  """Logs an error if there is no newline char at the end of the file.
1264
1265  Args:
1266    filename: The name of the current file.
1267    lines: An array of strings, each representing a line of the file.
1268    error: The function to call with any errors found.
1269  """
1270
1271  # The array lines() was created by adding two newlines to the
1272  # original file (go figure), then splitting on \n.
1273  # To verify that the file ends in \n, we just have to make sure the
1274  # last-but-two element of lines() exists and is empty.
1275  if len(lines) < 3 or lines[-2]:
1276    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1277          'Could not find a newline character at the end of the file.')
1278
1279
1280def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1281  """Logs an error if we see /* ... */ or "..." that extend past one line.
1282
1283  /* ... */ comments are legit inside macros, for one line.
1284  Otherwise, we prefer // comments, so it's ok to warn about the
1285  other.  Likewise, it's ok for strings to extend across multiple
1286  lines, as long as a line continuation character (backslash)
1287  terminates each line. Although not currently prohibited by the C++
1288  style guide, it's ugly and unnecessary. We don't do well with either
1289  in this lint program, so we warn about both.
1290
1291  Args:
1292    filename: The name of the current file.
1293    clean_lines: A CleansedLines instance containing the file.
1294    linenum: The number of the line to check.
1295    error: The function to call with any errors found.
1296  """
1297  line = clean_lines.elided[linenum]
1298
1299  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1300  # second (escaped) slash may trigger later \" detection erroneously.
1301  line = line.replace('\\\\', '')
1302
1303  if line.count('/*') > line.count('*/'):
1304    error(filename, linenum, 'readability/multiline_comment', 5,
1305          'Complex multi-line /*...*/-style comment found. '
1306          'Lint may give bogus warnings.  '
1307          'Consider replacing these with //-style comments, '
1308          'with #if 0...#endif, '
1309          'or with more clearly structured multi-line comments.')
1310
1311  if (line.count('"') - line.count('\\"')) % 2:
1312    error(filename, linenum, 'readability/multiline_string', 5,
1313          'Multi-line string ("...") found.  This lint script doesn\'t '
1314          'do well with such strings, and may give bogus warnings.  They\'re '
1315          'ugly and unnecessary, and you should use concatenation instead".')
1316
1317
1318threading_list = (
1319    ('asctime(', 'asctime_r('),
1320    ('ctime(', 'ctime_r('),
1321    ('getgrgid(', 'getgrgid_r('),
1322    ('getgrnam(', 'getgrnam_r('),
1323    ('getlogin(', 'getlogin_r('),
1324    ('getpwnam(', 'getpwnam_r('),
1325    ('getpwuid(', 'getpwuid_r('),
1326    ('gmtime(', 'gmtime_r('),
1327    ('localtime(', 'localtime_r('),
1328    ('rand(', 'rand_r('),
1329    ('readdir(', 'readdir_r('),
1330    ('strtok(', 'strtok_r('),
1331    ('ttyname(', 'ttyname_r('),
1332    )
1333
1334
1335def CheckPosixThreading(filename, clean_lines, linenum, error):
1336  """Checks for calls to thread-unsafe functions.
1337
1338  Much code has been originally written without consideration of
1339  multi-threading. Also, engineers are relying on their old experience;
1340  they have learned posix before threading extensions were added. These
1341  tests guide the engineers to use thread-safe functions (when using
1342  posix directly).
1343
1344  Args:
1345    filename: The name of the current file.
1346    clean_lines: A CleansedLines instance containing the file.
1347    linenum: The number of the line to check.
1348    error: The function to call with any errors found.
1349  """
1350  line = clean_lines.elided[linenum]
1351  for single_thread_function, multithread_safe_function in threading_list:
1352    ix = line.find(single_thread_function)
1353    # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1354    if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1355                                line[ix - 1] not in ('_', '.', '>'))):
1356      error(filename, linenum, 'runtime/threadsafe_fn', 2,
1357            'Consider using ' + multithread_safe_function +
1358            '...) instead of ' + single_thread_function +
1359            '...) for improved thread safety.')
1360
1361
1362# Matches invalid increment: *count++, which moves pointer instead of
1363# incrementing a value.
1364_RE_PATTERN_INVALID_INCREMENT = re.compile(
1365    r'^\s*\*\w+(\+\+|--);')
1366
1367
1368def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1369  """Checks for invalid increment *count++.
1370
1371  For example following function:
1372  void increment_counter(int* count) {
1373    *count++;
1374  }
1375  is invalid, because it effectively does count++, moving pointer, and should
1376  be replaced with ++*count, (*count)++ or *count += 1.
1377
1378  Args:
1379    filename: The name of the current file.
1380    clean_lines: A CleansedLines instance containing the file.
1381    linenum: The number of the line to check.
1382    error: The function to call with any errors found.
1383  """
1384  line = clean_lines.elided[linenum]
1385  if _RE_PATTERN_INVALID_INCREMENT.match(line):
1386    error(filename, linenum, 'runtime/invalid_increment', 5,
1387          'Changing pointer instead of value (or unused value of operator*).')
1388
1389
1390class _BlockInfo(object):
1391  """Stores information about a generic block of code."""
1392
1393  def __init__(self, seen_open_brace):
1394    self.seen_open_brace = seen_open_brace
1395    self.open_parentheses = 0
1396    self.inline_asm = _NO_ASM
1397
1398  def CheckBegin(self, filename, clean_lines, linenum, error):
1399    """Run checks that applies to text up to the opening brace.
1400
1401    This is mostly for checking the text after the class identifier
1402    and the "{", usually where the base class is specified.  For other
1403    blocks, there isn't much to check, so we always pass.
1404
1405    Args:
1406      filename: The name of the current file.
1407      clean_lines: A CleansedLines instance containing the file.
1408      linenum: The number of the line to check.
1409      error: The function to call with any errors found.
1410    """
1411    pass
1412
1413  def CheckEnd(self, filename, clean_lines, linenum, error):
1414    """Run checks that applies to text after the closing brace.
1415
1416    This is mostly used for checking end of namespace comments.
1417
1418    Args:
1419      filename: The name of the current file.
1420      clean_lines: A CleansedLines instance containing the file.
1421      linenum: The number of the line to check.
1422      error: The function to call with any errors found.
1423    """
1424    pass
1425
1426
1427class _ClassInfo(_BlockInfo):
1428  """Stores information about a class."""
1429
1430  def __init__(self, name, class_or_struct, clean_lines, linenum):
1431    _BlockInfo.__init__(self, False)
1432    self.name = name
1433    self.starting_linenum = linenum
1434    self.is_derived = False
1435    if class_or_struct == 'struct':
1436      self.access = 'public'
1437    else:
1438      self.access = 'private'
1439
1440    # Try to find the end of the class.  This will be confused by things like:
1441    #   class A {
1442    #   } *x = { ...
1443    #
1444    # But it's still good enough for CheckSectionSpacing.
1445    self.last_line = 0
1446    depth = 0
1447    for i in range(linenum, clean_lines.NumLines()):
1448      line = clean_lines.elided[i]
1449      depth += line.count('{') - line.count('}')
1450      if not depth:
1451        self.last_line = i
1452        break
1453
1454  def CheckBegin(self, filename, clean_lines, linenum, error):
1455    # Look for a bare ':'
1456    if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1457      self.is_derived = True
1458
1459
1460class _NamespaceInfo(_BlockInfo):
1461  """Stores information about a namespace."""
1462
1463  def __init__(self, name, linenum):
1464    _BlockInfo.__init__(self, False)
1465    self.name = name or ''
1466    self.starting_linenum = linenum
1467
1468  def CheckEnd(self, filename, clean_lines, linenum, error):
1469    """Check end of namespace comments."""
1470    line = clean_lines.raw_lines[linenum]
1471
1472    # Check how many lines is enclosed in this namespace.  Don't issue
1473    # warning for missing namespace comments if there aren't enough
1474    # lines.  However, do apply checks if there is already an end of
1475    # namespace comment and it's incorrect.
1476    #
1477    # TODO(unknown): We always want to check end of namespace comments
1478    # if a namespace is large, but sometimes we also want to apply the
1479    # check if a short namespace contained nontrivial things (something
1480    # other than forward declarations).  There is currently no logic on
1481    # deciding what these nontrivial things are, so this check is
1482    # triggered by namespace size only, which works most of the time.
1483    if (linenum - self.starting_linenum < 10
1484        and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1485      return
1486
1487    # Look for matching comment at end of namespace.
1488    #
1489    # Note that we accept C style "/* */" comments for terminating
1490    # namespaces, so that code that terminate namespaces inside
1491    # preprocessor macros can be cpplint clean.  Example: http://go/nxpiz
1492    #
1493    # We also accept stuff like "// end of namespace <name>." with the
1494    # period at the end.
1495    #
1496    # Besides these, we don't accept anything else, otherwise we might
1497    # get false negatives when existing comment is a substring of the
1498    # expected namespace.  Example: http://go/ldkdc, http://cl/23548205
1499    if self.name:
1500      # Named namespace
1501      if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1502                    r'[\*/\.\\\s]*$'),
1503                   line):
1504        error(filename, linenum, 'readability/namespace', 5,
1505              'Namespace should be terminated with "// namespace %s"' %
1506              self.name)
1507    else:
1508      # Anonymous namespace
1509      if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1510        error(filename, linenum, 'readability/namespace', 5,
1511              'Namespace should be terminated with "// namespace"')
1512
1513
1514class _PreprocessorInfo(object):
1515  """Stores checkpoints of nesting stacks when #if/#else is seen."""
1516
1517  def __init__(self, stack_before_if):
1518    # The entire nesting stack before #if
1519    self.stack_before_if = stack_before_if
1520
1521    # The entire nesting stack up to #else
1522    self.stack_before_else = []
1523
1524    # Whether we have already seen #else or #elif
1525    self.seen_else = False
1526
1527
1528class _NestingState(object):
1529  """Holds states related to parsing braces."""
1530
1531  def __init__(self):
1532    # Stack for tracking all braces.  An object is pushed whenever we
1533    # see a "{", and popped when we see a "}".  Only 3 types of
1534    # objects are possible:
1535    # - _ClassInfo: a class or struct.
1536    # - _NamespaceInfo: a namespace.
1537    # - _BlockInfo: some other type of block.
1538    self.stack = []
1539
1540    # Stack of _PreprocessorInfo objects.
1541    self.pp_stack = []
1542
1543  def SeenOpenBrace(self):
1544    """Check if we have seen the opening brace for the innermost block.
1545
1546    Returns:
1547      True if we have seen the opening brace, False if the innermost
1548      block is still expecting an opening brace.
1549    """
1550    return (not self.stack) or self.stack[-1].seen_open_brace
1551
1552  def InNamespaceBody(self):
1553    """Check if we are currently one level inside a namespace body.
1554
1555    Returns:
1556      True if top of the stack is a namespace block, False otherwise.
1557    """
1558    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
1559
1560  def UpdatePreprocessor(self, line):
1561    """Update preprocessor stack.
1562
1563    We need to handle preprocessors due to classes like this:
1564      #ifdef SWIG
1565      struct ResultDetailsPageElementExtensionPoint {
1566      #else
1567      struct ResultDetailsPageElementExtensionPoint : public Extension {
1568      #endif
1569    (see http://go/qwddn for original example)
1570
1571    We make the following assumptions (good enough for most files):
1572    - Preprocessor condition evaluates to true from #if up to first
1573      #else/#elif/#endif.
1574
1575    - Preprocessor condition evaluates to false from #else/#elif up
1576      to #endif.  We still perform lint checks on these lines, but
1577      these do not affect nesting stack.
1578
1579    Args:
1580      line: current line to check.
1581    """
1582    if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
1583      # Beginning of #if block, save the nesting stack here.  The saved
1584      # stack will allow us to restore the parsing state in the #else case.
1585      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
1586    elif Match(r'^\s*#\s*(else|elif)\b', line):
1587      # Beginning of #else block
1588      if self.pp_stack:
1589        if not self.pp_stack[-1].seen_else:
1590          # This is the first #else or #elif block.  Remember the
1591          # whole nesting stack up to this point.  This is what we
1592          # keep after the #endif.
1593          self.pp_stack[-1].seen_else = True
1594          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
1595
1596        # Restore the stack to how it was before the #if
1597        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
1598      else:
1599        # TODO(unknown): unexpected #else, issue warning?
1600        pass
1601    elif Match(r'^\s*#\s*endif\b', line):
1602      # End of #if or #else blocks.
1603      if self.pp_stack:
1604        # If we saw an #else, we will need to restore the nesting
1605        # stack to its former state before the #else, otherwise we
1606        # will just continue from where we left off.
1607        if self.pp_stack[-1].seen_else:
1608          # Here we can just use a shallow copy since we are the last
1609          # reference to it.
1610          self.stack = self.pp_stack[-1].stack_before_else
1611        # Drop the corresponding #if
1612        self.pp_stack.pop()
1613      else:
1614        # TODO(unknown): unexpected #endif, issue warning?
1615        pass
1616
1617  def Update(self, filename, clean_lines, linenum, error):
1618    """Update nesting state with current line.
1619
1620    Args:
1621      filename: The name of the current file.
1622      clean_lines: A CleansedLines instance containing the file.
1623      linenum: The number of the line to check.
1624      error: The function to call with any errors found.
1625    """
1626    line = clean_lines.elided[linenum]
1627
1628    # Update pp_stack first
1629    self.UpdatePreprocessor(line)
1630
1631    # Count parentheses.  This is to avoid adding struct arguments to
1632    # the nesting stack.
1633    if self.stack:
1634      inner_block = self.stack[-1]
1635      depth_change = line.count('(') - line.count(')')
1636      inner_block.open_parentheses += depth_change
1637
1638      # Also check if we are starting or ending an inline assembly block.
1639      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
1640        if (depth_change != 0 and
1641            inner_block.open_parentheses == 1 and
1642            _MATCH_ASM.match(line)):
1643          # Enter assembly block
1644          inner_block.inline_asm = _INSIDE_ASM
1645        else:
1646          # Not entering assembly block.  If previous line was _END_ASM,
1647          # we will now shift to _NO_ASM state.
1648          inner_block.inline_asm = _NO_ASM
1649      elif (inner_block.inline_asm == _INSIDE_ASM and
1650            inner_block.open_parentheses == 0):
1651        # Exit assembly block
1652        inner_block.inline_asm = _END_ASM
1653
1654    # Consume namespace declaration at the beginning of the line.  Do
1655    # this in a loop so that we catch same line declarations like this:
1656    #   namespace proto2 { namespace bridge { class MessageSet; } }
1657    while True:
1658      # Match start of namespace.  The "\b\s*" below catches namespace
1659      # declarations even if it weren't followed by a whitespace, this
1660      # is so that we don't confuse our namespace checker.  The
1661      # missing spaces will be flagged by CheckSpacing.
1662      namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
1663      if not namespace_decl_match:
1664        break
1665
1666      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
1667      self.stack.append(new_namespace)
1668
1669      line = namespace_decl_match.group(2)
1670      if line.find('{') != -1:
1671        new_namespace.seen_open_brace = True
1672        line = line[line.find('{') + 1:]
1673
1674    # Look for a class declaration in whatever is left of the line
1675    # after parsing namespaces.  The regexp accounts for decorated classes
1676    # such as in:
1677    #   class LOCKABLE API Object {
1678    #   };
1679    #
1680    # Templates with class arguments may confuse the parser, for example:
1681    #   template <class T
1682    #             class Comparator = less<T>,
1683    #             class Vector = vector<T> >
1684    #   class HeapQueue {
1685    #
1686    # Because this parser has no nesting state about templates, by the
1687    # time it saw "class Comparator", it may think that it's a new class.
1688    # Nested templates have a similar problem:
1689    #   template <
1690    #       typename ExportedType,
1691    #       typename TupleType,
1692    #       template <typename, typename> class ImplTemplate>
1693    #
1694    # To avoid these cases, we ignore classes that are followed by '=' or '>'
1695    class_decl_match = Match(
1696        r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
1697        '(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
1698        '(([^=>]|<[^<>]*>)*)$', line)
1699    if (class_decl_match and
1700        (not self.stack or self.stack[-1].open_parentheses == 0)):
1701      self.stack.append(_ClassInfo(
1702          class_decl_match.group(4), class_decl_match.group(2),
1703          clean_lines, linenum))
1704      line = class_decl_match.group(5)
1705
1706    # If we have not yet seen the opening brace for the innermost block,
1707    # run checks here.
1708    if not self.SeenOpenBrace():
1709      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
1710
1711    # Update access control if we are inside a class/struct
1712    if self.stack and isinstance(self.stack[-1], _ClassInfo):
1713      access_match = Match(r'\s*(public|private|protected)\s*:', line)
1714      if access_match:
1715        self.stack[-1].access = access_match.group(1)
1716
1717    # Consume braces or semicolons from what's left of the line
1718    while True:
1719      # Match first brace, semicolon, or closed parenthesis.
1720      matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
1721      if not matched:
1722        break
1723
1724      token = matched.group(1)
1725      if token == '{':
1726        # If namespace or class hasn't seen a opening brace yet, mark
1727        # namespace/class head as complete.  Push a new block onto the
1728        # stack otherwise.
1729        if not self.SeenOpenBrace():
1730          self.stack[-1].seen_open_brace = True
1731        else:
1732          self.stack.append(_BlockInfo(True))
1733          if _MATCH_ASM.match(line):
1734            self.stack[-1].inline_asm = _BLOCK_ASM
1735      elif token == ';' or token == ')':
1736        # If we haven't seen an opening brace yet, but we already saw
1737        # a semicolon, this is probably a forward declaration.  Pop
1738        # the stack for these.
1739        #
1740        # Similarly, if we haven't seen an opening brace yet, but we
1741        # already saw a closing parenthesis, then these are probably
1742        # function arguments with extra "class" or "struct" keywords.
1743        # Also pop these stack for these.
1744        if not self.SeenOpenBrace():
1745          self.stack.pop()
1746      else:  # token == '}'
1747        # Perform end of block checks and pop the stack.
1748        if self.stack:
1749          self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
1750          self.stack.pop()
1751      line = matched.group(2)
1752
1753  def InnermostClass(self):
1754    """Get class info on the top of the stack.
1755
1756    Returns:
1757      A _ClassInfo object if we are inside a class, or None otherwise.
1758    """
1759    for i in range(len(self.stack), 0, -1):
1760      classinfo = self.stack[i - 1]
1761      if isinstance(classinfo, _ClassInfo):
1762        return classinfo
1763    return None
1764
1765  def CheckClassFinished(self, filename, error):
1766    """Checks that all classes have been completely parsed.
1767
1768    Call this when all lines in a file have been processed.
1769    Args:
1770      filename: The name of the current file.
1771      error: The function to call with any errors found.
1772    """
1773    # Note: This test can result in false positives if #ifdef constructs
1774    # get in the way of brace matching. See the testBuildClass test in
1775    # cpplint_unittest.py for an example of this.
1776    for obj in self.stack:
1777      if isinstance(obj, _ClassInfo):
1778        error(filename, obj.starting_linenum, 'build/class', 5,
1779              'Failed to find complete declaration of class %s' %
1780              obj.name)
1781
1782
1783def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1784                                  nesting_state, error):
1785  """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1786
1787  Complain about several constructs which gcc-2 accepts, but which are
1788  not standard C++.  Warning about these in lint is one way to ease the
1789  transition to new compilers.
1790  - put storage class first (e.g. "static const" instead of "const static").
1791  - "%lld" instead of %qd" in printf-type functions.
1792  - "%1$d" is non-standard in printf-type functions.
1793  - "\%" is an undefined character escape sequence.
1794  - text after #endif is not allowed.
1795  - invalid inner-style forward declaration.
1796  - >? and <? operators, and their >?= and <?= cousins.
1797
1798  Additionally, check for constructor/destructor style violations and reference
1799  members, as it is very convenient to do so while checking for
1800  gcc-2 compliance.
1801
1802  Args:
1803    filename: The name of the current file.
1804    clean_lines: A CleansedLines instance containing the file.
1805    linenum: The number of the line to check.
1806    nesting_state: A _NestingState instance which maintains information about
1807                   the current stack of nested blocks being parsed.
1808    error: A callable to which errors are reported, which takes 4 arguments:
1809           filename, line number, error level, and message
1810  """
1811
1812  # Remove comments from the line, but leave in strings for now.
1813  line = clean_lines.lines[linenum]
1814
1815  if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1816    error(filename, linenum, 'runtime/printf_format', 3,
1817          '%q in format strings is deprecated.  Use %ll instead.')
1818
1819  if Search(r'printf\s*\(.*".*%\d+\$', line):
1820    error(filename, linenum, 'runtime/printf_format', 2,
1821          '%N$ formats are unconventional.  Try rewriting to avoid them.')
1822
1823  # Remove escaped backslashes before looking for undefined escapes.
1824  line = line.replace('\\\\', '')
1825
1826  if Search(r'("|\').*\\(%|\[|\(|{)', line):
1827    error(filename, linenum, 'build/printf_format', 3,
1828          '%, [, (, and { are undefined character escapes.  Unescape them.')
1829
1830  # For the rest, work with both comments and strings removed.
1831  line = clean_lines.elided[linenum]
1832
1833  if Search(r'\b(const|volatile|void|char|short|int|long'
1834            r'|float|double|signed|unsigned'
1835            r'|schar|u?int8|u?int16|u?int32|u?int64)'
1836            r'\s+(register|static|extern|typedef)\b',
1837            line):
1838    error(filename, linenum, 'build/storage_class', 5,
1839          'Storage class (static, extern, typedef, etc) should be first.')
1840
1841  if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1842    error(filename, linenum, 'build/endif_comment', 5,
1843          'Uncommented text after #endif is non-standard.  Use a comment.')
1844
1845  if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1846    error(filename, linenum, 'build/forward_decl', 5,
1847          'Inner-style forward declarations are invalid.  Remove this line.')
1848
1849  if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1850            line):
1851    error(filename, linenum, 'build/deprecated', 3,
1852          '>? and <? (max and min) operators are non-standard and deprecated.')
1853
1854  if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1855    # TODO(unknown): Could it be expanded safely to arbitrary references,
1856    # without triggering too many false positives? The first
1857    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
1858    # the restriction.
1859    # Here's the original regexp, for the reference:
1860    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
1861    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
1862    error(filename, linenum, 'runtime/member_string_references', 2,
1863          'const string& members are dangerous. It is much better to use '
1864          'alternatives, such as pointers or simple constants.')
1865
1866  # Everything else in this function operates on class declarations.
1867  # Return early if the top of the nesting stack is not a class, or if
1868  # the class head is not completed yet.
1869  classinfo = nesting_state.InnermostClass()
1870  if not classinfo or not classinfo.seen_open_brace:
1871    return
1872
1873  # The class may have been declared with namespace or classname qualifiers.
1874  # The constructor and destructor will not have those qualifiers.
1875  base_classname = classinfo.name.split('::')[-1]
1876
1877  # Look for single-argument constructors that aren't marked explicit.
1878  # Technically a valid construct, but against style.
1879  args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
1880               % re.escape(base_classname),
1881               line)
1882  if (args and
1883      args.group(1) != 'void' and
1884      not Match(r'(const\s+)?%s\s*(?:<\w+>\s*)?&' % re.escape(base_classname),
1885                args.group(1).strip())):
1886    error(filename, linenum, 'runtime/explicit', 5,
1887          'Single-argument constructors should be marked explicit.')
1888
1889
1890def CheckSpacingForFunctionCall(filename, line, linenum, error):
1891  """Checks for the correctness of various spacing around function calls.
1892
1893  Args:
1894    filename: The name of the current file.
1895    line: The text of the line to check.
1896    linenum: The number of the line to check.
1897    error: The function to call with any errors found.
1898  """
1899
1900  # Since function calls often occur inside if/for/while/switch
1901  # expressions - which have their own, more liberal conventions - we
1902  # first see if we should be looking inside such an expression for a
1903  # function call, to which we can apply more strict standards.
1904  fncall = line    # if there's no control flow construct, look at whole line
1905  for pattern in (r'\bif\s*\((.*)\)\s*{',
1906                  r'\bfor\s*\((.*)\)\s*{',
1907                  r'\bwhile\s*\((.*)\)\s*[{;]',
1908                  r'\bswitch\s*\((.*)\)\s*{'):
1909    match = Search(pattern, line)
1910    if match:
1911      fncall = match.group(1)    # look inside the parens for function calls
1912      break
1913
1914  # Except in if/for/while/switch, there should never be space
1915  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
1916  # for nested parens ( (a+b) + c ).  Likewise, there should never be
1917  # a space before a ( when it's a function argument.  I assume it's a
1918  # function argument when the char before the whitespace is legal in
1919  # a function name (alnum + _) and we're not starting a macro. Also ignore
1920  # pointers and references to arrays and functions coz they're too tricky:
1921  # we use a very simple way to recognize these:
1922  # " (something)(maybe-something)" or
1923  # " (something)(maybe-something," or
1924  # " (something)[something]"
1925  # Note that we assume the contents of [] to be short enough that
1926  # they'll never need to wrap.
1927  if (  # Ignore control structures.
1928      # BEGIN android-changed
1929      # not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1930      not Search(r'\b(if|for|while|switch|return|delete|new)\b', fncall) and
1931      # END android-changed
1932      # Ignore pointers/references to functions.
1933      not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1934      # Ignore pointers/references to arrays.
1935      not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
1936    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
1937      error(filename, linenum, 'whitespace/parens', 4,
1938            'Extra space after ( in function call')
1939    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
1940      error(filename, linenum, 'whitespace/parens', 2,
1941            'Extra space after (')
1942    if (Search(r'\w\s+\(', fncall) and
1943        not Search(r'#\s*define|typedef', fncall) and
1944        not Search(r'\w\s+\((\w+::)?\*\w+\)\(', fncall)):
1945      error(filename, linenum, 'whitespace/parens', 4,
1946            'Extra space before ( in function call')
1947    # If the ) is followed only by a newline or a { + newline, assume it's
1948    # part of a control statement (if/while/etc), and don't complain
1949    if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1950      # If the closing parenthesis is preceded by only whitespaces,
1951      # try to give a more descriptive error message.
1952      if Search(r'^\s+\)', fncall):
1953        error(filename, linenum, 'whitespace/parens', 2,
1954              'Closing ) should be moved to the previous line')
1955      else:
1956        error(filename, linenum, 'whitespace/parens', 2,
1957              'Extra space before )')
1958
1959
1960def IsBlankLine(line):
1961  """Returns true if the given line is blank.
1962
1963  We consider a line to be blank if the line is empty or consists of
1964  only white spaces.
1965
1966  Args:
1967    line: A line of a string.
1968
1969  Returns:
1970    True, if the given line is blank.
1971  """
1972  return not line or line.isspace()
1973
1974
1975def CheckForFunctionLengths(filename, clean_lines, linenum,
1976                            function_state, error):
1977  """Reports for long function bodies.
1978
1979  For an overview why this is done, see:
1980  http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1981
1982  Uses a simplistic algorithm assuming other style guidelines
1983  (especially spacing) are followed.
1984  Only checks unindented functions, so class members are unchecked.
1985  Trivial bodies are unchecked, so constructors with huge initializer lists
1986  may be missed.
1987  Blank/comment lines are not counted so as to avoid encouraging the removal
1988  of vertical space and comments just to get through a lint check.
1989  NOLINT *on the last line of a function* disables this check.
1990
1991  Args:
1992    filename: The name of the current file.
1993    clean_lines: A CleansedLines instance containing the file.
1994    linenum: The number of the line to check.
1995    function_state: Current function name and lines in body so far.
1996    error: The function to call with any errors found.
1997  """
1998  lines = clean_lines.lines
1999  line = lines[linenum]
2000  raw = clean_lines.raw_lines
2001  raw_line = raw[linenum]
2002  joined_line = ''
2003
2004  starting_func = False
2005  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
2006  match_result = Match(regexp, line)
2007  if match_result:
2008    # If the name is all caps and underscores, figure it's a macro and
2009    # ignore it, unless it's TEST or TEST_F.
2010    function_name = match_result.group(1).split()[-1]
2011    if function_name == 'TEST' or function_name == 'TEST_F' or (
2012        not Match(r'[A-Z_]+$', function_name)):
2013      starting_func = True
2014
2015  if starting_func:
2016    body_found = False
2017    for start_linenum in xrange(linenum, clean_lines.NumLines()):
2018      start_line = lines[start_linenum]
2019      joined_line += ' ' + start_line.lstrip()
2020      if Search(r'(;|})', start_line):  # Declarations and trivial functions
2021        body_found = True
2022        break                              # ... ignore
2023      elif Search(r'{', start_line):
2024        body_found = True
2025        function = Search(r'((\w|:)*)\(', line).group(1)
2026        if Match(r'TEST', function):    # Handle TEST... macros
2027          parameter_regexp = Search(r'(\(.*\))', joined_line)
2028          if parameter_regexp:             # Ignore bad syntax
2029            function += parameter_regexp.group(1)
2030        else:
2031          function += '()'
2032        function_state.Begin(function)
2033        break
2034    if not body_found:
2035      # No body for the function (or evidence of a non-function) was found.
2036      error(filename, linenum, 'readability/fn_size', 5,
2037            'Lint failed to find start of function body.')
2038  elif Match(r'^\}\s*$', line):  # function end
2039    function_state.Check(error, filename, linenum)
2040    function_state.End()
2041  elif not Match(r'^\s*$', line):
2042    function_state.Count()  # Count non-blank/non-comment lines.
2043
2044
2045_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2046
2047
2048def CheckComment(comment, filename, linenum, error):
2049  """Checks for common mistakes in TODO comments.
2050
2051  Args:
2052    comment: The text of the comment from the line in question.
2053    filename: The name of the current file.
2054    linenum: The number of the line to check.
2055    error: The function to call with any errors found.
2056  """
2057  match = _RE_PATTERN_TODO.match(comment)
2058  if match:
2059    # One whitespace is correct; zero whitespace is handled elsewhere.
2060    leading_whitespace = match.group(1)
2061    if len(leading_whitespace) > 1:
2062      error(filename, linenum, 'whitespace/todo', 2,
2063            'Too many spaces before TODO')
2064
2065    username = match.group(2)
2066    if not username:
2067      error(filename, linenum, 'readability/todo', 2,
2068            'Missing username in TODO; it should look like '
2069            '"// TODO(my_username): Stuff."')
2070
2071    middle_whitespace = match.group(3)
2072    # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
2073    if middle_whitespace != ' ' and middle_whitespace != '':
2074      error(filename, linenum, 'whitespace/todo', 2,
2075            'TODO(my_username) should be followed by a space')
2076
2077def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2078  """Checks for improper use of DISALLOW* macros.
2079
2080  Args:
2081    filename: The name of the current file.
2082    clean_lines: A CleansedLines instance containing the file.
2083    linenum: The number of the line to check.
2084    nesting_state: A _NestingState instance which maintains information about
2085                   the current stack of nested blocks being parsed.
2086    error: The function to call with any errors found.
2087  """
2088  line = clean_lines.elided[linenum]  # get rid of comments and strings
2089
2090  matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2091                   r'DISALLOW_EVIL_CONSTRUCTORS|'
2092                   r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2093  if not matched:
2094    return
2095  if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2096    if nesting_state.stack[-1].access != 'private':
2097      error(filename, linenum, 'readability/constructors', 3,
2098            '%s must be in the private: section' % matched.group(1))
2099
2100  else:
2101    # Found DISALLOW* macro outside a class declaration, or perhaps it
2102    # was used inside a function when it should have been part of the
2103    # class declaration.  We could issue a warning here, but it
2104    # probably resulted in a compiler error already.
2105    pass
2106
2107
2108def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
2109  """Find the corresponding > to close a template.
2110
2111  Args:
2112    clean_lines: A CleansedLines instance containing the file.
2113    linenum: Current line number.
2114    init_suffix: Remainder of the current line after the initial <.
2115
2116  Returns:
2117    True if a matching bracket exists.
2118  """
2119  line = init_suffix
2120  nesting_stack = ['<']
2121  while True:
2122    # Find the next operator that can tell us whether < is used as an
2123    # opening bracket or as a less-than operator.  We only want to
2124    # warn on the latter case.
2125    #
2126    # We could also check all other operators and terminate the search
2127    # early, e.g. if we got something like this "a<b+c", the "<" is
2128    # most likely a less-than operator, but then we will get false
2129    # positives for default arguments (e.g. http://go/prccd) and
2130    # other template expressions (e.g. http://go/oxcjq).
2131    match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
2132    if match:
2133      # Found an operator, update nesting stack
2134      operator = match.group(1)
2135      line = match.group(2)
2136
2137      if nesting_stack[-1] == '<':
2138        # Expecting closing angle bracket
2139        if operator in ('<', '(', '['):
2140          nesting_stack.append(operator)
2141        elif operator == '>':
2142          nesting_stack.pop()
2143          if not nesting_stack:
2144            # Found matching angle bracket
2145            return True
2146        elif operator == ',':
2147          # Got a comma after a bracket, this is most likely a template
2148          # argument.  We have not seen a closing angle bracket yet, but
2149          # it's probably a few lines later if we look for it, so just
2150          # return early here.
2151          return True
2152        else:
2153          # Got some other operator.
2154          return False
2155
2156      else:
2157        # Expecting closing parenthesis or closing bracket
2158        if operator in ('<', '(', '['):
2159          nesting_stack.append(operator)
2160        elif operator in (')', ']'):
2161          # We don't bother checking for matching () or [].  If we got
2162          # something like (] or [), it would have been a syntax error.
2163          nesting_stack.pop()
2164
2165    else:
2166      # Scan the next line
2167      linenum += 1
2168      if linenum >= len(clean_lines.elided):
2169        break
2170      line = clean_lines.elided[linenum]
2171
2172  # Exhausted all remaining lines and still no matching angle bracket.
2173  # Most likely the input was incomplete, otherwise we should have
2174  # seen a semicolon and returned early.
2175  return True
2176
2177
2178def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
2179  """Find the corresponding < that started a template.
2180
2181  Args:
2182    clean_lines: A CleansedLines instance containing the file.
2183    linenum: Current line number.
2184    init_prefix: Part of the current line before the initial >.
2185
2186  Returns:
2187    True if a matching bracket exists.
2188  """
2189  line = init_prefix
2190  nesting_stack = ['>']
2191  while True:
2192    # Find the previous operator
2193    match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
2194    if match:
2195      # Found an operator, update nesting stack
2196      operator = match.group(2)
2197      line = match.group(1)
2198
2199      if nesting_stack[-1] == '>':
2200        # Expecting opening angle bracket
2201        if operator in ('>', ')', ']'):
2202          nesting_stack.append(operator)
2203        elif operator == '<':
2204          nesting_stack.pop()
2205          if not nesting_stack:
2206            # Found matching angle bracket
2207            return True
2208        elif operator == ',':
2209          # Got a comma before a bracket, this is most likely a
2210          # template argument.  The opening angle bracket is probably
2211          # there if we look for it, so just return early here.
2212          return True
2213        else:
2214          # Got some other operator.
2215          return False
2216
2217      else:
2218        # Expecting opening parenthesis or opening bracket
2219        if operator in ('>', ')', ']'):
2220          nesting_stack.append(operator)
2221        elif operator in ('(', '['):
2222          nesting_stack.pop()
2223
2224    else:
2225      # Scan the previous line
2226      linenum -= 1
2227      if linenum < 0:
2228        break
2229      line = clean_lines.elided[linenum]
2230
2231  # Exhausted all earlier lines and still no matching angle bracket.
2232  return False
2233
2234
2235def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
2236  """Checks for the correctness of various spacing issues in the code.
2237
2238  Things we check for: spaces around operators, spaces after
2239  if/for/while/switch, no spaces around parens in function calls, two
2240  spaces between code and comment, don't start a block with a blank
2241  line, don't end a function with a blank line, don't add a blank line
2242  after public/protected/private, don't have too many blank lines in a row.
2243
2244  Args:
2245    filename: The name of the current file.
2246    clean_lines: A CleansedLines instance containing the file.
2247    linenum: The number of the line to check.
2248    nesting_state: A _NestingState instance which maintains information about
2249                   the current stack of nested blocks being parsed.
2250    error: The function to call with any errors found.
2251  """
2252
2253  raw = clean_lines.raw_lines
2254  line = raw[linenum]
2255
2256  # Before nixing comments, check if the line is blank for no good
2257  # reason.  This includes the first line after a block is opened, and
2258  # blank lines at the end of a function (ie, right before a line like '}'
2259  #
2260  # Skip all the blank line checks if we are immediately inside a
2261  # namespace body.  In other words, don't issue blank line warnings
2262  # for this block:
2263  #   namespace {
2264  #
2265  #   }
2266  #
2267  # A warning about missing end of namespace comments will be issued instead.
2268  if IsBlankLine(line) and not nesting_state.InNamespaceBody():
2269    elided = clean_lines.elided
2270    prev_line = elided[linenum - 1]
2271    prevbrace = prev_line.rfind('{')
2272    # TODO(unknown): Don't complain if line before blank line, and line after,
2273    #                both start with alnums and are indented the same amount.
2274    #                This ignores whitespace at the start of a namespace block
2275    #                because those are not usually indented.
2276    if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
2277      # OK, we have a blank line at the start of a code block.  Before we
2278      # complain, we check if it is an exception to the rule: The previous
2279      # non-empty line has the parameters of a function header that are indented
2280      # 4 spaces (because they did not fit in a 80 column line when placed on
2281      # the same line as the function name).  We also check for the case where
2282      # the previous line is indented 6 spaces, which may happen when the
2283      # initializers of a constructor do not fit into a 80 column line.
2284      exception = False
2285      if Match(r' {6}\w', prev_line):  # Initializer list?
2286        # We are looking for the opening column of initializer list, which
2287        # should be indented 4 spaces to cause 6 space indentation afterwards.
2288        search_position = linenum-2
2289        while (search_position >= 0
2290               and Match(r' {6}\w', elided[search_position])):
2291          search_position -= 1
2292        exception = (search_position >= 0
2293                     and elided[search_position][:5] == '    :')
2294      else:
2295        # Search for the function arguments or an initializer list.  We use a
2296        # simple heuristic here: If the line is indented 4 spaces; and we have a
2297        # closing paren, without the opening paren, followed by an opening brace
2298        # or colon (for initializer lists) we assume that it is the last line of
2299        # a function header.  If we have a colon indented 4 spaces, it is an
2300        # initializer list.
2301        exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2302                           prev_line)
2303                     or Match(r' {4}:', prev_line))
2304
2305      if not exception:
2306        error(filename, linenum, 'whitespace/blank_line', 2,
2307              'Blank line at the start of a code block.  Is this needed?')
2308    # Ignore blank lines at the end of a block in a long if-else
2309    # chain, like this:
2310    #   if (condition1) {
2311    #     // Something followed by a blank line
2312    #
2313    #   } else if (condition2) {
2314    #     // Something else
2315    #   }
2316    if linenum + 1 < clean_lines.NumLines():
2317      next_line = raw[linenum + 1]
2318      if (next_line
2319          and Match(r'\s*}', next_line)
2320          and next_line.find('} else ') == -1):
2321        error(filename, linenum, 'whitespace/blank_line', 3,
2322              'Blank line at the end of a code block.  Is this needed?')
2323
2324    matched = Match(r'\s*(public|protected|private):', prev_line)
2325    if matched:
2326      error(filename, linenum, 'whitespace/blank_line', 3,
2327            'Do not leave a blank line after "%s:"' % matched.group(1))
2328
2329  # Next, we complain if there's a comment too near the text
2330  commentpos = line.find('//')
2331  if commentpos != -1:
2332    # Check if the // may be in quotes.  If so, ignore it
2333    # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
2334    if (line.count('"', 0, commentpos) -
2335        line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
2336      # Allow one space for new scopes, two spaces otherwise:
2337      if (not Match(r'^\s*{ //', line) and
2338          ((commentpos >= 1 and
2339            line[commentpos-1] not in string.whitespace) or
2340           (commentpos >= 2 and
2341            line[commentpos-2] not in string.whitespace))):
2342        error(filename, linenum, 'whitespace/comments', 2,
2343              'At least two spaces is best between code and comments')
2344      # There should always be a space between the // and the comment
2345      commentend = commentpos + 2
2346      if commentend < len(line) and not line[commentend] == ' ':
2347        # but some lines are exceptions -- e.g. if they're big
2348        # comment delimiters like:
2349        # //----------------------------------------------------------
2350        # or are an empty C++ style Doxygen comment, like:
2351        # ///
2352        # or they begin with multiple slashes followed by a space:
2353        # //////// Header comment
2354        match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
2355                 Search(r'^/$', line[commentend:]) or
2356                 Search(r'^/+ ', line[commentend:]))
2357        if not match:
2358          error(filename, linenum, 'whitespace/comments', 4,
2359                'Should have a space between // and comment')
2360      CheckComment(line[commentpos:], filename, linenum, error)
2361
2362  line = clean_lines.elided[linenum]  # get rid of comments and strings
2363
2364  # Don't try to do spacing checks for operator methods
2365  line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
2366
2367  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2368  # Otherwise not.  Note we only check for non-spaces on *both* sides;
2369  # sometimes people put non-spaces on one side when aligning ='s among
2370  # many lines (not that this is behavior that I approve of...)
2371  if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2372    error(filename, linenum, 'whitespace/operators', 4,
2373          'Missing spaces around =')
2374
2375  # It's ok not to have spaces around binary operators like + - * /, but if
2376  # there's too little whitespace, we get concerned.  It's hard to tell,
2377  # though, so we punt on this one for now.  TODO.
2378
2379  # You should always have whitespace around binary operators.
2380  #
2381  # Check <= and >= first to avoid false positives with < and >, then
2382  # check non-include lines for spacing around < and >.
2383  match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
2384  if match:
2385    error(filename, linenum, 'whitespace/operators', 3,
2386          'Missing spaces around %s' % match.group(1))
2387  # We allow no-spaces around << when used like this: 10<<20, but
2388  # not otherwise (particularly, not when used as streams)
2389  match = Search(r'(\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
2390  if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
2391    error(filename, linenum, 'whitespace/operators', 3,
2392          'Missing spaces around <<')
2393  elif not Match(r'#.*include', line):
2394    # Avoid false positives on ->
2395    reduced_line = line.replace('->', '')
2396
2397    # Look for < that is not surrounded by spaces.  This is only
2398    # triggered if both sides are missing spaces, even though
2399    # technically should should flag if at least one side is missing a
2400    # space.  This is done to avoid some false positives with shifts.
2401    match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
2402    if (match and
2403        not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
2404      error(filename, linenum, 'whitespace/operators', 3,
2405            'Missing spaces around <')
2406
2407    # Look for > that is not surrounded by spaces.  Similar to the
2408    # above, we only trigger if both sides are missing spaces to avoid
2409    # false positives with shifts.
2410    match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
2411    if (match and
2412        not FindPreviousMatchingAngleBracket(clean_lines, linenum,
2413                                             match.group(1))):
2414      error(filename, linenum, 'whitespace/operators', 3,
2415            'Missing spaces around >')
2416
2417  # We allow no-spaces around >> for almost anything.  This is because
2418  # C++11 allows ">>" to close nested templates, which accounts for
2419  # most cases when ">>" is not followed by a space.
2420  #
2421  # We still warn on ">>" followed by alpha character, because that is
2422  # likely due to ">>" being used for right shifts, e.g.:
2423  #   value >> alpha
2424  #
2425  # When ">>" is used to close templates, the alphanumeric letter that
2426  # follows would be part of an identifier, and there should still be
2427  # a space separating the template type and the identifier.
2428  #   type<type<type>> alpha
2429  match = Search(r'>>[a-zA-Z_]', line)
2430  if match:
2431    error(filename, linenum, 'whitespace/operators', 3,
2432          'Missing spaces around >>')
2433
2434  # There shouldn't be space around unary operators
2435  match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2436  if match:
2437    error(filename, linenum, 'whitespace/operators', 4,
2438          'Extra space for operator %s' % match.group(1))
2439
2440  # A pet peeve of mine: no spaces after an if, while, switch, or for
2441  match = Search(r' (if\(|for\(|while\(|switch\()', line)
2442  if match:
2443    error(filename, linenum, 'whitespace/parens', 5,
2444          'Missing space before ( in %s' % match.group(1))
2445
2446  # For if/for/while/switch, the left and right parens should be
2447  # consistent about how many spaces are inside the parens, and
2448  # there should either be zero or one spaces inside the parens.
2449  # We don't want: "if ( foo)" or "if ( foo   )".
2450  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
2451  match = Search(r'\b(if|for|while|switch)\s*'
2452                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2453                 line)
2454  if match:
2455    if len(match.group(2)) != len(match.group(4)):
2456      if not (match.group(3) == ';' and
2457              len(match.group(2)) == 1 + len(match.group(4)) or
2458              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
2459        error(filename, linenum, 'whitespace/parens', 5,
2460              'Mismatching spaces inside () in %s' % match.group(1))
2461    if not len(match.group(2)) in [0, 1]:
2462      error(filename, linenum, 'whitespace/parens', 5,
2463            'Should have zero or one spaces inside ( and ) in %s' %
2464            match.group(1))
2465
2466  # You should always have a space after a comma (either as fn arg or operator)
2467  if Search(r',[^\s]', line):
2468    error(filename, linenum, 'whitespace/comma', 3,
2469          'Missing space after ,')
2470
2471  # You should always have a space after a semicolon
2472  # except for few corner cases
2473  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
2474  # space after ;
2475  if Search(r';[^\s};\\)/]', line):
2476    error(filename, linenum, 'whitespace/semicolon', 3,
2477          'Missing space after ;')
2478
2479  # Next we will look for issues with function calls.
2480  CheckSpacingForFunctionCall(filename, line, linenum, error)
2481
2482  # Except after an opening paren, or after another opening brace (in case of
2483  # an initializer list, for instance), you should have spaces before your
2484  # braces. And since you should never have braces at the beginning of a line,
2485  # this is an easy test.
2486  if Search(r'[^ ({]{', line):
2487    error(filename, linenum, 'whitespace/braces', 5,
2488          'Missing space before {')
2489
2490  # Make sure '} else {' has spaces.
2491  if Search(r'}else', line):
2492    error(filename, linenum, 'whitespace/braces', 5,
2493          'Missing space before else')
2494
2495  # You shouldn't have spaces before your brackets, except maybe after
2496  # 'delete []' or 'new char * []'.
2497  if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
2498    error(filename, linenum, 'whitespace/braces', 5,
2499          'Extra space before [')
2500
2501  # You shouldn't have a space before a semicolon at the end of the line.
2502  # There's a special case for "for" since the style guide allows space before
2503  # the semicolon there.
2504  if Search(r':\s*;\s*$', line):
2505    error(filename, linenum, 'whitespace/semicolon', 5,
2506          'Semicolon defining empty statement. Use {} instead.')
2507  elif Search(r'^\s*;\s*$', line):
2508    error(filename, linenum, 'whitespace/semicolon', 5,
2509          'Line contains only semicolon. If this should be an empty statement, '
2510          'use {} instead.')
2511  elif (Search(r'\s+;\s*$', line) and
2512        not Search(r'\bfor\b', line)):
2513    error(filename, linenum, 'whitespace/semicolon', 5,
2514          'Extra space before last semicolon. If this should be an empty '
2515          'statement, use {} instead.')
2516
2517  # In range-based for, we wanted spaces before and after the colon, but
2518  # not around "::" tokens that might appear.
2519  if (Search('for *\(.*[^:]:[^: ]', line) or
2520      Search('for *\(.*[^: ]:[^:]', line)):
2521    error(filename, linenum, 'whitespace/forcolon', 2,
2522          'Missing space around colon in range-based for loop')
2523
2524
2525def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
2526  """Checks for additional blank line issues related to sections.
2527
2528  Currently the only thing checked here is blank line before protected/private.
2529
2530  Args:
2531    filename: The name of the current file.
2532    clean_lines: A CleansedLines instance containing the file.
2533    class_info: A _ClassInfo objects.
2534    linenum: The number of the line to check.
2535    error: The function to call with any errors found.
2536  """
2537  # Skip checks if the class is small, where small means 25 lines or less.
2538  # 25 lines seems like a good cutoff since that's the usual height of
2539  # terminals, and any class that can't fit in one screen can't really
2540  # be considered "small".
2541  #
2542  # Also skip checks if we are on the first line.  This accounts for
2543  # classes that look like
2544  #   class Foo { public: ... };
2545  #
2546  # If we didn't find the end of the class, last_line would be zero,
2547  # and the check will be skipped by the first condition.
2548  if (class_info.last_line - class_info.starting_linenum <= 24 or
2549      linenum <= class_info.starting_linenum):
2550    return
2551
2552  matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
2553  if matched:
2554    # Issue warning if the line before public/protected/private was
2555    # not a blank line, but don't do this if the previous line contains
2556    # "class" or "struct".  This can happen two ways:
2557    #  - We are at the beginning of the class.
2558    #  - We are forward-declaring an inner class that is semantically
2559    #    private, but needed to be public for implementation reasons.
2560    # Also ignores cases where the previous line ends with a backslash as can be
2561    # common when defining classes in C macros.
2562    prev_line = clean_lines.lines[linenum - 1]
2563    if (not IsBlankLine(prev_line) and
2564        not Search(r'\b(class|struct)\b', prev_line) and
2565        not Search(r'\\$', prev_line)):
2566      # Try a bit harder to find the beginning of the class.  This is to
2567      # account for multi-line base-specifier lists, e.g.:
2568      #   class Derived
2569      #       : public Base {
2570      end_class_head = class_info.starting_linenum
2571      for i in range(class_info.starting_linenum, linenum):
2572        if Search(r'\{\s*$', clean_lines.lines[i]):
2573          end_class_head = i
2574          break
2575      if end_class_head < linenum - 1:
2576        error(filename, linenum, 'whitespace/blank_line', 3,
2577              '"%s:" should be preceded by a blank line' % matched.group(1))
2578
2579
2580def GetPreviousNonBlankLine(clean_lines, linenum):
2581  """Return the most recent non-blank line and its line number.
2582
2583  Args:
2584    clean_lines: A CleansedLines instance containing the file contents.
2585    linenum: The number of the line to check.
2586
2587  Returns:
2588    A tuple with two elements.  The first element is the contents of the last
2589    non-blank line before the current line, or the empty string if this is the
2590    first non-blank line.  The second is the line number of that line, or -1
2591    if this is the first non-blank line.
2592  """
2593
2594  prevlinenum = linenum - 1
2595  while prevlinenum >= 0:
2596    prevline = clean_lines.elided[prevlinenum]
2597    if not IsBlankLine(prevline):     # if not a blank line...
2598      return (prevline, prevlinenum)
2599    prevlinenum -= 1
2600  return ('', -1)
2601
2602
2603def CheckBraces(filename, clean_lines, linenum, error):
2604  """Looks for misplaced braces (e.g. at the end of line).
2605
2606  Args:
2607    filename: The name of the current file.
2608    clean_lines: A CleansedLines instance containing the file.
2609    linenum: The number of the line to check.
2610    error: The function to call with any errors found.
2611  """
2612
2613  line = clean_lines.elided[linenum]        # get rid of comments and strings
2614
2615  if Match(r'\s*{\s*$', line):
2616    # We allow an open brace to start a line in the case where someone
2617    # is using braces in a block to explicitly create a new scope,
2618    # which is commonly used to control the lifetime of
2619    # stack-allocated variables.  We don't detect this perfectly: we
2620    # just don't complain if the last non-whitespace character on the
2621    # previous non-blank line is ';', ':', '{', or '}', or if the previous
2622    # line starts a preprocessor block.
2623    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2624    if (not Search(r'[;:}{]\s*$', prevline) and
2625        not Match(r'\s*#', prevline)):
2626      error(filename, linenum, 'whitespace/braces', 4,
2627            '{ should almost always be at the end of the previous line')
2628
2629  # An else clause should be on the same line as the preceding closing brace.
2630  if Match(r'\s*else\s*', line):
2631    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2632    if Match(r'\s*}\s*$', prevline):
2633      error(filename, linenum, 'whitespace/newline', 4,
2634            'An else should appear on the same line as the preceding }')
2635
2636  # If braces come on one side of an else, they should be on both.
2637  # However, we have to worry about "else if" that spans multiple lines!
2638  if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2639    if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
2640      # find the ( after the if
2641      pos = line.find('else if')
2642      pos = line.find('(', pos)
2643      if pos > 0:
2644        (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
2645        if endline[endpos:].find('{') == -1:    # must be brace after if
2646          error(filename, linenum, 'readability/braces', 5,
2647                'If an else has a brace on one side, it should have it on both')
2648    else:            # common case: else not followed by a multi-line if
2649      error(filename, linenum, 'readability/braces', 5,
2650            'If an else has a brace on one side, it should have it on both')
2651
2652  # Likewise, an else should never have the else clause on the same line
2653  if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
2654    error(filename, linenum, 'whitespace/newline', 4,
2655          'Else clause should never be on same line as else (use 2 lines)')
2656
2657  # In the same way, a do/while should never be on one line
2658  if Match(r'\s*do [^\s{]', line):
2659    error(filename, linenum, 'whitespace/newline', 4,
2660          'do/while clauses should not be on a single line')
2661
2662  # Braces shouldn't be followed by a ; unless they're defining a struct
2663  # or initializing an array.
2664  # We can't tell in general, but we can for some common cases.
2665  prevlinenum = linenum
2666  while True:
2667    (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
2668    if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
2669      line = prevline + line
2670    else:
2671      break
2672  if (Search(r'{.*}\s*;', line) and
2673      line.count('{') == line.count('}') and
2674      not Search(r'struct|class|enum|\s*=\s*{', line)):
2675    error(filename, linenum, 'readability/braces', 4,
2676          "You don't need a ; after a }")
2677
2678
2679def CheckEmptyLoopBody(filename, clean_lines, linenum, error):
2680  """Loop for empty loop body with only a single semicolon.
2681
2682  Args:
2683    filename: The name of the current file.
2684    clean_lines: A CleansedLines instance containing the file.
2685    linenum: The number of the line to check.
2686    error: The function to call with any errors found.
2687  """
2688
2689  # Search for loop keywords at the beginning of the line.  Because only
2690  # whitespaces are allowed before the keywords, this will also ignore most
2691  # do-while-loops, since those lines should start with closing brace.
2692  line = clean_lines.elided[linenum]
2693  if Match(r'\s*(for|while)\s*\(', line):
2694    # Find the end of the conditional expression
2695    (end_line, end_linenum, end_pos) = CloseExpression(
2696        clean_lines, linenum, line.find('('))
2697
2698    # Output warning if what follows the condition expression is a semicolon.
2699    # No warning for all other cases, including whitespace or newline, since we
2700    # have a separate check for semicolons preceded by whitespace.
2701    if end_pos >= 0 and Match(r';', end_line[end_pos:]):
2702      error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
2703            'Empty loop bodies should use {} or continue')
2704
2705
2706def ReplaceableCheck(operator, macro, line):
2707  """Determine whether a basic CHECK can be replaced with a more specific one.
2708
2709  For example suggest using CHECK_EQ instead of CHECK(a == b) and
2710  similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
2711
2712  Args:
2713    operator: The C++ operator used in the CHECK.
2714    macro: The CHECK or EXPECT macro being called.
2715    line: The current source line.
2716
2717  Returns:
2718    True if the CHECK can be replaced with a more specific one.
2719  """
2720
2721  # This matches decimal and hex integers, strings, and chars (in that order).
2722  match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
2723
2724  # Expression to match two sides of the operator with something that
2725  # looks like a literal, since CHECK(x == iterator) won't compile.
2726  # This means we can't catch all the cases where a more specific
2727  # CHECK is possible, but it's less annoying than dealing with
2728  # extraneous warnings.
2729  match_this = (r'\s*' + macro + r'\((\s*' +
2730                match_constant + r'\s*' + operator + r'[^<>].*|'
2731                r'.*[^<>]' + operator + r'\s*' + match_constant +
2732                r'\s*\))')
2733
2734  # Don't complain about CHECK(x == NULL) or similar because
2735  # CHECK_EQ(x, NULL) won't compile (requires a cast).
2736  # Also, don't complain about more complex boolean expressions
2737  # involving && or || such as CHECK(a == b || c == d).
2738  return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
2739
2740
2741def CheckCheck(filename, clean_lines, linenum, error):
2742  """Checks the use of CHECK and EXPECT macros.
2743
2744  Args:
2745    filename: The name of the current file.
2746    clean_lines: A CleansedLines instance containing the file.
2747    linenum: The number of the line to check.
2748    error: The function to call with any errors found.
2749  """
2750
2751  # Decide the set of replacement macros that should be suggested
2752  raw_lines = clean_lines.raw_lines
2753  current_macro = ''
2754  for macro in _CHECK_MACROS:
2755    if raw_lines[linenum].find(macro) >= 0:
2756      current_macro = macro
2757      break
2758  if not current_macro:
2759    # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2760    return
2761
2762  line = clean_lines.elided[linenum]        # get rid of comments and strings
2763
2764  # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2765  for operator in ['==', '!=', '>=', '>', '<=', '<']:
2766    if ReplaceableCheck(operator, current_macro, line):
2767      error(filename, linenum, 'readability/check', 2,
2768            'Consider using %s instead of %s(a %s b)' % (
2769                _CHECK_REPLACEMENT[current_macro][operator],
2770                current_macro, operator))
2771      break
2772
2773
2774def CheckAltTokens(filename, clean_lines, linenum, error):
2775  """Check alternative keywords being used in boolean expressions.
2776
2777  Args:
2778    filename: The name of the current file.
2779    clean_lines: A CleansedLines instance containing the file.
2780    linenum: The number of the line to check.
2781    error: The function to call with any errors found.
2782  """
2783  line = clean_lines.elided[linenum]
2784
2785  # Avoid preprocessor lines
2786  if Match(r'^\s*#', line):
2787    return
2788
2789  # Last ditch effort to avoid multi-line comments.  This will not help
2790  # if the comment started before the current line or ended after the
2791  # current line, but it catches most of the false positives.  At least,
2792  # it provides a way to workaround this warning for people who use
2793  # multi-line comments in preprocessor macros.
2794  #
2795  # TODO(unknown): remove this once cpplint has better support for
2796  # multi-line comments.
2797  if line.find('/*') >= 0 or line.find('*/') >= 0:
2798    return
2799
2800  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
2801    error(filename, linenum, 'readability/alt_tokens', 2,
2802          'Use operator %s instead of %s' % (
2803              _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
2804
2805
2806def GetLineWidth(line):
2807  """Determines the width of the line in column positions.
2808
2809  Args:
2810    line: A string, which may be a Unicode string.
2811
2812  Returns:
2813    The width of the line in column positions, accounting for Unicode
2814    combining characters and wide characters.
2815  """
2816  if isinstance(line, unicode):
2817    width = 0
2818    for uc in unicodedata.normalize('NFC', line):
2819      if unicodedata.east_asian_width(uc) in ('W', 'F'):
2820        width += 2
2821      elif not unicodedata.combining(uc):
2822        width += 1
2823    return width
2824  else:
2825    return len(line)
2826
2827
2828def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
2829               error):
2830  """Checks rules from the 'C++ style rules' section of cppguide.html.
2831
2832  Most of these rules are hard to test (naming, comment style), but we
2833  do what we can.  In particular we check for 2-space indents, line lengths,
2834  tab usage, spaces inside code, etc.
2835
2836  Args:
2837    filename: The name of the current file.
2838    clean_lines: A CleansedLines instance containing the file.
2839    linenum: The number of the line to check.
2840    file_extension: The extension (without the dot) of the filename.
2841    nesting_state: A _NestingState instance which maintains information about
2842                   the current stack of nested blocks being parsed.
2843    error: The function to call with any errors found.
2844  """
2845
2846  raw_lines = clean_lines.raw_lines
2847  line = raw_lines[linenum]
2848
2849  if line.find('\t') != -1:
2850    error(filename, linenum, 'whitespace/tab', 1,
2851          'Tab found; better to use spaces')
2852
2853  # One or three blank spaces at the beginning of the line is weird; it's
2854  # hard to reconcile that with 2-space indents.
2855  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
2856  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
2857  # if(RLENGTH > 20) complain = 0;
2858  # if(match($0, " +(error|private|public|protected):")) complain = 0;
2859  # if(match(prev, "&& *$")) complain = 0;
2860  # if(match(prev, "\\|\\| *$")) complain = 0;
2861  # if(match(prev, "[\",=><] *$")) complain = 0;
2862  # if(match($0, " <<")) complain = 0;
2863  # if(match(prev, " +for \\(")) complain = 0;
2864  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2865  initial_spaces = 0
2866  cleansed_line = clean_lines.elided[linenum]
2867  while initial_spaces < len(line) and line[initial_spaces] == ' ':
2868    initial_spaces += 1
2869  if line and line[-1].isspace():
2870    error(filename, linenum, 'whitespace/end_of_line', 4,
2871          'Line ends in whitespace.  Consider deleting these extra spaces.')
2872  # There are certain situations we allow one space, notably for labels
2873  elif ((initial_spaces == 1 or initial_spaces == 3) and
2874        not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
2875    error(filename, linenum, 'whitespace/indent', 3,
2876          'Weird number of spaces at line-start.  '
2877          'Are you using a 2-space indent?')
2878  # Labels should always be indented at least one space.
2879  elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
2880                                                          line):
2881    error(filename, linenum, 'whitespace/labels', 4,
2882          'Labels should always be indented at least one space.  '
2883          'If this is a member-initializer list in a constructor or '
2884          'the base class list in a class definition, the colon should '
2885          'be on the following line.')
2886
2887
2888  # Check if the line is a header guard.
2889  is_header_guard = False
2890  if file_extension == 'h':
2891    cppvar = GetHeaderGuardCPPVariable(filename)
2892    if (line.startswith('#ifndef %s' % cppvar) or
2893        line.startswith('#define %s' % cppvar) or
2894        line.startswith('#endif  // %s' % cppvar)):
2895      is_header_guard = True
2896  # #include lines and header guards can be long, since there's no clean way to
2897  # split them.
2898  #
2899  # URLs can be long too.  It's possible to split these, but it makes them
2900  # harder to cut&paste.
2901  #
2902  # The "$Id:...$" comment may also get very long without it being the
2903  # developers fault.
2904  if (not line.startswith('#include') and not is_header_guard and
2905      not Match(r'^\s*//.*http(s?)://\S*$', line) and
2906      not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
2907    line_width = GetLineWidth(line)
2908    if line_width > 100:
2909      error(filename, linenum, 'whitespace/line_length', 4,
2910            'Lines should very rarely be longer than 100 characters')
2911    elif line_width > 80:
2912      error(filename, linenum, 'whitespace/line_length', 2,
2913            'Lines should be <= 80 characters long')
2914
2915  if (cleansed_line.count(';') > 1 and
2916      # for loops are allowed two ;'s (and may run over two lines).
2917      cleansed_line.find('for') == -1 and
2918      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
2919       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
2920      # It's ok to have many commands in a switch case that fits in 1 line
2921      not ((cleansed_line.find('case ') != -1 or
2922            cleansed_line.find('default:') != -1) and
2923           cleansed_line.find('break;') != -1)):
2924    error(filename, linenum, 'whitespace/newline', 0,
2925          'More than one command on the same line')
2926
2927  # Some more style checks
2928  CheckBraces(filename, clean_lines, linenum, error)
2929  CheckEmptyLoopBody(filename, clean_lines, linenum, error)
2930  CheckAccess(filename, clean_lines, linenum, nesting_state, error)
2931  CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
2932  CheckCheck(filename, clean_lines, linenum, error)
2933  CheckAltTokens(filename, clean_lines, linenum, error)
2934  classinfo = nesting_state.InnermostClass()
2935  if classinfo:
2936    CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
2937
2938
2939_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2940_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2941# Matches the first component of a filename delimited by -s and _s. That is:
2942#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2943#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
2944#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
2945#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
2946_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2947
2948
2949def _DropCommonSuffixes(filename):
2950  """Drops common suffixes like _test.cc or -inl.h from filename.
2951
2952  For example:
2953    >>> _DropCommonSuffixes('foo/foo-inl.h')
2954    'foo/foo'
2955    >>> _DropCommonSuffixes('foo/bar/foo.cc')
2956    'foo/bar/foo'
2957    >>> _DropCommonSuffixes('foo/foo_internal.h')
2958    'foo/foo'
2959    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
2960    'foo/foo_unusualinternal'
2961
2962  Args:
2963    filename: The input filename.
2964
2965  Returns:
2966    The filename with the common suffix removed.
2967  """
2968  for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
2969                 'inl.h', 'impl.h', 'internal.h'):
2970    if (filename.endswith(suffix) and len(filename) > len(suffix) and
2971        filename[-len(suffix) - 1] in ('-', '_')):
2972      return filename[:-len(suffix) - 1]
2973  return os.path.splitext(filename)[0]
2974
2975
2976def _IsTestFilename(filename):
2977  """Determines if the given filename has a suffix that identifies it as a test.
2978
2979  Args:
2980    filename: The input filename.
2981
2982  Returns:
2983    True if 'filename' looks like a test, False otherwise.
2984  """
2985  if (filename.endswith('_test.cc') or
2986      filename.endswith('_unittest.cc') or
2987      filename.endswith('_regtest.cc')):
2988    return True
2989  else:
2990    return False
2991
2992
2993def _ClassifyInclude(fileinfo, include, is_system):
2994  """Figures out what kind of header 'include' is.
2995
2996  Args:
2997    fileinfo: The current file cpplint is running over. A FileInfo instance.
2998    include: The path to a #included file.
2999    is_system: True if the #include used <> rather than "".
3000
3001  Returns:
3002    One of the _XXX_HEADER constants.
3003
3004  For example:
3005    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
3006    _C_SYS_HEADER
3007    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
3008    _CPP_SYS_HEADER
3009    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
3010    _LIKELY_MY_HEADER
3011    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
3012    ...                  'bar/foo_other_ext.h', False)
3013    _POSSIBLE_MY_HEADER
3014    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
3015    _OTHER_HEADER
3016  """
3017  # This is a list of all standard c++ header files, except
3018  # those already checked for above.
3019  is_stl_h = include in _STL_HEADERS
3020  is_cpp_h = is_stl_h or include in _CPP_HEADERS
3021
3022  if is_system:
3023    if is_cpp_h:
3024      return _CPP_SYS_HEADER
3025    else:
3026      return _C_SYS_HEADER
3027
3028  # If the target file and the include we're checking share a
3029  # basename when we drop common extensions, and the include
3030  # lives in . , then it's likely to be owned by the target file.
3031  target_dir, target_base = (
3032      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
3033  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
3034  if target_base == include_base and (
3035      include_dir == target_dir or
3036      include_dir == os.path.normpath(target_dir + '/../public')):
3037    return _LIKELY_MY_HEADER
3038
3039  # If the target and include share some initial basename
3040  # component, it's possible the target is implementing the
3041  # include, so it's allowed to be first, but we'll never
3042  # complain if it's not there.
3043  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
3044  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
3045  if (target_first_component and include_first_component and
3046      target_first_component.group(0) ==
3047      include_first_component.group(0)):
3048    return _POSSIBLE_MY_HEADER
3049
3050  return _OTHER_HEADER
3051
3052
3053
3054def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
3055  """Check rules that are applicable to #include lines.
3056
3057  Strings on #include lines are NOT removed from elided line, to make
3058  certain tasks easier. However, to prevent false positives, checks
3059  applicable to #include lines in CheckLanguage must be put here.
3060
3061  Args:
3062    filename: The name of the current file.
3063    clean_lines: A CleansedLines instance containing the file.
3064    linenum: The number of the line to check.
3065    include_state: An _IncludeState instance in which the headers are inserted.
3066    error: The function to call with any errors found.
3067  """
3068  fileinfo = FileInfo(filename)
3069
3070  line = clean_lines.lines[linenum]
3071
3072  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
3073  if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
3074    error(filename, linenum, 'build/include', 4,
3075          'Include the directory when naming .h files')
3076
3077  # we shouldn't include a file more than once. actually, there are a
3078  # handful of instances where doing so is okay, but in general it's
3079  # not.
3080  match = _RE_PATTERN_INCLUDE.search(line)
3081  if match:
3082    include = match.group(2)
3083    is_system = (match.group(1) == '<')
3084    if include in include_state:
3085      error(filename, linenum, 'build/include', 4,
3086            '"%s" already included at %s:%s' %
3087            (include, filename, include_state[include]))
3088    else:
3089      include_state[include] = linenum
3090
3091      # We want to ensure that headers appear in the right order:
3092      # 1) for foo.cc, foo.h  (preferred location)
3093      # 2) c system files
3094      # 3) cpp system files
3095      # 4) for foo.cc, foo.h  (deprecated location)
3096      # 5) other google headers
3097      #
3098      # We classify each include statement as one of those 5 types
3099      # using a number of techniques. The include_state object keeps
3100      # track of the highest type seen, and complains if we see a
3101      # lower type after that.
3102      error_message = include_state.CheckNextIncludeOrder(
3103          _ClassifyInclude(fileinfo, include, is_system))
3104      if error_message:
3105        error(filename, linenum, 'build/include_order', 4,
3106              '%s. Should be: %s.h, c system, c++ system, other.' %
3107              (error_message, fileinfo.BaseName()))
3108      if not include_state.IsInAlphabeticalOrder(include):
3109        error(filename, linenum, 'build/include_alpha', 4,
3110              'Include "%s" not in alphabetical order' % include)
3111
3112  # Look for any of the stream classes that are part of standard C++.
3113  match = _RE_PATTERN_INCLUDE.match(line)
3114  if match:
3115    include = match.group(2)
3116    if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3117      # Many unit tests use cout, so we exempt them.
3118      if not _IsTestFilename(filename):
3119        error(filename, linenum, 'readability/streams', 3,
3120              'Streams are highly discouraged.')
3121
3122
3123def _GetTextInside(text, start_pattern):
3124  """Retrieves all the text between matching open and close parentheses.
3125
3126  Given a string of lines and a regular expression string, retrieve all the text
3127  following the expression and between opening punctuation symbols like
3128  (, [, or {, and the matching close-punctuation symbol. This properly nested
3129  occurrences of the punctuations, so for the text like
3130    printf(a(), b(c()));
3131  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
3132  start_pattern must match string having an open punctuation symbol at the end.
3133
3134  Args:
3135    text: The lines to extract text. Its comments and strings must be elided.
3136           It can be single line and can span multiple lines.
3137    start_pattern: The regexp string indicating where to start extracting
3138                   the text.
3139  Returns:
3140    The extracted text.
3141    None if either the opening string or ending punctuation could not be found.
3142  """
3143  # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
3144  # rewritten to use _GetTextInside (and use inferior regexp matching today).
3145
3146  # Give opening punctuations to get the matching close-punctuations.
3147  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
3148  closing_punctuation = set(matching_punctuation.itervalues())
3149
3150  # Find the position to start extracting text.
3151  match = re.search(start_pattern, text, re.M)
3152  if not match:  # start_pattern not found in text.
3153    return None
3154  start_position = match.end(0)
3155
3156  assert start_position > 0, (
3157      'start_pattern must ends with an opening punctuation.')
3158  assert text[start_position - 1] in matching_punctuation, (
3159      'start_pattern must ends with an opening punctuation.')
3160  # Stack of closing punctuations we expect to have in text after position.
3161  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
3162  position = start_position
3163  while punctuation_stack and position < len(text):
3164    if text[position] == punctuation_stack[-1]:
3165      punctuation_stack.pop()
3166    elif text[position] in closing_punctuation:
3167      # A closing punctuation without matching opening punctuations.
3168      return None
3169    elif text[position] in matching_punctuation:
3170      punctuation_stack.append(matching_punctuation[text[position]])
3171    position += 1
3172  if punctuation_stack:
3173    # Opening punctuations left without matching close-punctuations.
3174    return None
3175  # punctuations match.
3176  return text[start_position:position - 1]
3177
3178
3179def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
3180                  error):
3181  """Checks rules from the 'C++ language rules' section of cppguide.html.
3182
3183  Some of these rules are hard to test (function overloading, using
3184  uint32 inappropriately), but we do the best we can.
3185
3186  Args:
3187    filename: The name of the current file.
3188    clean_lines: A CleansedLines instance containing the file.
3189    linenum: The number of the line to check.
3190    file_extension: The extension (without the dot) of the filename.
3191    include_state: An _IncludeState instance in which the headers are inserted.
3192    error: The function to call with any errors found.
3193  """
3194  # If the line is empty or consists of entirely a comment, no need to
3195  # check it.
3196  line = clean_lines.elided[linenum]
3197  if not line:
3198    return
3199
3200  match = _RE_PATTERN_INCLUDE.search(line)
3201  if match:
3202    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
3203    return
3204
3205  # Create an extended_line, which is the concatenation of the current and
3206  # next lines, for more effective checking of code that may span more than one
3207  # line.
3208  if linenum + 1 < clean_lines.NumLines():
3209    extended_line = line + clean_lines.elided[linenum + 1]
3210  else:
3211    extended_line = line
3212
3213  # Make Windows paths like Unix.
3214  fullname = os.path.abspath(filename).replace('\\', '/')
3215
3216  # TODO(unknown): figure out if they're using default arguments in fn proto.
3217
3218  # Check for non-const references in functions.  This is tricky because &
3219  # is also used to take the address of something.  We allow <> for templates,
3220  # (ignoring whatever is between the braces) and : for classes.
3221  # These are complicated re's.  They try to capture the following:
3222  # paren (for fn-prototype start), typename, &, varname.  For the const
3223  # version, we're willing for const to be before typename or after
3224  # Don't check the implementation on same line.
3225  fnline = line.split('{', 1)[0]
3226  if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
3227      len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
3228                     r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
3229      len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
3230                     fnline))):
3231
3232    # We allow non-const references in a few standard places, like functions
3233    # called "swap()" or iostream operators like "<<" or ">>". We also filter
3234    # out for loops, which lint otherwise mistakenly thinks are functions.
3235    if not Search(
3236        r'(for|swap|Swap|operator[<>][<>])\s*\(\s*'
3237        r'(?:(?:typename\s*)?[\w:]|<.*>)+\s*&',
3238        fnline):
3239      error(filename, linenum, 'runtime/references', 2,
3240            'Is this a non-const reference? '
3241            'If so, make const or use a pointer.')
3242
3243  # Check to see if they're using an conversion function cast.
3244  # I just try to capture the most common basic types, though there are more.
3245  # Parameterless conversion functions, such as bool(), are allowed as they are
3246  # probably a member operator declaration or default constructor.
3247  match = Search(
3248      r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
3249      r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
3250  if match:
3251    # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3252    # where type may be float(), int(string), etc.  Without context they are
3253    # virtually indistinguishable from int(x) casts. Likewise, gMock's
3254    # MockCallback takes a template parameter of the form return_type(arg_type),
3255    # which looks much like the cast we're trying to detect.
3256    # BEGIN android-added
3257    # The C++ 2011 std::function class template exhibits a similar issue.
3258    # END android-added
3259    if (match.group(1) is None and  # If new operator, then this isn't a cast
3260        not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
3261             # BEGIN android-changed
3262             # Match(r'^\s*MockCallback<.*>', line))):
3263             Match(r'^\s*MockCallback<.*>', line) or
3264             Match(r'^\s*std::function<.*>', line))):
3265             # END android-changed
3266      # Try a bit harder to catch gmock lines: the only place where
3267      # something looks like an old-style cast is where we declare the
3268      # return type of the mocked method, and the only time when we
3269      # are missing context is if MOCK_METHOD was split across
3270      # multiple lines (for example http://go/hrfhr ), so we only need
3271      # to check the previous line for MOCK_METHOD.
3272      if (linenum == 0 or
3273          not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(\S+,\s*$',
3274                    clean_lines.elided[linenum - 1])):
3275        error(filename, linenum, 'readability/casting', 4,
3276              'Using deprecated casting style.  '
3277              'Use static_cast<%s>(...) instead' %
3278              match.group(2))
3279
3280  CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3281                  'static_cast',
3282                  r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
3283
3284  # This doesn't catch all cases. Consider (const char * const)"hello".
3285  #
3286  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
3287  # compile).
3288  if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3289                     'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
3290    pass
3291  else:
3292    # Check pointer casts for other than string constants
3293    CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3294                    'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
3295
3296  # In addition, we look for people taking the address of a cast.  This
3297  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3298  # point where you think.
3299  if Search(
3300      r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
3301    error(filename, linenum, 'runtime/casting', 4,
3302          ('Are you taking an address of a cast?  '
3303           'This is dangerous: could be a temp var.  '
3304           'Take the address before doing the cast, rather than after'))
3305
3306  # Check for people declaring static/global STL strings at the top level.
3307  # This is dangerous because the C++ language does not guarantee that
3308  # globals with constructors are initialized before the first access.
3309  match = Match(
3310      r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3311      line)
3312  # Make sure it's not a function.
3313  # Function template specialization looks like: "string foo<Type>(...".
3314  # Class template definitions look like: "string Foo<Type>::Method(...".
3315  if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
3316                         match.group(3)):
3317    error(filename, linenum, 'runtime/string', 4,
3318          'For a static/global string constant, use a C style string instead: '
3319          '"%schar %s[]".' %
3320          (match.group(1), match.group(2)))
3321
3322  # Check that we're not using RTTI outside of testing code.
3323  if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
3324    error(filename, linenum, 'runtime/rtti', 5,
3325          'Do not use dynamic_cast<>.  If you need to cast within a class '
3326          "hierarchy, use static_cast<> to upcast.  Google doesn't support "
3327          'RTTI.')
3328
3329  if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3330    error(filename, linenum, 'runtime/init', 4,
3331          'You seem to be initializing a member variable with itself.')
3332
3333  if file_extension == 'h':
3334    # TODO(unknown): check that 1-arg constructors are explicit.
3335    #                How to tell it's a constructor?
3336    #                (handled in CheckForNonStandardConstructs for now)
3337    # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
3338    #                (level 1 error)
3339    pass
3340
3341  # Check if people are using the verboten C basic types.  The only exception
3342  # we regularly allow is "unsigned short port" for port.
3343  if Search(r'\bshort port\b', line):
3344    if not Search(r'\bunsigned short port\b', line):
3345      error(filename, linenum, 'runtime/int', 4,
3346            'Use "unsigned short" for ports, not "short"')
3347  else:
3348    match = Search(r'\b(short|long(?! +double)|long long)\b', line)
3349    if match:
3350      error(filename, linenum, 'runtime/int', 4,
3351            'Use int16/int64/etc, rather than the C type %s' % match.group(1))
3352
3353  # When snprintf is used, the second argument shouldn't be a literal.
3354  match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
3355  if match and match.group(2) != '0':
3356    # If 2nd arg is zero, snprintf is used to calculate size.
3357    error(filename, linenum, 'runtime/printf', 3,
3358          'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3359          'to snprintf.' % (match.group(1), match.group(2)))
3360
3361  # Check if some verboten C functions are being used.
3362  if Search(r'\bsprintf\b', line):
3363    error(filename, linenum, 'runtime/printf', 5,
3364          'Never use sprintf.  Use snprintf instead.')
3365  match = Search(r'\b(strcpy|strcat)\b', line)
3366  if match:
3367    error(filename, linenum, 'runtime/printf', 4,
3368          'Almost always, snprintf is better than %s' % match.group(1))
3369
3370  if Search(r'\bsscanf\b', line):
3371    error(filename, linenum, 'runtime/printf', 1,
3372          'sscanf can be ok, but is slow and can overflow buffers.')
3373
3374  # Check if some verboten operator overloading is going on
3375  # TODO(unknown): catch out-of-line unary operator&:
3376  #   class X {};
3377  #   int operator&(const X& x) { return 42; }  // unary operator&
3378  # The trick is it's hard to tell apart from binary operator&:
3379  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
3380  if Search(r'\boperator\s*&\s*\(\s*\)', line):
3381    error(filename, linenum, 'runtime/operator', 4,
3382          'Unary operator& is dangerous.  Do not use it.')
3383
3384  # Check for suspicious usage of "if" like
3385  # } if (a == b) {
3386  if Search(r'\}\s*if\s*\(', line):
3387    error(filename, linenum, 'readability/braces', 4,
3388          'Did you mean "else if"? If not, start a new line for "if".')
3389
3390  # Check for potential format string bugs like printf(foo).
3391  # We constrain the pattern not to pick things like DocidForPrintf(foo).
3392  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
3393  # TODO(sugawarayu): Catch the following case. Need to change the calling
3394  # convention of the whole function to process multiple line to handle it.
3395  #   printf(
3396  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
3397  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
3398  if printf_args:
3399    match = Match(r'([\w.\->()]+)$', printf_args)
3400    if match and match.group(1) != '__VA_ARGS__':
3401      function_name = re.search(r'\b((?:string)?printf)\s*\(',
3402                                line, re.I).group(1)
3403      error(filename, linenum, 'runtime/printf', 4,
3404            'Potential format string bug. Do %s("%%s", %s) instead.'
3405            % (function_name, match.group(1)))
3406
3407  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3408  match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3409  if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
3410    error(filename, linenum, 'runtime/memset', 4,
3411          'Did you mean "memset(%s, 0, %s)"?'
3412          % (match.group(1), match.group(2)))
3413
3414  if Search(r'\busing namespace\b', line):
3415    error(filename, linenum, 'build/namespaces', 5,
3416          'Do not use namespace using-directives.  '
3417          'Use using-declarations instead.')
3418
3419  # Detect variable-length arrays.
3420  match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3421  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
3422      match.group(3).find(']') == -1):
3423    # Split the size using space and arithmetic operators as delimiters.
3424    # If any of the resulting tokens are not compile time constants then
3425    # report the error.
3426    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
3427    is_const = True
3428    skip_next = False
3429    for tok in tokens:
3430      if skip_next:
3431        skip_next = False
3432        continue
3433
3434      if Search(r'sizeof\(.+\)', tok): continue
3435      if Search(r'arraysize\(\w+\)', tok): continue
3436
3437      tok = tok.lstrip('(')
3438      tok = tok.rstrip(')')
3439      if not tok: continue
3440      if Match(r'\d+', tok): continue
3441      if Match(r'0[xX][0-9a-fA-F]+', tok): continue
3442      if Match(r'k[A-Z0-9]\w*', tok): continue
3443      if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
3444      if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
3445      # A catch all for tricky sizeof cases, including 'sizeof expression',
3446      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
3447      # requires skipping the next token because we split on ' ' and '*'.
3448      if tok.startswith('sizeof'):
3449        skip_next = True
3450        continue
3451      is_const = False
3452      break
3453    if not is_const:
3454      error(filename, linenum, 'runtime/arrays', 1,
3455            'Do not use variable-length arrays.  Use an appropriately named '
3456            "('k' followed by CamelCase) compile-time constant for the size.")
3457
3458  # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
3459  # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
3460  # in the class declaration.
3461  match = Match(
3462      (r'\s*'
3463       r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
3464       r'\(.*\);$'),
3465      line)
3466  if match and linenum + 1 < clean_lines.NumLines():
3467    next_line = clean_lines.elided[linenum + 1]
3468    # We allow some, but not all, declarations of variables to be present
3469    # in the statement that defines the class.  The [\w\*,\s]* fragment of
3470    # the regular expression below allows users to declare instances of
3471    # the class or pointers to instances, but not less common types such
3472    # as function pointers or arrays.  It's a tradeoff between allowing
3473    # reasonable code and avoiding trying to parse more C++ using regexps.
3474    if not Search(r'^\s*}[\w\*,\s]*;', next_line):
3475      error(filename, linenum, 'readability/constructors', 3,
3476            match.group(1) + ' should be the last thing in the class')
3477
3478  # Check for use of unnamed namespaces in header files.  Registration
3479  # macros are typically OK, so we allow use of "namespace {" on lines
3480  # that end with backslashes.
3481  if (file_extension == 'h'
3482      and Search(r'\bnamespace\s*{', line)
3483      and line[-1] != '\\'):
3484    error(filename, linenum, 'build/namespaces', 4,
3485          'Do not use unnamed namespaces in header files.  See '
3486          'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
3487          ' for more information.')
3488
3489
3490def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
3491                    error):
3492  """Checks for a C-style cast by looking for the pattern.
3493
3494  This also handles sizeof(type) warnings, due to similarity of content.
3495
3496  Args:
3497    filename: The name of the current file.
3498    linenum: The number of the line to check.
3499    line: The line of code to check.
3500    raw_line: The raw line of code to check, with comments.
3501    cast_type: The string for the C++ cast to recommend.  This is either
3502      reinterpret_cast, static_cast, or const_cast, depending.
3503    pattern: The regular expression used to find C-style casts.
3504    error: The function to call with any errors found.
3505
3506  Returns:
3507    True if an error was emitted.
3508    False otherwise.
3509  """
3510  match = Search(pattern, line)
3511  if not match:
3512    return False
3513
3514  # e.g., sizeof(int)
3515  sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
3516  if sizeof_match:
3517    error(filename, linenum, 'runtime/sizeof', 1,
3518          'Using sizeof(type).  Use sizeof(varname) instead if possible')
3519    return True
3520
3521  # operator++(int) and operator--(int)
3522  if (line[0:match.start(1) - 1].endswith(' operator++') or
3523      line[0:match.start(1) - 1].endswith(' operator--')):
3524    return False
3525
3526  remainder = line[match.end(0):]
3527
3528  # The close paren is for function pointers as arguments to a function.
3529  # eg, void foo(void (*bar)(int));
3530  # The semicolon check is a more basic function check; also possibly a
3531  # function pointer typedef.
3532  # eg, void foo(int); or void foo(int) const;
3533  # The equals check is for function pointer assignment.
3534  # eg, void *(*foo)(int) = ...
3535  # The > is for MockCallback<...> ...
3536  #
3537  # Right now, this will only catch cases where there's a single argument, and
3538  # it's unnamed.  It should probably be expanded to check for multiple
3539  # arguments with some unnamed.
3540  function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)|>))', remainder)
3541  if function_match:
3542    if (not function_match.group(3) or
3543        function_match.group(3) == ';' or
3544        ('MockCallback<' not in raw_line and
3545         '/*' not in raw_line)):
3546      error(filename, linenum, 'readability/function', 3,
3547            'All parameters should be named in a function')
3548    return True
3549
3550  # At this point, all that should be left is actual casts.
3551  error(filename, linenum, 'readability/casting', 4,
3552        'Using C-style cast.  Use %s<%s>(...) instead' %
3553        (cast_type, match.group(1)))
3554
3555  return True
3556
3557
3558_HEADERS_CONTAINING_TEMPLATES = (
3559    ('<deque>', ('deque',)),
3560    ('<functional>', ('unary_function', 'binary_function',
3561                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
3562                      'negate',
3563                      'equal_to', 'not_equal_to', 'greater', 'less',
3564                      'greater_equal', 'less_equal',
3565                      'logical_and', 'logical_or', 'logical_not',
3566                      'unary_negate', 'not1', 'binary_negate', 'not2',
3567                      'bind1st', 'bind2nd',
3568                      'pointer_to_unary_function',
3569                      'pointer_to_binary_function',
3570                      'ptr_fun',
3571                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
3572                      'mem_fun_ref_t',
3573                      'const_mem_fun_t', 'const_mem_fun1_t',
3574                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
3575                      'mem_fun_ref',
3576                     )),
3577    ('<limits>', ('numeric_limits',)),
3578    ('<list>', ('list',)),
3579    ('<map>', ('map', 'multimap',)),
3580    ('<memory>', ('allocator',)),
3581    ('<queue>', ('queue', 'priority_queue',)),
3582    ('<set>', ('set', 'multiset',)),
3583    ('<stack>', ('stack',)),
3584    ('<string>', ('char_traits', 'basic_string',)),
3585    ('<utility>', ('pair',)),
3586    ('<vector>', ('vector',)),
3587
3588    # gcc extensions.
3589    # Note: std::hash is their hash, ::hash is our hash
3590    ('<hash_map>', ('hash_map', 'hash_multimap',)),
3591    ('<hash_set>', ('hash_set', 'hash_multiset',)),
3592    ('<slist>', ('slist',)),
3593    )
3594
3595_RE_PATTERN_STRING = re.compile(r'\bstring\b')
3596
3597_re_pattern_algorithm_header = []
3598for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
3599                  'transform'):
3600  # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
3601  # type::max().
3602  _re_pattern_algorithm_header.append(
3603      (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
3604       _template,
3605       '<algorithm>'))
3606
3607_re_pattern_templates = []
3608for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
3609  for _template in _templates:
3610    _re_pattern_templates.append(
3611        (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
3612         _template + '<>',
3613         _header))
3614
3615
3616def FilesBelongToSameModule(filename_cc, filename_h):
3617  """Check if these two filenames belong to the same module.
3618
3619  The concept of a 'module' here is a as follows:
3620  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
3621  same 'module' if they are in the same directory.
3622  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
3623  to belong to the same module here.
3624
3625  If the filename_cc contains a longer path than the filename_h, for example,
3626  '/absolute/path/to/base/sysinfo.cc', and this file would include
3627  'base/sysinfo.h', this function also produces the prefix needed to open the
3628  header. This is used by the caller of this function to more robustly open the
3629  header file. We don't have access to the real include paths in this context,
3630  so we need this guesswork here.
3631
3632  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
3633  according to this implementation. Because of this, this function gives
3634  some false positives. This should be sufficiently rare in practice.
3635
3636  Args:
3637    filename_cc: is the path for the .cc file
3638    filename_h: is the path for the header path
3639
3640  Returns:
3641    Tuple with a bool and a string:
3642    bool: True if filename_cc and filename_h belong to the same module.
3643    string: the additional prefix needed to open the header file.
3644  """
3645
3646  if not filename_cc.endswith('.cc'):
3647    return (False, '')
3648  filename_cc = filename_cc[:-len('.cc')]
3649  if filename_cc.endswith('_unittest'):
3650    filename_cc = filename_cc[:-len('_unittest')]
3651  elif filename_cc.endswith('_test'):
3652    filename_cc = filename_cc[:-len('_test')]
3653  filename_cc = filename_cc.replace('/public/', '/')
3654  filename_cc = filename_cc.replace('/internal/', '/')
3655
3656  if not filename_h.endswith('.h'):
3657    return (False, '')
3658  filename_h = filename_h[:-len('.h')]
3659  if filename_h.endswith('-inl'):
3660    filename_h = filename_h[:-len('-inl')]
3661  filename_h = filename_h.replace('/public/', '/')
3662  filename_h = filename_h.replace('/internal/', '/')
3663
3664  files_belong_to_same_module = filename_cc.endswith(filename_h)
3665  common_path = ''
3666  if files_belong_to_same_module:
3667    common_path = filename_cc[:-len(filename_h)]
3668  return files_belong_to_same_module, common_path
3669
3670
3671def UpdateIncludeState(filename, include_state, io=codecs):
3672  """Fill up the include_state with new includes found from the file.
3673
3674  Args:
3675    filename: the name of the header to read.
3676    include_state: an _IncludeState instance in which the headers are inserted.
3677    io: The io factory to use to read the file. Provided for testability.
3678
3679  Returns:
3680    True if a header was succesfully added. False otherwise.
3681  """
3682  headerfile = None
3683  try:
3684    headerfile = io.open(filename, 'r', 'utf8', 'replace')
3685  except IOError:
3686    return False
3687  linenum = 0
3688  for line in headerfile:
3689    linenum += 1
3690    clean_line = CleanseComments(line)
3691    match = _RE_PATTERN_INCLUDE.search(clean_line)
3692    if match:
3693      include = match.group(2)
3694      # The value formatting is cute, but not really used right now.
3695      # What matters here is that the key is in include_state.
3696      include_state.setdefault(include, '%s:%d' % (filename, linenum))
3697  return True
3698
3699
3700def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
3701                              io=codecs):
3702  """Reports for missing stl includes.
3703
3704  This function will output warnings to make sure you are including the headers
3705  necessary for the stl containers and functions that you use. We only give one
3706  reason to include a header. For example, if you use both equal_to<> and
3707  less<> in a .h file, only one (the latter in the file) of these will be
3708  reported as a reason to include the <functional>.
3709
3710  Args:
3711    filename: The name of the current file.
3712    clean_lines: A CleansedLines instance containing the file.
3713    include_state: An _IncludeState instance.
3714    error: The function to call with any errors found.
3715    io: The IO factory to use to read the header file. Provided for unittest
3716        injection.
3717  """
3718  required = {}  # A map of header name to linenumber and the template entity.
3719                 # Example of required: { '<functional>': (1219, 'less<>') }
3720
3721  for linenum in xrange(clean_lines.NumLines()):
3722    line = clean_lines.elided[linenum]
3723    if not line or line[0] == '#':
3724      continue
3725
3726    # String is special -- it is a non-templatized type in STL.
3727    matched = _RE_PATTERN_STRING.search(line)
3728    if matched:
3729      # Don't warn about strings in non-STL namespaces:
3730      # (We check only the first match per line; good enough.)
3731      prefix = line[:matched.start()]
3732      if prefix.endswith('std::') or not prefix.endswith('::'):
3733        required['<string>'] = (linenum, 'string')
3734
3735    for pattern, template, header in _re_pattern_algorithm_header:
3736      if pattern.search(line):
3737        required[header] = (linenum, template)
3738
3739    # The following function is just a speed up, no semantics are changed.
3740    if not '<' in line:  # Reduces the cpu time usage by skipping lines.
3741      continue
3742
3743    for pattern, template, header in _re_pattern_templates:
3744      if pattern.search(line):
3745        required[header] = (linenum, template)
3746
3747  # The policy is that if you #include something in foo.h you don't need to
3748  # include it again in foo.cc. Here, we will look at possible includes.
3749  # Let's copy the include_state so it is only messed up within this function.
3750  include_state = include_state.copy()
3751
3752  # Did we find the header for this file (if any) and succesfully load it?
3753  header_found = False
3754
3755  # Use the absolute path so that matching works properly.
3756  abs_filename = FileInfo(filename).FullName()
3757
3758  # For Emacs's flymake.
3759  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
3760  # by flymake and that file name might end with '_flymake.cc'. In that case,
3761  # restore original file name here so that the corresponding header file can be
3762  # found.
3763  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
3764  # instead of 'foo_flymake.h'
3765  abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
3766
3767  # include_state is modified during iteration, so we iterate over a copy of
3768  # the keys.
3769  header_keys = include_state.keys()
3770  for header in header_keys:
3771    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
3772    fullpath = common_path + header
3773    if same_module and UpdateIncludeState(fullpath, include_state, io):
3774      header_found = True
3775
3776  # If we can't find the header file for a .cc, assume it's because we don't
3777  # know where to look. In that case we'll give up as we're not sure they
3778  # didn't include it in the .h file.
3779  # TODO(unknown): Do a better job of finding .h files so we are confident that
3780  # not having the .h file means there isn't one.
3781  if filename.endswith('.cc') and not header_found:
3782    return
3783
3784  # All the lines have been processed, report the errors found.
3785  for required_header_unstripped in required:
3786    template = required[required_header_unstripped][1]
3787    if required_header_unstripped.strip('<>"') not in include_state:
3788      error(filename, required[required_header_unstripped][0],
3789            'build/include_what_you_use', 4,
3790            'Add #include ' + required_header_unstripped + ' for ' + template)
3791
3792
3793_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
3794
3795
3796def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
3797  """Check that make_pair's template arguments are deduced.
3798
3799  G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
3800  specified explicitly, and such use isn't intended in any case.
3801
3802  Args:
3803    filename: The name of the current file.
3804    clean_lines: A CleansedLines instance containing the file.
3805    linenum: The number of the line to check.
3806    error: The function to call with any errors found.
3807  """
3808  raw = clean_lines.raw_lines
3809  line = raw[linenum]
3810  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
3811  if match:
3812    error(filename, linenum, 'build/explicit_make_pair',
3813          4,  # 4 = high confidence
3814          'For C++11-compatibility, omit template arguments from make_pair'
3815          ' OR use pair directly OR if appropriate, construct a pair directly')
3816
3817
3818def ProcessLine(filename, file_extension, clean_lines, line,
3819                include_state, function_state, nesting_state, error,
3820                extra_check_functions=[]):
3821  """Processes a single line in the file.
3822
3823  Args:
3824    filename: Filename of the file that is being processed.
3825    file_extension: The extension (dot not included) of the file.
3826    clean_lines: An array of strings, each representing a line of the file,
3827                 with comments stripped.
3828    line: Number of line being processed.
3829    include_state: An _IncludeState instance in which the headers are inserted.
3830    function_state: A _FunctionState instance which counts function lines, etc.
3831    nesting_state: A _NestingState instance which maintains information about
3832                   the current stack of nested blocks being parsed.
3833    error: A callable to which errors are reported, which takes 4 arguments:
3834           filename, line number, error level, and message
3835    extra_check_functions: An array of additional check functions that will be
3836                           run on each source line. Each function takes 4
3837                           arguments: filename, clean_lines, line, error
3838  """
3839  raw_lines = clean_lines.raw_lines
3840  ParseNolintSuppressions(filename, raw_lines[line], line, error)
3841  nesting_state.Update(filename, clean_lines, line, error)
3842  if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
3843    return
3844  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
3845  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
3846  CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
3847  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
3848                error)
3849  CheckForNonStandardConstructs(filename, clean_lines, line,
3850                                nesting_state, error)
3851  CheckPosixThreading(filename, clean_lines, line, error)
3852  CheckInvalidIncrement(filename, clean_lines, line, error)
3853  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
3854  for check_fn in extra_check_functions:
3855    check_fn(filename, clean_lines, line, error)
3856
3857def ProcessFileData(filename, file_extension, lines, error,
3858                    extra_check_functions=[]):
3859  """Performs lint checks and reports any errors to the given error function.
3860
3861  Args:
3862    filename: Filename of the file that is being processed.
3863    file_extension: The extension (dot not included) of the file.
3864    lines: An array of strings, each representing a line of the file, with the
3865           last element being empty if the file is terminated with a newline.
3866    error: A callable to which errors are reported, which takes 4 arguments:
3867           filename, line number, error level, and message
3868    extra_check_functions: An array of additional check functions that will be
3869                           run on each source line. Each function takes 4
3870                           arguments: filename, clean_lines, line, error
3871  """
3872  lines = (['// marker so line numbers and indices both start at 1'] + lines +
3873           ['// marker so line numbers end in a known way'])
3874
3875  include_state = _IncludeState()
3876  function_state = _FunctionState()
3877  nesting_state = _NestingState()
3878
3879  ResetNolintSuppressions()
3880
3881  CheckForCopyright(filename, lines, error)
3882
3883  if file_extension == 'h':
3884    CheckForHeaderGuard(filename, lines, error)
3885
3886  RemoveMultiLineComments(filename, lines, error)
3887  clean_lines = CleansedLines(lines)
3888  for line in xrange(clean_lines.NumLines()):
3889    ProcessLine(filename, file_extension, clean_lines, line,
3890                include_state, function_state, nesting_state, error,
3891                extra_check_functions)
3892  nesting_state.CheckClassFinished(filename, error)
3893
3894  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
3895
3896  # We check here rather than inside ProcessLine so that we see raw
3897  # lines rather than "cleaned" lines.
3898  CheckForUnicodeReplacementCharacters(filename, lines, error)
3899
3900  CheckForNewlineAtEOF(filename, lines, error)
3901
3902def ProcessFile(filename, vlevel, extra_check_functions=[]):
3903  """Does google-lint on a single file.
3904
3905  Args:
3906    filename: The name of the file to parse.
3907
3908    vlevel: The level of errors to report.  Every error of confidence
3909    >= verbose_level will be reported.  0 is a good default.
3910
3911    extra_check_functions: An array of additional check functions that will be
3912                           run on each source line. Each function takes 4
3913                           arguments: filename, clean_lines, line, error
3914  """
3915
3916  _SetVerboseLevel(vlevel)
3917# BEGIN android-added
3918  old_errors = _cpplint_state.error_count
3919# END android-added
3920
3921  try:
3922    # Support the UNIX convention of using "-" for stdin.  Note that
3923    # we are not opening the file with universal newline support
3924    # (which codecs doesn't support anyway), so the resulting lines do
3925    # contain trailing '\r' characters if we are reading a file that
3926    # has CRLF endings.
3927    # If after the split a trailing '\r' is present, it is removed
3928    # below. If it is not expected to be present (i.e. os.linesep !=
3929    # '\r\n' as in Windows), a warning is issued below if this file
3930    # is processed.
3931
3932    if filename == '-':
3933      lines = codecs.StreamReaderWriter(sys.stdin,
3934                                        codecs.getreader('utf8'),
3935                                        codecs.getwriter('utf8'),
3936                                        'replace').read().split('\n')
3937    else:
3938      lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
3939
3940    carriage_return_found = False
3941    # Remove trailing '\r'.
3942    for linenum in range(len(lines)):
3943      if lines[linenum].endswith('\r'):
3944        lines[linenum] = lines[linenum].rstrip('\r')
3945        carriage_return_found = True
3946
3947  except IOError:
3948    sys.stderr.write(
3949        "Skipping input '%s': Can't open for reading\n" % filename)
3950    return
3951
3952  # Note, if no dot is found, this will give the entire filename as the ext.
3953  file_extension = filename[filename.rfind('.') + 1:]
3954
3955  # When reading from stdin, the extension is unknown, so no cpplint tests
3956  # should rely on the extension.
3957  if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
3958      and file_extension != 'cpp'):
3959    sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
3960  else:
3961    ProcessFileData(filename, file_extension, lines, Error,
3962                    extra_check_functions)
3963    if carriage_return_found and os.linesep != '\r\n':
3964      # Use 0 for linenum since outputting only one error for potentially
3965      # several lines.
3966      Error(filename, 0, 'whitespace/newline', 1,
3967            'One or more unexpected \\r (^M) found;'
3968            'better to use only a \\n')
3969
3970# BEGIN android-changed
3971  # sys.stderr.write('Done processing %s\n' % filename)
3972  if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count:
3973    sys.stderr.write('Done processing %s\n' % filename)
3974# END android-changed
3975
3976def PrintUsage(message):
3977  """Prints a brief usage string and exits, optionally with an error message.
3978
3979  Args:
3980    message: The optional error message.
3981  """
3982  sys.stderr.write(_USAGE)
3983  if message:
3984    sys.exit('\nFATAL ERROR: ' + message)
3985  else:
3986    sys.exit(1)
3987
3988
3989def PrintCategories():
3990  """Prints a list of all the error-categories used by error messages.
3991
3992  These are the categories used to filter messages via --filter.
3993  """
3994  sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
3995  sys.exit(0)
3996
3997
3998def ParseArguments(args):
3999  """Parses the command line arguments.
4000
4001  This may set the output format and verbosity level as side-effects.
4002
4003  Args:
4004    args: The command line arguments:
4005
4006  Returns:
4007    The list of filenames to lint.
4008  """
4009  try:
4010    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
4011                                                 'stdout', # TODO(enh): added --stdout
4012                                                 # BEGIN android-added
4013                                                 'quiet',
4014                                                 # END android-added
4015                                                 'counting=',
4016                                                 'filter=',
4017                                                 'root='])
4018  except getopt.GetoptError:
4019    PrintUsage('Invalid arguments.')
4020
4021  verbosity = _VerboseLevel()
4022  output_format = _OutputFormat()
4023  output_stream = sys.stderr # TODO(enh): added --stdout
4024  filters = ''
4025  # BEGIN android-added
4026  quiet = _Quiet()
4027  # END android-added
4028  counting_style = ''
4029
4030  for (opt, val) in opts:
4031    if opt == '--help':
4032      PrintUsage(None)
4033    elif opt == '--stdout': # TODO(enh): added --stdout
4034      output_stream = sys.stdout # TODO(enh): added --stdout
4035    # BEGIN android-added
4036    elif opt == '--quiet':
4037      quiet = True
4038    # END android-added
4039    elif opt == '--output':
4040      if not val in ('emacs', 'vs7', 'eclipse'):
4041        PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
4042      output_format = val
4043    elif opt == '--verbose':
4044      verbosity = int(val)
4045    elif opt == '--filter':
4046      filters = val
4047      if not filters:
4048        PrintCategories()
4049    elif opt == '--counting':
4050      if val not in ('total', 'toplevel', 'detailed'):
4051        PrintUsage('Valid counting options are total, toplevel, and detailed')
4052      counting_style = val
4053    elif opt == '--root':
4054      global _root
4055      _root = val
4056
4057  if not filenames:
4058    PrintUsage('No files were specified.')
4059
4060  _SetOutputFormat(output_format)
4061  _SetVerboseLevel(verbosity)
4062  _SetFilters(filters)
4063  _SetCountingStyle(counting_style)
4064  # BEGIN android-added
4065  _SetQuiet(quiet)
4066  # END android-added
4067  sys.stderr = output_stream # TODO(enh): added --stdout
4068
4069  return filenames
4070
4071
4072def main():
4073  filenames = ParseArguments(sys.argv[1:])
4074
4075  # Change stderr to write with replacement characters so we don't die
4076  # if we try to print something containing non-ASCII characters.
4077  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
4078                                         codecs.getreader('utf8'),
4079                                         codecs.getwriter('utf8'),
4080                                         'replace')
4081
4082  _cpplint_state.ResetErrorCounts()
4083  for filename in filenames:
4084    ProcessFile(filename, _cpplint_state.verbose_level)
4085  # BEGIN android-changed
4086  # _cpplint_state.PrintErrorCounts()
4087  if not _cpplint_state.quiet or _cpplint_state.error_count > 0:
4088    _cpplint_state.PrintErrorCounts()
4089  # END android-changed
4090
4091  sys.exit(_cpplint_state.error_count > 0)
4092
4093
4094if __name__ == '__main__':
4095  main()
4096