1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree. 15 16[for a description of what an unwrapped line is, see unwrapped_line.py] 17 18This is a pytree visitor that goes over a parse tree and produces a list of 19UnwrappedLine containers from it, each with its own depth and containing all 20the tokens that could fit on the line if there were no maximal line-length 21limitations. 22 23Note: a precondition to running this visitor and obtaining correct results is 24for the tree to have its comments spliced in as nodes. Prefixes are ignored. 25 26For most uses, the convenience function UnwrapPyTree should be sufficient. 27""" 28 29# The word "token" is overloaded within this module, so for clarity rename 30# the imported pgen2.token module. 31from lib2to3 import pytree 32from lib2to3.pgen2 import token as grammar_token 33 34from yapf.yapflib import pytree_utils 35from yapf.yapflib import pytree_visitor 36from yapf.yapflib import split_penalty 37from yapf.yapflib import style 38from yapf.yapflib import unwrapped_line 39 40 41def UnwrapPyTree(tree): 42 """Create and return a list of unwrapped lines from the given pytree. 43 44 Arguments: 45 tree: the top-level pytree node to unwrap. 46 47 Returns: 48 A list of UnwrappedLine objects. 49 """ 50 unwrapper = PyTreeUnwrapper() 51 unwrapper.Visit(tree) 52 uwlines = unwrapper.GetUnwrappedLines() 53 uwlines.sort(key=lambda x: x.lineno) 54 return uwlines 55 56 57# Grammar tokens considered as whitespace for the purpose of unwrapping. 58_WHITESPACE_TOKENS = frozenset([ 59 grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT, 60 grammar_token.ENDMARKER 61]) 62 63 64class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): 65 """PyTreeUnwrapper - see file-level docstring for detailed description. 66 67 Note: since this implements PyTreeVisitor and node names in lib2to3 are 68 underscore_separated, the visiting methods of this class are named as 69 Visit_node_name. invalid-name pragmas are added to each such method to silence 70 a style warning. This is forced on us by the usage of lib2to3, and re-munging 71 method names to make them different from actual node names sounded like a 72 confusing and brittle affair that wasn't worth it for this small & controlled 73 deviation from the style guide. 74 75 To understand the connection between visitor methods in this class, some 76 familiarity with the Python grammar is required. 77 """ 78 79 def __init__(self): 80 # A list of all unwrapped lines finished visiting so far. 81 self._unwrapped_lines = [] 82 83 # Builds up a "current" unwrapped line while visiting pytree nodes. Some 84 # nodes will finish a line and start a new one. 85 self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0) 86 87 # Current indentation depth. 88 self._cur_depth = 0 89 90 def GetUnwrappedLines(self): 91 """Fetch the result of the tree walk. 92 93 Note: only call this after visiting the whole tree. 94 95 Returns: 96 A list of UnwrappedLine objects. 97 """ 98 # Make sure the last line that was being populated is flushed. 99 self._StartNewLine() 100 return self._unwrapped_lines 101 102 def _StartNewLine(self): 103 """Finish current line and start a new one. 104 105 Place the currently accumulated line into the _unwrapped_lines list and 106 start a new one. 107 """ 108 if self._cur_unwrapped_line.tokens: 109 self._unwrapped_lines.append(self._cur_unwrapped_line) 110 _MatchBrackets(self._cur_unwrapped_line) 111 _AdjustSplitPenalty(self._cur_unwrapped_line) 112 self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth) 113 114 _STMT_TYPES = frozenset({ 115 'if_stmt', 116 'while_stmt', 117 'for_stmt', 118 'try_stmt', 119 'expect_clause', 120 'with_stmt', 121 'funcdef', 122 'classdef', 123 }) 124 125 # pylint: disable=invalid-name,missing-docstring 126 def Visit_simple_stmt(self, node): 127 # A 'simple_stmt' conveniently represents a non-compound Python statement, 128 # i.e. a statement that does not contain other statements. 129 130 # When compound nodes have a single statement as their suite, the parser 131 # can leave it in the tree directly without creating a suite. But we have 132 # to increase depth in these cases as well. However, don't increase the 133 # depth of we have a simple_stmt that's a comment node. This represents a 134 # standalone comment and in the case of it coming directly after the 135 # funcdef, it is a "top" comment for the whole function. 136 # TODO(eliben): add more relevant compound statements here. 137 single_stmt_suite = ( 138 node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES) 139 is_comment_stmt = pytree_utils.IsCommentStatement(node) 140 if single_stmt_suite and not is_comment_stmt: 141 self._cur_depth += 1 142 self._StartNewLine() 143 self.DefaultNodeVisit(node) 144 if single_stmt_suite and not is_comment_stmt: 145 self._cur_depth -= 1 146 147 def _VisitCompoundStatement(self, node, substatement_names): 148 """Helper for visiting compound statements. 149 150 Python compound statements serve as containers for other statements. Thus, 151 when we encounter a new compound statement we start a new unwrapped line. 152 153 Arguments: 154 node: the node to visit. 155 substatement_names: set of node names. A compound statement will be 156 recognized as a NAME node with a name in this set. 157 """ 158 for child in node.children: 159 # A pytree is structured in such a way that a single 'if_stmt' node will 160 # contain all the 'if', 'elif' and 'else' nodes as children (similar 161 # structure applies to 'while' statements, 'try' blocks, etc). Therefore, 162 # we visit all children here and create a new line before the requested 163 # set of nodes. 164 if (child.type == grammar_token.NAME and 165 child.value in substatement_names): 166 self._StartNewLine() 167 self.Visit(child) 168 169 _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'}) 170 171 def Visit_if_stmt(self, node): # pylint: disable=invalid-name 172 self._VisitCompoundStatement(node, self._IF_STMT_ELEMS) 173 174 _WHILE_STMT_ELEMS = frozenset({'while', 'else'}) 175 176 def Visit_while_stmt(self, node): # pylint: disable=invalid-name 177 self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS) 178 179 _FOR_STMT_ELEMS = frozenset({'for', 'else'}) 180 181 def Visit_for_stmt(self, node): # pylint: disable=invalid-name 182 self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS) 183 184 _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'}) 185 186 def Visit_try_stmt(self, node): # pylint: disable=invalid-name 187 self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS) 188 189 _EXCEPT_STMT_ELEMS = frozenset({'except'}) 190 191 def Visit_except_clause(self, node): # pylint: disable=invalid-name 192 self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS) 193 194 _FUNC_DEF_ELEMS = frozenset({'def'}) 195 196 def Visit_funcdef(self, node): # pylint: disable=invalid-name 197 self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS) 198 199 def Visit_async_funcdef(self, node): # pylint: disable=invalid-name 200 self._StartNewLine() 201 index = 0 202 for child in node.children: 203 index += 1 204 self.Visit(child) 205 if pytree_utils.NodeName(child) == 'ASYNC': 206 break 207 for child in node.children[index].children: 208 self.Visit(child) 209 210 _CLASS_DEF_ELEMS = frozenset({'class'}) 211 212 def Visit_classdef(self, node): # pylint: disable=invalid-name 213 self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS) 214 215 def Visit_async_stmt(self, node): # pylint: disable=invalid-name 216 self._StartNewLine() 217 index = 0 218 for child in node.children: 219 index += 1 220 self.Visit(child) 221 if pytree_utils.NodeName(child) == 'ASYNC': 222 break 223 for child in node.children[index].children: 224 self.Visit(child) 225 226 def Visit_decorator(self, node): # pylint: disable=invalid-name 227 for child in node.children: 228 self.Visit(child) 229 if (pytree_utils.NodeName(child) == 'COMMENT' and 230 child == node.children[0]): 231 self._StartNewLine() 232 233 def Visit_decorators(self, node): # pylint: disable=invalid-name 234 for child in node.children: 235 self._StartNewLine() 236 self.Visit(child) 237 238 def Visit_decorated(self, node): # pylint: disable=invalid-name 239 for child in node.children: 240 self._StartNewLine() 241 self.Visit(child) 242 243 _WITH_STMT_ELEMS = frozenset({'with'}) 244 245 def Visit_with_stmt(self, node): # pylint: disable=invalid-name 246 self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) 247 248 def Visit_suite(self, node): # pylint: disable=invalid-name 249 # A 'suite' starts a new indentation level in Python. 250 self._cur_depth += 1 251 self._StartNewLine() 252 self.DefaultNodeVisit(node) 253 self._cur_depth -= 1 254 255 def Visit_listmaker(self, node): # pylint: disable=invalid-name 256 _DetermineMustSplitAnnotation(node) 257 self.DefaultNodeVisit(node) 258 259 def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name 260 _DetermineMustSplitAnnotation(node) 261 self.DefaultNodeVisit(node) 262 263 def Visit_import_as_names(self, node): # pylint: disable=invalid-name 264 if node.prev_sibling.value == '(': 265 _DetermineMustSplitAnnotation(node) 266 self.DefaultNodeVisit(node) 267 268 def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name 269 _DetermineMustSplitAnnotation(node) 270 self.DefaultNodeVisit(node) 271 272 def Visit_arglist(self, node): # pylint: disable=invalid-name 273 _DetermineMustSplitAnnotation(node) 274 self.DefaultNodeVisit(node) 275 276 def Visit_typedargslist(self, node): # pylint: disable=invalid-name 277 _DetermineMustSplitAnnotation(node) 278 self.DefaultNodeVisit(node) 279 280 def DefaultLeafVisit(self, leaf): 281 """Default visitor for tree leaves. 282 283 A tree leaf is always just gets appended to the current unwrapped line. 284 285 Arguments: 286 leaf: the leaf to visit. 287 """ 288 if leaf.type in _WHITESPACE_TOKENS: 289 self._StartNewLine() 290 elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): 291 # Add non-whitespace tokens and comments that aren't empty. 292 self._cur_unwrapped_line.AppendNode(leaf) 293 294 295_BRACKET_MATCH = {')': '(', '}': '{', ']': '['} 296 297 298def _MatchBrackets(uwline): 299 """Visit the node and match the brackets. 300 301 For every open bracket ('[', '{', or '('), find the associated closing bracket 302 and "match" them up. I.e., save in the token a pointer to its associated open 303 or close bracket. 304 305 Arguments: 306 uwline: (UnwrappedLine) An unwrapped line. 307 """ 308 bracket_stack = [] 309 for token in uwline.tokens: 310 if token.value in pytree_utils.OPENING_BRACKETS: 311 bracket_stack.append(token) 312 elif token.value in pytree_utils.CLOSING_BRACKETS: 313 bracket_stack[-1].matching_bracket = token 314 token.matching_bracket = bracket_stack[-1] 315 bracket_stack.pop() 316 317 for bracket in bracket_stack: 318 if id(pytree_utils.GetOpeningBracket(token.node)) == id(bracket.node): 319 bracket.container_elements.append(token) 320 token.container_opening = bracket 321 322 323def _AdjustSplitPenalty(uwline): 324 """Visit the node and adjust the split penalties if needed. 325 326 A token shouldn't be split if it's not within a bracket pair. Mark any token 327 that's not within a bracket pair as "unbreakable". 328 329 Arguments: 330 uwline: (UnwrappedLine) An unwrapped line. 331 """ 332 bracket_level = 0 333 for index, token in enumerate(uwline.tokens): 334 if index and not bracket_level: 335 pytree_utils.SetNodeAnnotation(token.node, 336 pytree_utils.Annotation.SPLIT_PENALTY, 337 split_penalty.UNBREAKABLE) 338 if token.value in pytree_utils.OPENING_BRACKETS: 339 bracket_level += 1 340 elif token.value in pytree_utils.CLOSING_BRACKETS: 341 bracket_level -= 1 342 343 344def _DetermineMustSplitAnnotation(node): 345 """Enforce a split in the list if the list ends with a comma.""" 346 if style.Get('DISABLE_ENDING_COMMA_HEURISTIC'): 347 return 348 if not _ContainsComments(node): 349 token = next(node.parent.leaves()) 350 if token.value == '(': 351 if sum(1 for ch in node.children 352 if pytree_utils.NodeName(ch) == 'COMMA') < 2: 353 return 354 if (not isinstance(node.children[-1], pytree.Leaf) or 355 node.children[-1].value != ','): 356 return 357 num_children = len(node.children) 358 index = 0 359 _SetMustSplitOnFirstLeaf(node.children[0]) 360 while index < num_children - 1: 361 child = node.children[index] 362 if isinstance(child, pytree.Leaf) and child.value == ',': 363 next_child = node.children[index + 1] 364 if next_child.type == grammar_token.COMMENT: 365 index += 1 366 if index >= num_children - 1: 367 break 368 _SetMustSplitOnFirstLeaf(node.children[index + 1]) 369 index += 1 370 371 372def _ContainsComments(node): 373 """Return True if the list has a comment in it.""" 374 if isinstance(node, pytree.Leaf): 375 return node.type == grammar_token.COMMENT 376 for child in node.children: 377 if _ContainsComments(child): 378 return True 379 return False 380 381 382def _SetMustSplitOnFirstLeaf(node): 383 """Set the "must split" annotation on the first leaf node.""" 384 pytree_utils.SetNodeAnnotation( 385 pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.MUST_SPLIT, 386 True) 387