1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "ContinuationIndenter.h"
17 #include "TokenAnnotator.h"
18 #include "UnwrappedLineFormatter.h"
19 #include "UnwrappedLineParser.h"
20 #include "WhitespaceManager.h"
21 #include "clang/Basic/Diagnostic.h"
22 #include "clang/Basic/DiagnosticOptions.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33
34 #define DEBUG_TYPE "format-formatter"
35
36 using clang::format::FormatStyle;
37
38 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
39
40 namespace llvm {
41 namespace yaml {
42 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
enumerationllvm::yaml::ScalarEnumerationTraits43 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
44 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
45 IO.enumCase(Value, "Java", FormatStyle::LK_Java);
46 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
47 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
48 }
49 };
50
51 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
enumerationllvm::yaml::ScalarEnumerationTraits52 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
53 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
54 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
55 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
56 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
57 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
58 }
59 };
60
61 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits62 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
63 IO.enumCase(Value, "Never", FormatStyle::UT_Never);
64 IO.enumCase(Value, "false", FormatStyle::UT_Never);
65 IO.enumCase(Value, "Always", FormatStyle::UT_Always);
66 IO.enumCase(Value, "true", FormatStyle::UT_Always);
67 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
68 }
69 };
70
71 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits72 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
73 IO.enumCase(Value, "None", FormatStyle::SFS_None);
74 IO.enumCase(Value, "false", FormatStyle::SFS_None);
75 IO.enumCase(Value, "All", FormatStyle::SFS_All);
76 IO.enumCase(Value, "true", FormatStyle::SFS_All);
77 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
78 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
79 }
80 };
81
82 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits83 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
84 IO.enumCase(Value, "All", FormatStyle::BOS_All);
85 IO.enumCase(Value, "true", FormatStyle::BOS_All);
86 IO.enumCase(Value, "None", FormatStyle::BOS_None);
87 IO.enumCase(Value, "false", FormatStyle::BOS_None);
88 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
89 }
90 };
91
92 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits93 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
94 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
95 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
96 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
97 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
98 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
99 }
100 };
101
102 template <>
103 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
enumerationllvm::yaml::ScalarEnumerationTraits104 static void enumeration(IO &IO,
105 FormatStyle::NamespaceIndentationKind &Value) {
106 IO.enumCase(Value, "None", FormatStyle::NI_None);
107 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
108 IO.enumCase(Value, "All", FormatStyle::NI_All);
109 }
110 };
111
112 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits113 static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
114 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
115 IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
116 IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
117
118 // For backward compatibility.
119 IO.enumCase(Value, "true", FormatStyle::PAS_Left);
120 IO.enumCase(Value, "false", FormatStyle::PAS_Right);
121 }
122 };
123
124 template <>
125 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
enumerationllvm::yaml::ScalarEnumerationTraits126 static void enumeration(IO &IO,
127 FormatStyle::SpaceBeforeParensOptions &Value) {
128 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
129 IO.enumCase(Value, "ControlStatements",
130 FormatStyle::SBPO_ControlStatements);
131 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
132
133 // For backward compatibility.
134 IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
135 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
136 }
137 };
138
139 template <> struct MappingTraits<FormatStyle> {
mappingllvm::yaml::MappingTraits140 static void mapping(IO &IO, FormatStyle &Style) {
141 // When reading, read the language first, we need it for getPredefinedStyle.
142 IO.mapOptional("Language", Style.Language);
143
144 if (IO.outputting()) {
145 StringRef StylesArray[] = {"LLVM", "Google", "Chromium",
146 "Mozilla", "WebKit", "GNU"};
147 ArrayRef<StringRef> Styles(StylesArray);
148 for (size_t i = 0, e = Styles.size(); i < e; ++i) {
149 StringRef StyleName(Styles[i]);
150 FormatStyle PredefinedStyle;
151 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
152 Style == PredefinedStyle) {
153 IO.mapOptional("# BasedOnStyle", StyleName);
154 break;
155 }
156 }
157 } else {
158 StringRef BasedOnStyle;
159 IO.mapOptional("BasedOnStyle", BasedOnStyle);
160 if (!BasedOnStyle.empty()) {
161 FormatStyle::LanguageKind OldLanguage = Style.Language;
162 FormatStyle::LanguageKind Language =
163 ((FormatStyle *)IO.getContext())->Language;
164 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
165 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
166 return;
167 }
168 Style.Language = OldLanguage;
169 }
170 }
171
172 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
173 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
174 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
175 IO.mapOptional("AlignOperands", Style.AlignOperands);
176 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
177 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
178 Style.AllowAllParametersOfDeclarationOnNextLine);
179 IO.mapOptional("AllowShortBlocksOnASingleLine",
180 Style.AllowShortBlocksOnASingleLine);
181 IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
182 Style.AllowShortCaseLabelsOnASingleLine);
183 IO.mapOptional("AllowShortIfStatementsOnASingleLine",
184 Style.AllowShortIfStatementsOnASingleLine);
185 IO.mapOptional("AllowShortLoopsOnASingleLine",
186 Style.AllowShortLoopsOnASingleLine);
187 IO.mapOptional("AllowShortFunctionsOnASingleLine",
188 Style.AllowShortFunctionsOnASingleLine);
189 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
190 Style.AlwaysBreakAfterDefinitionReturnType);
191 IO.mapOptional("AlwaysBreakTemplateDeclarations",
192 Style.AlwaysBreakTemplateDeclarations);
193 IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
194 Style.AlwaysBreakBeforeMultilineStrings);
195 IO.mapOptional("BreakBeforeBinaryOperators",
196 Style.BreakBeforeBinaryOperators);
197 IO.mapOptional("BreakBeforeTernaryOperators",
198 Style.BreakBeforeTernaryOperators);
199 IO.mapOptional("BreakConstructorInitializersBeforeComma",
200 Style.BreakConstructorInitializersBeforeComma);
201 IO.mapOptional("BinPackParameters", Style.BinPackParameters);
202 IO.mapOptional("BinPackArguments", Style.BinPackArguments);
203 IO.mapOptional("ColumnLimit", Style.ColumnLimit);
204 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
205 Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
206 IO.mapOptional("ConstructorInitializerIndentWidth",
207 Style.ConstructorInitializerIndentWidth);
208 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
209 IO.mapOptional("ExperimentalAutoDetectBinPacking",
210 Style.ExperimentalAutoDetectBinPacking);
211 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
212 IO.mapOptional("IndentWrappedFunctionNames",
213 Style.IndentWrappedFunctionNames);
214 IO.mapOptional("IndentFunctionDeclarationAfterType",
215 Style.IndentWrappedFunctionNames);
216 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
217 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
218 Style.KeepEmptyLinesAtTheStartOfBlocks);
219 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
220 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
221 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
222 IO.mapOptional("ObjCSpaceBeforeProtocolList",
223 Style.ObjCSpaceBeforeProtocolList);
224 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
225 Style.PenaltyBreakBeforeFirstCallParameter);
226 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
227 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
228 IO.mapOptional("PenaltyBreakFirstLessLess",
229 Style.PenaltyBreakFirstLessLess);
230 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
231 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
232 Style.PenaltyReturnTypeOnItsOwnLine);
233 IO.mapOptional("PointerAlignment", Style.PointerAlignment);
234 IO.mapOptional("SpacesBeforeTrailingComments",
235 Style.SpacesBeforeTrailingComments);
236 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
237 IO.mapOptional("Standard", Style.Standard);
238 IO.mapOptional("IndentWidth", Style.IndentWidth);
239 IO.mapOptional("TabWidth", Style.TabWidth);
240 IO.mapOptional("UseTab", Style.UseTab);
241 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
242 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
243 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
244 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
245 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
246 IO.mapOptional("SpacesInCStyleCastParentheses",
247 Style.SpacesInCStyleCastParentheses);
248 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
249 IO.mapOptional("SpacesInContainerLiterals",
250 Style.SpacesInContainerLiterals);
251 IO.mapOptional("SpaceBeforeAssignmentOperators",
252 Style.SpaceBeforeAssignmentOperators);
253 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
254 IO.mapOptional("CommentPragmas", Style.CommentPragmas);
255 IO.mapOptional("ForEachMacros", Style.ForEachMacros);
256
257 // For backward compatibility.
258 if (!IO.outputting()) {
259 IO.mapOptional("SpaceAfterControlStatementKeyword",
260 Style.SpaceBeforeParens);
261 IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
262 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
263 }
264 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
265 IO.mapOptional("DisableFormat", Style.DisableFormat);
266 }
267 };
268
269 // Allows to read vector<FormatStyle> while keeping default values.
270 // IO.getContext() should contain a pointer to the FormatStyle structure, that
271 // will be used to get default values for missing keys.
272 // If the first element has no Language specified, it will be treated as the
273 // default one for the following elements.
274 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
sizellvm::yaml::DocumentListTraits275 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
276 return Seq.size();
277 }
elementllvm::yaml::DocumentListTraits278 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
279 size_t Index) {
280 if (Index >= Seq.size()) {
281 assert(Index == Seq.size());
282 FormatStyle Template;
283 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
284 Template = Seq[0];
285 } else {
286 Template = *((const FormatStyle *)IO.getContext());
287 Template.Language = FormatStyle::LK_None;
288 }
289 Seq.resize(Index + 1, Template);
290 }
291 return Seq[Index];
292 }
293 };
294 }
295 }
296
297 namespace clang {
298 namespace format {
299
getParseCategory()300 const std::error_category &getParseCategory() {
301 static ParseErrorCategory C;
302 return C;
303 }
make_error_code(ParseError e)304 std::error_code make_error_code(ParseError e) {
305 return std::error_code(static_cast<int>(e), getParseCategory());
306 }
307
name() const308 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
309 return "clang-format.parse_error";
310 }
311
message(int EV) const312 std::string ParseErrorCategory::message(int EV) const {
313 switch (static_cast<ParseError>(EV)) {
314 case ParseError::Success:
315 return "Success";
316 case ParseError::Error:
317 return "Invalid argument";
318 case ParseError::Unsuitable:
319 return "Unsuitable";
320 }
321 llvm_unreachable("unexpected parse error");
322 }
323
getLLVMStyle()324 FormatStyle getLLVMStyle() {
325 FormatStyle LLVMStyle;
326 LLVMStyle.Language = FormatStyle::LK_Cpp;
327 LLVMStyle.AccessModifierOffset = -2;
328 LLVMStyle.AlignEscapedNewlinesLeft = false;
329 LLVMStyle.AlignAfterOpenBracket = true;
330 LLVMStyle.AlignOperands = true;
331 LLVMStyle.AlignTrailingComments = true;
332 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
333 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
334 LLVMStyle.AllowShortBlocksOnASingleLine = false;
335 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
336 LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
337 LLVMStyle.AllowShortLoopsOnASingleLine = false;
338 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = false;
339 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
340 LLVMStyle.AlwaysBreakTemplateDeclarations = false;
341 LLVMStyle.BinPackParameters = true;
342 LLVMStyle.BinPackArguments = true;
343 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
344 LLVMStyle.BreakBeforeTernaryOperators = true;
345 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
346 LLVMStyle.BreakConstructorInitializersBeforeComma = false;
347 LLVMStyle.ColumnLimit = 80;
348 LLVMStyle.CommentPragmas = "^ IWYU pragma:";
349 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
350 LLVMStyle.ConstructorInitializerIndentWidth = 4;
351 LLVMStyle.ContinuationIndentWidth = 4;
352 LLVMStyle.Cpp11BracedListStyle = true;
353 LLVMStyle.DerivePointerAlignment = false;
354 LLVMStyle.ExperimentalAutoDetectBinPacking = false;
355 LLVMStyle.ForEachMacros.push_back("foreach");
356 LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
357 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
358 LLVMStyle.IndentCaseLabels = false;
359 LLVMStyle.IndentWrappedFunctionNames = false;
360 LLVMStyle.IndentWidth = 2;
361 LLVMStyle.TabWidth = 8;
362 LLVMStyle.MaxEmptyLinesToKeep = 1;
363 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
364 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
365 LLVMStyle.ObjCBlockIndentWidth = 2;
366 LLVMStyle.ObjCSpaceAfterProperty = false;
367 LLVMStyle.ObjCSpaceBeforeProtocolList = true;
368 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
369 LLVMStyle.SpacesBeforeTrailingComments = 1;
370 LLVMStyle.Standard = FormatStyle::LS_Cpp11;
371 LLVMStyle.UseTab = FormatStyle::UT_Never;
372 LLVMStyle.SpacesInParentheses = false;
373 LLVMStyle.SpacesInSquareBrackets = false;
374 LLVMStyle.SpaceInEmptyParentheses = false;
375 LLVMStyle.SpacesInContainerLiterals = true;
376 LLVMStyle.SpacesInCStyleCastParentheses = false;
377 LLVMStyle.SpaceAfterCStyleCast = false;
378 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
379 LLVMStyle.SpaceBeforeAssignmentOperators = true;
380 LLVMStyle.SpacesInAngles = false;
381
382 LLVMStyle.PenaltyBreakComment = 300;
383 LLVMStyle.PenaltyBreakFirstLessLess = 120;
384 LLVMStyle.PenaltyBreakString = 1000;
385 LLVMStyle.PenaltyExcessCharacter = 1000000;
386 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
387 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
388
389 LLVMStyle.DisableFormat = false;
390
391 return LLVMStyle;
392 }
393
getGoogleStyle(FormatStyle::LanguageKind Language)394 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
395 FormatStyle GoogleStyle = getLLVMStyle();
396 GoogleStyle.Language = Language;
397
398 GoogleStyle.AccessModifierOffset = -1;
399 GoogleStyle.AlignEscapedNewlinesLeft = true;
400 GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
401 GoogleStyle.AllowShortLoopsOnASingleLine = true;
402 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
403 GoogleStyle.AlwaysBreakTemplateDeclarations = true;
404 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
405 GoogleStyle.DerivePointerAlignment = true;
406 GoogleStyle.IndentCaseLabels = true;
407 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
408 GoogleStyle.ObjCSpaceAfterProperty = false;
409 GoogleStyle.ObjCSpaceBeforeProtocolList = false;
410 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
411 GoogleStyle.SpacesBeforeTrailingComments = 2;
412 GoogleStyle.Standard = FormatStyle::LS_Auto;
413
414 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
415 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
416
417 if (Language == FormatStyle::LK_Java) {
418 GoogleStyle.AlignAfterOpenBracket = false;
419 GoogleStyle.AlignOperands = false;
420 GoogleStyle.AlignTrailingComments = false;
421 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
422 GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
423 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
424 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
425 GoogleStyle.ColumnLimit = 100;
426 GoogleStyle.SpaceAfterCStyleCast = true;
427 GoogleStyle.SpacesBeforeTrailingComments = 1;
428 } else if (Language == FormatStyle::LK_JavaScript) {
429 GoogleStyle.BreakBeforeTernaryOperators = false;
430 GoogleStyle.MaxEmptyLinesToKeep = 3;
431 GoogleStyle.SpacesInContainerLiterals = false;
432 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
433 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
434 } else if (Language == FormatStyle::LK_Proto) {
435 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
436 GoogleStyle.SpacesInContainerLiterals = false;
437 }
438
439 return GoogleStyle;
440 }
441
getChromiumStyle(FormatStyle::LanguageKind Language)442 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
443 FormatStyle ChromiumStyle = getGoogleStyle(Language);
444 if (Language == FormatStyle::LK_Java) {
445 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
446 ChromiumStyle.IndentWidth = 4;
447 ChromiumStyle.ContinuationIndentWidth = 8;
448 } else {
449 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
450 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
451 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
452 ChromiumStyle.AllowShortLoopsOnASingleLine = false;
453 ChromiumStyle.BinPackParameters = false;
454 ChromiumStyle.DerivePointerAlignment = false;
455 }
456 return ChromiumStyle;
457 }
458
getMozillaStyle()459 FormatStyle getMozillaStyle() {
460 FormatStyle MozillaStyle = getLLVMStyle();
461 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
462 MozillaStyle.Cpp11BracedListStyle = false;
463 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
464 MozillaStyle.DerivePointerAlignment = true;
465 MozillaStyle.IndentCaseLabels = true;
466 MozillaStyle.ObjCSpaceAfterProperty = true;
467 MozillaStyle.ObjCSpaceBeforeProtocolList = false;
468 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
469 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
470 MozillaStyle.Standard = FormatStyle::LS_Cpp03;
471 return MozillaStyle;
472 }
473
getWebKitStyle()474 FormatStyle getWebKitStyle() {
475 FormatStyle Style = getLLVMStyle();
476 Style.AccessModifierOffset = -4;
477 Style.AlignAfterOpenBracket = false;
478 Style.AlignOperands = false;
479 Style.AlignTrailingComments = false;
480 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
481 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
482 Style.BreakConstructorInitializersBeforeComma = true;
483 Style.Cpp11BracedListStyle = false;
484 Style.ColumnLimit = 0;
485 Style.IndentWidth = 4;
486 Style.NamespaceIndentation = FormatStyle::NI_Inner;
487 Style.ObjCBlockIndentWidth = 4;
488 Style.ObjCSpaceAfterProperty = true;
489 Style.PointerAlignment = FormatStyle::PAS_Left;
490 Style.Standard = FormatStyle::LS_Cpp03;
491 return Style;
492 }
493
getGNUStyle()494 FormatStyle getGNUStyle() {
495 FormatStyle Style = getLLVMStyle();
496 Style.AlwaysBreakAfterDefinitionReturnType = true;
497 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
498 Style.BreakBeforeBraces = FormatStyle::BS_GNU;
499 Style.BreakBeforeTernaryOperators = true;
500 Style.Cpp11BracedListStyle = false;
501 Style.ColumnLimit = 79;
502 Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
503 Style.Standard = FormatStyle::LS_Cpp03;
504 return Style;
505 }
506
getNoStyle()507 FormatStyle getNoStyle() {
508 FormatStyle NoStyle = getLLVMStyle();
509 NoStyle.DisableFormat = true;
510 return NoStyle;
511 }
512
getPredefinedStyle(StringRef Name,FormatStyle::LanguageKind Language,FormatStyle * Style)513 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
514 FormatStyle *Style) {
515 if (Name.equals_lower("llvm")) {
516 *Style = getLLVMStyle();
517 } else if (Name.equals_lower("chromium")) {
518 *Style = getChromiumStyle(Language);
519 } else if (Name.equals_lower("mozilla")) {
520 *Style = getMozillaStyle();
521 } else if (Name.equals_lower("google")) {
522 *Style = getGoogleStyle(Language);
523 } else if (Name.equals_lower("webkit")) {
524 *Style = getWebKitStyle();
525 } else if (Name.equals_lower("gnu")) {
526 *Style = getGNUStyle();
527 } else if (Name.equals_lower("none")) {
528 *Style = getNoStyle();
529 } else {
530 return false;
531 }
532
533 Style->Language = Language;
534 return true;
535 }
536
parseConfiguration(StringRef Text,FormatStyle * Style)537 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
538 assert(Style);
539 FormatStyle::LanguageKind Language = Style->Language;
540 assert(Language != FormatStyle::LK_None);
541 if (Text.trim().empty())
542 return make_error_code(ParseError::Error);
543
544 std::vector<FormatStyle> Styles;
545 llvm::yaml::Input Input(Text);
546 // DocumentListTraits<vector<FormatStyle>> uses the context to get default
547 // values for the fields, keys for which are missing from the configuration.
548 // Mapping also uses the context to get the language to find the correct
549 // base style.
550 Input.setContext(Style);
551 Input >> Styles;
552 if (Input.error())
553 return Input.error();
554
555 for (unsigned i = 0; i < Styles.size(); ++i) {
556 // Ensures that only the first configuration can skip the Language option.
557 if (Styles[i].Language == FormatStyle::LK_None && i != 0)
558 return make_error_code(ParseError::Error);
559 // Ensure that each language is configured at most once.
560 for (unsigned j = 0; j < i; ++j) {
561 if (Styles[i].Language == Styles[j].Language) {
562 DEBUG(llvm::dbgs()
563 << "Duplicate languages in the config file on positions " << j
564 << " and " << i << "\n");
565 return make_error_code(ParseError::Error);
566 }
567 }
568 }
569 // Look for a suitable configuration starting from the end, so we can
570 // find the configuration for the specific language first, and the default
571 // configuration (which can only be at slot 0) after it.
572 for (int i = Styles.size() - 1; i >= 0; --i) {
573 if (Styles[i].Language == Language ||
574 Styles[i].Language == FormatStyle::LK_None) {
575 *Style = Styles[i];
576 Style->Language = Language;
577 return make_error_code(ParseError::Success);
578 }
579 }
580 return make_error_code(ParseError::Unsuitable);
581 }
582
configurationAsText(const FormatStyle & Style)583 std::string configurationAsText(const FormatStyle &Style) {
584 std::string Text;
585 llvm::raw_string_ostream Stream(Text);
586 llvm::yaml::Output Output(Stream);
587 // We use the same mapping method for input and output, so we need a non-const
588 // reference here.
589 FormatStyle NonConstStyle = Style;
590 Output << NonConstStyle;
591 return Stream.str();
592 }
593
594 namespace {
595
596 class FormatTokenLexer {
597 public:
FormatTokenLexer(SourceManager & SourceMgr,FileID ID,FormatStyle & Style,encoding::Encoding Encoding)598 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
599 encoding::Encoding Encoding)
600 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
601 LessStashed(false), Column(0), TrailingWhitespace(0),
602 SourceMgr(SourceMgr), ID(ID), Style(Style),
603 IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
604 Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) {
605 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
606 getFormattingLangOpts(Style)));
607 Lex->SetKeepWhitespaceMode(true);
608
609 for (const std::string &ForEachMacro : Style.ForEachMacros)
610 ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
611 std::sort(ForEachMacros.begin(), ForEachMacros.end());
612 }
613
lex()614 ArrayRef<FormatToken *> lex() {
615 assert(Tokens.empty());
616 assert(FirstInLineIndex == 0);
617 do {
618 Tokens.push_back(getNextToken());
619 tryMergePreviousTokens();
620 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
621 FirstInLineIndex = Tokens.size() - 1;
622 } while (Tokens.back()->Tok.isNot(tok::eof));
623 return Tokens;
624 }
625
getKeywords()626 const AdditionalKeywords &getKeywords() { return Keywords; }
627
628 private:
tryMergePreviousTokens()629 void tryMergePreviousTokens() {
630 if (tryMerge_TMacro())
631 return;
632 if (tryMergeConflictMarkers())
633 return;
634 if (tryMergeLessLess())
635 return;
636
637 if (Style.Language == FormatStyle::LK_JavaScript) {
638 if (tryMergeJSRegexLiteral())
639 return;
640 if (tryMergeEscapeSequence())
641 return;
642 if (tryMergeTemplateString())
643 return;
644
645 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
646 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
647 tok::equal};
648 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
649 tok::greaterequal};
650 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
651 // FIXME: We probably need to change token type to mimic operator with the
652 // correct priority.
653 if (tryMergeTokens(JSIdentity))
654 return;
655 if (tryMergeTokens(JSNotIdentity))
656 return;
657 if (tryMergeTokens(JSShiftEqual))
658 return;
659 if (tryMergeTokens(JSRightArrow))
660 return;
661 }
662 }
663
tryMergeLessLess()664 bool tryMergeLessLess() {
665 // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
666 if (Tokens.size() < 3)
667 return false;
668
669 bool FourthTokenIsLess = false;
670 if (Tokens.size() > 3)
671 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
672
673 auto First = Tokens.end() - 3;
674 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
675 First[0]->isNot(tok::less) || FourthTokenIsLess)
676 return false;
677
678 // Only merge if there currently is no whitespace between the two "<".
679 if (First[1]->WhitespaceRange.getBegin() !=
680 First[1]->WhitespaceRange.getEnd())
681 return false;
682
683 First[0]->Tok.setKind(tok::lessless);
684 First[0]->TokenText = "<<";
685 First[0]->ColumnWidth += 1;
686 Tokens.erase(Tokens.end() - 2);
687 return true;
688 }
689
tryMergeTokens(ArrayRef<tok::TokenKind> Kinds)690 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
691 if (Tokens.size() < Kinds.size())
692 return false;
693
694 SmallVectorImpl<FormatToken *>::const_iterator First =
695 Tokens.end() - Kinds.size();
696 if (!First[0]->is(Kinds[0]))
697 return false;
698 unsigned AddLength = 0;
699 for (unsigned i = 1; i < Kinds.size(); ++i) {
700 if (!First[i]->is(Kinds[i]) ||
701 First[i]->WhitespaceRange.getBegin() !=
702 First[i]->WhitespaceRange.getEnd())
703 return false;
704 AddLength += First[i]->TokenText.size();
705 }
706 Tokens.resize(Tokens.size() - Kinds.size() + 1);
707 First[0]->TokenText = StringRef(First[0]->TokenText.data(),
708 First[0]->TokenText.size() + AddLength);
709 First[0]->ColumnWidth += AddLength;
710 return true;
711 }
712
713 // Tries to merge an escape sequence, i.e. a "\\" and the following
714 // character. Use e.g. inside JavaScript regex literals.
tryMergeEscapeSequence()715 bool tryMergeEscapeSequence() {
716 if (Tokens.size() < 2)
717 return false;
718 FormatToken *Previous = Tokens[Tokens.size() - 2];
719 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\")
720 return false;
721 ++Previous->ColumnWidth;
722 StringRef Text = Previous->TokenText;
723 Previous->TokenText = StringRef(Text.data(), Text.size() + 1);
724 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1);
725 Tokens.resize(Tokens.size() - 1);
726 Column = Previous->OriginalColumn + Previous->ColumnWidth;
727 return true;
728 }
729
730 // Try to determine whether the current token ends a JavaScript regex literal.
731 // We heuristically assume that this is a regex literal if we find two
732 // unescaped slashes on a line and the token before the first slash is one of
733 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
734 // a division.
tryMergeJSRegexLiteral()735 bool tryMergeJSRegexLiteral() {
736 if (Tokens.size() < 2)
737 return false;
738 // If a regex literal ends in "\//", this gets represented by an unknown
739 // token "\" and a comment.
740 bool MightEndWithEscapedSlash =
741 Tokens.back()->is(tok::comment) &&
742 Tokens.back()->TokenText.startswith("//") &&
743 Tokens[Tokens.size() - 2]->TokenText == "\\";
744 if (!MightEndWithEscapedSlash &&
745 (Tokens.back()->isNot(tok::slash) ||
746 (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
747 Tokens[Tokens.size() - 2]->TokenText == "\\")))
748 return false;
749 unsigned TokenCount = 0;
750 unsigned LastColumn = Tokens.back()->OriginalColumn;
751 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
752 ++TokenCount;
753 if (I[0]->is(tok::slash) && I + 1 != E &&
754 (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
755 tok::exclaim, tok::l_square, tok::colon, tok::comma,
756 tok::question, tok::kw_return) ||
757 I[1]->isBinaryOperator())) {
758 if (MightEndWithEscapedSlash) {
759 // This regex literal ends in '\//'. Skip past the '//' of the last
760 // token and re-start lexing from there.
761 SourceLocation Loc = Tokens.back()->Tok.getLocation();
762 resetLexer(SourceMgr.getFileOffset(Loc) + 2);
763 }
764 Tokens.resize(Tokens.size() - TokenCount);
765 Tokens.back()->Tok.setKind(tok::unknown);
766 Tokens.back()->Type = TT_RegexLiteral;
767 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
768 return true;
769 }
770
771 // There can't be a newline inside a regex literal.
772 if (I[0]->NewlinesBefore > 0)
773 return false;
774 }
775 return false;
776 }
777
tryMergeTemplateString()778 bool tryMergeTemplateString() {
779 if (Tokens.size() < 2)
780 return false;
781
782 FormatToken *EndBacktick = Tokens.back();
783 // Backticks get lexed as tok:unknown tokens. If a template string contains
784 // a comment start, it gets lexed as a tok::comment, or tok::unknown if
785 // unterminated.
786 if (!EndBacktick->isOneOf(tok::comment, tok::unknown))
787 return false;
788 size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
789 // Unknown token that's not actually a backtick, or a comment that doesn't
790 // contain a backtick.
791 if (CommentBacktickPos == StringRef::npos)
792 return false;
793
794 unsigned TokenCount = 0;
795 bool IsMultiline = false;
796 unsigned EndColumnInFirstLine = 0;
797 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
798 ++TokenCount;
799 if (I[0]->NewlinesBefore > 0 || I[0]->IsMultiline)
800 IsMultiline = true;
801
802 // If there was a preceding template string, this must be the start of a
803 // template string, not the end.
804 if (I[0]->is(TT_TemplateString))
805 return false;
806
807 if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
808 // Keep track of the rhs offset of the last token to wrap across lines -
809 // its the rhs offset of the first line of the template string, used to
810 // determine its width.
811 if (I[0]->IsMultiline)
812 EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
813 // If the token has newlines, the token before it (if it exists) is the
814 // rhs end of the previous line.
815 if (I[0]->NewlinesBefore > 0 && (I + 1 != E))
816 EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
817
818 continue;
819 }
820
821 Tokens.resize(Tokens.size() - TokenCount);
822 Tokens.back()->Type = TT_TemplateString;
823 const char *EndOffset =
824 EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
825 if (CommentBacktickPos != 0) {
826 // If the backtick was not the first character (e.g. in a comment),
827 // re-lex after the backtick position.
828 SourceLocation Loc = EndBacktick->Tok.getLocation();
829 resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
830 }
831 Tokens.back()->TokenText =
832 StringRef(Tokens.back()->TokenText.data(),
833 EndOffset - Tokens.back()->TokenText.data());
834 if (IsMultiline) {
835 // ColumnWidth is from backtick to last token in line.
836 // LastLineColumnWidth is 0 to backtick.
837 // x = `some content
838 // until here`;
839 Tokens.back()->ColumnWidth =
840 EndColumnInFirstLine - Tokens.back()->OriginalColumn;
841 Tokens.back()->LastLineColumnWidth = EndBacktick->OriginalColumn;
842 Tokens.back()->IsMultiline = true;
843 } else {
844 // Token simply spans from start to end, +1 for the ` itself.
845 Tokens.back()->ColumnWidth =
846 EndBacktick->OriginalColumn - Tokens.back()->OriginalColumn + 1;
847 }
848 return true;
849 }
850 return false;
851 }
852
tryMerge_TMacro()853 bool tryMerge_TMacro() {
854 if (Tokens.size() < 4)
855 return false;
856 FormatToken *Last = Tokens.back();
857 if (!Last->is(tok::r_paren))
858 return false;
859
860 FormatToken *String = Tokens[Tokens.size() - 2];
861 if (!String->is(tok::string_literal) || String->IsMultiline)
862 return false;
863
864 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
865 return false;
866
867 FormatToken *Macro = Tokens[Tokens.size() - 4];
868 if (Macro->TokenText != "_T")
869 return false;
870
871 const char *Start = Macro->TokenText.data();
872 const char *End = Last->TokenText.data() + Last->TokenText.size();
873 String->TokenText = StringRef(Start, End - Start);
874 String->IsFirst = Macro->IsFirst;
875 String->LastNewlineOffset = Macro->LastNewlineOffset;
876 String->WhitespaceRange = Macro->WhitespaceRange;
877 String->OriginalColumn = Macro->OriginalColumn;
878 String->ColumnWidth = encoding::columnWidthWithTabs(
879 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
880 String->NewlinesBefore = Macro->NewlinesBefore;
881 String->HasUnescapedNewline = Macro->HasUnescapedNewline;
882
883 Tokens.pop_back();
884 Tokens.pop_back();
885 Tokens.pop_back();
886 Tokens.back() = String;
887 return true;
888 }
889
tryMergeConflictMarkers()890 bool tryMergeConflictMarkers() {
891 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
892 return false;
893
894 // Conflict lines look like:
895 // <marker> <text from the vcs>
896 // For example:
897 // >>>>>>> /file/in/file/system at revision 1234
898 //
899 // We merge all tokens in a line that starts with a conflict marker
900 // into a single token with a special token type that the unwrapped line
901 // parser will use to correctly rebuild the underlying code.
902
903 FileID ID;
904 // Get the position of the first token in the line.
905 unsigned FirstInLineOffset;
906 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
907 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
908 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
909 // Calculate the offset of the start of the current line.
910 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
911 if (LineOffset == StringRef::npos) {
912 LineOffset = 0;
913 } else {
914 ++LineOffset;
915 }
916
917 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
918 StringRef LineStart;
919 if (FirstSpace == StringRef::npos) {
920 LineStart = Buffer.substr(LineOffset);
921 } else {
922 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
923 }
924
925 TokenType Type = TT_Unknown;
926 if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
927 Type = TT_ConflictStart;
928 } else if (LineStart == "|||||||" || LineStart == "=======" ||
929 LineStart == "====") {
930 Type = TT_ConflictAlternative;
931 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
932 Type = TT_ConflictEnd;
933 }
934
935 if (Type != TT_Unknown) {
936 FormatToken *Next = Tokens.back();
937
938 Tokens.resize(FirstInLineIndex + 1);
939 // We do not need to build a complete token here, as we will skip it
940 // during parsing anyway (as we must not touch whitespace around conflict
941 // markers).
942 Tokens.back()->Type = Type;
943 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
944
945 Tokens.push_back(Next);
946 return true;
947 }
948
949 return false;
950 }
951
getStashedToken()952 FormatToken *getStashedToken() {
953 // Create a synthesized second '>' or '<' token.
954 Token Tok = FormatTok->Tok;
955 StringRef TokenText = FormatTok->TokenText;
956
957 unsigned OriginalColumn = FormatTok->OriginalColumn;
958 FormatTok = new (Allocator.Allocate()) FormatToken;
959 FormatTok->Tok = Tok;
960 SourceLocation TokLocation =
961 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
962 FormatTok->Tok.setLocation(TokLocation);
963 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
964 FormatTok->TokenText = TokenText;
965 FormatTok->ColumnWidth = 1;
966 FormatTok->OriginalColumn = OriginalColumn + 1;
967
968 return FormatTok;
969 }
970
getNextToken()971 FormatToken *getNextToken() {
972 if (GreaterStashed) {
973 GreaterStashed = false;
974 return getStashedToken();
975 }
976 if (LessStashed) {
977 LessStashed = false;
978 return getStashedToken();
979 }
980
981 FormatTok = new (Allocator.Allocate()) FormatToken;
982 readRawToken(*FormatTok);
983 SourceLocation WhitespaceStart =
984 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
985 FormatTok->IsFirst = IsFirstToken;
986 IsFirstToken = false;
987
988 // Consume and record whitespace until we find a significant token.
989 unsigned WhitespaceLength = TrailingWhitespace;
990 while (FormatTok->Tok.is(tok::unknown)) {
991 // FIXME: This miscounts tok:unknown tokens that are not just
992 // whitespace, e.g. a '`' character.
993 for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
994 switch (FormatTok->TokenText[i]) {
995 case '\n':
996 ++FormatTok->NewlinesBefore;
997 // FIXME: This is technically incorrect, as it could also
998 // be a literal backslash at the end of the line.
999 if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1000 (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1001 FormatTok->TokenText[i - 2] != '\\')))
1002 FormatTok->HasUnescapedNewline = true;
1003 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1004 Column = 0;
1005 break;
1006 case '\r':
1007 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1008 Column = 0;
1009 break;
1010 case '\f':
1011 case '\v':
1012 Column = 0;
1013 break;
1014 case ' ':
1015 ++Column;
1016 break;
1017 case '\t':
1018 Column += Style.TabWidth - Column % Style.TabWidth;
1019 break;
1020 case '\\':
1021 if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1022 FormatTok->TokenText[i + 1] != '\n'))
1023 FormatTok->Type = TT_ImplicitStringLiteral;
1024 break;
1025 default:
1026 FormatTok->Type = TT_ImplicitStringLiteral;
1027 ++Column;
1028 break;
1029 }
1030 }
1031
1032 if (FormatTok->is(TT_ImplicitStringLiteral))
1033 break;
1034 WhitespaceLength += FormatTok->Tok.getLength();
1035
1036 readRawToken(*FormatTok);
1037 }
1038
1039 // In case the token starts with escaped newlines, we want to
1040 // take them into account as whitespace - this pattern is quite frequent
1041 // in macro definitions.
1042 // FIXME: Add a more explicit test.
1043 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1044 FormatTok->TokenText[1] == '\n') {
1045 ++FormatTok->NewlinesBefore;
1046 WhitespaceLength += 2;
1047 Column = 0;
1048 FormatTok->TokenText = FormatTok->TokenText.substr(2);
1049 }
1050
1051 FormatTok->WhitespaceRange = SourceRange(
1052 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1053
1054 FormatTok->OriginalColumn = Column;
1055
1056 TrailingWhitespace = 0;
1057 if (FormatTok->Tok.is(tok::comment)) {
1058 // FIXME: Add the trimmed whitespace to Column.
1059 StringRef UntrimmedText = FormatTok->TokenText;
1060 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1061 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1062 } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1063 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1064 FormatTok->Tok.setIdentifierInfo(&Info);
1065 FormatTok->Tok.setKind(Info.getTokenID());
1066 if (Style.Language == FormatStyle::LK_Java &&
1067 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) {
1068 FormatTok->Tok.setKind(tok::identifier);
1069 FormatTok->Tok.setIdentifierInfo(nullptr);
1070 }
1071 } else if (FormatTok->Tok.is(tok::greatergreater)) {
1072 FormatTok->Tok.setKind(tok::greater);
1073 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1074 GreaterStashed = true;
1075 } else if (FormatTok->Tok.is(tok::lessless)) {
1076 FormatTok->Tok.setKind(tok::less);
1077 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1078 LessStashed = true;
1079 }
1080
1081 // Now FormatTok is the next non-whitespace token.
1082
1083 StringRef Text = FormatTok->TokenText;
1084 size_t FirstNewlinePos = Text.find('\n');
1085 if (FirstNewlinePos == StringRef::npos) {
1086 // FIXME: ColumnWidth actually depends on the start column, we need to
1087 // take this into account when the token is moved.
1088 FormatTok->ColumnWidth =
1089 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1090 Column += FormatTok->ColumnWidth;
1091 } else {
1092 FormatTok->IsMultiline = true;
1093 // FIXME: ColumnWidth actually depends on the start column, we need to
1094 // take this into account when the token is moved.
1095 FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1096 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1097
1098 // The last line of the token always starts in column 0.
1099 // Thus, the length can be precomputed even in the presence of tabs.
1100 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1101 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1102 Encoding);
1103 Column = FormatTok->LastLineColumnWidth;
1104 }
1105
1106 FormatTok->IsForEachMacro =
1107 std::binary_search(ForEachMacros.begin(), ForEachMacros.end(),
1108 FormatTok->Tok.getIdentifierInfo());
1109
1110 return FormatTok;
1111 }
1112
1113 FormatToken *FormatTok;
1114 bool IsFirstToken;
1115 bool GreaterStashed, LessStashed;
1116 unsigned Column;
1117 unsigned TrailingWhitespace;
1118 std::unique_ptr<Lexer> Lex;
1119 SourceManager &SourceMgr;
1120 FileID ID;
1121 FormatStyle &Style;
1122 IdentifierTable IdentTable;
1123 AdditionalKeywords Keywords;
1124 encoding::Encoding Encoding;
1125 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1126 // Index (in 'Tokens') of the last token that starts a new line.
1127 unsigned FirstInLineIndex;
1128 SmallVector<FormatToken *, 16> Tokens;
1129 SmallVector<IdentifierInfo *, 8> ForEachMacros;
1130
1131 bool FormattingDisabled;
1132
readRawToken(FormatToken & Tok)1133 void readRawToken(FormatToken &Tok) {
1134 Lex->LexFromRawLexer(Tok.Tok);
1135 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1136 Tok.Tok.getLength());
1137 // For formatting, treat unterminated string literals like normal string
1138 // literals.
1139 if (Tok.is(tok::unknown)) {
1140 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1141 Tok.Tok.setKind(tok::string_literal);
1142 Tok.IsUnterminatedLiteral = true;
1143 } else if (Style.Language == FormatStyle::LK_JavaScript &&
1144 Tok.TokenText == "''") {
1145 Tok.Tok.setKind(tok::char_constant);
1146 }
1147 }
1148
1149 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1150 Tok.TokenText == "/* clang-format on */")) {
1151 FormattingDisabled = false;
1152 }
1153
1154 Tok.Finalized = FormattingDisabled;
1155
1156 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1157 Tok.TokenText == "/* clang-format off */")) {
1158 FormattingDisabled = true;
1159 }
1160 }
1161
resetLexer(unsigned Offset)1162 void resetLexer(unsigned Offset) {
1163 StringRef Buffer = SourceMgr.getBufferData(ID);
1164 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1165 getFormattingLangOpts(Style), Buffer.begin(),
1166 Buffer.begin() + Offset, Buffer.end()));
1167 Lex->SetKeepWhitespaceMode(true);
1168 }
1169 };
1170
getLanguageName(FormatStyle::LanguageKind Language)1171 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1172 switch (Language) {
1173 case FormatStyle::LK_Cpp:
1174 return "C++";
1175 case FormatStyle::LK_Java:
1176 return "Java";
1177 case FormatStyle::LK_JavaScript:
1178 return "JavaScript";
1179 case FormatStyle::LK_Proto:
1180 return "Proto";
1181 default:
1182 return "Unknown";
1183 }
1184 }
1185
1186 class Formatter : public UnwrappedLineConsumer {
1187 public:
Formatter(const FormatStyle & Style,SourceManager & SourceMgr,FileID ID,ArrayRef<CharSourceRange> Ranges)1188 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
1189 ArrayRef<CharSourceRange> Ranges)
1190 : Style(Style), ID(ID), SourceMgr(SourceMgr),
1191 Whitespaces(SourceMgr, Style,
1192 inputUsesCRLF(SourceMgr.getBufferData(ID))),
1193 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1194 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
1195 DEBUG(llvm::dbgs() << "File encoding: "
1196 << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1197 : "unknown")
1198 << "\n");
1199 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1200 << "\n");
1201 }
1202
format()1203 tooling::Replacements format() {
1204 tooling::Replacements Result;
1205 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
1206
1207 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1208 *this);
1209 bool StructuralError = Parser.parse();
1210 assert(UnwrappedLines.rbegin()->empty());
1211 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1212 ++Run) {
1213 DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1214 SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1215 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1216 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1217 }
1218 tooling::Replacements RunResult =
1219 format(AnnotatedLines, StructuralError, Tokens);
1220 DEBUG({
1221 llvm::dbgs() << "Replacements for run " << Run << ":\n";
1222 for (tooling::Replacements::iterator I = RunResult.begin(),
1223 E = RunResult.end();
1224 I != E; ++I) {
1225 llvm::dbgs() << I->toString() << "\n";
1226 }
1227 });
1228 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1229 delete AnnotatedLines[i];
1230 }
1231 Result.insert(RunResult.begin(), RunResult.end());
1232 Whitespaces.reset();
1233 }
1234 return Result;
1235 }
1236
format(SmallVectorImpl<AnnotatedLine * > & AnnotatedLines,bool StructuralError,FormatTokenLexer & Tokens)1237 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1238 bool StructuralError, FormatTokenLexer &Tokens) {
1239 TokenAnnotator Annotator(Style, Tokens.getKeywords());
1240 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1241 Annotator.annotate(*AnnotatedLines[i]);
1242 }
1243 deriveLocalStyle(AnnotatedLines);
1244 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1245 Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1246 }
1247 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1248
1249 Annotator.setCommentLineLevels(AnnotatedLines);
1250 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
1251 Whitespaces, Encoding,
1252 BinPackInconclusiveFunctions);
1253 UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style,
1254 Tokens.getKeywords());
1255 Formatter.format(AnnotatedLines, /*DryRun=*/false);
1256 return Whitespaces.generateReplacements();
1257 }
1258
1259 private:
1260 // Determines which lines are affected by the SourceRanges given as input.
1261 // Returns \c true if at least one line between I and E or one of their
1262 // children is affected.
computeAffectedLines(SmallVectorImpl<AnnotatedLine * >::iterator I,SmallVectorImpl<AnnotatedLine * >::iterator E)1263 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1264 SmallVectorImpl<AnnotatedLine *>::iterator E) {
1265 bool SomeLineAffected = false;
1266 const AnnotatedLine *PreviousLine = nullptr;
1267 while (I != E) {
1268 AnnotatedLine *Line = *I;
1269 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1270
1271 // If a line is part of a preprocessor directive, it needs to be formatted
1272 // if any token within the directive is affected.
1273 if (Line->InPPDirective) {
1274 FormatToken *Last = Line->Last;
1275 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1276 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1277 Last = (*PPEnd)->Last;
1278 ++PPEnd;
1279 }
1280
1281 if (affectsTokenRange(*Line->First, *Last,
1282 /*IncludeLeadingNewlines=*/false)) {
1283 SomeLineAffected = true;
1284 markAllAsAffected(I, PPEnd);
1285 }
1286 I = PPEnd;
1287 continue;
1288 }
1289
1290 if (nonPPLineAffected(Line, PreviousLine))
1291 SomeLineAffected = true;
1292
1293 PreviousLine = Line;
1294 ++I;
1295 }
1296 return SomeLineAffected;
1297 }
1298
1299 // Determines whether 'Line' is affected by the SourceRanges given as input.
1300 // Returns \c true if line or one if its children is affected.
nonPPLineAffected(AnnotatedLine * Line,const AnnotatedLine * PreviousLine)1301 bool nonPPLineAffected(AnnotatedLine *Line,
1302 const AnnotatedLine *PreviousLine) {
1303 bool SomeLineAffected = false;
1304 Line->ChildrenAffected =
1305 computeAffectedLines(Line->Children.begin(), Line->Children.end());
1306 if (Line->ChildrenAffected)
1307 SomeLineAffected = true;
1308
1309 // Stores whether one of the line's tokens is directly affected.
1310 bool SomeTokenAffected = false;
1311 // Stores whether we need to look at the leading newlines of the next token
1312 // in order to determine whether it was affected.
1313 bool IncludeLeadingNewlines = false;
1314
1315 // Stores whether the first child line of any of this line's tokens is
1316 // affected.
1317 bool SomeFirstChildAffected = false;
1318
1319 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1320 // Determine whether 'Tok' was affected.
1321 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1322 SomeTokenAffected = true;
1323
1324 // Determine whether the first child of 'Tok' was affected.
1325 if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1326 SomeFirstChildAffected = true;
1327
1328 IncludeLeadingNewlines = Tok->Children.empty();
1329 }
1330
1331 // Was this line moved, i.e. has it previously been on the same line as an
1332 // affected line?
1333 bool LineMoved = PreviousLine && PreviousLine->Affected &&
1334 Line->First->NewlinesBefore == 0;
1335
1336 bool IsContinuedComment =
1337 Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1338 Line->First->NewlinesBefore < 2 && PreviousLine &&
1339 PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1340
1341 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1342 IsContinuedComment) {
1343 Line->Affected = true;
1344 SomeLineAffected = true;
1345 }
1346 return SomeLineAffected;
1347 }
1348
1349 // Marks all lines between I and E as well as all their children as affected.
markAllAsAffected(SmallVectorImpl<AnnotatedLine * >::iterator I,SmallVectorImpl<AnnotatedLine * >::iterator E)1350 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1351 SmallVectorImpl<AnnotatedLine *>::iterator E) {
1352 while (I != E) {
1353 (*I)->Affected = true;
1354 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1355 ++I;
1356 }
1357 }
1358
1359 // Returns true if the range from 'First' to 'Last' intersects with one of the
1360 // input ranges.
affectsTokenRange(const FormatToken & First,const FormatToken & Last,bool IncludeLeadingNewlines)1361 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1362 bool IncludeLeadingNewlines) {
1363 SourceLocation Start = First.WhitespaceRange.getBegin();
1364 if (!IncludeLeadingNewlines)
1365 Start = Start.getLocWithOffset(First.LastNewlineOffset);
1366 SourceLocation End = Last.getStartOfNonWhitespace();
1367 End = End.getLocWithOffset(Last.TokenText.size());
1368 CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1369 return affectsCharSourceRange(Range);
1370 }
1371
1372 // Returns true if one of the input ranges intersect the leading empty lines
1373 // before 'Tok'.
affectsLeadingEmptyLines(const FormatToken & Tok)1374 bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1375 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1376 Tok.WhitespaceRange.getBegin(),
1377 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1378 return affectsCharSourceRange(EmptyLineRange);
1379 }
1380
1381 // Returns true if 'Range' intersects with one of the input ranges.
affectsCharSourceRange(const CharSourceRange & Range)1382 bool affectsCharSourceRange(const CharSourceRange &Range) {
1383 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1384 E = Ranges.end();
1385 I != E; ++I) {
1386 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1387 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1388 return true;
1389 }
1390 return false;
1391 }
1392
inputUsesCRLF(StringRef Text)1393 static bool inputUsesCRLF(StringRef Text) {
1394 return Text.count('\r') * 2 > Text.count('\n');
1395 }
1396
1397 void
deriveLocalStyle(const SmallVectorImpl<AnnotatedLine * > & AnnotatedLines)1398 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1399 unsigned CountBoundToVariable = 0;
1400 unsigned CountBoundToType = 0;
1401 bool HasCpp03IncompatibleFormat = false;
1402 bool HasBinPackedFunction = false;
1403 bool HasOnePerLineFunction = false;
1404 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1405 if (!AnnotatedLines[i]->First->Next)
1406 continue;
1407 FormatToken *Tok = AnnotatedLines[i]->First->Next;
1408 while (Tok->Next) {
1409 if (Tok->is(TT_PointerOrReference)) {
1410 bool SpacesBefore =
1411 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1412 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1413 Tok->Next->WhitespaceRange.getEnd();
1414 if (SpacesBefore && !SpacesAfter)
1415 ++CountBoundToVariable;
1416 else if (!SpacesBefore && SpacesAfter)
1417 ++CountBoundToType;
1418 }
1419
1420 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1421 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1422 HasCpp03IncompatibleFormat = true;
1423 if (Tok->is(TT_TemplateCloser) &&
1424 Tok->Previous->is(TT_TemplateCloser))
1425 HasCpp03IncompatibleFormat = true;
1426 }
1427
1428 if (Tok->PackingKind == PPK_BinPacked)
1429 HasBinPackedFunction = true;
1430 if (Tok->PackingKind == PPK_OnePerLine)
1431 HasOnePerLineFunction = true;
1432
1433 Tok = Tok->Next;
1434 }
1435 }
1436 if (Style.DerivePointerAlignment) {
1437 if (CountBoundToType > CountBoundToVariable)
1438 Style.PointerAlignment = FormatStyle::PAS_Left;
1439 else if (CountBoundToType < CountBoundToVariable)
1440 Style.PointerAlignment = FormatStyle::PAS_Right;
1441 }
1442 if (Style.Standard == FormatStyle::LS_Auto) {
1443 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1444 : FormatStyle::LS_Cpp03;
1445 }
1446 BinPackInconclusiveFunctions =
1447 HasBinPackedFunction || !HasOnePerLineFunction;
1448 }
1449
consumeUnwrappedLine(const UnwrappedLine & TheLine)1450 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1451 assert(!UnwrappedLines.empty());
1452 UnwrappedLines.back().push_back(TheLine);
1453 }
1454
finishRun()1455 void finishRun() override {
1456 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1457 }
1458
1459 FormatStyle Style;
1460 FileID ID;
1461 SourceManager &SourceMgr;
1462 WhitespaceManager Whitespaces;
1463 SmallVector<CharSourceRange, 8> Ranges;
1464 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1465
1466 encoding::Encoding Encoding;
1467 bool BinPackInconclusiveFunctions;
1468 };
1469
1470 } // end anonymous namespace
1471
reformat(const FormatStyle & Style,Lexer & Lex,SourceManager & SourceMgr,ArrayRef<CharSourceRange> Ranges)1472 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1473 SourceManager &SourceMgr,
1474 ArrayRef<CharSourceRange> Ranges) {
1475 if (Style.DisableFormat)
1476 return tooling::Replacements();
1477 return reformat(Style, SourceMgr,
1478 SourceMgr.getFileID(Lex.getSourceLocation()), Ranges);
1479 }
1480
reformat(const FormatStyle & Style,SourceManager & SourceMgr,FileID ID,ArrayRef<CharSourceRange> Ranges)1481 tooling::Replacements reformat(const FormatStyle &Style,
1482 SourceManager &SourceMgr, FileID ID,
1483 ArrayRef<CharSourceRange> Ranges) {
1484 if (Style.DisableFormat)
1485 return tooling::Replacements();
1486 Formatter formatter(Style, SourceMgr, ID, Ranges);
1487 return formatter.format();
1488 }
1489
reformat(const FormatStyle & Style,StringRef Code,ArrayRef<tooling::Range> Ranges,StringRef FileName)1490 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1491 ArrayRef<tooling::Range> Ranges,
1492 StringRef FileName) {
1493 if (Style.DisableFormat)
1494 return tooling::Replacements();
1495
1496 FileManager Files((FileSystemOptions()));
1497 DiagnosticsEngine Diagnostics(
1498 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1499 new DiagnosticOptions);
1500 SourceManager SourceMgr(Diagnostics, Files);
1501 std::unique_ptr<llvm::MemoryBuffer> Buf =
1502 llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1503 const clang::FileEntry *Entry =
1504 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1505 SourceMgr.overrideFileContents(Entry, std::move(Buf));
1506 FileID ID =
1507 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1508 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1509 std::vector<CharSourceRange> CharRanges;
1510 for (const tooling::Range &Range : Ranges) {
1511 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
1512 SourceLocation End = Start.getLocWithOffset(Range.getLength());
1513 CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1514 }
1515 return reformat(Style, SourceMgr, ID, CharRanges);
1516 }
1517
getFormattingLangOpts(const FormatStyle & Style)1518 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
1519 LangOptions LangOpts;
1520 LangOpts.CPlusPlus = 1;
1521 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1522 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1523 LangOpts.LineComment = 1;
1524 bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
1525 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
1526 LangOpts.Bool = 1;
1527 LangOpts.ObjC1 = 1;
1528 LangOpts.ObjC2 = 1;
1529 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
1530 return LangOpts;
1531 }
1532
1533 const char *StyleOptionHelpDescription =
1534 "Coding style, currently supports:\n"
1535 " LLVM, Google, Chromium, Mozilla, WebKit.\n"
1536 "Use -style=file to load style configuration from\n"
1537 ".clang-format file located in one of the parent\n"
1538 "directories of the source file (or current\n"
1539 "directory for stdin).\n"
1540 "Use -style=\"{key: value, ...}\" to set specific\n"
1541 "parameters, e.g.:\n"
1542 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1543
getLanguageByFileName(StringRef FileName)1544 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1545 if (FileName.endswith(".java")) {
1546 return FormatStyle::LK_Java;
1547 } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) {
1548 // JavaScript or TypeScript.
1549 return FormatStyle::LK_JavaScript;
1550 } else if (FileName.endswith_lower(".proto") ||
1551 FileName.endswith_lower(".protodevel")) {
1552 return FormatStyle::LK_Proto;
1553 }
1554 return FormatStyle::LK_Cpp;
1555 }
1556
getStyle(StringRef StyleName,StringRef FileName,StringRef FallbackStyle)1557 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1558 StringRef FallbackStyle) {
1559 FormatStyle Style = getLLVMStyle();
1560 Style.Language = getLanguageByFileName(FileName);
1561 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1562 llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1563 << "\" using LLVM style\n";
1564 return Style;
1565 }
1566
1567 if (StyleName.startswith("{")) {
1568 // Parse YAML/JSON style from the command line.
1569 if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
1570 llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1571 << FallbackStyle << " style\n";
1572 }
1573 return Style;
1574 }
1575
1576 if (!StyleName.equals_lower("file")) {
1577 if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1578 llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1579 << " style\n";
1580 return Style;
1581 }
1582
1583 // Look for .clang-format/_clang-format file in the file's parent directories.
1584 SmallString<128> UnsuitableConfigFiles;
1585 SmallString<128> Path(FileName);
1586 llvm::sys::fs::make_absolute(Path);
1587 for (StringRef Directory = Path; !Directory.empty();
1588 Directory = llvm::sys::path::parent_path(Directory)) {
1589 if (!llvm::sys::fs::is_directory(Directory))
1590 continue;
1591 SmallString<128> ConfigFile(Directory);
1592
1593 llvm::sys::path::append(ConfigFile, ".clang-format");
1594 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1595 bool IsFile = false;
1596 // Ignore errors from is_regular_file: we only need to know if we can read
1597 // the file or not.
1598 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1599
1600 if (!IsFile) {
1601 // Try _clang-format too, since dotfiles are not commonly used on Windows.
1602 ConfigFile = Directory;
1603 llvm::sys::path::append(ConfigFile, "_clang-format");
1604 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1605 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1606 }
1607
1608 if (IsFile) {
1609 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1610 llvm::MemoryBuffer::getFile(ConfigFile.c_str());
1611 if (std::error_code EC = Text.getError()) {
1612 llvm::errs() << EC.message() << "\n";
1613 break;
1614 }
1615 if (std::error_code ec =
1616 parseConfiguration(Text.get()->getBuffer(), &Style)) {
1617 if (ec == ParseError::Unsuitable) {
1618 if (!UnsuitableConfigFiles.empty())
1619 UnsuitableConfigFiles.append(", ");
1620 UnsuitableConfigFiles.append(ConfigFile);
1621 continue;
1622 }
1623 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1624 << "\n";
1625 break;
1626 }
1627 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1628 return Style;
1629 }
1630 }
1631 llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1632 << " style\n";
1633 if (!UnsuitableConfigFiles.empty()) {
1634 llvm::errs() << "Configuration file(s) do(es) not support "
1635 << getLanguageName(Style.Language) << ": "
1636 << UnsuitableConfigFiles << "\n";
1637 }
1638 return Style;
1639 }
1640
1641 } // namespace format
1642 } // namespace clang
1643