1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Format.h"
9 #include "support/Logger.h"
10 #include "clang/Basic/FileManager.h"
11 #include "clang/Basic/SourceManager.h"
12 #include "clang/Format/Format.h"
13 #include "clang/Lex/Lexer.h"
14 #include "clang/Tooling/Core/Replacement.h"
15 #include "llvm/Support/Unicode.h"
16
17 namespace clang {
18 namespace clangd {
19 namespace {
20
21 /// Append closing brackets )]} to \p Code to make it well-formed.
22 /// Clang-format conservatively refuses to format files with unmatched brackets
23 /// as it isn't sure where the errors are and so can't correct.
24 /// When editing, it's reasonable to assume code before the cursor is complete.
closeBrackets(std::string & Code,const format::FormatStyle & Style)25 void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
26 SourceManagerForFile FileSM("dummy.cpp", Code);
27 auto &SM = FileSM.get();
28 FileID FID = SM.getMainFileID();
29 Lexer Lex(FID, SM.getBufferOrFake(FID), SM,
30 format::getFormattingLangOpts(Style));
31 Token Tok;
32 std::vector<char> Brackets;
33 while (!Lex.LexFromRawLexer(Tok)) {
34 switch(Tok.getKind()) {
35 case tok::l_paren:
36 Brackets.push_back(')');
37 break;
38 case tok::l_brace:
39 Brackets.push_back('}');
40 break;
41 case tok::l_square:
42 Brackets.push_back(']');
43 break;
44 case tok::r_paren:
45 if (!Brackets.empty() && Brackets.back() == ')')
46 Brackets.pop_back();
47 break;
48 case tok::r_brace:
49 if (!Brackets.empty() && Brackets.back() == '}')
50 Brackets.pop_back();
51 break;
52 case tok::r_square:
53 if (!Brackets.empty() && Brackets.back() == ']')
54 Brackets.pop_back();
55 break;
56 default:
57 continue;
58 }
59 }
60 // Attempt to end any open comments first.
61 Code.append("\n// */\n");
62 Code.append(Brackets.rbegin(), Brackets.rend());
63 }
64
commentMarker(llvm::StringRef Line)65 static StringRef commentMarker(llvm::StringRef Line) {
66 for (StringRef Marker : {"///", "//"}){
67 auto I = Line.rfind(Marker);
68 if (I != StringRef::npos)
69 return Line.substr(I, Marker.size());
70 }
71 return "";
72 }
73
firstLine(llvm::StringRef Code)74 llvm::StringRef firstLine(llvm::StringRef Code) {
75 return Code.take_until([](char C) { return C == '\n'; });
76 }
77
lastLine(llvm::StringRef Code)78 llvm::StringRef lastLine(llvm::StringRef Code) {
79 llvm::StringRef Rest = Code;
80 while (!Rest.empty() && Rest.back() != '\n')
81 Rest = Rest.drop_back();
82 return Code.substr(Rest.size());
83 }
84
85 // Filename is needed for tooling::Replacement and some overloads of reformat().
86 // Its value should not affect the outcome. We use the default from reformat().
87 llvm::StringRef Filename = "<stdin>";
88
89 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
replacement(llvm::StringRef Code,llvm::StringRef From,llvm::StringRef To)90 tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
91 llvm::StringRef To) {
92 assert(From.begin() >= Code.begin() && From.end() <= Code.end());
93 // The filename is required but ignored.
94 return tooling::Replacement(Filename, From.data() - Code.data(),
95 From.size(), To);
96 }
97
98 // High-level representation of incremental formatting changes.
99 // The changes are made in two steps.
100 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
101 // comment markers when splitting a line comment with a newline).
102 // 2) a selective clang-format run:
103 // - the "source code" passed to clang format is the code up to the cursor,
104 // a placeholder for the cursor, and some closing brackets
105 // - the formatting is restricted to the cursor and (possibly) other ranges
106 // (e.g. the old line when inserting a newline).
107 // - changes before the cursor are applied, those after are discarded.
108 struct IncrementalChanges {
109 // Changes that should be applied before running clang-format.
110 tooling::Replacements Changes;
111 // Ranges of the original source code that should be clang-formatted.
112 // The CursorProxyText will also be formatted.
113 std::vector<tooling::Range> FormatRanges;
114 // The source code that should stand in for the cursor when clang-formatting.
115 // e.g. after inserting a newline, a line-comment at the cursor is used to
116 // ensure that the newline is preserved.
117 std::string CursorPlaceholder;
118 };
119
120 // After a newline:
121 // - we continue any line-comment that was split
122 // - we format the old line in addition to the cursor
123 // - we represent the cursor with a line comment to preserve the newline
getIncrementalChangesAfterNewline(llvm::StringRef Code,unsigned Cursor)124 IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
125 unsigned Cursor) {
126 IncrementalChanges Result;
127 // Before newline, code looked like:
128 // leading^trailing
129 // After newline, code looks like:
130 // leading
131 // indentation^trailing
132 // Where indentation was added by the editor.
133 StringRef Trailing = firstLine(Code.substr(Cursor));
134 StringRef Indentation = lastLine(Code.take_front(Cursor));
135 if (Indentation.data() == Code.data()) {
136 vlog("Typed a newline, but we're still on the first line!");
137 return Result;
138 }
139 StringRef Leading =
140 lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
141 StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
142
143 // Strip leading whitespace on trailing line.
144 StringRef TrailingTrim = Trailing.ltrim();
145 if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
146 cantFail(Result.Changes.add(
147 replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
148
149 // If we split a comment, replace indentation with a comment marker.
150 // If the editor made the new line a comment, also respect that.
151 StringRef CommentMarker = commentMarker(Leading);
152 bool NewLineIsComment = !commentMarker(Indentation).empty();
153 if (!CommentMarker.empty() &&
154 (NewLineIsComment || !commentMarker(NextLine).empty() ||
155 (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
156 using llvm::sys::unicode::columnWidthUTF8;
157 // We indent the new comment to match the previous one.
158 StringRef PreComment =
159 Leading.take_front(CommentMarker.data() - Leading.data());
160 std::string IndentAndComment =
161 (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
162 .str();
163 cantFail(
164 Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
165 } else {
166 // Remove any indentation and let clang-format re-add it.
167 // This prevents the cursor marker dragging e.g. an aligned comment with it.
168 cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
169 }
170
171 // If we put a the newline inside a {} pair, put } on its own line...
172 if (CommentMarker.empty() && Leading.endswith("{") &&
173 Trailing.startswith("}")) {
174 cantFail(
175 Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
176 // ...and format it.
177 Result.FormatRanges.push_back(
178 tooling::Range(Trailing.data() - Code.data() + 1, 1));
179 }
180
181 // Format the whole leading line.
182 Result.FormatRanges.push_back(
183 tooling::Range(Leading.data() - Code.data(), Leading.size()));
184
185 // We use a comment to represent the cursor, to preserve the newline.
186 // A trailing identifier improves parsing of e.g. for without braces.
187 // Exception: if the previous line has a trailing comment, we can't use one
188 // as the cursor (they will be aligned). But in this case we don't need to.
189 Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
190
191 return Result;
192 }
193
getIncrementalChanges(llvm::StringRef Code,unsigned Cursor,llvm::StringRef InsertedText)194 IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
195 llvm::StringRef InsertedText) {
196 IncrementalChanges Result;
197 if (InsertedText == "\n")
198 return getIncrementalChangesAfterNewline(Code, Cursor);
199
200 Result.CursorPlaceholder = " /**/";
201 return Result;
202 }
203
204 // Returns equivalent replacements that preserve the correspondence between
205 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
206 // replacement will be split.
207 std::vector<tooling::Replacement>
split(const tooling::Replacements & Replacements,unsigned OldCursor,unsigned NewCursor)208 split(const tooling::Replacements &Replacements, unsigned OldCursor,
209 unsigned NewCursor) {
210 std::vector<tooling::Replacement> Result;
211 int LengthChange = 0;
212 for (const tooling::Replacement &R : Replacements) {
213 if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
214 Result.push_back(R);
215 LengthChange += R.getReplacementText().size() - R.getLength();
216 } else if (R.getOffset() < OldCursor) { // overlaps cursor
217 int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
218 assert(ReplacementSplit >= 0 &&
219 ReplacementSplit <= int(R.getReplacementText().size()) &&
220 "NewCursor incompatible with OldCursor!");
221 Result.push_back(tooling::Replacement(
222 R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
223 R.getReplacementText().take_front(ReplacementSplit)));
224 Result.push_back(tooling::Replacement(
225 R.getFilePath(), OldCursor,
226 R.getLength() - (OldCursor - R.getOffset()),
227 R.getReplacementText().drop_front(ReplacementSplit)));
228 } else if (R.getOffset() >= OldCursor) { // after cursor
229 Result.push_back(R);
230 }
231 }
232 return Result;
233 }
234
235 } // namespace
236
237 // We're simulating the following sequence of changes:
238 // - apply the pre-formatting edits (see getIncrementalChanges)
239 // - insert a placeholder for the cursor
240 // - format some of the resulting code
241 // - remove the cursor placeholder again
242 // The replacements we return are produced by composing these.
243 //
244 // The text we actually pass to clang-format is slightly different from this,
245 // e.g. we have to close brackets. We ensure these differences are *after*
246 // all the regions we want to format, and discard changes in them.
247 std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode,unsigned OriginalCursor,llvm::StringRef InsertedText,format::FormatStyle Style)248 formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
249 llvm::StringRef InsertedText, format::FormatStyle Style) {
250 IncrementalChanges Incremental =
251 getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
252 // Never *remove* lines in response to pressing enter! This annoys users.
253 if (InsertedText == "\n") {
254 Style.MaxEmptyLinesToKeep = 1000;
255 Style.KeepEmptyLinesAtTheStartOfBlocks = true;
256 }
257
258 // Compute the code we want to format:
259 // 1) Start with code after the pre-formatting edits.
260 std::string CodeToFormat = cantFail(
261 tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
262 unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
263 // 2) Truncate code after the last interesting range.
264 unsigned FormatLimit = Cursor;
265 for (tooling::Range &R : Incremental.FormatRanges)
266 FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
267 CodeToFormat.resize(FormatLimit);
268 // 3) Insert a placeholder for the cursor.
269 CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
270 // 4) Append brackets after FormatLimit so the code is well-formed.
271 closeBrackets(CodeToFormat, Style);
272
273 // Determine the ranges to format:
274 std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
275 // Ranges after the cursor need to be adjusted for the placeholder.
276 for (auto &R : RangesToFormat) {
277 if (R.getOffset() > Cursor)
278 R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
279 R.getLength());
280 }
281 // We also format the cursor.
282 RangesToFormat.push_back(
283 tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
284 // Also update FormatLimit for the placeholder, we'll use this later.
285 FormatLimit += Incremental.CursorPlaceholder.size();
286
287 // Run clang-format, and truncate changes at FormatLimit.
288 tooling::Replacements FormattingChanges;
289 format::FormattingAttemptStatus Status;
290 for (const tooling::Replacement &R : format::reformat(
291 Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
292 if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
293 cantFail(FormattingChanges.add(R));
294 else if(R.getOffset() < FormatLimit) { // Overlaps limit.
295 if (R.getReplacementText().empty()) // Deletions are easy to handle.
296 cantFail(FormattingChanges.add(tooling::Replacement(Filename,
297 R.getOffset(), FormatLimit - R.getOffset(), "")));
298 else
299 // Hopefully won't happen in practice?
300 elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
301 Cursor, CodeToFormat);
302 }
303 }
304 if (!Status.FormatComplete)
305 vlog("Incremental format incomplete at line {0}", Status.Line);
306
307 // Now we are ready to compose the changes relative to OriginalCode.
308 // edits -> insert placeholder -> format -> remove placeholder.
309 // We must express insert/remove as Replacements.
310 tooling::Replacements InsertCursorPlaceholder(
311 tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
312 unsigned FormattedCursorStart =
313 FormattingChanges.getShiftedCodePosition(Cursor),
314 FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
315 Cursor + Incremental.CursorPlaceholder.size());
316 tooling::Replacements RemoveCursorPlaceholder(
317 tooling::Replacement(Filename, FormattedCursorStart,
318 FormattedCursorEnd - FormattedCursorStart, ""));
319
320 // We can't simply merge() and return: tooling::Replacements will combine
321 // adjacent edits left and right of the cursor. This gives the right source
322 // code, but loses information about where the cursor is!
323 // Fortunately, none of the individual passes lose information, so:
324 // - we use merge() to compute the final Replacements
325 // - we chain getShiftedCodePosition() to compute final cursor position
326 // - we split the final Replacements at the cursor position, so that
327 // each Replacement lies either before or after the cursor.
328 tooling::Replacements Final;
329 unsigned FinalCursor = OriginalCursor;
330 #ifndef NDEBUG
331 std::string FinalCode = std::string(OriginalCode);
332 dlog("Initial code: {0}", FinalCode);
333 #endif
334 for (auto Pass :
335 std::vector<std::pair<const char *, const tooling::Replacements *>>{
336 {"Pre-formatting changes", &Incremental.Changes},
337 {"Insert placeholder", &InsertCursorPlaceholder},
338 {"clang-format", &FormattingChanges},
339 {"Remove placeholder", &RemoveCursorPlaceholder}}) {
340 Final = Final.merge(*Pass.second);
341 FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
342 #ifndef NDEBUG
343 FinalCode =
344 cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
345 dlog("After {0}:\n{1}^{2}", Pass.first,
346 StringRef(FinalCode).take_front(FinalCursor),
347 StringRef(FinalCode).drop_front(FinalCursor));
348 #endif
349 }
350 return split(Final, OriginalCursor, FinalCursor);
351 }
352
353 unsigned
transformCursorPosition(unsigned Offset,const std::vector<tooling::Replacement> & Replacements)354 transformCursorPosition(unsigned Offset,
355 const std::vector<tooling::Replacement> &Replacements) {
356 unsigned OriginalOffset = Offset;
357 for (const auto &R : Replacements) {
358 if (R.getOffset() + R.getLength() <= OriginalOffset) {
359 // Replacement is before cursor.
360 Offset += R.getReplacementText().size();
361 Offset -= R.getLength();
362 } else if (R.getOffset() < OriginalOffset) {
363 // Replacement overlaps cursor.
364 // Preserve position within replacement text, as far as possible.
365 unsigned PositionWithinReplacement = Offset - R.getOffset();
366 if (PositionWithinReplacement > R.getReplacementText().size()) {
367 Offset += R.getReplacementText().size();
368 Offset -= PositionWithinReplacement;
369 }
370 } else {
371 // Replacement after cursor.
372 break; // Replacements are sorted, the rest are also after the cursor.
373 }
374 }
375 return Offset;
376 }
377
378 } // namespace clangd
379 } // namespace clang
380