1 //===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "RawStringLiteralCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
13 
14 using namespace clang::ast_matchers;
15 
16 namespace clang {
17 namespace tidy {
18 namespace modernize {
19 
20 namespace {
21 
containsEscapes(StringRef HayStack,StringRef Escapes)22 bool containsEscapes(StringRef HayStack, StringRef Escapes) {
23   size_t BackSlash = HayStack.find('\\');
24   if (BackSlash == StringRef::npos)
25     return false;
26 
27   while (BackSlash != StringRef::npos) {
28     if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos)
29       return false;
30     BackSlash = HayStack.find('\\', BackSlash + 2);
31   }
32 
33   return true;
34 }
35 
isRawStringLiteral(StringRef Text)36 bool isRawStringLiteral(StringRef Text) {
37   // Already a raw string literal if R comes before ".
38   const size_t QuotePos = Text.find('"');
39   assert(QuotePos != StringRef::npos);
40   return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
41 }
42 
containsEscapedCharacters(const MatchFinder::MatchResult & Result,const StringLiteral * Literal,const CharsBitSet & DisallowedChars)43 bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
44                                const StringLiteral *Literal,
45                                const CharsBitSet &DisallowedChars) {
46   // FIXME: Handle L"", u8"", u"" and U"" literals.
47   if (!Literal->isAscii())
48     return false;
49 
50   for (const unsigned char C : Literal->getBytes())
51     if (DisallowedChars.test(C))
52       return false;
53 
54   CharSourceRange CharRange = Lexer::makeFileCharRange(
55       CharSourceRange::getTokenRange(Literal->getSourceRange()),
56       *Result.SourceManager, Result.Context->getLangOpts());
57   StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
58                                         Result.Context->getLangOpts());
59   if (isRawStringLiteral(Text))
60     return false;
61 
62   return containsEscapes(Text, R"('\"?x01)");
63 }
64 
containsDelimiter(StringRef Bytes,const std::string & Delimiter)65 bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
66   return Bytes.find(Delimiter.empty()
67                         ? std::string(R"lit()")lit")
68                         : (")" + Delimiter + R"(")")) != StringRef::npos;
69 }
70 
asRawStringLiteral(const StringLiteral * Literal,const std::string & DelimiterStem)71 std::string asRawStringLiteral(const StringLiteral *Literal,
72                                const std::string &DelimiterStem) {
73   const StringRef Bytes = Literal->getBytes();
74   std::string Delimiter;
75   for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
76     Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
77   }
78 
79   if (Delimiter.empty())
80     return (R"(R"()" + Bytes + R"lit()")lit").str();
81 
82   return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
83 }
84 
85 } // namespace
86 
RawStringLiteralCheck(StringRef Name,ClangTidyContext * Context)87 RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
88                                              ClangTidyContext *Context)
89     : ClangTidyCheck(Name, Context),
90       DelimiterStem(Options.get("DelimiterStem", "lit")),
91       ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
92   // Non-printing characters are disallowed:
93   // \007 = \a bell
94   // \010 = \b backspace
95   // \011 = \t horizontal tab
96   // \012 = \n new line
97   // \013 = \v vertical tab
98   // \014 = \f form feed
99   // \015 = \r carriage return
100   // \177 = delete
101   for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
102                                          "\b\t\n\v\f\r\016\017"
103                                          "\020\021\022\023\024\025\026\027"
104                                          "\030\031\032\033\034\035\036\037"
105                                          "\177",
106                                          33))
107     DisallowedChars.set(C);
108 
109   // Non-ASCII are disallowed too.
110   for (unsigned int C = 0x80u; C <= 0xFFu; ++C)
111     DisallowedChars.set(static_cast<unsigned char>(C));
112 }
113 
storeOptions(ClangTidyOptions::OptionMap & Opts)114 void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
115   Options.store(Opts, "DelimiterStem", DelimiterStem);
116   Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals);
117 }
118 
registerMatchers(MatchFinder * Finder)119 void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
120   Finder->addMatcher(
121       stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
122 }
123 
check(const MatchFinder::MatchResult & Result)124 void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
125   const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
126   if (Literal->getBeginLoc().isMacroID())
127     return;
128 
129   if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
130     std::string Replacement = asRawStringLiteral(Literal, DelimiterStem);
131     if (ReplaceShorterLiterals ||
132         Replacement.length() <=
133             Lexer::MeasureTokenLength(Literal->getBeginLoc(),
134                                       *Result.SourceManager, getLangOpts()))
135       replaceWithRawStringLiteral(Result, Literal, Replacement);
136   }
137 }
138 
replaceWithRawStringLiteral(const MatchFinder::MatchResult & Result,const StringLiteral * Literal,StringRef Replacement)139 void RawStringLiteralCheck::replaceWithRawStringLiteral(
140     const MatchFinder::MatchResult &Result, const StringLiteral *Literal,
141     StringRef Replacement) {
142   CharSourceRange CharRange = Lexer::makeFileCharRange(
143       CharSourceRange::getTokenRange(Literal->getSourceRange()),
144       *Result.SourceManager, getLangOpts());
145   diag(Literal->getBeginLoc(),
146        "escaped string literal can be written as a raw string literal")
147       << FixItHint::CreateReplacement(CharRange, Replacement);
148 }
149 
150 } // namespace modernize
151 } // namespace tidy
152 } // namespace clang
153