1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This clang tool does the following three tasks:
6 // 1) Finds all instances of the following functions and extracts the location
7 //    info and content of annotation tags:
8 //      - net::DefineNetworkTrafficAnnotation
9 //      - net::DefinePartialNetworkTrafficAnnotation
10 //      - net::CompleteNetworkTrafficAnnotation
11 //      - net::BranchedCompleteNetworkTrafficAnnotation
12 // 2) Extracts all calls of the following network request creation functions
13 //    and returns their source location and availability of a
14 //    net::[Partial]NetworkTrafficAnnotation parameter in them:
15 //     - URLFetcher::Create
16 //     - URLRequestContext::CreateRequest
17 // 3) Finds all instances of initializing any of the following classes with list
18 //    expressions or assignment of a value to |unique_id_hash_code| of the
19 //    mutable ones, outside traffic annotation API functions:
20 //     - net::NetworkTrafficAnnotationTag
21 //     - net::PartialNetworkTrafficAnnotationTag
22 //     - net::MutableNetworkTrafficAnnotationTag
23 //     - net::MutablePartialNetworkTrafficAnnotationTag
24 // All outputs are written to to llvm::outs.
25 // Please refer to README.md for build and usage instructions.
26 
27 #include <memory>
28 #include <vector>
29 
30 #include "clang/ASTMatchers/ASTMatchFinder.h"
31 #include "clang/ASTMatchers/ASTMatchers.h"
32 #include "clang/Basic/SourceManager.h"
33 #include "clang/Frontend/FrontendActions.h"
34 #include "clang/Lex/Lexer.h"
35 #include "clang/Tooling/CommonOptionsParser.h"
36 #include "clang/Tooling/Refactoring.h"
37 #include "clang/Tooling/Tooling.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/TargetSelect.h"
40 
41 using namespace clang::ast_matchers;
42 
43 namespace {
44 
45 // Information about location of a line of code.
46 struct Location {
47   std::string file_path;
48   int line_number = -1;
49 
50   // Name of the function including this line. E.g., in the following code,
51   // |function_name| will be 'foo' for all |line_number| values 101-103.
52   //
53   // 100 void foo() {
54   // 101   NetworkTrafficAnnotationTag baz =
55   // 102       net::DefineNetworkTrafficAnnotation(...); }
56   // 103   bar(baz);
57   // 104 }
58   // If no function is found, 'Global Namespace' will be returned.
59   std::string function_name;
60 };
61 
62 // An instance of a call to either of the 4 network traffic annotation
63 // definition functions.
64 struct NetworkAnnotationInstance {
65   // Annotation content. These are the arguments of the call to either of the 4
66   // network traffic annotation definition functions.
67   struct Annotation {
68     std::string unique_id;
69     std::string text;
70 
71     // |extra_id| will have |completing_id| for
72     // net::DefinePartialNetworkTrafficAnnotation and |group_id| for
73     // net::BranchedCompleteNetworkTrafficAnnotation. It will be empty in other
74     // cases.
75     std::string extra_id;
76   };
77 
78   Location location;
79   Annotation annotation;
80 
81   // Specifying the function type.
82   enum FunctionType {
83     kDefinition,         // net::DefineNetworkTrafficAnnotation
84     kPartial,            // net::DefinePartialNetworkTrafficAnnotation
85     kCompleting,         // net::CompleteNetworkTrafficAnnotation
86     kBranchedCompleting  // net::BranchedCompleteNetworkTrafficAnnotation
87   };
88 
89   FunctionType function_type;
90 
GetTypeName__anon3e8690ca0111::NetworkAnnotationInstance91   const char* GetTypeName() const {
92     switch (function_type) {
93       case kDefinition:
94         return "Definition";
95       case kPartial:
96         return "Partial";
97       case kCompleting:
98         return "Completing";
99       case kBranchedCompleting:
100         return "BranchedCompleting";
101     }
102     assert(false);
103     return "";
104   }
105 };
106 
107 // An instance of a call to one of the monitored function.
108 struct CallInstance {
109   // Location of the call.
110   Location location;
111 
112   // Whether the function is annotated.
113   bool has_annotation = false;
114 
115   // Name of the called function.
116   std::string called_function_name;
117 };
118 
119 // A structure to keep detected annotation and call instances, and all code
120 // locations that include a direct value assignment to annotations using list
121 // expression constructors or mutable annotations' |unique_id_hash_code|.
122 struct Collector {
123   std::vector<NetworkAnnotationInstance> annotations;
124   std::vector<CallInstance> calls;
125   std::vector<Location> assignments;
126 };
127 
128 // This class implements the call back functions for AST Matchers. The matchers
129 // are defined in RunMatchers function. When a pattern is found there,
130 // the run function in this class is called back with information on the matched
131 // location and description of the matched pattern.
132 class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback {
133  public:
NetworkAnnotationTagCallback(Collector * collector)134   explicit NetworkAnnotationTagCallback(Collector* collector)
135       : collector_(collector) {}
136   ~NetworkAnnotationTagCallback() override = default;
137 
138   // Is called on any pattern found by ASTMathers that are defined in RunMathers
139   // function.
run(const MatchFinder::MatchResult & result)140   virtual void run(const MatchFinder::MatchResult& result) override {
141     if (const clang::CallExpr* call_expr =
142             result.Nodes.getNodeAs<clang::CallExpr>("monitored_function")) {
143       AddFunction(call_expr, result);
144     } else if (const clang::CXXConstructExpr* constructor_expr =
145                    result.Nodes.getNodeAs<clang::CXXConstructExpr>(
146                        "annotation_constructor")) {
147       AddConstructor(constructor_expr, result);
148     } else if (const clang::MemberExpr* member_expr =
149                    result.Nodes.getNodeAs<clang::MemberExpr>(
150                        "direct_assignment")) {
151       AddAssignment(member_expr, result);
152     } else {
153       AddAnnotation(result);
154     }
155   }
156 
GetInstanceLocation(const MatchFinder::MatchResult & result,const clang::Expr * expr,Location * location)157   void GetInstanceLocation(const MatchFinder::MatchResult& result,
158                            const clang::Expr* expr,
159                            Location* location) {
160     clang::SourceLocation source_location = expr->getLocStart();
161     if (source_location.isMacroID())
162       source_location = result.SourceManager->getExpansionLoc(source_location);
163     location->file_path = result.SourceManager->getFilename(source_location);
164     location->line_number =
165         result.SourceManager->getSpellingLineNumber(source_location);
166 
167     const clang::FunctionDecl* ancestor =
168         result.Nodes.getNodeAs<clang::FunctionDecl>("function_context");
169     if (ancestor)
170       location->function_name = ancestor->getQualifiedNameAsString();
171     else
172       location->function_name = "Global Namespace";
173 
174     std::replace(location->file_path.begin(), location->file_path.end(), '\\',
175                  '/');
176 
177     // Trim leading "../"s from file path.
178     while (location->file_path.length() > 3 &&
179            location->file_path.substr(0, 3) == "../") {
180       location->file_path =
181           location->file_path.substr(3, location->file_path.length() - 3);
182     }
183   }
184 
185   // Stores a function call that should be monitored.
AddFunction(const clang::CallExpr * call_expr,const MatchFinder::MatchResult & result)186   void AddFunction(const clang::CallExpr* call_expr,
187                    const MatchFinder::MatchResult& result) {
188     CallInstance instance;
189 
190     GetInstanceLocation(result, call_expr, &instance.location);
191     instance.called_function_name =
192         call_expr->getDirectCallee()->getQualifiedNameAsString();
193     instance.has_annotation =
194         (result.Nodes.getNodeAs<clang::RecordDecl>("annotation") != nullptr);
195     collector_->calls.push_back(instance);
196   }
197 
198   // Tests if the given function name belongs to the network traffic annotation
199   // API. These functions are all defined in
200   // 'net/traffic_annotation/network_traffic_annotation.h'.
IsAPIFunction(const std::string & function_name)201   bool IsAPIFunction(const std::string& function_name) {
202     return function_name == "net::NetworkTrafficAnnotationTag::NotReached" ||
203            function_name == "net::DefineNetworkTrafficAnnotation" ||
204            function_name == "net::DefinePartialNetworkTrafficAnnotation" ||
205            function_name == "net::CompleteNetworkTrafficAnnotation" ||
206            function_name == "net::BranchedCompleteNetworkTrafficAnnotation" ||
207            function_name ==
208                "net::MutableNetworkTrafficAnnotationTag::operator "
209                "NetworkTrafficAnnotationTag" ||
210            function_name ==
211                "net::MutablePartialNetworkTrafficAnnotationTag::operator "
212                "PartialNetworkTrafficAnnotationTag";
213   }
214 
215   // Stores an annotation constructor called with list expression.
AddConstructor(const clang::CXXConstructExpr * constructor_expr,const MatchFinder::MatchResult & result)216   void AddConstructor(const clang::CXXConstructExpr* constructor_expr,
217                       const MatchFinder::MatchResult& result) {
218     Location instance;
219 
220     GetInstanceLocation(result, constructor_expr, &instance);
221     // Only report if the constructor is not in one of the API functions for
222     // network traffic annotations.
223     if (!IsAPIFunction(instance.function_name))
224       collector_->assignments.push_back(instance);
225   }
226 
227   // Stores a value assignment to |unique_id_hash_code| of a mutable annotaton.
AddAssignment(const clang::MemberExpr * member_expr,const MatchFinder::MatchResult & result)228   void AddAssignment(const clang::MemberExpr* member_expr,
229                      const MatchFinder::MatchResult& result) {
230     Location instance;
231 
232     GetInstanceLocation(result, member_expr, &instance);
233     // Only report if the assignment is not in one of the API functions for
234     // network traffic annotations.
235     if (!IsAPIFunction(instance.function_name))
236       collector_->assignments.push_back(instance);
237   }
238 
239   // Stores an annotation.
AddAnnotation(const MatchFinder::MatchResult & result)240   void AddAnnotation(const MatchFinder::MatchResult& result) {
241     NetworkAnnotationInstance instance;
242 
243     const clang::StringLiteral* unique_id =
244         result.Nodes.getNodeAs<clang::StringLiteral>("unique_id");
245     const clang::StringLiteral* annotation_text =
246         result.Nodes.getNodeAs<clang::StringLiteral>("annotation_text");
247     const clang::StringLiteral* group_id =
248         result.Nodes.getNodeAs<clang::StringLiteral>("group_id");
249     const clang::StringLiteral* completing_id =
250         result.Nodes.getNodeAs<clang::StringLiteral>("completing_id");
251 
252     const clang::CallExpr* call_expr = nullptr;
253     if ((call_expr =
254              result.Nodes.getNodeAs<clang::CallExpr>("definition_function"))) {
255       instance.function_type = NetworkAnnotationInstance::kDefinition;
256     } else if ((call_expr = result.Nodes.getNodeAs<clang::CallExpr>(
257                     "partial_function"))) {
258       instance.function_type = NetworkAnnotationInstance::kPartial;
259       assert(completing_id);
260       instance.annotation.extra_id = completing_id->getString();
261     } else if ((call_expr = result.Nodes.getNodeAs<clang::CallExpr>(
262                     "completing_function"))) {
263       instance.function_type = NetworkAnnotationInstance::kCompleting;
264     } else if ((call_expr = result.Nodes.getNodeAs<clang::CallExpr>(
265                     "branched_completing_function"))) {
266       instance.function_type = NetworkAnnotationInstance::kBranchedCompleting;
267       assert(group_id);
268       instance.annotation.extra_id = group_id->getString();
269     } else {
270       assert(false);
271     }
272 
273     assert(unique_id && annotation_text);
274     instance.annotation.unique_id = unique_id->getString();
275     instance.annotation.text = annotation_text->getString();
276 
277     GetInstanceLocation(result, call_expr, &instance.location);
278 
279     collector_->annotations.push_back(instance);
280   }
281 
282  private:
283   Collector* collector_;
284 };
285 
286 // Sets up an ASTMatcher and runs clang tool to populate collector. Returns the
287 // result of running the clang tool.
RunMatchers(clang::tooling::ClangTool * clang_tool,Collector * collector)288 int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) {
289   NetworkAnnotationTagCallback callback(collector);
290   MatchFinder match_finder;
291 
292   // Set up patterns to find network traffic annotation definition functions,
293   // their arguments, and their ancestor function (when possible).
294   auto bind_function_context_if_present =
295       anyOf(hasAncestor(functionDecl().bind("function_context")),
296             unless(hasAncestor(functionDecl())));
297   auto has_annotation_parameter = anyOf(
298       hasAnyParameter(hasType(
299           recordDecl(anyOf(hasName("net::NetworkTrafficAnnotationTag"),
300                            hasName("net::PartialNetworkTrafficAnnotationTag")))
301               .bind("annotation"))),
302       unless(hasAnyParameter(hasType(recordDecl(
303           anyOf(hasName("net::NetworkTrafficAnnotationTag"),
304                 hasName("net::PartialNetworkTrafficAnnotationTag")))))));
305   match_finder.addMatcher(
306       callExpr(hasDeclaration(functionDecl(
307                    anyOf(hasName("DefineNetworkTrafficAnnotation"),
308                          hasName("net::DefineNetworkTrafficAnnotation")))),
309                hasArgument(0, stringLiteral().bind("unique_id")),
310                hasArgument(1, stringLiteral().bind("annotation_text")),
311                bind_function_context_if_present)
312           .bind("definition_function"),
313       &callback);
314   match_finder.addMatcher(
315       callExpr(hasDeclaration(functionDecl(anyOf(
316                    hasName("DefinePartialNetworkTrafficAnnotation"),
317                    hasName("net::DefinePartialNetworkTrafficAnnotation")))),
318                hasArgument(0, stringLiteral().bind("unique_id")),
319                hasArgument(1, stringLiteral().bind("completing_id")),
320                hasArgument(2, stringLiteral().bind("annotation_text")),
321                bind_function_context_if_present)
322           .bind("partial_function"),
323       &callback);
324   match_finder.addMatcher(
325       callExpr(hasDeclaration(functionDecl(
326                    anyOf(hasName("CompleteNetworkTrafficAnnotation"),
327                          hasName("net::CompleteNetworkTrafficAnnotation")))),
328                hasArgument(0, stringLiteral().bind("unique_id")),
329                hasArgument(2, stringLiteral().bind("annotation_text")),
330                bind_function_context_if_present)
331           .bind("completing_function"),
332       &callback);
333   match_finder.addMatcher(
334       callExpr(hasDeclaration(functionDecl(anyOf(
335                    hasName("BranchedCompleteNetworkTrafficAnnotation"),
336                    hasName("net::BranchedCompleteNetworkTrafficAnnotation")))),
337                hasArgument(0, stringLiteral().bind("unique_id")),
338                hasArgument(1, stringLiteral().bind("group_id")),
339                hasArgument(3, stringLiteral().bind("annotation_text")),
340                bind_function_context_if_present)
341           .bind("branched_completing_function"),
342       &callback);
343 
344   // Setup patterns to find functions that should be monitored.
345   match_finder.addMatcher(
346       callExpr(hasDeclaration(functionDecl(
347                    anyOf(hasName("URLFetcher::Create"),
348                          hasName("URLRequestContext::CreateRequest")),
349                    has_annotation_parameter)),
350                bind_function_context_if_present)
351           .bind("monitored_function"),
352       &callback);
353 
354   // Setup patterns to find constructors of different network traffic annotation
355   // tags that are initialized by list expressions.
356   match_finder.addMatcher(
357       cxxConstructExpr(
358           hasDeclaration(functionDecl(
359               anyOf(hasName("net::NetworkTrafficAnnotationTag::"
360                             "NetworkTrafficAnnotationTag"),
361                     hasName("net::PartialNetworkTrafficAnnotationTag::"
362                             "PartialNetworkTrafficAnnotationTag"),
363                     hasName("net::MutableNetworkTrafficAnnotationTag::"
364                             "MutableNetworkTrafficAnnotationTag"),
365                     hasName("net::MutablePartialNetworkTrafficAnnotationTag::"
366                             "MutablePartialNetworkTrafficAnnotationTag")))),
367           hasDescendant(initListExpr()), bind_function_context_if_present)
368           .bind("annotation_constructor"),
369       &callback);
370 
371   // Setup pattern to find direct assignment of value to |unique_id_hash_code|
372   // of net::MutableNetworkTrafficAnnotationTag or
373   // net::MutablePartialNetworkTrafficAnnotationTag.
374   match_finder.addMatcher(
375       memberExpr(
376           member(hasName("unique_id_hash_code")),
377           hasObjectExpression(hasType(cxxRecordDecl(anyOf(
378               hasName("net::MutableNetworkTrafficAnnotationTag"),
379               hasName("net::MutablePartialNetworkTrafficAnnotationTag"))))),
380           hasParent(binaryOperator(hasOperatorName("="))),
381           bind_function_context_if_present)
382           .bind("direct_assignment"),
383       &callback);
384 
385   std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory =
386       clang::tooling::newFrontendActionFactory(&match_finder);
387   return clang_tool->run(frontend_factory.get());
388 }
389 
390 }  // namespace
391 
392 static llvm::cl::OptionCategory ToolCategory(
393     "traffic_annotation_extractor: Extract traffic annotation texts");
394 static llvm::cl::extrahelp CommonHelp(
395     clang::tooling::CommonOptionsParser::HelpMessage);
396 
main(int argc,const char * argv[])397 int main(int argc, const char* argv[]) {
398   clang::tooling::CommonOptionsParser options(argc, argv, ToolCategory);
399   clang::tooling::ClangTool tool(options.getCompilations(),
400                                  options.getSourcePathList());
401   Collector collector;
402 
403   llvm::InitializeNativeTarget();
404   llvm::InitializeNativeTargetAsmParser();
405   int result = RunMatchers(&tool, &collector);
406 
407   if (result != 0)
408     return result;
409 
410   // For each call to any of the functions that define a network traffic
411   // annotation, write annotation text and relevant meta data into llvm::outs().
412   for (const NetworkAnnotationInstance& instance : collector.annotations) {
413     llvm::outs() << "==== NEW ANNOTATION ====\n";
414     llvm::outs() << instance.location.file_path << "\n";
415     llvm::outs() << instance.location.function_name << "\n";
416     llvm::outs() << instance.location.line_number << "\n";
417     llvm::outs() << instance.GetTypeName() << "\n";
418     llvm::outs() << instance.annotation.unique_id << "\n";
419     llvm::outs() << instance.annotation.extra_id << "\n";
420     llvm::outs() << instance.annotation.text << "\n";
421     llvm::outs() << "==== ANNOTATION ENDS ====\n";
422   }
423 
424   // For each call, write annotation text and relevant meta data.
425   for (const CallInstance& instance : collector.calls) {
426     llvm::outs() << "==== NEW CALL ====\n";
427     llvm::outs() << instance.location.file_path << "\n";
428     llvm::outs() << instance.location.function_name << "\n";
429     llvm::outs() << instance.location.line_number << "\n";
430     llvm::outs() << instance.called_function_name << "\n";
431     llvm::outs() << instance.has_annotation << "\n";
432     llvm::outs() << "==== CALL ENDS ====\n";
433   }
434 
435   // For each assignment, write relevant meta data.
436   for (const Location& instance : collector.assignments) {
437     llvm::outs() << "==== NEW ASSIGNMENT ====\n";
438     llvm::outs() << instance.file_path << "\n";
439     llvm::outs() << instance.function_name << "\n";
440     llvm::outs() << instance.line_number << "\n";
441     llvm::outs() << "==== ASSIGNMENT ENDS ====\n";
442   }
443 
444   return 0;
445 }