1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Preprocessor.h"
18 
19 #include <err.h>
20 #include <fcntl.h>
21 #include <fts.h>
22 #include <libgen.h>
23 #include <string.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 
28 #include <deque>
29 #include <fstream>
30 #include <string>
31 #include <unordered_map>
32 
33 #include <llvm/ADT/StringRef.h>
34 #include <llvm/ADT/Twine.h>
35 #include <llvm/Support/FileSystem.h>
36 #include <llvm/Support/Path.h>
37 
38 #include "Arch.h"
39 #include "DeclarationDatabase.h"
40 #include "versioner.h"
41 
42 using namespace std::string_literals;
43 
calculateRequiredGuard(const Declaration & declaration)44 static DeclarationAvailability calculateRequiredGuard(const Declaration& declaration) {
45   // To avoid redundant macro guards, the availability calculated by this function is the set
46   // difference of 'targets marked-available' from 'targets the declaration is visible in'.
47   // For example, a declaration that is visible always and introduced in 9 would return introduced
48   // in 9, but the same declaration, except only visible in 9+ would return an empty
49   // DeclarationAvailability.
50 
51   // This currently only handles __INTRODUCED_IN.
52   // TODO: Do the same for __REMOVED_IN.
53   int global_min_api_visible = 0;
54   ArchMap<int> arch_visibility;
55 
56   for (const auto& it : declaration.availability) {
57     const CompilationType& type = it.first;
58 
59     if (global_min_api_visible == 0 || global_min_api_visible > type.api_level) {
60       global_min_api_visible = type.api_level;
61     }
62 
63     if (arch_visibility[type.arch] == 0 || arch_visibility[type.arch] > type.api_level) {
64       arch_visibility[type.arch] = type.api_level;
65     }
66   }
67 
68   DeclarationAvailability decl_av;
69   if (!declaration.calculateAvailability(&decl_av)) {
70     fprintf(stderr, "versioner: failed to calculate availability while preprocessing:\n");
71     declaration.dump("", stderr, 2);
72     exit(1);
73   }
74 
75   D("Calculating required guard for %s:\n", declaration.name.c_str());
76   D("  Declaration availability: %s\n", to_string(decl_av).c_str());
77 
78   if (verbose) {
79     std::string arch_visibility_str;
80     for (Arch arch : supported_archs) {
81       if (arch_visibility[arch] != 0) {
82         arch_visibility_str += to_string(arch);
83         arch_visibility_str += ": ";
84         arch_visibility_str += std::to_string(arch_visibility[arch]);
85         arch_visibility_str += ", ";
86       }
87     }
88     if (!arch_visibility_str.empty()) {
89       arch_visibility_str.resize(arch_visibility_str.size() - 2);
90     }
91     D("  Declaration visibility: global = %d, arch = %s\n", global_min_api_visible,
92       arch_visibility_str.c_str());
93   }
94 
95   DeclarationAvailability result = decl_av;
96   if (result.global_availability.introduced <= global_min_api_visible) {
97     result.global_availability.introduced = 0;
98   }
99 
100   for (Arch arch : supported_archs) {
101     if (result.arch_availability[arch].introduced <= arch_visibility[arch] ||
102         result.arch_availability[arch].introduced <= arch_min_api[arch]) {
103       result.arch_availability[arch].introduced = 0;
104     }
105   }
106 
107   D("  Calculated result: %s\n", to_string(result).c_str());
108   D("\n");
109 
110   return result;
111 }
112 
readFileLines(const std::string & path)113 static std::deque<std::string> readFileLines(const std::string& path) {
114   std::ifstream is(path.c_str());
115   std::deque<std::string> result;
116   std::string line;
117 
118   while (std::getline(is, line)) {
119     result.push_back(std::move(line));
120   }
121 
122   return result;
123 }
124 
writeFileLines(const std::string & path,const std::deque<std::string> & lines)125 static void writeFileLines(const std::string& path, const std::deque<std::string>& lines) {
126   if (!mkdirs(dirname(path))) {
127     err(1, "failed to create directory '%s'", dirname(path).c_str());
128   }
129 
130   std::ofstream os(path.c_str(), std::ios_base::out | std::ios_base::trunc);
131 
132   for (const std::string& line : lines) {
133     os << line << "\n";
134   }
135 }
136 
137 using GuardMap = std::map<Location, DeclarationAvailability>;
138 
generateGuardCondition(const DeclarationAvailability & avail)139 static std::string generateGuardCondition(const DeclarationAvailability& avail) {
140   // Logically orred expressions that constitute the macro guard.
141   std::vector<std::string> expressions;
142   static const std::vector<std::pair<std::string, std::set<Arch>>> arch_sets = {
143       {"!defined(__LP64__)", {Arch::arm, Arch::x86}},
144       {"defined(__LP64__)", {Arch::arm64, Arch::riscv64, Arch::x86_64}},
145   };
146   std::map<Arch, std::string> individual_archs = {
147     { Arch::arm, "defined(__arm__)" },
148     { Arch::arm64, "defined(__aarch64__)" },
149     { Arch::riscv64, "defined(__riscv)" },
150     { Arch::x86, "defined(__i386__)" },
151     { Arch::x86_64, "defined(__x86_64__)" },
152   };
153 
154   auto generate_guard = [](const std::string& arch_expr, int min_version) {
155     if (min_version == 0) {
156       return arch_expr;
157     }
158     return arch_expr + " && __ANDROID_API__ >= " + std::to_string(min_version);
159   };
160 
161   D("Generating guard for availability: %s\n", to_string(avail).c_str());
162   if (!avail.global_availability.empty()) {
163     for (Arch arch : supported_archs) {
164       if (!avail.arch_availability[arch].empty()) {
165         errx(1, "attempted to generate guard with global and per-arch values: %s",
166              to_string(avail).c_str());
167       }
168     }
169 
170     if (avail.global_availability.introduced == 0) {
171       // We currently get here for the "__sF" symbol because it's marked __REMOVED_IN(23). This
172       // symbol is the only use of __REMOVED_IN, and it's already guarded manually, so there's no
173       // need to do anything.
174       fprintf(stderr, "warning: attempted to generate guard with empty availability: %s\n",
175               to_string(avail).c_str());
176       return "";
177     }
178 
179     if (avail.global_availability.introduced <= 9) {
180       return "";
181     }
182 
183     return "__ANDROID_API__ >= "s + std::to_string(avail.global_availability.introduced);
184   }
185 
186   for (const auto& it : arch_sets) {
187     const std::string& arch_expr = it.first;
188     const std::set<Arch>& archs = it.second;
189 
190     D("  Checking arch set '%s'\n", arch_expr.c_str());
191 
192     int version = 0;
193 
194     // Find the architectures that need to check __ANDROID_API__ and verify that they check against
195     // the same API level.
196     for (Arch arch : archs) {
197       const int arch_version = avail.arch_availability[arch].introduced;
198       if (arch_version == 0) {
199         continue;
200       } else if (version == 0) {
201         version = arch_version;
202       } else if (version != arch_version) {
203         D("    Skipping arch set, availability for %s doesn't match %s\n",
204           to_string(*it.second.begin()).c_str(), to_string(arch).c_str());
205         goto skip;
206       }
207     }
208 
209     // Verify that a non-zero version is acceptable to reuse for other archs with a higher minimum
210     // API, like riscv64. (e.g. It's OK to reuse an (__ANDROID_API__ >= 24) check if the arch's
211     // minimum API is 35.)
212     if (version != 0) {
213       for (Arch arch : archs) {
214         const int arch_version = avail.arch_availability[arch].introduced;
215         if (arch_version == 0 && version > arch_min_api[arch]) {
216           D("    Skipping arch set, availability for %s doesn't match %s\n",
217             to_string(*it.second.begin()).c_str(), to_string(arch).c_str());
218           goto skip;
219         }
220       }
221     }
222 
223     expressions.emplace_back(generate_guard(arch_expr, version));
224 
225     D("    Generated expression '%s'\n", expressions.rbegin()->c_str());
226 
227     for (Arch arch : archs) {
228       individual_archs.erase(arch);
229     }
230 
231   skip:
232     continue;
233   }
234 
235   for (const auto& it : individual_archs) {
236     const std::string& arch_expr = it.second;
237     int introduced = avail.arch_availability[it.first].introduced;
238     expressions.emplace_back(generate_guard(arch_expr, introduced));
239   }
240 
241   if (expressions.size() == 0) {
242     errx(1, "generated empty guard for availability %s", to_string(avail).c_str());
243   } else if (expressions.size() == 1) {
244     return expressions[0];
245   }
246 
247   return "("s + Join(expressions, ") || (") + ")";
248 }
249 
250 // Assumes that nothing weird is happening (e.g. having the semicolon be in a macro).
findNextSemicolon(const std::deque<std::string> & lines,FileLocation start)251 static FileLocation findNextSemicolon(const std::deque<std::string>& lines, FileLocation start) {
252   unsigned current_line = start.line;
253   unsigned current_column = start.column;
254   while (current_line <= lines.size()) {
255     size_t result = lines[current_line - 1].find_first_of(';', current_column - 1);
256 
257     if (result != std::string::npos) {
258       FileLocation loc = {
259         .line = current_line,
260         .column = unsigned(result) + 1,
261       };
262 
263       return loc;
264     }
265 
266     ++current_line;
267     current_column = 0;
268   }
269 
270   errx(1, "failed to find semicolon starting from %u:%u", start.line, start.column);
271 }
272 
273 // Merge adjacent blocks with identical guards.
mergeGuards(std::deque<std::string> & file_lines,GuardMap & guard_map)274 static void mergeGuards(std::deque<std::string>& file_lines, GuardMap& guard_map) {
275   if (guard_map.size() < 2) {
276     return;
277   }
278 
279   auto current = guard_map.begin();
280   auto next = current;
281   ++next;
282 
283   while (next != guard_map.end()) {
284     if (current->second != next->second) {
285       ++current;
286       ++next;
287       continue;
288     }
289 
290     // Scan from the end of current to the beginning of next.
291     bool in_block_comment = false;
292     bool valid = true;
293 
294     FileLocation current_location = current->first.end;
295     FileLocation end_location = next->first.start;
296 
297     auto nextLine = [&current_location]() {
298       ++current_location.line;
299       current_location.column = 1;
300     };
301 
302     auto nextCol = [&file_lines, &current_location, &nextLine]() {
303       if (current_location.column == file_lines[current_location.line - 1].length()) {
304         nextLine();
305       } else {
306         ++current_location.column;
307       }
308     };
309 
310     // The end location will point to the semicolon, which we don't want to read, so skip it.
311     nextCol();
312 
313     while (current_location < end_location) {
314       const std::string& line = file_lines[current_location.line - 1];
315       size_t line_index = current_location.column - 1;
316 
317       if (in_block_comment) {
318         size_t pos = line.find("*/", line_index);
319         if (pos == std::string::npos) {
320           D("Didn't find block comment terminator, skipping line\n");
321           nextLine();
322           continue;
323         } else {
324           D("Found block comment terminator\n");
325           in_block_comment = false;
326           current_location.column = pos + 2;
327           nextCol();
328           continue;
329         }
330       } else {
331         size_t pos = line.find_first_not_of(" \t", line_index);
332         if (pos == std::string::npos) {
333           nextLine();
334           continue;
335         }
336 
337         current_location.column = pos + 1;
338         if (line[pos] != '/') {
339           valid = false;
340           break;
341         }
342 
343         nextCol();
344         if (line.length() <= pos + 1) {
345           // Trailing slash at the end of a line?
346           D("Trailing slash at end of line\n");
347           valid = false;
348           break;
349         }
350 
351         if (line[pos + 1] == '/') {
352           // C++ style comment
353           nextLine();
354         } else if (line[pos + 1] == '*') {
355           // Block comment
356           nextCol();
357           in_block_comment = true;
358           D("In a block comment\n");
359         } else {
360           // Garbage?
361           D("Unexpected output after /: %s\n", line.substr(pos).c_str());
362           valid = false;
363           break;
364         }
365       }
366     }
367 
368     if (!valid) {
369       D("Not merging blocks %s and %s\n", to_string(current->first).c_str(),
370         to_string(next->first).c_str());
371       ++current;
372       ++next;
373       continue;
374     }
375 
376     D("Merging blocks %s and %s\n", to_string(current->first).c_str(),
377       to_string(next->first).c_str());
378 
379     Location merged = current->first;
380     merged.end = next->first.end;
381 
382     DeclarationAvailability avail = current->second;
383 
384     guard_map.erase(current);
385     guard_map.erase(next);
386     bool unused;
387     std::tie(current, unused) = guard_map.insert(std::make_pair(merged, avail));
388     next = current;
389     ++next;
390   }
391 }
392 
rewriteFile(const std::string & output_path,std::deque<std::string> & file_lines,const GuardMap & guard_map)393 static void rewriteFile(const std::string& output_path, std::deque<std::string>& file_lines,
394                         const GuardMap& guard_map) {
395   for (auto it = guard_map.rbegin(); it != guard_map.rend(); ++it) {
396     const Location& loc = it->first;
397     const DeclarationAvailability& avail = it->second;
398 
399     std::string condition = generateGuardCondition(avail);
400     if (condition.empty()) {
401       continue;
402     }
403 
404     std::string prologue = "\n#if "s + condition + "\n";
405     std::string epilogue = "\n#endif /* " + condition + " */\n";
406 
407     file_lines[loc.end.line - 1].insert(loc.end.column, epilogue);
408     file_lines[loc.start.line - 1].insert(loc.start.column - 1, prologue);
409   }
410 
411   if (verbose) {
412     printf("Preprocessing %s...\n", output_path.c_str());
413   }
414   writeFileLines(output_path, file_lines);
415 }
416 
preprocessHeaders(const std::string & dst_dir,const std::string & src_dir,HeaderDatabase * database)417 bool preprocessHeaders(const std::string& dst_dir, const std::string& src_dir,
418                        HeaderDatabase* database) {
419   std::unordered_map<std::string, GuardMap> guards;
420   std::unordered_map<std::string, std::deque<std::string>> file_lines;
421 
422   for (const auto& symbol_it : database->symbols) {
423     const Symbol& symbol = symbol_it.second;
424 
425     for (const auto& decl_it : symbol.declarations) {
426       const Location& location = decl_it.first;
427       const Declaration& decl = decl_it.second;
428 
429       if (decl.no_guard) {
430         // No guard required.
431         continue;
432       }
433 
434       DeclarationAvailability macro_guard = calculateRequiredGuard(decl);
435       if (!macro_guard.empty()) {
436         guards[location.filename][location] = macro_guard;
437       }
438     }
439   }
440 
441   // Copy over the original headers before preprocessing.
442   char* fts_paths[2] = { const_cast<char*>(src_dir.c_str()), nullptr };
443   std::unique_ptr<FTS, decltype(&fts_close)> fts(fts_open(fts_paths, FTS_LOGICAL, nullptr),
444                                                  fts_close);
445   if (!fts) {
446     err(1, "failed to open directory %s", src_dir.c_str());
447   }
448 
449   while (FTSENT* ent = fts_read(fts.get())) {
450     llvm::StringRef path = ent->fts_path;
451     if (!path.startswith(src_dir)) {
452       err(1, "path '%s' doesn't start with source dir '%s'", ent->fts_path, src_dir.c_str());
453     }
454 
455     if (ent->fts_info != FTS_F) {
456       continue;
457     }
458 
459     std::string rel_path = path.substr(src_dir.length() + 1).str();
460     std::string dst_path = dst_dir + "/" + rel_path;
461     llvm::StringRef parent_path = llvm::sys::path::parent_path(dst_path);
462     if (llvm::sys::fs::create_directories(parent_path)) {
463       errx(1, "failed to ensure existence of directory '%s'", parent_path.str().c_str());
464     }
465     if (llvm::sys::fs::copy_file(path, dst_path)) {
466       errx(1, "failed to copy '%s/%s' to '%s'", src_dir.c_str(), path.str().c_str(),
467            dst_path.c_str());
468     }
469   }
470 
471   for (const auto& file_it : guards) {
472     file_lines[file_it.first] = readFileLines(file_it.first);
473   }
474 
475   for (auto& file_it : guards) {
476     llvm::StringRef file_path = file_it.first;
477     GuardMap& orig_guard_map = file_it.second;
478 
479     // The end positions given to us are the end of the declaration, which is some point before the
480     // semicolon. Fix up the end positions by scanning for the next semicolon.
481     GuardMap guard_map;
482     for (const auto& it : orig_guard_map) {
483       Location loc = it.first;
484       loc.end = findNextSemicolon(file_lines[file_path.str()], loc.end);
485       guard_map[loc] = it.second;
486     }
487 
488     // TODO: Make sure that the Locations don't overlap.
489     // TODO: Merge adjacent non-identical guards.
490     mergeGuards(file_lines[file_path.str()], guard_map);
491 
492     if (!file_path.startswith(src_dir)) {
493       errx(1, "input file %s is not in %s\n", file_path.str().c_str(), src_dir.c_str());
494     }
495 
496     // rel_path has a leading slash.
497     llvm::StringRef rel_path = file_path.substr(src_dir.size(), file_path.size() - src_dir.size());
498     std::string output_path = (llvm::Twine(dst_dir) + rel_path).str();
499 
500     rewriteFile(output_path, file_lines[file_path.str()], guard_map);
501   }
502 
503   return true;
504 }
505