1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "read_elf.h"
18 #include "read_apk.h"
19 
20 #include <stdio.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 
25 #include <algorithm>
26 #include <limits>
27 
28 #include <android-base/file.h>
29 #include <android-base/logging.h>
30 
31 #pragma clang diagnostic push
32 #pragma clang diagnostic ignored "-Wunused-parameter"
33 
34 #include <llvm/ADT/StringRef.h>
35 #include <llvm/Object/Binary.h>
36 #include <llvm/Object/ELFObjectFile.h>
37 #include <llvm/Object/ObjectFile.h>
38 
39 #pragma clang diagnostic pop
40 
41 #include "utils.h"
42 
43 #define ELF_NOTE_GNU "GNU"
44 #define NT_GNU_BUILD_ID 3
45 
46 
IsValidElfFile(int fd)47 bool IsValidElfFile(int fd) {
48   static const char elf_magic[] = {0x7f, 'E', 'L', 'F'};
49   char buf[4];
50   return android::base::ReadFully(fd, buf, 4) && memcmp(buf, elf_magic, 4) == 0;
51 }
52 
IsValidElfPath(const std::string & filename)53 bool IsValidElfPath(const std::string& filename) {
54   if (!IsRegularFile(filename)) {
55     return false;
56   }
57   std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
58   FILE* fp = fopen(filename.c_str(), mode.c_str());
59   if (fp == nullptr) {
60     return false;
61   }
62   bool result = IsValidElfFile(fileno(fp));
63   fclose(fp);
64   return result;
65 }
66 
GetBuildIdFromNoteSection(const char * section,size_t section_size,BuildId * build_id)67 static bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) {
68   const char* p = section;
69   const char* end = p + section_size;
70   while (p < end) {
71     CHECK_LE(p + 12, end);
72     size_t namesz = *reinterpret_cast<const uint32_t*>(p);
73     p += 4;
74     size_t descsz = *reinterpret_cast<const uint32_t*>(p);
75     p += 4;
76     uint32_t type = *reinterpret_cast<const uint32_t*>(p);
77     p += 4;
78     namesz = ALIGN(namesz, 4);
79     descsz = ALIGN(descsz, 4);
80     CHECK_LE(p + namesz + descsz, end);
81     if ((type == NT_GNU_BUILD_ID) && (strcmp(p, ELF_NOTE_GNU) == 0)) {
82       *build_id = BuildId(p + namesz, descsz);
83       return true;
84     }
85     p += namesz + descsz;
86   }
87   return false;
88 }
89 
GetBuildIdFromNoteFile(const std::string & filename,BuildId * build_id)90 bool GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) {
91   std::string content;
92   if (!android::base::ReadFileToString(filename, &content)) {
93     LOG(DEBUG) << "can't read note file " << filename;
94     return false;
95   }
96   if (GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id) == false) {
97     LOG(DEBUG) << "can't read build_id from note file " << filename;
98     return false;
99   }
100   return true;
101 }
102 
103 template <class ELFT>
GetBuildIdFromELFFile(const llvm::object::ELFFile<ELFT> * elf,BuildId * build_id)104 bool GetBuildIdFromELFFile(const llvm::object::ELFFile<ELFT>* elf, BuildId* build_id) {
105   for (auto section_iterator = elf->section_begin(); section_iterator != elf->section_end();
106        ++section_iterator) {
107     if (section_iterator->sh_type == llvm::ELF::SHT_NOTE) {
108       auto contents = elf->getSectionContents(&*section_iterator);
109       if (contents.getError()) {
110         LOG(DEBUG) << "read note section error";
111         continue;
112       }
113       if (GetBuildIdFromNoteSection(reinterpret_cast<const char*>(contents->data()),
114                                     contents->size(), build_id)) {
115         return true;
116       }
117     }
118   }
119   return false;
120 }
121 
GetBuildIdFromObjectFile(llvm::object::ObjectFile * obj,BuildId * build_id)122 static bool GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
123   bool result = false;
124   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
125     result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
126   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
127     result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
128   } else {
129     LOG(ERROR) << "unknown elf format in file " << obj->getFileName().data();
130     return false;
131   }
132   if (!result) {
133     LOG(DEBUG) << "no build id present in file " << obj->getFileName().data();
134   }
135   return result;
136 }
137 
138 struct BinaryRet {
139   llvm::object::OwningBinary<llvm::object::Binary> binary;
140   llvm::object::ObjectFile* obj;
141 
BinaryRetBinaryRet142   BinaryRet() : obj(nullptr) {
143   }
144 };
145 
OpenObjectFile(const std::string & filename,uint64_t file_offset=0,uint64_t file_size=0)146 static BinaryRet OpenObjectFile(const std::string& filename, uint64_t file_offset = 0,
147                                 uint64_t file_size = 0) {
148   BinaryRet ret;
149   FileHelper fhelper = FileHelper::OpenReadOnly(filename);
150   if (!fhelper) {
151     PLOG(DEBUG) << "failed to open " << filename;
152     return ret;
153   }
154   if (file_size == 0) {
155     file_size = GetFileSize(filename);
156     if (file_size == 0) {
157       PLOG(ERROR) << "failed to get size of file " << filename;
158       return ret;
159     }
160   }
161   auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset);
162   if (!buffer_or_err) {
163     LOG(ERROR) << "failed to read " << filename << " [" << file_offset << "-" << (file_offset + file_size)
164         << "]: " << buffer_or_err.getError().message();
165     return ret;
166   }
167   auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef());
168   if (!binary_or_err) {
169     LOG(ERROR) << filename << " [" << file_offset << "-" << (file_offset + file_size)
170         << "] is not a binary file: " << binary_or_err.getError().message();
171     return ret;
172   }
173   ret.binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
174                                                                 std::move(buffer_or_err.get()));
175   ret.obj = llvm::dyn_cast<llvm::object::ObjectFile>(ret.binary.getBinary());
176   if (ret.obj == nullptr) {
177     LOG(ERROR) << filename << " [" << file_offset << "-" << (file_offset + file_size)
178         << "] is not an object file";
179   }
180   return ret;
181 }
182 
GetBuildIdFromElfFile(const std::string & filename,BuildId * build_id)183 bool GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
184   if (!IsValidElfPath(filename)) {
185     return false;
186   }
187   bool result = GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id);
188   LOG(VERBOSE) << "GetBuildIdFromElfFile(" << filename << ") => " << build_id->ToString();
189   return result;
190 }
191 
GetBuildIdFromEmbeddedElfFile(const std::string & filename,uint64_t file_offset,uint32_t file_size,BuildId * build_id)192 bool GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
193                                    uint32_t file_size, BuildId* build_id) {
194   BinaryRet ret = OpenObjectFile(filename, file_offset, file_size);
195   if (ret.obj == nullptr) {
196     return false;
197   }
198   return GetBuildIdFromObjectFile(ret.obj, build_id);
199 }
200 
IsArmMappingSymbol(const char * name)201 bool IsArmMappingSymbol(const char* name) {
202   // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and
203   // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol
204   // is ^\$(a|d|t|x)(\..*)?$
205   return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.');
206 }
207 
208 template <class ELFT>
ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT> * elf_obj,std::function<void (const ElfFileSymbol &)> callback)209 void ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf_obj,
210                              std::function<void(const ElfFileSymbol&)> callback) {
211   auto elf = elf_obj->getELFFile();
212   bool is_arm = (elf->getHeader()->e_machine == llvm::ELF::EM_ARM ||
213                  elf->getHeader()->e_machine == llvm::ELF::EM_AARCH64);
214   auto begin = elf_obj->symbol_begin();
215   auto end = elf_obj->symbol_end();
216   if (begin == end) {
217     begin = elf_obj->dynamic_symbol_begin();
218     end = elf_obj->dynamic_symbol_end();
219   }
220   for (; begin != end; ++begin) {
221     ElfFileSymbol symbol;
222     auto elf_symbol = static_cast<const llvm::object::ELFSymbolRef*>(&*begin);
223     auto section_it = elf_symbol->getSection();
224     if (!section_it) {
225       continue;
226     }
227     llvm::StringRef section_name;
228     if (section_it.get()->getName(section_name) || section_name.empty()) {
229       continue;
230     }
231     if (section_name.str() == ".text") {
232       symbol.is_in_text_section = true;
233     }
234 
235     auto symbol_name = elf_symbol->getName();
236     if (!symbol_name || symbol_name.get().empty()) {
237       continue;
238     }
239     symbol.name = symbol_name.get();
240     symbol.vaddr = elf_symbol->getValue();
241     if ((symbol.vaddr & 1) != 0 && is_arm) {
242       // Arm sets bit 0 to mark it as thumb code, remove the flag.
243       symbol.vaddr &= ~1;
244     }
245     symbol.len = elf_symbol->getSize();
246     int type = elf_symbol->getELFType();
247     if (type == llvm::ELF::STT_FUNC) {
248       symbol.is_func = true;
249     } else if (type == llvm::ELF::STT_NOTYPE) {
250       if (symbol.is_in_text_section) {
251         symbol.is_label = true;
252         if (is_arm) {
253           // Remove mapping symbols in arm.
254           const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0)
255                               ? symbol.name.c_str() + linker_prefix.size()
256                               : symbol.name.c_str();
257           if (IsArmMappingSymbol(p)) {
258             symbol.is_label = false;
259           }
260         }
261       }
262     }
263 
264     callback(symbol);
265   }
266 }
267 
MatchBuildId(llvm::object::ObjectFile * obj,const BuildId & expected_build_id,const std::string & debug_filename)268 bool MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id,
269                   const std::string& debug_filename) {
270   if (expected_build_id.IsEmpty()) {
271     return true;
272   }
273   BuildId real_build_id;
274   if (!GetBuildIdFromObjectFile(obj, &real_build_id)) {
275     return false;
276   }
277   if (expected_build_id != real_build_id) {
278     LOG(DEBUG) << "build id for " << debug_filename << " mismatch: "
279                << "expected " << expected_build_id.ToString()
280                << ", real " << real_build_id.ToString();
281     return false;
282   }
283   return true;
284 }
285 
ParseSymbolsFromElfFile(const std::string & filename,const BuildId & expected_build_id,std::function<void (const ElfFileSymbol &)> callback)286 bool ParseSymbolsFromElfFile(const std::string& filename, const BuildId& expected_build_id,
287                              std::function<void(const ElfFileSymbol&)> callback) {
288   if (!IsValidElfPath(filename)) {
289     return false;
290   }
291   return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback);
292 }
293 
ParseSymbolsFromEmbeddedElfFile(const std::string & filename,uint64_t file_offset,uint32_t file_size,const BuildId & expected_build_id,std::function<void (const ElfFileSymbol &)> callback)294 bool ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
295                                      uint32_t file_size, const BuildId& expected_build_id,
296                                      std::function<void(const ElfFileSymbol&)> callback) {
297   BinaryRet ret = OpenObjectFile(filename, file_offset, file_size);
298   if (ret.obj == nullptr || !MatchBuildId(ret.obj, expected_build_id, filename)) {
299     return false;
300   }
301   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
302     ParseSymbolsFromELFFile(elf, callback);
303   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
304     ParseSymbolsFromELFFile(elf, callback);
305   } else {
306     LOG(ERROR) << "unknown elf format in file " << filename;
307     return false;
308   }
309   return true;
310 }
311 
312 template <class ELFT>
ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT> * elf,uint64_t * p_vaddr)313 bool ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) {
314   bool has_vaddr = false;
315   uint64_t min_addr = std::numeric_limits<uint64_t>::max();
316   for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) {
317     if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) {
318       if (it->p_vaddr < min_addr) {
319         min_addr = it->p_vaddr;
320         has_vaddr = true;
321       }
322     }
323   }
324   if (has_vaddr) {
325     *p_vaddr = min_addr;
326   }
327   return has_vaddr;
328 }
329 
ReadMinExecutableVirtualAddressFromElfFile(const std::string & filename,const BuildId & expected_build_id,uint64_t * min_vaddr)330 bool ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename,
331                                                 const BuildId& expected_build_id,
332                                                 uint64_t* min_vaddr) {
333   if (!IsValidElfPath(filename)) {
334     return false;
335   }
336   BinaryRet ret = OpenObjectFile(filename);
337   if (ret.obj == nullptr || !MatchBuildId(ret.obj, expected_build_id, filename)) {
338     return false;
339   }
340 
341   bool result = false;
342   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
343     result = ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
344   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
345     result = ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
346   } else {
347     LOG(ERROR) << "unknown elf format in file" << filename;
348     return false;
349   }
350 
351   if (!result) {
352     LOG(ERROR) << "no program header in file " << filename;
353   }
354   return result;
355 }
356 
357 template <class ELFT>
ReadSectionFromELFFile(const llvm::object::ELFFile<ELFT> * elf,const std::string & section_name,std::string * content)358 bool ReadSectionFromELFFile(const llvm::object::ELFFile<ELFT>* elf, const std::string& section_name,
359                             std::string* content) {
360   for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
361     auto name_or_err = elf->getSectionName(&*it);
362     if (name_or_err && *name_or_err == section_name) {
363       auto data_or_err = elf->getSectionContents(&*it);
364       if (!data_or_err) {
365         LOG(ERROR) << "failed to read section " << section_name;
366         return false;
367       }
368       content->append(data_or_err->begin(), data_or_err->end());
369       return true;
370     }
371   }
372   LOG(ERROR) << "can't find section " << section_name;
373   return false;
374 }
375 
ReadSectionFromElfFile(const std::string & filename,const std::string & section_name,std::string * content)376 bool ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
377                             std::string* content) {
378   if (!IsValidElfPath(filename)) {
379     return false;
380   }
381   BinaryRet ret = OpenObjectFile(filename);
382   if (ret.obj == nullptr) {
383     return false;
384   }
385   bool result = false;
386   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
387     result = ReadSectionFromELFFile(elf->getELFFile(), section_name, content);
388   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
389     result = ReadSectionFromELFFile(elf->getELFFile(), section_name, content);
390   } else {
391     LOG(ERROR) << "unknown elf format in file" << filename;
392     return false;
393   }
394   return result;
395 }
396