1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <elf.h>
18 #include <stdint.h>
19 #include <string.h>
20 
21 #include <algorithm>
22 #include <string>
23 #include <vector>
24 
25 #include <unwindstack/Memory.h>
26 
27 #include "Check.h"
28 #include "Symbols.h"
29 
30 namespace unwindstack {
31 
Symbols(uint64_t offset,uint64_t size,uint64_t entry_size,uint64_t str_offset,uint64_t str_size)32 Symbols::Symbols(uint64_t offset, uint64_t size, uint64_t entry_size, uint64_t str_offset,
33                  uint64_t str_size)
34     : offset_(offset),
35       count_(entry_size != 0 ? size / entry_size : 0),
36       entry_size_(entry_size),
37       str_offset_(str_offset),
38       str_end_(str_offset_ + str_size) {}
39 
40 template <typename SymType>
IsFunc(const SymType * entry)41 static bool IsFunc(const SymType* entry) {
42   return entry->st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry->st_info) == STT_FUNC;
43 }
44 
45 // Binary search the symbol table to find function containing the given address.
46 // Without remap, the symbol table is assumed to be sorted and accessed directly.
47 // If the symbol table is not sorted this method might fail but should not crash.
48 // When the indices are remapped, they are guaranteed to be sorted by address.
49 template <typename SymType, bool RemapIndices>
BinarySearch(uint64_t addr,Memory * elf_memory,uint64_t * func_offset)50 Symbols::Info* Symbols::BinarySearch(uint64_t addr, Memory* elf_memory, uint64_t* func_offset) {
51   // Fast-path: Check if the symbol has been already read from memory.
52   // Otherwise use the cache iterator to constrain the binary search range.
53   // (the symbol must be in the gap between this and the previous iterator)
54   auto it = symbols_.upper_bound(addr);
55   if (it != symbols_.end()) {
56     uint64_t sym_value = (it->first - it->second.size);  // Function address.
57     if (sym_value <= addr) {
58       *func_offset = addr - sym_value;
59       return &it->second;
60     }
61   }
62   uint32_t count = RemapIndices ? remap_->size() : count_;
63   uint32_t last = (it != symbols_.end()) ? it->second.index : count;
64   uint32_t first = (it != symbols_.begin()) ? std::prev(it)->second.index + 1 : 0;
65 
66   while (first < last) {
67     uint32_t current = first + (last - first) / 2;
68     uint32_t symbol_index = RemapIndices ? remap_.value()[current] : current;
69     SymType sym;
70     if (!elf_memory->ReadFully(offset_ + symbol_index * entry_size_, &sym, sizeof(sym))) {
71       return nullptr;
72     }
73     // There shouldn't be multiple symbols with same end address, but in case there are,
74     // overwrite the cache with the last entry, so that 'sym' and 'info' are consistent.
75     Info& info = symbols_[sym.st_value + sym.st_size];
76     info = {.size = static_cast<uint32_t>(sym.st_size), .index = current};
77     if (addr < sym.st_value) {
78       last = current;
79     } else if (addr < sym.st_value + sym.st_size) {
80       *func_offset = addr - sym.st_value;
81       return &info;
82     } else {
83       first = current + 1;
84     }
85   }
86   return nullptr;
87 }
88 
89 // Create remapping table which allows us to access symbols as if they were sorted by address.
90 template <typename SymType>
BuildRemapTable(Memory * elf_memory)91 void Symbols::BuildRemapTable(Memory* elf_memory) {
92   std::vector<uint64_t> addrs;  // Addresses of all symbols (addrs[i] == symbols[i].st_value).
93   addrs.reserve(count_);
94   remap_.emplace();  // Construct the optional remap table.
95   remap_->reserve(count_);
96   for (size_t symbol_idx = 0; symbol_idx < count_;) {
97     // Read symbols from memory.  We intentionally bypass the cache to save memory.
98     // Do the reads in batches so that we minimize the number of memory read calls.
99     uint8_t buffer[1024];
100     size_t read = std::min<size_t>(sizeof(buffer), (count_ - symbol_idx) * entry_size_);
101     size_t size = elf_memory->Read(offset_ + symbol_idx * entry_size_, buffer, read);
102     if (size < sizeof(SymType)) {
103       break;  // Stop processing, something looks like it is corrupted.
104     }
105     for (size_t offset = 0; offset + sizeof(SymType) <= size; offset += entry_size_, symbol_idx++) {
106       SymType sym;
107       memcpy(&sym, &buffer[offset], sizeof(SymType));  // Copy to ensure alignment.
108       addrs.push_back(sym.st_value);  // Always insert so it is indexable by symbol index.
109       // NB: It is important to filter our zero-sized symbols since otherwise we can get
110       // duplicate end addresses in the table (e.g. if there is custom "end" symbol marker).
111       if (IsFunc(&sym) && sym.st_size != 0) {
112         remap_->push_back(symbol_idx);  // Indices of function symbols only.
113       }
114     }
115   }
116   // Sort by address to make the remap list binary searchable (stable due to the a<b tie break).
117   auto comp = [&addrs](auto a, auto b) { return std::tie(addrs[a], a) < std::tie(addrs[b], b); };
118   std::sort(remap_->begin(), remap_->end(), comp);
119   // Remove duplicate entries (methods de-duplicated by the linker).
120   auto pred = [&addrs](auto a, auto b) { return addrs[a] == addrs[b]; };
121   remap_->erase(std::unique(remap_->begin(), remap_->end(), pred), remap_->end());
122   remap_->shrink_to_fit();
123 }
124 
125 template <typename SymType>
GetName(uint64_t addr,Memory * elf_memory,SharedString * name,uint64_t * func_offset)126 bool Symbols::GetName(uint64_t addr, Memory* elf_memory, SharedString* name,
127                       uint64_t* func_offset) {
128   Info* info;
129   if (!remap_.has_value()) {
130     // Assume the symbol table is sorted. If it is not, this will gracefully fail.
131     info = BinarySearch<SymType, false>(addr, elf_memory, func_offset);
132     if (info == nullptr) {
133       // Create the remapping table and retry the search.
134       BuildRemapTable<SymType>(elf_memory);
135       symbols_.clear();  // Remove cached symbols since the access pattern will be different.
136       info = BinarySearch<SymType, true>(addr, elf_memory, func_offset);
137     }
138   } else {
139     // Fast search using the previously created remap table.
140     info = BinarySearch<SymType, true>(addr, elf_memory, func_offset);
141   }
142   if (info == nullptr) {
143     return false;
144   }
145   // Read and cache the symbol name.
146   if (info->name.is_null()) {
147     SymType sym;
148     uint32_t symbol_index = remap_.has_value() ? remap_.value()[info->index] : info->index;
149     if (!elf_memory->ReadFully(offset_ + symbol_index * entry_size_, &sym, sizeof(sym))) {
150       return false;
151     }
152     std::string symbol_name;
153     uint64_t str;
154     if (__builtin_add_overflow(str_offset_, sym.st_name, &str) || str >= str_end_) {
155       return false;
156     }
157     if (!IsFunc(&sym) || !elf_memory->ReadString(str, &symbol_name, str_end_ - str)) {
158       return false;
159     }
160     info->name = SharedString(std::move(symbol_name));
161   }
162   *name = info->name;
163   return true;
164 }
165 
166 template <typename SymType>
GetGlobal(Memory * elf_memory,const std::string & name,uint64_t * memory_address)167 bool Symbols::GetGlobal(Memory* elf_memory, const std::string& name, uint64_t* memory_address) {
168   // Lookup from cache.
169   auto it = global_variables_.find(name);
170   if (it != global_variables_.end()) {
171     if (it->second.has_value()) {
172       *memory_address = it->second.value();
173       return true;
174     }
175     return false;
176   }
177 
178   // Linear scan of all symbols.
179   for (uint32_t i = 0; i < count_; i++) {
180     SymType entry;
181     if (!elf_memory->ReadFully(offset_ + i * entry_size_, &entry, sizeof(entry))) {
182       return false;
183     }
184 
185     if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_OBJECT &&
186         ELF32_ST_BIND(entry.st_info) == STB_GLOBAL) {
187       uint64_t str_offset = str_offset_ + entry.st_name;
188       if (str_offset < str_end_) {
189         std::string symbol;
190         if (elf_memory->ReadString(str_offset, &symbol, str_end_ - str_offset) && symbol == name) {
191           global_variables_.emplace(name, entry.st_value);
192           *memory_address = entry.st_value;
193           return true;
194         }
195       }
196     }
197   }
198   global_variables_.emplace(name, std::optional<uint64_t>());  // Remember "not found" outcome.
199   return false;
200 }
201 
202 // Instantiate all of the needed template functions.
203 template bool Symbols::GetName<Elf32_Sym>(uint64_t, Memory*, SharedString*, uint64_t*);
204 template bool Symbols::GetName<Elf64_Sym>(uint64_t, Memory*, SharedString*, uint64_t*);
205 
206 template bool Symbols::GetGlobal<Elf32_Sym>(Memory*, const std::string&, uint64_t*);
207 template bool Symbols::GetGlobal<Elf64_Sym>(Memory*, const std::string&, uint64_t*);
208 }  // namespace unwindstack
209