1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef _LIBUNWINDSTACK_GLOBAL_DEBUG_IMPL_H
18 #define _LIBUNWINDSTACK_GLOBAL_DEBUG_IMPL_H
19
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/mman.h>
23
24 #include <memory>
25 #include <vector>
26
27 #include <unwindstack/Global.h>
28 #include <unwindstack/Maps.h>
29
30 #include "Check.h"
31 #include "GlobalDebugInterface.h"
32 #include "MemoryCache.h"
33 #include "MemoryRange.h"
34
35 // This implements the JIT Compilation Interface.
36 // See https://sourceware.org/gdb/onlinedocs/gdb/JIT-Interface.html
37 //
38 // We use it to get in-memory ELF files created by the ART compiler,
39 // but we also use it to get list of DEX files used by the runtime.
40
41 namespace unwindstack {
42
43 // Implementation templated for ELF/DEX and for different architectures.
44 template <typename Symfile, typename Uintptr_T, typename Uint64_T>
45 class GlobalDebugImpl : public GlobalDebugInterface<Symfile>, public Global {
46 public:
47 static constexpr int kMaxRaceRetries = 16;
48 static constexpr int kMaxHeadRetries = 16;
49 static constexpr uint8_t kMagic[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '2'};
50
51 struct JITCodeEntry {
52 Uintptr_T next;
53 Uintptr_T prev;
54 Uintptr_T symfile_addr;
55 Uint64_T symfile_size;
56 // Android-specific fields:
57 Uint64_T timestamp;
58 uint32_t seqlock;
59 };
60
61 static constexpr size_t kSizeOfCodeEntryV1 = offsetof(JITCodeEntry, timestamp);
62 static constexpr size_t kSizeOfCodeEntryV2 = sizeof(JITCodeEntry);
63
64 struct JITDescriptor {
65 uint32_t version;
66 uint32_t action_flag;
67 Uintptr_T relevant_entry;
68 Uintptr_T first_entry;
69 // Android-specific fields:
70 uint8_t magic[8];
71 uint32_t flags;
72 uint32_t sizeof_descriptor;
73 uint32_t sizeof_entry;
74 uint32_t seqlock;
75 Uint64_T timestamp;
76 };
77
78 static constexpr size_t kSizeOfDescriptorV1 = offsetof(JITDescriptor, magic);
79 static constexpr size_t kSizeOfDescriptorV2 = sizeof(JITDescriptor);
80
81 // This uniquely identifies entry in presence of concurrent modifications.
82 // Each (address,seqlock) pair is unique for each newly created JIT entry.
83 struct UID {
84 uint64_t address; // Address of JITCodeEntry in memory.
85 uint32_t seqlock; // This servers as "version" for the given address.
86
87 bool operator<(const UID& other) const {
88 return std::tie(address, seqlock) < std::tie(other.address, other.seqlock);
89 }
90 };
91
GlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> & search_libs,const char * global_variable_name)92 GlobalDebugImpl(ArchEnum arch, std::shared_ptr<Memory>& memory,
93 std::vector<std::string>& search_libs, const char* global_variable_name)
94 : Global(memory, search_libs), global_variable_name_(global_variable_name) {
95 SetArch(arch);
96 }
97
ReadDescriptor(uint64_t addr)98 bool ReadDescriptor(uint64_t addr) {
99 JITDescriptor desc{};
100 // Try to read the full descriptor including Android-specific fields.
101 if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV2)) {
102 // Fallback to just the minimal descriptor.
103 // This will make the magic check below fail.
104 if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV1)) {
105 return false;
106 }
107 }
108
109 if (desc.version != 1 || desc.first_entry == 0) {
110 // Either unknown version, or no jit entries.
111 return false;
112 }
113
114 // Check if there are extra Android-specific fields.
115 if (memcmp(desc.magic, kMagic, sizeof(kMagic)) == 0) {
116 jit_entry_size_ = kSizeOfCodeEntryV2;
117 seqlock_offset_ = offsetof(JITCodeEntry, seqlock);
118 } else {
119 jit_entry_size_ = kSizeOfCodeEntryV1;
120 seqlock_offset_ = 0;
121 }
122 descriptor_addr_ = addr;
123 return true;
124 }
125
ProcessArch()126 void ProcessArch() {}
127
ReadVariableData(uint64_t ptr)128 bool ReadVariableData(uint64_t ptr) { return ReadDescriptor(ptr); }
129
130 // Invoke callback for all symfiles that contain the given PC.
131 // Returns true if any callback returns true (which also aborts the iteration).
132 template <typename Callback /* (Symfile*) -> bool */>
ForEachSymfile(Maps * maps,uint64_t pc,Callback callback)133 bool ForEachSymfile(Maps* maps, uint64_t pc, Callback callback) {
134 // Use a single lock, this object should be used so infrequently that
135 // a fine grain lock is unnecessary.
136 std::lock_guard<std::mutex> guard(lock_);
137 if (descriptor_addr_ == 0) {
138 FindAndReadVariable(maps, global_variable_name_);
139 if (descriptor_addr_ == 0) {
140 return false;
141 }
142 }
143
144 // Try to find the entry in already loaded symbol files.
145 for (auto& it : entries_) {
146 Symfile* symfile = it.second.get();
147 // Check seqlock to make sure that entry is still valid (it may be very old).
148 if (symfile->IsValidPc(pc) && CheckSeqlock(it.first) && callback(symfile)) {
149 return true;
150 }
151 }
152
153 // Update all entries and retry.
154 ReadAllEntries(maps);
155 for (auto& it : entries_) {
156 Symfile* symfile = it.second.get();
157 // Note that the entry could become invalid since the ReadAllEntries above,
158 // but that is ok. We don't want to fail or refresh the entries yet again.
159 // This is as if we found the entry in time and it became invalid after return.
160 // This is relevant when ART moves/packs JIT entries. That is, the entry is
161 // technically deleted, but only because it was copied into merged uber-entry.
162 // So the JIT method is still alive and the deleted data is still correct.
163 if (symfile->IsValidPc(pc) && callback(symfile)) {
164 return true;
165 }
166 }
167
168 return false;
169 }
170
GetFunctionName(Maps * maps,uint64_t pc,SharedString * name,uint64_t * offset)171 bool GetFunctionName(Maps* maps, uint64_t pc, SharedString* name, uint64_t* offset) {
172 // NB: If symfiles overlap in PC ranges, this will check all of them.
173 return ForEachSymfile(maps, pc, [pc, name, offset](Symfile* file) {
174 return file->GetFunctionName(pc, name, offset);
175 });
176 }
177
Find(Maps * maps,uint64_t pc)178 Symfile* Find(Maps* maps, uint64_t pc) {
179 // NB: If symfiles overlap in PC ranges (which can happen for both ELF and DEX),
180 // this will check all of them and return one that also has a matching function.
181 Symfile* result = nullptr;
182 bool found = ForEachSymfile(maps, pc, [pc, &result](Symfile* file) {
183 result = file;
184 SharedString name;
185 uint64_t offset;
186 return file->GetFunctionName(pc, &name, &offset);
187 });
188 if (found) {
189 return result; // Found symfile with symbol that also matches the PC.
190 }
191 // There is no matching symbol, so return any symfile for which the PC is valid.
192 // This is a useful fallback for tests, which often have symfiles with no functions.
193 return result;
194 }
195
196 // Read all entries from the process and cache them locally.
197 // The linked list might be concurrently modified. We detect races and retry.
ReadAllEntries(Maps * maps)198 bool ReadAllEntries(Maps* maps) {
199 for (int i = 0; i < kMaxRaceRetries; i++) {
200 bool race = false;
201 if (!ReadAllEntries(maps, &race)) {
202 if (race) {
203 continue; // Retry due to concurrent modification of the linked list.
204 }
205 return false; // Failed to read entries.
206 }
207 return true; // Success.
208 }
209 return false; // Too many retries.
210 }
211
212 // Read all JIT entries while assuming there might be concurrent modifications.
213 // If there is a race, the method will fail and the caller should retry the call.
ReadAllEntries(Maps * maps,bool * race)214 bool ReadAllEntries(Maps* maps, bool* race) {
215 // New entries might be added while we iterate over the linked list.
216 // In particular, an entry could be effectively moved from end to start due to
217 // the ART repacking algorithm, which groups smaller entries into a big one.
218 // Therefore keep reading the most recent entries until we reach a fixed point.
219 std::map<UID, std::shared_ptr<Symfile>> entries;
220 for (size_t i = 0; i < kMaxHeadRetries; i++) {
221 size_t old_size = entries.size();
222 if (!ReadNewEntries(maps, &entries, race)) {
223 return false;
224 }
225 if (entries.size() == old_size) {
226 entries_.swap(entries);
227 return true;
228 }
229 }
230 return false; // Too many retries.
231 }
232
233 // Read new JIT entries (head of linked list) until we find one that we have seen before.
234 // This method uses seqlocks extensively to ensure safety in case of concurrent modifications.
ReadNewEntries(Maps * maps,std::map<UID,std::shared_ptr<Symfile>> * entries,bool * race)235 bool ReadNewEntries(Maps* maps, std::map<UID, std::shared_ptr<Symfile>>* entries, bool* race) {
236 // Read the address of the head entry in the linked list.
237 UID uid;
238 if (!ReadNextField(descriptor_addr_ + offsetof(JITDescriptor, first_entry), &uid, race)) {
239 return false;
240 }
241
242 // Follow the linked list.
243 while (uid.address != 0) {
244 // Check if we have reached an already cached entry (we restart from head repeatedly).
245 if (entries->count(uid) != 0) {
246 return true;
247 }
248
249 // Read the entry.
250 JITCodeEntry data{};
251 if (!memory_->ReadFully(uid.address, &data, jit_entry_size_)) {
252 return false;
253 }
254 data.symfile_addr = StripAddressTag(data.symfile_addr);
255
256 // Check the seqlock to verify the symfile_addr and symfile_size.
257 if (!CheckSeqlock(uid, race)) {
258 return false;
259 }
260
261 // Copy and load the symfile.
262 auto it = entries_.find(uid);
263 if (it != entries_.end()) {
264 // The symfile was already loaded - just copy the reference.
265 entries->emplace(uid, it->second);
266 } else if (data.symfile_addr != 0) {
267 std::shared_ptr<Symfile> symfile;
268 bool ok = this->Load(maps, memory_, data.symfile_addr, data.symfile_size.value, symfile);
269 // Check seqlock first because load can fail due to race (so we want to trigger retry).
270 // TODO: Extract the memory copy code before the load, so that it is immune to races.
271 if (!CheckSeqlock(uid, race)) {
272 return false; // The ELF/DEX data was removed before we loaded it.
273 }
274 // Exclude symbol files that fail to load (but continue loading other files).
275 if (ok) {
276 entries->emplace(uid, symfile);
277 }
278 }
279
280 // Go to next entry.
281 UID next_uid;
282 if (!ReadNextField(uid.address + offsetof(JITCodeEntry, next), &next_uid, race)) {
283 return false; // The next pointer was modified while we were reading it.
284 }
285 if (!CheckSeqlock(uid, race)) {
286 return false; // This entry was deleted before we moved to the next one.
287 }
288 uid = next_uid;
289 }
290
291 return true;
292 }
293
294 // Read the address and seqlock of entry from the next field of linked list.
295 // This is non-trivial since they need to be consistent (as if we read both atomically).
296 //
297 // We're reading pointers, which can point at heap-allocated structures (the
298 // case for the __dex_debug_descriptor pointers at the time of writing).
299 // On 64 bit systems, the target process might have top-byte heap pointer
300 // tagging enabled, so we need to mask out the tag. We also know that the
301 // address must point to userspace, so the top byte of the address must be
302 // zero on both x64 and aarch64 without tagging. Therefore the masking can be
303 // done unconditionally.
ReadNextField(uint64_t next_field_addr,UID * uid,bool * race)304 bool ReadNextField(uint64_t next_field_addr, UID* uid, bool* race) {
305 Uintptr_T address[2]{0, 0};
306 uint32_t seqlock[2]{0, 0};
307 // Read all data twice: address[0], seqlock[0], address[1], seqlock[1].
308 for (int i = 0; i < 2; i++) {
309 std::atomic_thread_fence(std::memory_order_acquire);
310 if (!(memory_->ReadFully(next_field_addr, &address[i], sizeof(address[i])))) {
311 return false;
312 }
313 address[i] = StripAddressTag(address[i]);
314 if (seqlock_offset_ == 0) {
315 // There is no seqlock field.
316 *uid = UID{.address = address[0], .seqlock = 0};
317 return true;
318 }
319 if (address[i] != 0) {
320 std::atomic_thread_fence(std::memory_order_acquire);
321 if (!memory_->ReadFully(address[i] + seqlock_offset_, &seqlock[i], sizeof(seqlock[i]))) {
322 return false;
323 }
324 }
325 }
326 // Check that both reads returned identical values, and that the entry is live.
327 if (address[0] != address[1] || seqlock[0] != seqlock[1] || (seqlock[0] & 1) == 1) {
328 *race = true;
329 return false;
330 }
331 // Since address[1] is sandwiched between two seqlock reads, we know that
332 // at the time of address[1] read, the entry had the given seqlock value.
333 *uid = UID{.address = address[1], .seqlock = seqlock[1]};
334 return true;
335 }
336
337 // Check that the given entry has not been deleted (or replaced by new entry at same address).
338 bool CheckSeqlock(UID uid, bool* race = nullptr) {
339 if (seqlock_offset_ == 0) {
340 // There is no seqlock field.
341 return true;
342 }
343 // This is required for memory synchronization if the we are working with local memory.
344 // For other types of memory (e.g. remote) this is no-op and has no significant effect.
345 std::atomic_thread_fence(std::memory_order_acquire);
346 uint32_t seen_seqlock;
347 if (!memory_->Read32(uid.address + seqlock_offset_, &seen_seqlock)) {
348 return false;
349 }
350 if (seen_seqlock != uid.seqlock) {
351 if (race != nullptr) {
352 *race = true;
353 }
354 return false;
355 }
356 return true;
357 }
358
359 // AArch64 has Address tagging (aka Top Byte Ignore) feature, which is used by
360 // HWASAN and MTE to store metadata in the address. We need to remove the tag.
StripAddressTag(Uintptr_T addr)361 Uintptr_T StripAddressTag(Uintptr_T addr) {
362 if (arch() == ARCH_ARM64) {
363 // Make the value signed so it will be sign extended if necessary.
364 return static_cast<Uintptr_T>((static_cast<int64_t>(addr) << 8) >> 8);
365 }
366 return addr;
367 }
368
369 private:
370 const char* global_variable_name_ = nullptr;
371 uint64_t descriptor_addr_ = 0; // Non-zero if we have found (non-empty) descriptor.
372 uint32_t jit_entry_size_ = 0;
373 uint32_t seqlock_offset_ = 0;
374 std::map<UID, std::shared_ptr<Symfile>> entries_; // Cached loaded entries.
375
376 std::mutex lock_;
377 };
378
379 // uint64_t values on x86 are not naturally aligned,
380 // but uint64_t values on ARM are naturally aligned.
381 struct Uint64_P {
382 uint64_t value;
383 } __attribute__((packed));
384 struct Uint64_A {
385 uint64_t value;
386 } __attribute__((aligned(8)));
387
388 template <typename Symfile>
CreateGlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> search_libs,const char * global_variable_name)389 std::unique_ptr<GlobalDebugInterface<Symfile>> CreateGlobalDebugImpl(
390 ArchEnum arch, std::shared_ptr<Memory>& memory, std::vector<std::string> search_libs,
391 const char* global_variable_name) {
392 CHECK(arch != ARCH_UNKNOWN);
393
394 // The interface needs to see real-time changes in memory for synchronization with the
395 // concurrently running ART JIT compiler. Skip caching and read the memory directly.
396 std::shared_ptr<Memory> jit_memory;
397 MemoryCacheBase* cached_memory = memory->AsMemoryCacheBase();
398 if (cached_memory != nullptr) {
399 jit_memory = cached_memory->UnderlyingMemory();
400 } else {
401 jit_memory = memory;
402 }
403
404 switch (arch) {
405 case ARCH_X86: {
406 using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_P>;
407 static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 12, "layout");
408 static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 28, "layout");
409 static_assert(sizeof(typename Impl::JITCodeEntry) == 32, "layout");
410 static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
411 return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
412 }
413 case ARCH_ARM:
414 case ARCH_MIPS: {
415 using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_A>;
416 static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 16, "layout");
417 static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 32, "layout");
418 static_assert(sizeof(typename Impl::JITCodeEntry) == 40, "layout");
419 static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
420 return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
421 }
422 case ARCH_ARM64:
423 case ARCH_X86_64:
424 case ARCH_MIPS64: {
425 using Impl = GlobalDebugImpl<Symfile, uint64_t, Uint64_A>;
426 static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 24, "layout");
427 static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 40, "layout");
428 static_assert(sizeof(typename Impl::JITCodeEntry) == 48, "layout");
429 static_assert(sizeof(typename Impl::JITDescriptor) == 56, "layout");
430 return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
431 }
432 default:
433 abort();
434 }
435 }
436
437 } // namespace unwindstack
438
439 #endif // _LIBUNWINDSTACK_GLOBAL_DEBUG_IMPL_H
440