1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef _LIBUNWINDSTACK_GLOBAL_DEBUG_IMPL_H
18 #define _LIBUNWINDSTACK_GLOBAL_DEBUG_IMPL_H
19 
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/mman.h>
23 
24 #include <memory>
25 #include <vector>
26 
27 #include <unwindstack/Global.h>
28 #include <unwindstack/Maps.h>
29 
30 #include "Check.h"
31 #include "GlobalDebugInterface.h"
32 #include "MemoryCache.h"
33 #include "MemoryRange.h"
34 
35 // This implements the JIT Compilation Interface.
36 // See https://sourceware.org/gdb/onlinedocs/gdb/JIT-Interface.html
37 //
38 // We use it to get in-memory ELF files created by the ART compiler,
39 // but we also use it to get list of DEX files used by the runtime.
40 
41 namespace unwindstack {
42 
43 // Implementation templated for ELF/DEX and for different architectures.
44 template <typename Symfile, typename Uintptr_T, typename Uint64_T>
45 class GlobalDebugImpl : public GlobalDebugInterface<Symfile>, public Global {
46  public:
47   static constexpr int kMaxRaceRetries = 16;
48   static constexpr int kMaxHeadRetries = 16;
49   static constexpr uint8_t kMagic[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '2'};
50 
51   struct JITCodeEntry {
52     Uintptr_T next;
53     Uintptr_T prev;
54     Uintptr_T symfile_addr;
55     Uint64_T symfile_size;
56     // Android-specific fields:
57     Uint64_T timestamp;
58     uint32_t seqlock;
59   };
60 
61   static constexpr size_t kSizeOfCodeEntryV1 = offsetof(JITCodeEntry, timestamp);
62   static constexpr size_t kSizeOfCodeEntryV2 = sizeof(JITCodeEntry);
63 
64   struct JITDescriptor {
65     uint32_t version;
66     uint32_t action_flag;
67     Uintptr_T relevant_entry;
68     Uintptr_T first_entry;
69     // Android-specific fields:
70     uint8_t magic[8];
71     uint32_t flags;
72     uint32_t sizeof_descriptor;
73     uint32_t sizeof_entry;
74     uint32_t seqlock;
75     Uint64_T timestamp;
76   };
77 
78   static constexpr size_t kSizeOfDescriptorV1 = offsetof(JITDescriptor, magic);
79   static constexpr size_t kSizeOfDescriptorV2 = sizeof(JITDescriptor);
80 
81   // This uniquely identifies entry in presence of concurrent modifications.
82   // Each (address,seqlock) pair is unique for each newly created JIT entry.
83   struct UID {
84     uint64_t address;  // Address of JITCodeEntry in memory.
85     uint32_t seqlock;  // This servers as "version" for the given address.
86 
87     bool operator<(const UID& other) const {
88       return std::tie(address, seqlock) < std::tie(other.address, other.seqlock);
89     }
90   };
91 
GlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> & search_libs,const char * global_variable_name)92   GlobalDebugImpl(ArchEnum arch, std::shared_ptr<Memory>& memory,
93                   std::vector<std::string>& search_libs, const char* global_variable_name)
94       : Global(memory, search_libs), global_variable_name_(global_variable_name) {
95     SetArch(arch);
96   }
97 
ReadDescriptor(uint64_t addr)98   bool ReadDescriptor(uint64_t addr) {
99     JITDescriptor desc{};
100     // Try to read the full descriptor including Android-specific fields.
101     if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV2)) {
102       // Fallback to just the minimal descriptor.
103       // This will make the magic check below fail.
104       if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV1)) {
105         return false;
106       }
107     }
108 
109     if (desc.version != 1 || desc.first_entry == 0) {
110       // Either unknown version, or no jit entries.
111       return false;
112     }
113 
114     // Check if there are extra Android-specific fields.
115     if (memcmp(desc.magic, kMagic, sizeof(kMagic)) == 0) {
116       jit_entry_size_ = kSizeOfCodeEntryV2;
117       seqlock_offset_ = offsetof(JITCodeEntry, seqlock);
118     } else {
119       jit_entry_size_ = kSizeOfCodeEntryV1;
120       seqlock_offset_ = 0;
121     }
122     descriptor_addr_ = addr;
123     return true;
124   }
125 
ProcessArch()126   void ProcessArch() {}
127 
ReadVariableData(uint64_t ptr)128   bool ReadVariableData(uint64_t ptr) { return ReadDescriptor(ptr); }
129 
130   // Invoke callback for all symfiles that contain the given PC.
131   // Returns true if any callback returns true (which also aborts the iteration).
132   template <typename Callback /* (Symfile*) -> bool */>
ForEachSymfile(Maps * maps,uint64_t pc,Callback callback)133   bool ForEachSymfile(Maps* maps, uint64_t pc, Callback callback) {
134     // Use a single lock, this object should be used so infrequently that
135     // a fine grain lock is unnecessary.
136     std::lock_guard<std::mutex> guard(lock_);
137     if (descriptor_addr_ == 0) {
138       FindAndReadVariable(maps, global_variable_name_);
139       if (descriptor_addr_ == 0) {
140         return false;
141       }
142     }
143 
144     // Try to find the entry in already loaded symbol files.
145     for (auto& it : entries_) {
146       Symfile* symfile = it.second.get();
147       // Check seqlock to make sure that entry is still valid (it may be very old).
148       if (symfile->IsValidPc(pc) && CheckSeqlock(it.first) && callback(symfile)) {
149         return true;
150       }
151     }
152 
153     // Update all entries and retry.
154     ReadAllEntries(maps);
155     for (auto& it : entries_) {
156       Symfile* symfile = it.second.get();
157       // Note that the entry could become invalid since the ReadAllEntries above,
158       // but that is ok.  We don't want to fail or refresh the entries yet again.
159       // This is as if we found the entry in time and it became invalid after return.
160       // This is relevant when ART moves/packs JIT entries. That is, the entry is
161       // technically deleted, but only because it was copied into merged uber-entry.
162       // So the JIT method is still alive and the deleted data is still correct.
163       if (symfile->IsValidPc(pc) && callback(symfile)) {
164         return true;
165       }
166     }
167 
168     return false;
169   }
170 
GetFunctionName(Maps * maps,uint64_t pc,SharedString * name,uint64_t * offset)171   bool GetFunctionName(Maps* maps, uint64_t pc, SharedString* name, uint64_t* offset) {
172     // NB: If symfiles overlap in PC ranges, this will check all of them.
173     return ForEachSymfile(maps, pc, [pc, name, offset](Symfile* file) {
174       return file->GetFunctionName(pc, name, offset);
175     });
176   }
177 
Find(Maps * maps,uint64_t pc)178   Symfile* Find(Maps* maps, uint64_t pc) {
179     // NB: If symfiles overlap in PC ranges (which can happen for both ELF and DEX),
180     // this will check all of them and return one that also has a matching function.
181     Symfile* result = nullptr;
182     bool found = ForEachSymfile(maps, pc, [pc, &result](Symfile* file) {
183       result = file;
184       SharedString name;
185       uint64_t offset;
186       return file->GetFunctionName(pc, &name, &offset);
187     });
188     if (found) {
189       return result;  // Found symfile with symbol that also matches the PC.
190     }
191     // There is no matching symbol, so return any symfile for which the PC is valid.
192     // This is a useful fallback for tests, which often have symfiles with no functions.
193     return result;
194   }
195 
196   // Read all entries from the process and cache them locally.
197   // The linked list might be concurrently modified. We detect races and retry.
ReadAllEntries(Maps * maps)198   bool ReadAllEntries(Maps* maps) {
199     for (int i = 0; i < kMaxRaceRetries; i++) {
200       bool race = false;
201       if (!ReadAllEntries(maps, &race)) {
202         if (race) {
203           continue;  // Retry due to concurrent modification of the linked list.
204         }
205         return false;  // Failed to read entries.
206       }
207       return true;  // Success.
208     }
209     return false;  // Too many retries.
210   }
211 
212   // Read all JIT entries while assuming there might be concurrent modifications.
213   // If there is a race, the method will fail and the caller should retry the call.
ReadAllEntries(Maps * maps,bool * race)214   bool ReadAllEntries(Maps* maps, bool* race) {
215     // New entries might be added while we iterate over the linked list.
216     // In particular, an entry could be effectively moved from end to start due to
217     // the ART repacking algorithm, which groups smaller entries into a big one.
218     // Therefore keep reading the most recent entries until we reach a fixed point.
219     std::map<UID, std::shared_ptr<Symfile>> entries;
220     for (size_t i = 0; i < kMaxHeadRetries; i++) {
221       size_t old_size = entries.size();
222       if (!ReadNewEntries(maps, &entries, race)) {
223         return false;
224       }
225       if (entries.size() == old_size) {
226         entries_.swap(entries);
227         return true;
228       }
229     }
230     return false;  // Too many retries.
231   }
232 
233   // Read new JIT entries (head of linked list) until we find one that we have seen before.
234   // This method uses seqlocks extensively to ensure safety in case of concurrent modifications.
ReadNewEntries(Maps * maps,std::map<UID,std::shared_ptr<Symfile>> * entries,bool * race)235   bool ReadNewEntries(Maps* maps, std::map<UID, std::shared_ptr<Symfile>>* entries, bool* race) {
236     // Read the address of the head entry in the linked list.
237     UID uid;
238     if (!ReadNextField(descriptor_addr_ + offsetof(JITDescriptor, first_entry), &uid, race)) {
239       return false;
240     }
241 
242     // Follow the linked list.
243     while (uid.address != 0) {
244       // Check if we have reached an already cached entry (we restart from head repeatedly).
245       if (entries->count(uid) != 0) {
246         return true;
247       }
248 
249       // Read the entry.
250       JITCodeEntry data{};
251       if (!memory_->ReadFully(uid.address, &data, jit_entry_size_)) {
252         return false;
253       }
254       data.symfile_addr = StripAddressTag(data.symfile_addr);
255 
256       // Check the seqlock to verify the symfile_addr and symfile_size.
257       if (!CheckSeqlock(uid, race)) {
258         return false;
259       }
260 
261       // Copy and load the symfile.
262       auto it = entries_.find(uid);
263       if (it != entries_.end()) {
264         // The symfile was already loaded - just copy the reference.
265         entries->emplace(uid, it->second);
266       } else if (data.symfile_addr != 0) {
267         std::shared_ptr<Symfile> symfile;
268         bool ok = this->Load(maps, memory_, data.symfile_addr, data.symfile_size.value, symfile);
269         // Check seqlock first because load can fail due to race (so we want to trigger retry).
270         // TODO: Extract the memory copy code before the load, so that it is immune to races.
271         if (!CheckSeqlock(uid, race)) {
272           return false;  // The ELF/DEX data was removed before we loaded it.
273         }
274         // Exclude symbol files that fail to load (but continue loading other files).
275         if (ok) {
276           entries->emplace(uid, symfile);
277         }
278       }
279 
280       // Go to next entry.
281       UID next_uid;
282       if (!ReadNextField(uid.address + offsetof(JITCodeEntry, next), &next_uid, race)) {
283         return false;  // The next pointer was modified while we were reading it.
284       }
285       if (!CheckSeqlock(uid, race)) {
286         return false;  // This entry was deleted before we moved to the next one.
287       }
288       uid = next_uid;
289     }
290 
291     return true;
292   }
293 
294   // Read the address and seqlock of entry from the next field of linked list.
295   // This is non-trivial since they need to be consistent (as if we read both atomically).
296   //
297   // We're reading pointers, which can point at heap-allocated structures (the
298   // case for the __dex_debug_descriptor pointers at the time of writing).
299   // On 64 bit systems, the target process might have top-byte heap pointer
300   // tagging enabled, so we need to mask out the tag. We also know that the
301   // address must point to userspace, so the top byte of the address must be
302   // zero on both x64 and aarch64 without tagging. Therefore the masking can be
303   // done unconditionally.
ReadNextField(uint64_t next_field_addr,UID * uid,bool * race)304   bool ReadNextField(uint64_t next_field_addr, UID* uid, bool* race) {
305     Uintptr_T address[2]{0, 0};
306     uint32_t seqlock[2]{0, 0};
307     // Read all data twice: address[0], seqlock[0], address[1], seqlock[1].
308     for (int i = 0; i < 2; i++) {
309       std::atomic_thread_fence(std::memory_order_acquire);
310       if (!(memory_->ReadFully(next_field_addr, &address[i], sizeof(address[i])))) {
311         return false;
312       }
313       address[i] = StripAddressTag(address[i]);
314       if (seqlock_offset_ == 0) {
315         // There is no seqlock field.
316         *uid = UID{.address = address[0], .seqlock = 0};
317         return true;
318       }
319       if (address[i] != 0) {
320         std::atomic_thread_fence(std::memory_order_acquire);
321         if (!memory_->ReadFully(address[i] + seqlock_offset_, &seqlock[i], sizeof(seqlock[i]))) {
322           return false;
323         }
324       }
325     }
326     // Check that both reads returned identical values, and that the entry is live.
327     if (address[0] != address[1] || seqlock[0] != seqlock[1] || (seqlock[0] & 1) == 1) {
328       *race = true;
329       return false;
330     }
331     // Since address[1] is sandwiched between two seqlock reads, we know that
332     // at the time of address[1] read, the entry had the given seqlock value.
333     *uid = UID{.address = address[1], .seqlock = seqlock[1]};
334     return true;
335   }
336 
337   // Check that the given entry has not been deleted (or replaced by new entry at same address).
338   bool CheckSeqlock(UID uid, bool* race = nullptr) {
339     if (seqlock_offset_ == 0) {
340       // There is no seqlock field.
341       return true;
342     }
343     // This is required for memory synchronization if the we are working with local memory.
344     // For other types of memory (e.g. remote) this is no-op and has no significant effect.
345     std::atomic_thread_fence(std::memory_order_acquire);
346     uint32_t seen_seqlock;
347     if (!memory_->Read32(uid.address + seqlock_offset_, &seen_seqlock)) {
348       return false;
349     }
350     if (seen_seqlock != uid.seqlock) {
351       if (race != nullptr) {
352         *race = true;
353       }
354       return false;
355     }
356     return true;
357   }
358 
359   // AArch64 has Address tagging (aka Top Byte Ignore) feature, which is used by
360   // HWASAN and MTE to store metadata in the address. We need to remove the tag.
StripAddressTag(Uintptr_T addr)361   Uintptr_T StripAddressTag(Uintptr_T addr) {
362     if (arch() == ARCH_ARM64) {
363       // Make the value signed so it will be sign extended if necessary.
364       return static_cast<Uintptr_T>((static_cast<int64_t>(addr) << 8) >> 8);
365     }
366     return addr;
367   }
368 
369  private:
370   const char* global_variable_name_ = nullptr;
371   uint64_t descriptor_addr_ = 0;  // Non-zero if we have found (non-empty) descriptor.
372   uint32_t jit_entry_size_ = 0;
373   uint32_t seqlock_offset_ = 0;
374   std::map<UID, std::shared_ptr<Symfile>> entries_;  // Cached loaded entries.
375 
376   std::mutex lock_;
377 };
378 
379 // uint64_t values on x86 are not naturally aligned,
380 // but uint64_t values on ARM are naturally aligned.
381 struct Uint64_P {
382   uint64_t value;
383 } __attribute__((packed));
384 struct Uint64_A {
385   uint64_t value;
386 } __attribute__((aligned(8)));
387 
388 template <typename Symfile>
CreateGlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> search_libs,const char * global_variable_name)389 std::unique_ptr<GlobalDebugInterface<Symfile>> CreateGlobalDebugImpl(
390     ArchEnum arch, std::shared_ptr<Memory>& memory, std::vector<std::string> search_libs,
391     const char* global_variable_name) {
392   CHECK(arch != ARCH_UNKNOWN);
393 
394   // The interface needs to see real-time changes in memory for synchronization with the
395   // concurrently running ART JIT compiler. Skip caching and read the memory directly.
396   std::shared_ptr<Memory> jit_memory;
397   MemoryCacheBase* cached_memory = memory->AsMemoryCacheBase();
398   if (cached_memory != nullptr) {
399     jit_memory = cached_memory->UnderlyingMemory();
400   } else {
401     jit_memory = memory;
402   }
403 
404   switch (arch) {
405     case ARCH_X86: {
406       using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_P>;
407       static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 12, "layout");
408       static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 28, "layout");
409       static_assert(sizeof(typename Impl::JITCodeEntry) == 32, "layout");
410       static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
411       return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
412     }
413     case ARCH_ARM:
414     case ARCH_MIPS: {
415       using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_A>;
416       static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 16, "layout");
417       static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 32, "layout");
418       static_assert(sizeof(typename Impl::JITCodeEntry) == 40, "layout");
419       static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
420       return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
421     }
422     case ARCH_ARM64:
423     case ARCH_X86_64:
424     case ARCH_MIPS64: {
425       using Impl = GlobalDebugImpl<Symfile, uint64_t, Uint64_A>;
426       static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 24, "layout");
427       static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 40, "layout");
428       static_assert(sizeof(typename Impl::JITCodeEntry) == 48, "layout");
429       static_assert(sizeof(typename Impl::JITDescriptor) == 56, "layout");
430       return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
431     }
432     default:
433       abort();
434   }
435 }
436 
437 }  // namespace unwindstack
438 
439 #endif  // _LIBUNWINDSTACK_GLOBAL_DEBUG_IMPL_H
440