1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "thread_list.h"
18 
19 #define ATRACE_TAG ATRACE_TAG_DALVIK
20 
21 #include <cutils/trace.h>
22 #include <dirent.h>
23 #include <ScopedLocalRef.h>
24 #include <ScopedUtfChars.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 
28 #include "base/mutex.h"
29 #include "base/mutex-inl.h"
30 #include "base/timing_logger.h"
31 #include "debugger.h"
32 #include "jni_internal.h"
33 #include "lock_word.h"
34 #include "monitor.h"
35 #include "scoped_thread_state_change.h"
36 #include "thread.h"
37 #include "trace.h"
38 #include "utils.h"
39 #include "well_known_classes.h"
40 
41 namespace art {
42 
43 static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
44 
ThreadList()45 ThreadList::ThreadList()
46     : suspend_all_count_(0), debug_suspend_all_count_(0),
47       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
48   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
49 }
50 
~ThreadList()51 ThreadList::~ThreadList() {
52   // Detach the current thread if necessary. If we failed to start, there might not be any threads.
53   // We need to detach the current thread here in case there's another thread waiting to join with
54   // us.
55   bool contains = false;
56   {
57     Thread* self = Thread::Current();
58     MutexLock mu(self, *Locks::thread_list_lock_);
59     contains = Contains(self);
60   }
61   if (contains) {
62     Runtime::Current()->DetachCurrentThread();
63   }
64 
65   WaitForOtherNonDaemonThreadsToExit();
66   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
67   //       Thread::Init.
68   SuspendAllDaemonThreads();
69 }
70 
Contains(Thread * thread)71 bool ThreadList::Contains(Thread* thread) {
72   return find(list_.begin(), list_.end(), thread) != list_.end();
73 }
74 
Contains(pid_t tid)75 bool ThreadList::Contains(pid_t tid) {
76   for (const auto& thread : list_) {
77     if (thread->GetTid() == tid) {
78       return true;
79     }
80   }
81   return false;
82 }
83 
GetLockOwner()84 pid_t ThreadList::GetLockOwner() {
85   return Locks::thread_list_lock_->GetExclusiveOwnerTid();
86 }
87 
DumpNativeStacks(std::ostream & os)88 void ThreadList::DumpNativeStacks(std::ostream& os) {
89   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
90   for (const auto& thread : list_) {
91     os << "DUMPING THREAD " << thread->GetTid() << "\n";
92     DumpNativeStack(os, thread->GetTid(), "\t");
93     os << "\n";
94   }
95 }
96 
DumpForSigQuit(std::ostream & os)97 void ThreadList::DumpForSigQuit(std::ostream& os) {
98   {
99     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
100     DumpLocked(os);
101   }
102   DumpUnattachedThreads(os);
103 }
104 
DumpUnattachedThread(std::ostream & os,pid_t tid)105 static void DumpUnattachedThread(std::ostream& os, pid_t tid) NO_THREAD_SAFETY_ANALYSIS {
106   // TODO: No thread safety analysis as DumpState with a NULL thread won't access fields, should
107   // refactor DumpState to avoid skipping analysis.
108   Thread::DumpState(os, NULL, tid);
109   DumpKernelStack(os, tid, "  kernel: ", false);
110   // TODO: Reenable this when the native code in system_server can handle it.
111   // Currently "adb shell kill -3 `pid system_server`" will cause it to exit.
112   if (false) {
113     DumpNativeStack(os, tid, "  native: ");
114   }
115   os << "\n";
116 }
117 
DumpUnattachedThreads(std::ostream & os)118 void ThreadList::DumpUnattachedThreads(std::ostream& os) {
119   DIR* d = opendir("/proc/self/task");
120   if (!d) {
121     return;
122   }
123 
124   Thread* self = Thread::Current();
125   dirent* e;
126   while ((e = readdir(d)) != NULL) {
127     char* end;
128     pid_t tid = strtol(e->d_name, &end, 10);
129     if (!*end) {
130       bool contains;
131       {
132         MutexLock mu(self, *Locks::thread_list_lock_);
133         contains = Contains(tid);
134       }
135       if (!contains) {
136         DumpUnattachedThread(os, tid);
137       }
138     }
139   }
140   closedir(d);
141 }
142 
DumpLocked(std::ostream & os)143 void ThreadList::DumpLocked(std::ostream& os) {
144   os << "DALVIK THREADS (" << list_.size() << "):\n";
145   for (const auto& thread : list_) {
146     thread->Dump(os);
147     os << "\n";
148   }
149 }
150 
AssertThreadsAreSuspended(Thread * self,Thread * ignore1,Thread * ignore2)151 void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) {
152   MutexLock mu(self, *Locks::thread_list_lock_);
153   MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
154   for (const auto& thread : list_) {
155     if (thread != ignore1 && thread != ignore2) {
156       CHECK(thread->IsSuspended())
157             << "\nUnsuspended thread: <<" << *thread << "\n"
158             << "self: <<" << *Thread::Current();
159     }
160   }
161 }
162 
163 #if HAVE_TIMED_RWLOCK
164 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
165 static void UnsafeLogFatalForThreadSuspendAllTimeout() NO_THREAD_SAFETY_ANALYSIS __attribute__((noreturn));
UnsafeLogFatalForThreadSuspendAllTimeout()166 static void UnsafeLogFatalForThreadSuspendAllTimeout() {
167   Runtime* runtime = Runtime::Current();
168   std::ostringstream ss;
169   ss << "Thread suspend timeout\n";
170   Locks::mutator_lock_->Dump(ss);
171   ss << "\n";
172   runtime->GetThreadList()->DumpLocked(ss);
173   LOG(FATAL) << ss.str();
174   exit(0);
175 }
176 #endif
177 
178 // Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
179 // individual thread requires polling. delay_us is the requested sleep and total_delay_us
180 // accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
181 // subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
ThreadSuspendSleep(Thread * self,useconds_t * delay_us,useconds_t * total_delay_us)182 static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
183   useconds_t new_delay_us = (*delay_us) * 2;
184   CHECK_GE(new_delay_us, *delay_us);
185   if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
186     *delay_us = new_delay_us;
187   }
188   if (*delay_us == 0) {
189     sched_yield();
190     // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
191     *delay_us = 500;
192   } else {
193     usleep(*delay_us);
194     *total_delay_us += *delay_us;
195   }
196 }
197 
RunCheckpoint(Closure * checkpoint_function)198 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) {
199   Thread* self = Thread::Current();
200   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
201   Locks::thread_list_lock_->AssertNotHeld(self);
202   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
203   if (kDebugLocking) {
204     CHECK_NE(self->GetState(), kRunnable);
205   }
206 
207   std::vector<Thread*> suspended_count_modified_threads;
208   size_t count = 0;
209   {
210     // Call a checkpoint function for each thread, threads which are suspend get their checkpoint
211     // manually called.
212     MutexLock mu(self, *Locks::thread_list_lock_);
213     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
214     for (const auto& thread : list_) {
215       if (thread != self) {
216         while (true) {
217           if (thread->RequestCheckpoint(checkpoint_function)) {
218             // This thread will run its checkpoint some time in the near future.
219             count++;
220             break;
221           } else {
222             // We are probably suspended, try to make sure that we stay suspended.
223             // The thread switched back to runnable.
224             if (thread->GetState() == kRunnable) {
225               // Spurious fail, try again.
226               continue;
227             }
228             thread->ModifySuspendCount(self, +1, false);
229             suspended_count_modified_threads.push_back(thread);
230             break;
231           }
232         }
233       }
234     }
235   }
236 
237   // Run the checkpoint on ourself while we wait for threads to suspend.
238   checkpoint_function->Run(self);
239 
240   // Run the checkpoint on the suspended threads.
241   for (const auto& thread : suspended_count_modified_threads) {
242     if (!thread->IsSuspended()) {
243       // Wait until the thread is suspended.
244       useconds_t total_delay_us = 0;
245       do {
246         useconds_t delay_us = 100;
247         ThreadSuspendSleep(self, &delay_us, &total_delay_us);
248       } while (!thread->IsSuspended());
249       // Shouldn't need to wait for longer than 1000 microseconds.
250       constexpr useconds_t kLongWaitThresholdUS = 1000;
251       if (UNLIKELY(total_delay_us > kLongWaitThresholdUS)) {
252         LOG(WARNING) << "Waited " << total_delay_us << " us for thread suspend!";
253       }
254     }
255     // We know for sure that the thread is suspended at this point.
256     checkpoint_function->Run(thread);
257     {
258       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
259       thread->ModifySuspendCount(self, -1, false);
260     }
261   }
262 
263   {
264     // Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their
265     // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
266     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
267     Thread::resume_cond_->Broadcast(self);
268   }
269 
270   // Add one for self.
271   return count + suspended_count_modified_threads.size() + 1;
272 }
273 
274 // Request that a checkpoint function be run on all active (non-suspended)
275 // threads.  Returns the number of successful requests.
RunCheckpointOnRunnableThreads(Closure * checkpoint_function)276 size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) {
277   Thread* self = Thread::Current();
278   if (kIsDebugBuild) {
279     Locks::mutator_lock_->AssertNotExclusiveHeld(self);
280     Locks::thread_list_lock_->AssertNotHeld(self);
281     Locks::thread_suspend_count_lock_->AssertNotHeld(self);
282     CHECK_NE(self->GetState(), kRunnable);
283   }
284 
285   size_t count = 0;
286   {
287     // Call a checkpoint function for each non-suspended thread.
288     MutexLock mu(self, *Locks::thread_list_lock_);
289     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
290     for (const auto& thread : list_) {
291       if (thread != self) {
292         if (thread->RequestCheckpoint(checkpoint_function)) {
293           // This thread will run its checkpoint some time in the near future.
294           count++;
295         }
296       }
297     }
298   }
299 
300   // Return the number of threads that will run the checkpoint function.
301   return count;
302 }
303 
SuspendAll()304 void ThreadList::SuspendAll() {
305   Thread* self = Thread::Current();
306 
307   if (self != nullptr) {
308     VLOG(threads) << *self << " SuspendAll starting...";
309   } else {
310     VLOG(threads) << "Thread[null] SuspendAll starting...";
311   }
312   ATRACE_BEGIN("Suspending mutator threads");
313   uint64_t start_time = NanoTime();
314 
315   Locks::mutator_lock_->AssertNotHeld(self);
316   Locks::thread_list_lock_->AssertNotHeld(self);
317   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
318   if (kDebugLocking && self != nullptr) {
319     CHECK_NE(self->GetState(), kRunnable);
320   }
321   {
322     MutexLock mu(self, *Locks::thread_list_lock_);
323     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
324     // Update global suspend all state for attaching threads.
325     ++suspend_all_count_;
326     // Increment everybody's suspend count (except our own).
327     for (const auto& thread : list_) {
328       if (thread == self) {
329         continue;
330       }
331       VLOG(threads) << "requesting thread suspend: " << *thread;
332       thread->ModifySuspendCount(self, +1, false);
333     }
334   }
335 
336   // Block on the mutator lock until all Runnable threads release their share of access.
337 #if HAVE_TIMED_RWLOCK
338   // Timeout if we wait more than 30 seconds.
339   if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
340     UnsafeLogFatalForThreadSuspendAllTimeout();
341   }
342 #else
343   Locks::mutator_lock_->ExclusiveLock(self);
344 #endif
345 
346   uint64_t end_time = NanoTime();
347   if (end_time - start_time > kLongThreadSuspendThreshold) {
348     LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(end_time - start_time);
349   }
350 
351   if (kDebugLocking) {
352     // Debug check that all threads are suspended.
353     AssertThreadsAreSuspended(self, self);
354   }
355 
356   ATRACE_END();
357   ATRACE_BEGIN("Mutator threads suspended");
358 
359   if (self != nullptr) {
360     VLOG(threads) << *self << " SuspendAll complete";
361   } else {
362     VLOG(threads) << "Thread[null] SuspendAll complete";
363   }
364 }
365 
ResumeAll()366 void ThreadList::ResumeAll() {
367   Thread* self = Thread::Current();
368 
369   if (self != nullptr) {
370     VLOG(threads) << *self << " ResumeAll starting";
371   } else {
372     VLOG(threads) << "Thread[null] ResumeAll starting";
373   }
374 
375   ATRACE_END();
376   ATRACE_BEGIN("Resuming mutator threads");
377 
378   if (kDebugLocking) {
379     // Debug check that all threads are suspended.
380     AssertThreadsAreSuspended(self, self);
381   }
382 
383   Locks::mutator_lock_->ExclusiveUnlock(self);
384   {
385     MutexLock mu(self, *Locks::thread_list_lock_);
386     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
387     // Update global suspend all state for attaching threads.
388     --suspend_all_count_;
389     // Decrement the suspend counts for all threads.
390     for (const auto& thread : list_) {
391       if (thread == self) {
392         continue;
393       }
394       thread->ModifySuspendCount(self, -1, false);
395     }
396 
397     // Broadcast a notification to all suspended threads, some or all of
398     // which may choose to wake up.  No need to wait for them.
399     if (self != nullptr) {
400       VLOG(threads) << *self << " ResumeAll waking others";
401     } else {
402       VLOG(threads) << "Thread[null] ResumeAll waking others";
403     }
404     Thread::resume_cond_->Broadcast(self);
405   }
406   ATRACE_END();
407 
408   if (self != nullptr) {
409     VLOG(threads) << *self << " ResumeAll complete";
410   } else {
411     VLOG(threads) << "Thread[null] ResumeAll complete";
412   }
413 }
414 
Resume(Thread * thread,bool for_debugger)415 void ThreadList::Resume(Thread* thread, bool for_debugger) {
416   Thread* self = Thread::Current();
417   DCHECK_NE(thread, self);
418   VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") starting..."
419       << (for_debugger ? " (debugger)" : "");
420 
421   {
422     // To check Contains.
423     MutexLock mu(self, *Locks::thread_list_lock_);
424     // To check IsSuspended.
425     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
426     DCHECK(thread->IsSuspended());
427     if (!Contains(thread)) {
428       // We only expect threads within the thread-list to have been suspended otherwise we can't
429       // stop such threads from delete-ing themselves.
430       LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
431           << ") thread not within thread list";
432       return;
433     }
434     thread->ModifySuspendCount(self, -1, for_debugger);
435   }
436 
437   {
438     VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") waking others";
439     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
440     Thread::resume_cond_->Broadcast(self);
441   }
442 
443   VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete";
444 }
445 
ThreadSuspendByPeerWarning(Thread * self,int level,const char * message,jobject peer)446 static void ThreadSuspendByPeerWarning(Thread* self, int level, const char* message, jobject peer) {
447   JNIEnvExt* env = self->GetJniEnv();
448   ScopedLocalRef<jstring>
449       scoped_name_string(env, (jstring)env->GetObjectField(peer,
450                                                           WellKnownClasses::java_lang_Thread_name));
451   ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
452   if (scoped_name_chars.c_str() == NULL) {
453       LOG(level) << message << ": " << peer;
454       env->ExceptionClear();
455   } else {
456       LOG(level) << message << ": " << peer << ":" << scoped_name_chars.c_str();
457   }
458 }
459 
SuspendThreadByPeer(jobject peer,bool request_suspension,bool debug_suspension,bool * timed_out)460 Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
461                                         bool debug_suspension, bool* timed_out) {
462   static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
463   useconds_t total_delay_us = 0;
464   useconds_t delay_us = 0;
465   bool did_suspend_request = false;
466   *timed_out = false;
467   Thread* self = Thread::Current();
468   VLOG(threads) << "SuspendThreadByPeer starting";
469   while (true) {
470     Thread* thread;
471     {
472       // Note: this will transition to runnable and potentially suspend. We ensure only one thread
473       // is requesting another suspend, to avoid deadlock, by requiring this function be called
474       // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
475       // than request thread suspension, to avoid potential cycles in threads requesting each other
476       // suspend.
477       ScopedObjectAccess soa(self);
478       MutexLock mu(self, *Locks::thread_list_lock_);
479       thread = Thread::FromManagedThread(soa, peer);
480       if (thread == nullptr) {
481         ThreadSuspendByPeerWarning(self, WARNING, "No such thread for suspend", peer);
482         return nullptr;
483       }
484       if (!Contains(thread)) {
485         VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: "
486             << reinterpret_cast<void*>(thread);
487         return nullptr;
488       }
489       VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
490       {
491         MutexLock mu(self, *Locks::thread_suspend_count_lock_);
492         if (request_suspension) {
493           thread->ModifySuspendCount(self, +1, debug_suspension);
494           request_suspension = false;
495           did_suspend_request = true;
496         } else {
497           // If the caller isn't requesting suspension, a suspension should have already occurred.
498           CHECK_GT(thread->GetSuspendCount(), 0);
499         }
500         // IsSuspended on the current thread will fail as the current thread is changed into
501         // Runnable above. As the suspend count is now raised if this is the current thread
502         // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
503         // to just explicitly handle the current thread in the callers to this code.
504         CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
505         // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
506         // count, or else we've waited and it has self suspended) or is the current thread, we're
507         // done.
508         if (thread->IsSuspended()) {
509           VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
510           return thread;
511         }
512         if (total_delay_us >= kTimeoutUs) {
513           ThreadSuspendByPeerWarning(self, FATAL, "Thread suspension timed out", peer);
514           if (did_suspend_request) {
515             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
516           }
517           *timed_out = true;
518           return nullptr;
519         }
520       }
521       // Release locks and come out of runnable state.
522     }
523     VLOG(threads) << "SuspendThreadByPeer sleeping to allow thread chance to suspend";
524     ThreadSuspendSleep(self, &delay_us, &total_delay_us);
525   }
526 }
527 
ThreadSuspendByThreadIdWarning(int level,const char * message,uint32_t thread_id)528 static void ThreadSuspendByThreadIdWarning(int level, const char* message, uint32_t thread_id) {
529   LOG(level) << StringPrintf("%s: %d", message, thread_id);
530 }
531 
SuspendThreadByThreadId(uint32_t thread_id,bool debug_suspension,bool * timed_out)532 Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension,
533                                             bool* timed_out) {
534   static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
535   useconds_t total_delay_us = 0;
536   useconds_t delay_us = 0;
537   *timed_out = false;
538   Thread* suspended_thread = nullptr;
539   Thread* self = Thread::Current();
540   CHECK_NE(thread_id, kInvalidThreadId);
541   VLOG(threads) << "SuspendThreadByThreadId starting";
542   while (true) {
543     {
544       // Note: this will transition to runnable and potentially suspend. We ensure only one thread
545       // is requesting another suspend, to avoid deadlock, by requiring this function be called
546       // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
547       // than request thread suspension, to avoid potential cycles in threads requesting each other
548       // suspend.
549       ScopedObjectAccess soa(self);
550       MutexLock mu(self, *Locks::thread_list_lock_);
551       Thread* thread = nullptr;
552       for (const auto& it : list_) {
553         if (it->GetThreadId() == thread_id) {
554           thread = it;
555           break;
556         }
557       }
558       if (thread == nullptr) {
559         CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
560             << " no longer in thread list";
561         // There's a race in inflating a lock and the owner giving up ownership and then dying.
562         ThreadSuspendByThreadIdWarning(WARNING, "No such thread id for suspend", thread_id);
563         return nullptr;
564       }
565       VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
566       DCHECK(Contains(thread));
567       {
568         MutexLock mu(self, *Locks::thread_suspend_count_lock_);
569         if (suspended_thread == nullptr) {
570           thread->ModifySuspendCount(self, +1, debug_suspension);
571           suspended_thread = thread;
572         } else {
573           CHECK_EQ(suspended_thread, thread);
574           // If the caller isn't requesting suspension, a suspension should have already occurred.
575           CHECK_GT(thread->GetSuspendCount(), 0);
576         }
577         // IsSuspended on the current thread will fail as the current thread is changed into
578         // Runnable above. As the suspend count is now raised if this is the current thread
579         // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
580         // to just explicitly handle the current thread in the callers to this code.
581         CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
582         // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
583         // count, or else we've waited and it has self suspended) or is the current thread, we're
584         // done.
585         if (thread->IsSuspended()) {
586           VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
587           return thread;
588         }
589         if (total_delay_us >= kTimeoutUs) {
590           ThreadSuspendByThreadIdWarning(WARNING, "Thread suspension timed out", thread_id);
591           if (suspended_thread != nullptr) {
592             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
593           }
594           *timed_out = true;
595           return nullptr;
596         }
597       }
598       // Release locks and come out of runnable state.
599     }
600     VLOG(threads) << "SuspendThreadByThreadId sleeping to allow thread chance to suspend";
601     ThreadSuspendSleep(self, &delay_us, &total_delay_us);
602   }
603 }
604 
FindThreadByThreadId(uint32_t thin_lock_id)605 Thread* ThreadList::FindThreadByThreadId(uint32_t thin_lock_id) {
606   Thread* self = Thread::Current();
607   MutexLock mu(self, *Locks::thread_list_lock_);
608   for (const auto& thread : list_) {
609     if (thread->GetThreadId() == thin_lock_id) {
610       CHECK(thread == self || thread->IsSuspended());
611       return thread;
612     }
613   }
614   return NULL;
615 }
616 
SuspendAllForDebugger()617 void ThreadList::SuspendAllForDebugger() {
618   Thread* self = Thread::Current();
619   Thread* debug_thread = Dbg::GetDebugThread();
620 
621   VLOG(threads) << *self << " SuspendAllForDebugger starting...";
622 
623   {
624     MutexLock mu(self, *Locks::thread_list_lock_);
625     {
626       MutexLock mu(self, *Locks::thread_suspend_count_lock_);
627       // Update global suspend all state for attaching threads.
628       DCHECK_GE(suspend_all_count_, debug_suspend_all_count_);
629       ++suspend_all_count_;
630       ++debug_suspend_all_count_;
631       // Increment everybody's suspend count (except our own).
632       for (const auto& thread : list_) {
633         if (thread == self || thread == debug_thread) {
634           continue;
635         }
636         VLOG(threads) << "requesting thread suspend: " << *thread;
637         thread->ModifySuspendCount(self, +1, true);
638       }
639     }
640   }
641 
642   // Block on the mutator lock until all Runnable threads release their share of access then
643   // immediately unlock again.
644 #if HAVE_TIMED_RWLOCK
645   // Timeout if we wait more than 30 seconds.
646   if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
647     UnsafeLogFatalForThreadSuspendAllTimeout();
648   } else {
649     Locks::mutator_lock_->ExclusiveUnlock(self);
650   }
651 #else
652   Locks::mutator_lock_->ExclusiveLock(self);
653   Locks::mutator_lock_->ExclusiveUnlock(self);
654 #endif
655   AssertThreadsAreSuspended(self, self, debug_thread);
656 
657   VLOG(threads) << *self << " SuspendAllForDebugger complete";
658 }
659 
SuspendSelfForDebugger()660 void ThreadList::SuspendSelfForDebugger() {
661   Thread* self = Thread::Current();
662 
663   // The debugger thread must not suspend itself due to debugger activity!
664   Thread* debug_thread = Dbg::GetDebugThread();
665   CHECK(debug_thread != NULL);
666   CHECK(self != debug_thread);
667   CHECK_NE(self->GetState(), kRunnable);
668   Locks::mutator_lock_->AssertNotHeld(self);
669 
670   {
671     // Collisions with other suspends aren't really interesting. We want
672     // to ensure that we're the only one fiddling with the suspend count
673     // though.
674     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
675     self->ModifySuspendCount(self, +1, true);
676     CHECK_GT(self->GetSuspendCount(), 0);
677   }
678 
679   VLOG(threads) << *self << " self-suspending (debugger)";
680 
681   // Tell JDWP we've completed invocation and are ready to suspend.
682   DebugInvokeReq* pReq = self->GetInvokeReq();
683   DCHECK(pReq != NULL);
684   if (pReq->invoke_needed) {
685     // Clear this before signaling.
686     pReq->Clear();
687 
688     VLOG(jdwp) << "invoke complete, signaling";
689     MutexLock mu(self, pReq->lock);
690     pReq->cond.Signal(self);
691   }
692 
693   // Tell JDWP that we've completed suspension. The JDWP thread can't
694   // tell us to resume before we're fully asleep because we hold the
695   // suspend count lock.
696   Dbg::ClearWaitForEventThread();
697 
698   {
699     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
700     while (self->GetSuspendCount() != 0) {
701       Thread::resume_cond_->Wait(self);
702       if (self->GetSuspendCount() != 0) {
703         // The condition was signaled but we're still suspended. This
704         // can happen when we suspend then resume all threads to
705         // update instrumentation or compute monitor info. This can
706         // also happen if the debugger lets go while a SIGQUIT thread
707         // dump event is pending (assuming SignalCatcher was resumed for
708         // just long enough to try to grab the thread-suspend lock).
709         VLOG(jdwp) << *self << " still suspended after undo "
710                    << "(suspend count=" << self->GetSuspendCount() << ", "
711                    << "debug suspend count=" << self->GetDebugSuspendCount() << ")";
712       }
713     }
714     CHECK_EQ(self->GetSuspendCount(), 0);
715   }
716 
717   VLOG(threads) << *self << " self-reviving (debugger)";
718 }
719 
ResumeAllForDebugger()720 void ThreadList::ResumeAllForDebugger() {
721   Thread* self = Thread::Current();
722   Thread* debug_thread = Dbg::GetDebugThread();
723 
724   VLOG(threads) << *self << " ResumeAllForDebugger starting...";
725 
726   // Threads can't resume if we exclusively hold the mutator lock.
727   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
728 
729   {
730     MutexLock mu(self, *Locks::thread_list_lock_);
731     {
732       MutexLock mu(self, *Locks::thread_suspend_count_lock_);
733       // Update global suspend all state for attaching threads.
734       DCHECK_GE(suspend_all_count_, debug_suspend_all_count_);
735       if (debug_suspend_all_count_ > 0) {
736         --suspend_all_count_;
737         --debug_suspend_all_count_;
738       } else {
739         // We've been asked to resume all threads without being asked to
740         // suspend them all before. That may happen if a debugger tries
741         // to resume some suspended threads (with suspend count == 1)
742         // at once with a VirtualMachine.Resume command. Let's print a
743         // warning.
744         LOG(WARNING) << "Debugger attempted to resume all threads without "
745                      << "having suspended them all before.";
746       }
747       // Decrement everybody's suspend count (except our own).
748       for (const auto& thread : list_) {
749         if (thread == self || thread == debug_thread) {
750           continue;
751         }
752         if (thread->GetDebugSuspendCount() == 0) {
753           // This thread may have been individually resumed with ThreadReference.Resume.
754           continue;
755         }
756         VLOG(threads) << "requesting thread resume: " << *thread;
757         thread->ModifySuspendCount(self, -1, true);
758       }
759     }
760   }
761 
762   {
763     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
764     Thread::resume_cond_->Broadcast(self);
765   }
766 
767   VLOG(threads) << *self << " ResumeAllForDebugger complete";
768 }
769 
UndoDebuggerSuspensions()770 void ThreadList::UndoDebuggerSuspensions() {
771   Thread* self = Thread::Current();
772 
773   VLOG(threads) << *self << " UndoDebuggerSuspensions starting";
774 
775   {
776     MutexLock mu(self, *Locks::thread_list_lock_);
777     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
778     // Update global suspend all state for attaching threads.
779     suspend_all_count_ -= debug_suspend_all_count_;
780     debug_suspend_all_count_ = 0;
781     // Update running threads.
782     for (const auto& thread : list_) {
783       if (thread == self || thread->GetDebugSuspendCount() == 0) {
784         continue;
785       }
786       thread->ModifySuspendCount(self, -thread->GetDebugSuspendCount(), true);
787     }
788   }
789 
790   {
791     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
792     Thread::resume_cond_->Broadcast(self);
793   }
794 
795   VLOG(threads) << "UndoDebuggerSuspensions(" << *self << ") complete";
796 }
797 
WaitForOtherNonDaemonThreadsToExit()798 void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
799   Thread* self = Thread::Current();
800   Locks::mutator_lock_->AssertNotHeld(self);
801   bool all_threads_are_daemons;
802   do {
803     {
804       // No more threads can be born after we start to shutdown.
805       MutexLock mu(self, *Locks::runtime_shutdown_lock_);
806       CHECK(Runtime::Current()->IsShuttingDownLocked());
807       CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
808     }
809     all_threads_are_daemons = true;
810     MutexLock mu(self, *Locks::thread_list_lock_);
811     for (const auto& thread : list_) {
812       if (thread != self && !thread->IsDaemon()) {
813         all_threads_are_daemons = false;
814         break;
815       }
816     }
817     if (!all_threads_are_daemons) {
818       // Wait for another thread to exit before re-checking.
819       thread_exit_cond_.Wait(self);
820     }
821   } while (!all_threads_are_daemons);
822 }
823 
SuspendAllDaemonThreads()824 void ThreadList::SuspendAllDaemonThreads() {
825   Thread* self = Thread::Current();
826   MutexLock mu(self, *Locks::thread_list_lock_);
827   {  // Tell all the daemons it's time to suspend.
828     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
829     for (const auto& thread : list_) {
830       // This is only run after all non-daemon threads have exited, so the remainder should all be
831       // daemons.
832       CHECK(thread->IsDaemon()) << *thread;
833       if (thread != self) {
834         thread->ModifySuspendCount(self, +1, false);
835       }
836     }
837   }
838   // Give the threads a chance to suspend, complaining if they're slow.
839   bool have_complained = false;
840   for (int i = 0; i < 10; ++i) {
841     usleep(200 * 1000);
842     bool all_suspended = true;
843     for (const auto& thread : list_) {
844       if (thread != self && thread->GetState() == kRunnable) {
845         if (!have_complained) {
846           LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
847           have_complained = true;
848         }
849         all_suspended = false;
850       }
851     }
852     if (all_suspended) {
853       return;
854     }
855   }
856   LOG(ERROR) << "suspend all daemons failed";
857 }
Register(Thread * self)858 void ThreadList::Register(Thread* self) {
859   DCHECK_EQ(self, Thread::Current());
860 
861   if (VLOG_IS_ON(threads)) {
862     std::ostringstream oss;
863     self->ShortDump(oss);  // We don't hold the mutator_lock_ yet and so cannot call Dump.
864     LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss.str();
865   }
866 
867   // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
868   // SuspendAll requests.
869   MutexLock mu(self, *Locks::thread_list_lock_);
870   MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
871   CHECK_GE(suspend_all_count_, debug_suspend_all_count_);
872   // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
873   // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
874   for (int delta = debug_suspend_all_count_; delta > 0; delta--) {
875     self->ModifySuspendCount(self, +1, true);
876   }
877   for (int delta = suspend_all_count_ - debug_suspend_all_count_; delta > 0; delta--) {
878     self->ModifySuspendCount(self, +1, false);
879   }
880   CHECK(!Contains(self));
881   list_.push_back(self);
882 }
883 
Unregister(Thread * self)884 void ThreadList::Unregister(Thread* self) {
885   DCHECK_EQ(self, Thread::Current());
886 
887   VLOG(threads) << "ThreadList::Unregister() " << *self;
888 
889   // Any time-consuming destruction, plus anything that can call back into managed code or
890   // suspend and so on, must happen at this point, and not in ~Thread.
891   self->Destroy();
892 
893   // If tracing, remember thread id and name before thread exits.
894   Trace::StoreExitingThreadInfo(self);
895 
896   uint32_t thin_lock_id = self->GetThreadId();
897   while (self != nullptr) {
898     // Remove and delete the Thread* while holding the thread_list_lock_ and
899     // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
900     // Note: deliberately not using MutexLock that could hold a stale self pointer.
901     Locks::thread_list_lock_->ExclusiveLock(self);
902     if (!Contains(self)) {
903       std::ostringstream os;
904       DumpNativeStack(os, GetTid(), "  native: ", nullptr);
905       LOG(ERROR) << "Request to unregister unattached thread\n" << os.str();
906       self = nullptr;
907     } else {
908       // Note: we don't take the thread_suspend_count_lock_ here as to be suspending a thread other
909       // than yourself you need to hold the thread_list_lock_ (see Thread::ModifySuspendCount).
910       if (!self->IsSuspended()) {
911         list_.remove(self);
912         delete self;
913         self = nullptr;
914       }
915     }
916     Locks::thread_list_lock_->ExclusiveUnlock(self);
917   }
918   // Release the thread ID after the thread is finished and deleted to avoid cases where we can
919   // temporarily have multiple threads with the same thread id. When this occurs, it causes
920   // problems in FindThreadByThreadId / SuspendThreadByThreadId.
921   ReleaseThreadId(nullptr, thin_lock_id);
922 
923   // Clear the TLS data, so that the underlying native thread is recognizably detached.
924   // (It may wish to reattach later.)
925   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, NULL), "detach self");
926 
927   // Signal that a thread just detached.
928   MutexLock mu(NULL, *Locks::thread_list_lock_);
929   thread_exit_cond_.Signal(NULL);
930 }
931 
ForEach(void (* callback)(Thread *,void *),void * context)932 void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) {
933   for (const auto& thread : list_) {
934     callback(thread, context);
935   }
936 }
937 
VisitRoots(RootCallback * callback,void * arg) const938 void ThreadList::VisitRoots(RootCallback* callback, void* arg) const {
939   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
940   for (const auto& thread : list_) {
941     thread->VisitRoots(callback, arg);
942   }
943 }
944 
AllocThreadId(Thread * self)945 uint32_t ThreadList::AllocThreadId(Thread* self) {
946   MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
947   for (size_t i = 0; i < allocated_ids_.size(); ++i) {
948     if (!allocated_ids_[i]) {
949       allocated_ids_.set(i);
950       return i + 1;  // Zero is reserved to mean "invalid".
951     }
952   }
953   LOG(FATAL) << "Out of internal thread ids";
954   return 0;
955 }
956 
ReleaseThreadId(Thread * self,uint32_t id)957 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
958   MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
959   --id;  // Zero is reserved to mean "invalid".
960   DCHECK(allocated_ids_[id]) << id;
961   allocated_ids_.reset(id);
962 }
963 
964 }  // namespace art
965