1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/tracked_objects.h"
6 
7 #include <limits.h>
8 #include <stdlib.h>
9 
10 #include "base/atomicops.h"
11 #include "base/base_switches.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/logging.h"
16 #include "base/process/process_handle.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/third_party/valgrind/memcheck.h"
19 #include "base/threading/worker_pool.h"
20 #include "base/tracking_info.h"
21 #include "build/build_config.h"
22 
23 using base::TimeDelta;
24 
25 namespace base {
26 class TimeDelta;
27 }
28 
29 namespace tracked_objects {
30 
31 namespace {
32 // When ThreadData is first initialized, should we start in an ACTIVE state to
33 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
34 // that we only record after parsing the command line flag --enable-tracking.
35 // Note that the flag may force either state, so this really controls only the
36 // period of time up until that flag is parsed.  If there is no flag seen, then
37 // this state may prevail for much or all of the process lifetime.
38 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
39 
40 // Possible states of the profiler timing enabledness.
41 enum {
42   UNDEFINED_TIMING,
43   ENABLED_TIMING,
44   DISABLED_TIMING,
45 };
46 
47 // State of the profiler timing enabledness.
48 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
49 
50 // Returns whether profiler timing is enabled.  The default is true, but this
51 // may be overridden by a command-line flag.  Some platforms may
52 // programmatically set this command-line flag to the "off" value if it's not
53 // specified.
54 // This in turn can be overridden by explicitly calling
55 // ThreadData::EnableProfilerTiming, say, based on a field trial.
IsProfilerTimingEnabled()56 inline bool IsProfilerTimingEnabled() {
57   // Reading |g_profiler_timing_enabled| is done without barrier because
58   // multiple initialization is not an issue while the barrier can be relatively
59   // costly given that this method is sometimes called in a tight loop.
60   base::subtle::Atomic32 current_timing_enabled =
61       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
62   if (current_timing_enabled == UNDEFINED_TIMING) {
63     if (!base::CommandLine::InitializedForCurrentProcess())
64       return true;
65     current_timing_enabled =
66         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
67              switches::kProfilerTiming) ==
68          switches::kProfilerTimingDisabledValue)
69             ? DISABLED_TIMING
70             : ENABLED_TIMING;
71     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
72                                   current_timing_enabled);
73   }
74   return current_timing_enabled == ENABLED_TIMING;
75 }
76 
77 }  // namespace
78 
79 //------------------------------------------------------------------------------
80 // DeathData tallies durations when a death takes place.
81 
DeathData()82 DeathData::DeathData()
83     : count_(0),
84       sample_probability_count_(0),
85       run_duration_sum_(0),
86       queue_duration_sum_(0),
87       run_duration_max_(0),
88       queue_duration_max_(0),
89       run_duration_sample_(0),
90       queue_duration_sample_(0),
91       last_phase_snapshot_(nullptr) {
92 }
93 
DeathData(const DeathData & other)94 DeathData::DeathData(const DeathData& other)
95     : count_(other.count_),
96       sample_probability_count_(other.sample_probability_count_),
97       run_duration_sum_(other.run_duration_sum_),
98       queue_duration_sum_(other.queue_duration_sum_),
99       run_duration_max_(other.run_duration_max_),
100       queue_duration_max_(other.queue_duration_max_),
101       run_duration_sample_(other.run_duration_sample_),
102       queue_duration_sample_(other.queue_duration_sample_),
103       last_phase_snapshot_(nullptr) {
104   // This constructor will be used by std::map when adding new DeathData values
105   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
106   // need to worry about ownership transfer.
107   DCHECK(other.last_phase_snapshot_ == nullptr);
108 }
109 
~DeathData()110 DeathData::~DeathData() {
111   while (last_phase_snapshot_) {
112     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
113     last_phase_snapshot_ = snapshot->prev;
114     delete snapshot;
115   }
116 }
117 
118 // TODO(jar): I need to see if this macro to optimize branching is worth using.
119 //
120 // This macro has no branching, so it is surely fast, and is equivalent to:
121 //             if (assign_it)
122 //               target = source;
123 // We use a macro rather than a template to force this to inline.
124 // Related code for calculating max is discussed on the web.
125 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
126   ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
127 
RecordDeath(const int32_t queue_duration,const int32_t run_duration,const uint32_t random_number)128 void DeathData::RecordDeath(const int32_t queue_duration,
129                             const int32_t run_duration,
130                             const uint32_t random_number) {
131   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
132   if (count_ < INT_MAX)
133     base::subtle::NoBarrier_Store(&count_, count_ + 1);
134 
135   int sample_probability_count =
136       base::subtle::NoBarrier_Load(&sample_probability_count_);
137   if (sample_probability_count < INT_MAX)
138     ++sample_probability_count;
139   base::subtle::NoBarrier_Store(&sample_probability_count_,
140                                 sample_probability_count);
141 
142   base::subtle::NoBarrier_Store(&queue_duration_sum_,
143                                 queue_duration_sum_ + queue_duration);
144   base::subtle::NoBarrier_Store(&run_duration_sum_,
145                                 run_duration_sum_ + run_duration);
146 
147   if (queue_duration_max() < queue_duration)
148     base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
149   if (run_duration_max() < run_duration)
150     base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
151 
152   // Take a uniformly distributed sample over all durations ever supplied during
153   // the current profiling phase.
154   // The probability that we (instead) use this new sample is
155   // 1/sample_probability_count_. This results in a completely uniform selection
156   // of the sample (at least when we don't clamp sample_probability_count_...
157   // but that should be inconsequentially likely).  We ignore the fact that we
158   // correlated our selection of a sample to the run and queue times (i.e., we
159   // used them to generate random_number).
160   CHECK_GT(sample_probability_count, 0);
161   if (0 == (random_number % sample_probability_count)) {
162     base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
163     base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
164   }
165 }
166 
OnProfilingPhaseCompleted(int profiling_phase)167 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
168   // Snapshotting and storing current state.
169   last_phase_snapshot_ = new DeathDataPhaseSnapshot(
170       profiling_phase, count(), run_duration_sum(), run_duration_max(),
171       run_duration_sample(), queue_duration_sum(), queue_duration_max(),
172       queue_duration_sample(), last_phase_snapshot_);
173 
174   // Not touching fields for which a delta can be computed by comparing with a
175   // snapshot from the previous phase. Resetting other fields.  Sample values
176   // will be reset upon next death recording because sample_probability_count_
177   // is set to 0.
178   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
179   // is that for incrementable fields, resetting to 0 from the snapshot thread
180   // potentially in parallel with incrementing in the death thread may result in
181   // significant data corruption that has a potential to grow with time.  Not
182   // resetting incrementable fields and using deltas will cause any
183   // off-by-little corruptions to be likely fixed at the next snapshot.
184   // The max values are not incrementable, and cannot be deduced using deltas
185   // for a given phase. Hence, we have to reset them to 0.  But the potential
186   // damage is limited to getting the previous phase's max to apply for the next
187   // phase, and the error doesn't have a potential to keep growing with new
188   // resets.
189   // sample_probability_count_ is incrementable, but must be reset to 0 at the
190   // phase end, so that we start a new uniformly randomized sample selection
191   // after the reset. These fields are updated using atomics. However, race
192   // conditions are possible since these are updated individually and not
193   // together atomically, resulting in the values being mutually inconsistent.
194   // The damage is limited to selecting a wrong sample, which is not something
195   // that can cause accumulating or cascading effects.
196   // If there were no inconsistencies caused by race conditions, we never send a
197   // sample for the previous phase in the next phase's snapshot because
198   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
199   base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
200   base::subtle::NoBarrier_Store(&run_duration_max_, 0);
201   base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
202 }
203 
204 //------------------------------------------------------------------------------
DeathDataSnapshot()205 DeathDataSnapshot::DeathDataSnapshot()
206     : count(-1),
207       run_duration_sum(-1),
208       run_duration_max(-1),
209       run_duration_sample(-1),
210       queue_duration_sum(-1),
211       queue_duration_max(-1),
212       queue_duration_sample(-1) {
213 }
214 
DeathDataSnapshot(int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample)215 DeathDataSnapshot::DeathDataSnapshot(int count,
216                                      int32_t run_duration_sum,
217                                      int32_t run_duration_max,
218                                      int32_t run_duration_sample,
219                                      int32_t queue_duration_sum,
220                                      int32_t queue_duration_max,
221                                      int32_t queue_duration_sample)
222     : count(count),
223       run_duration_sum(run_duration_sum),
224       run_duration_max(run_duration_max),
225       run_duration_sample(run_duration_sample),
226       queue_duration_sum(queue_duration_sum),
227       queue_duration_max(queue_duration_max),
228       queue_duration_sample(queue_duration_sample) {}
229 
~DeathDataSnapshot()230 DeathDataSnapshot::~DeathDataSnapshot() {
231 }
232 
Delta(const DeathDataSnapshot & older) const233 DeathDataSnapshot DeathDataSnapshot::Delta(
234     const DeathDataSnapshot& older) const {
235   return DeathDataSnapshot(count - older.count,
236                            run_duration_sum - older.run_duration_sum,
237                            run_duration_max, run_duration_sample,
238                            queue_duration_sum - older.queue_duration_sum,
239                            queue_duration_max, queue_duration_sample);
240 }
241 
242 //------------------------------------------------------------------------------
BirthOnThread(const Location & location,const ThreadData & current)243 BirthOnThread::BirthOnThread(const Location& location,
244                              const ThreadData& current)
245     : location_(location),
246       birth_thread_(&current) {
247 }
248 
249 //------------------------------------------------------------------------------
BirthOnThreadSnapshot()250 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
251 }
252 
BirthOnThreadSnapshot(const BirthOnThread & birth)253 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
254     : location(birth.location()),
255       thread_name(birth.birth_thread()->thread_name()) {
256 }
257 
~BirthOnThreadSnapshot()258 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
259 }
260 
261 //------------------------------------------------------------------------------
Births(const Location & location,const ThreadData & current)262 Births::Births(const Location& location, const ThreadData& current)
263     : BirthOnThread(location, current),
264       birth_count_(1) { }
265 
birth_count() const266 int Births::birth_count() const { return birth_count_; }
267 
RecordBirth()268 void Births::RecordBirth() { ++birth_count_; }
269 
270 //------------------------------------------------------------------------------
271 // ThreadData maintains the central data for all births and deaths on a single
272 // thread.
273 
274 // TODO(jar): We should pull all these static vars together, into a struct, and
275 // optimize layout so that we benefit from locality of reference during accesses
276 // to them.
277 
278 // static
279 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
280 
281 // A TLS slot which points to the ThreadData instance for the current thread.
282 // We do a fake initialization here (zeroing out data), and then the real
283 // in-place construction happens when we call tls_index_.Initialize().
284 // static
285 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
286 
287 // static
288 int ThreadData::worker_thread_data_creation_count_ = 0;
289 
290 // static
291 int ThreadData::cleanup_count_ = 0;
292 
293 // static
294 int ThreadData::incarnation_counter_ = 0;
295 
296 // static
297 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
298 
299 // static
300 ThreadData* ThreadData::first_retired_worker_ = NULL;
301 
302 // static
303 base::LazyInstance<base::Lock>::Leaky
304     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
305 
306 // static
307 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
308 
ThreadData(const std::string & suggested_name)309 ThreadData::ThreadData(const std::string& suggested_name)
310     : next_(NULL),
311       next_retired_worker_(NULL),
312       worker_thread_number_(0),
313       incarnation_count_for_pool_(-1),
314       current_stopwatch_(NULL) {
315   DCHECK_GE(suggested_name.size(), 0u);
316   thread_name_ = suggested_name;
317   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
318 }
319 
ThreadData(int thread_number)320 ThreadData::ThreadData(int thread_number)
321     : next_(NULL),
322       next_retired_worker_(NULL),
323       worker_thread_number_(thread_number),
324       incarnation_count_for_pool_(-1),
325       current_stopwatch_(NULL) {
326   CHECK_GT(thread_number, 0);
327   base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
328   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
329 }
330 
~ThreadData()331 ThreadData::~ThreadData() {
332 }
333 
PushToHeadOfList()334 void ThreadData::PushToHeadOfList() {
335   // Toss in a hint of randomness (atop the uniniitalized value).
336   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
337                                                  sizeof(random_number_));
338   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
339   random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
340   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
341 
342   DCHECK(!next_);
343   base::AutoLock lock(*list_lock_.Pointer());
344   incarnation_count_for_pool_ = incarnation_counter_;
345   next_ = all_thread_data_list_head_;
346   all_thread_data_list_head_ = this;
347 }
348 
349 // static
first()350 ThreadData* ThreadData::first() {
351   base::AutoLock lock(*list_lock_.Pointer());
352   return all_thread_data_list_head_;
353 }
354 
next() const355 ThreadData* ThreadData::next() const { return next_; }
356 
357 // static
InitializeThreadContext(const std::string & suggested_name)358 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
359   if (base::WorkerPool::RunsTasksOnCurrentThread())
360     return;
361   EnsureTlsInitialization();
362   ThreadData* current_thread_data =
363       reinterpret_cast<ThreadData*>(tls_index_.Get());
364   if (current_thread_data)
365     return;  // Browser tests instigate this.
366   current_thread_data = new ThreadData(suggested_name);
367   tls_index_.Set(current_thread_data);
368 }
369 
370 // static
Get()371 ThreadData* ThreadData::Get() {
372   if (!tls_index_.initialized())
373     return NULL;  // For unittests only.
374   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
375   if (registered)
376     return registered;
377 
378   // We must be a worker thread, since we didn't pre-register.
379   ThreadData* worker_thread_data = NULL;
380   int worker_thread_number = 0;
381   {
382     base::AutoLock lock(*list_lock_.Pointer());
383     if (first_retired_worker_) {
384       worker_thread_data = first_retired_worker_;
385       first_retired_worker_ = first_retired_worker_->next_retired_worker_;
386       worker_thread_data->next_retired_worker_ = NULL;
387     } else {
388       worker_thread_number = ++worker_thread_data_creation_count_;
389     }
390   }
391 
392   // If we can't find a previously used instance, then we have to create one.
393   if (!worker_thread_data) {
394     DCHECK_GT(worker_thread_number, 0);
395     worker_thread_data = new ThreadData(worker_thread_number);
396   }
397   DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
398 
399   tls_index_.Set(worker_thread_data);
400   return worker_thread_data;
401 }
402 
403 // static
OnThreadTermination(void * thread_data)404 void ThreadData::OnThreadTermination(void* thread_data) {
405   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
406   // We must NOT do any allocations during this callback.  There is a chance
407   // that the allocator is no longer active on this thread.
408   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
409 }
410 
OnThreadTerminationCleanup()411 void ThreadData::OnThreadTerminationCleanup() {
412   // The list_lock_ was created when we registered the callback, so it won't be
413   // allocated here despite the lazy reference.
414   base::AutoLock lock(*list_lock_.Pointer());
415   if (incarnation_counter_ != incarnation_count_for_pool_)
416     return;  // ThreadData was constructed in an earlier unit test.
417   ++cleanup_count_;
418   // Only worker threads need to be retired and reused.
419   if (!worker_thread_number_) {
420     return;
421   }
422   // We must NOT do any allocations during this callback.
423   // Using the simple linked lists avoids all allocations.
424   DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
425   this->next_retired_worker_ = first_retired_worker_;
426   first_retired_worker_ = this;
427 }
428 
429 // static
Snapshot(int current_profiling_phase,ProcessDataSnapshot * process_data_snapshot)430 void ThreadData::Snapshot(int current_profiling_phase,
431                           ProcessDataSnapshot* process_data_snapshot) {
432   // Get an unchanging copy of a ThreadData list.
433   ThreadData* my_list = ThreadData::first();
434 
435   // Gather data serially.
436   // This hackish approach *can* get some slightly corrupt tallies, as we are
437   // grabbing values without the protection of a lock, but it has the advantage
438   // of working even with threads that don't have message loops.  If a user
439   // sees any strangeness, they can always just run their stats gathering a
440   // second time.
441   BirthCountMap birth_counts;
442   for (ThreadData* thread_data = my_list; thread_data;
443        thread_data = thread_data->next()) {
444     thread_data->SnapshotExecutedTasks(current_profiling_phase,
445                                        &process_data_snapshot->phased_snapshots,
446                                        &birth_counts);
447   }
448 
449   // Add births that are still active -- i.e. objects that have tallied a birth,
450   // but have not yet tallied a matching death, and hence must be either
451   // running, queued up, or being held in limbo for future posting.
452   auto* current_phase_tasks =
453       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
454   for (const auto& birth_count : birth_counts) {
455     if (birth_count.second > 0) {
456       current_phase_tasks->push_back(
457           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
458                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
459                        "Still_Alive"));
460     }
461   }
462 }
463 
464 // static
OnProfilingPhaseCompleted(int profiling_phase)465 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
466   // Get an unchanging copy of a ThreadData list.
467   ThreadData* my_list = ThreadData::first();
468 
469   // Add snapshots for all instances of death data in all threads serially.
470   // This hackish approach *can* get some slightly corrupt tallies, as we are
471   // grabbing values without the protection of a lock, but it has the advantage
472   // of working even with threads that don't have message loops.  Any corruption
473   // shouldn't cause "cascading damage" to anything else (in later phases).
474   for (ThreadData* thread_data = my_list; thread_data;
475        thread_data = thread_data->next()) {
476     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
477   }
478 }
479 
TallyABirth(const Location & location)480 Births* ThreadData::TallyABirth(const Location& location) {
481   BirthMap::iterator it = birth_map_.find(location);
482   Births* child;
483   if (it != birth_map_.end()) {
484     child =  it->second;
485     child->RecordBirth();
486   } else {
487     child = new Births(location, *this);  // Leak this.
488     // Lock since the map may get relocated now, and other threads sometimes
489     // snapshot it (but they lock before copying it).
490     base::AutoLock lock(map_lock_);
491     birth_map_[location] = child;
492   }
493 
494   return child;
495 }
496 
TallyADeath(const Births & births,int32_t queue_duration,const TaskStopwatch & stopwatch)497 void ThreadData::TallyADeath(const Births& births,
498                              int32_t queue_duration,
499                              const TaskStopwatch& stopwatch) {
500   int32_t run_duration = stopwatch.RunDurationMs();
501 
502   // Stir in some randomness, plus add constant in case durations are zero.
503   const uint32_t kSomePrimeNumber = 2147483647;
504   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
505   // An address is going to have some randomness to it as well ;-).
506   random_number_ ^=
507       static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
508 
509   DeathMap::iterator it = death_map_.find(&births);
510   DeathData* death_data;
511   if (it != death_map_.end()) {
512     death_data = &it->second;
513   } else {
514     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
515     death_data = &death_map_[&births];
516   }  // Release lock ASAP.
517   death_data->RecordDeath(queue_duration, run_duration, random_number_);
518 }
519 
520 // static
TallyABirthIfActive(const Location & location)521 Births* ThreadData::TallyABirthIfActive(const Location& location) {
522   if (!TrackingStatus())
523     return NULL;
524   ThreadData* current_thread_data = Get();
525   if (!current_thread_data)
526     return NULL;
527   return current_thread_data->TallyABirth(location);
528 }
529 
530 // static
TallyRunOnNamedThreadIfTracking(const base::TrackingInfo & completed_task,const TaskStopwatch & stopwatch)531 void ThreadData::TallyRunOnNamedThreadIfTracking(
532     const base::TrackingInfo& completed_task,
533     const TaskStopwatch& stopwatch) {
534   // Even if we have been DEACTIVATED, we will process any pending births so
535   // that our data structures (which counted the outstanding births) remain
536   // consistent.
537   const Births* births = completed_task.birth_tally;
538   if (!births)
539     return;
540   ThreadData* current_thread_data = stopwatch.GetThreadData();
541   if (!current_thread_data)
542     return;
543 
544   // Watch out for a race where status_ is changing, and hence one or both
545   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
546   // get a time value since we "weren't tracking" and we were trying to be
547   // efficient by not calling for a genuine time value.  For simplicity, we'll
548   // use a default zero duration when we can't calculate a true value.
549   TrackedTime start_of_run = stopwatch.StartTime();
550   int32_t queue_duration = 0;
551   if (!start_of_run.is_null()) {
552     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
553         .InMilliseconds();
554   }
555   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
556 }
557 
558 // static
TallyRunOnWorkerThreadIfTracking(const Births * births,const TrackedTime & time_posted,const TaskStopwatch & stopwatch)559 void ThreadData::TallyRunOnWorkerThreadIfTracking(
560     const Births* births,
561     const TrackedTime& time_posted,
562     const TaskStopwatch& stopwatch) {
563   // Even if we have been DEACTIVATED, we will process any pending births so
564   // that our data structures (which counted the outstanding births) remain
565   // consistent.
566   if (!births)
567     return;
568 
569   // TODO(jar): Support the option to coalesce all worker-thread activity under
570   // one ThreadData instance that uses locks to protect *all* access.  This will
571   // reduce memory (making it provably bounded), but run incrementally slower
572   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
573   // that the locks on TallyADeath will be *after* the worker thread has run,
574   // and hence nothing will be waiting for the completion (...  besides some
575   // other thread that might like to run).  Also, the worker threads tasks are
576   // generally longer, and hence the cost of the lock may perchance be amortized
577   // over the long task's lifetime.
578   ThreadData* current_thread_data = stopwatch.GetThreadData();
579   if (!current_thread_data)
580     return;
581 
582   TrackedTime start_of_run = stopwatch.StartTime();
583   int32_t queue_duration = 0;
584   if (!start_of_run.is_null()) {
585     queue_duration = (start_of_run - time_posted).InMilliseconds();
586   }
587   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
588 }
589 
590 // static
TallyRunInAScopedRegionIfTracking(const Births * births,const TaskStopwatch & stopwatch)591 void ThreadData::TallyRunInAScopedRegionIfTracking(
592     const Births* births,
593     const TaskStopwatch& stopwatch) {
594   // Even if we have been DEACTIVATED, we will process any pending births so
595   // that our data structures (which counted the outstanding births) remain
596   // consistent.
597   if (!births)
598     return;
599 
600   ThreadData* current_thread_data = stopwatch.GetThreadData();
601   if (!current_thread_data)
602     return;
603 
604   int32_t queue_duration = 0;
605   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
606 }
607 
SnapshotExecutedTasks(int current_profiling_phase,PhasedProcessDataSnapshotMap * phased_snapshots,BirthCountMap * birth_counts)608 void ThreadData::SnapshotExecutedTasks(
609     int current_profiling_phase,
610     PhasedProcessDataSnapshotMap* phased_snapshots,
611     BirthCountMap* birth_counts) {
612   // Get copy of data, so that the data will not change during the iterations
613   // and processing.
614   BirthMap birth_map;
615   DeathsSnapshot deaths;
616   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
617 
618   for (const auto& birth : birth_map) {
619     (*birth_counts)[birth.second] += birth.second->birth_count();
620   }
621 
622   for (const auto& death : deaths) {
623     (*birth_counts)[death.first] -= death.first->birth_count();
624 
625     // For the current death data, walk through all its snapshots, starting from
626     // the current one, then from the previous profiling phase etc., and for
627     // each snapshot calculate the delta between the snapshot and the previous
628     // phase, if any.  Store the deltas in the result.
629     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
630          phase = phase->prev) {
631       const DeathDataSnapshot& death_data =
632           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
633                       : phase->death_data;
634 
635       if (death_data.count > 0) {
636         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
637             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
638                          thread_name()));
639       }
640     }
641   }
642 }
643 
644 // This may be called from another thread.
SnapshotMaps(int profiling_phase,BirthMap * birth_map,DeathsSnapshot * deaths)645 void ThreadData::SnapshotMaps(int profiling_phase,
646                               BirthMap* birth_map,
647                               DeathsSnapshot* deaths) {
648   base::AutoLock lock(map_lock_);
649 
650   for (const auto& birth : birth_map_)
651     (*birth_map)[birth.first] = birth.second;
652 
653   for (const auto& death : death_map_) {
654     deaths->push_back(std::make_pair(
655         death.first,
656         DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
657                                death.second.run_duration_sum(),
658                                death.second.run_duration_max(),
659                                death.second.run_duration_sample(),
660                                death.second.queue_duration_sum(),
661                                death.second.queue_duration_max(),
662                                death.second.queue_duration_sample(),
663                                death.second.last_phase_snapshot())));
664   }
665 }
666 
OnProfilingPhaseCompletedOnThread(int profiling_phase)667 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
668   base::AutoLock lock(map_lock_);
669 
670   for (auto& death : death_map_) {
671     death.second.OnProfilingPhaseCompleted(profiling_phase);
672   }
673 }
674 
EnsureTlsInitialization()675 void ThreadData::EnsureTlsInitialization() {
676   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
677     return;  // Someone else did the initialization.
678   // Due to racy lazy initialization in tests, we'll need to recheck status_
679   // after we acquire the lock.
680 
681   // Ensure that we don't double initialize tls.  We are called when single
682   // threaded in the product, but some tests may be racy and lazy about our
683   // initialization.
684   base::AutoLock lock(*list_lock_.Pointer());
685   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
686     return;  // Someone raced in here and beat us.
687 
688   // Perform the "real" TLS initialization now, and leave it intact through
689   // process termination.
690   if (!tls_index_.initialized()) {  // Testing may have initialized this.
691     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
692     tls_index_.Initialize(&ThreadData::OnThreadTermination);
693     DCHECK(tls_index_.initialized());
694   } else {
695     // TLS was initialzed for us earlier.
696     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
697   }
698 
699   // Incarnation counter is only significant to testing, as it otherwise will
700   // never again change in this process.
701   ++incarnation_counter_;
702 
703   // The lock is not critical for setting status_, but it doesn't hurt.  It also
704   // ensures that if we have a racy initialization, that we'll bail as soon as
705   // we get the lock earlier in this method.
706   base::subtle::Release_Store(&status_, kInitialStartupState);
707   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
708 }
709 
710 // static
InitializeAndSetTrackingStatus(Status status)711 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
712   DCHECK_GE(status, DEACTIVATED);
713   DCHECK_LE(status, PROFILING_ACTIVE);
714 
715   EnsureTlsInitialization();  // No-op if already initialized.
716 
717   if (status > DEACTIVATED)
718     status = PROFILING_ACTIVE;
719   base::subtle::Release_Store(&status_, status);
720 }
721 
722 // static
status()723 ThreadData::Status ThreadData::status() {
724   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
725 }
726 
727 // static
TrackingStatus()728 bool ThreadData::TrackingStatus() {
729   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
730 }
731 
732 // static
EnableProfilerTiming()733 void ThreadData::EnableProfilerTiming() {
734   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
735 }
736 
737 // static
Now()738 TrackedTime ThreadData::Now() {
739   if (now_function_for_testing_)
740     return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
741   if (IsProfilerTimingEnabled() && TrackingStatus())
742     return TrackedTime::Now();
743   return TrackedTime();  // Super fast when disabled, or not compiled.
744 }
745 
746 // static
EnsureCleanupWasCalled(int)747 void ThreadData::EnsureCleanupWasCalled(int /*major_threads_shutdown_count*/) {
748   base::AutoLock lock(*list_lock_.Pointer());
749   if (worker_thread_data_creation_count_ == 0)
750     return;  // We haven't really run much, and couldn't have leaked.
751 
752   // TODO(jar): until this is working on XP, don't run the real test.
753 #if 0
754   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
755   // caller should tell us how many thread shutdowns should have taken place by
756   // now.
757   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
758 #endif
759 }
760 
761 // static
ShutdownSingleThreadedCleanup(bool leak)762 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
763   // This is only called from test code, where we need to cleanup so that
764   // additional tests can be run.
765   // We must be single threaded... but be careful anyway.
766   InitializeAndSetTrackingStatus(DEACTIVATED);
767 
768   ThreadData* thread_data_list;
769   {
770     base::AutoLock lock(*list_lock_.Pointer());
771     thread_data_list = all_thread_data_list_head_;
772     all_thread_data_list_head_ = NULL;
773     ++incarnation_counter_;
774     // To be clean, break apart the retired worker list (though we leak them).
775     while (first_retired_worker_) {
776       ThreadData* worker = first_retired_worker_;
777       CHECK_GT(worker->worker_thread_number_, 0);
778       first_retired_worker_ = worker->next_retired_worker_;
779       worker->next_retired_worker_ = NULL;
780     }
781   }
782 
783   // Put most global static back in pristine shape.
784   worker_thread_data_creation_count_ = 0;
785   cleanup_count_ = 0;
786   tls_index_.Set(NULL);
787   // Almost UNINITIALIZED.
788   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
789 
790   // To avoid any chance of racing in unit tests, which is the only place we
791   // call this function, we may sometimes leak all the data structures we
792   // recovered, as they may still be in use on threads from prior tests!
793   if (leak) {
794     ThreadData* thread_data = thread_data_list;
795     while (thread_data) {
796       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
797       thread_data = thread_data->next();
798     }
799     return;
800   }
801 
802   // When we want to cleanup (on a single thread), here is what we do.
803 
804   // Do actual recursive delete in all ThreadData instances.
805   while (thread_data_list) {
806     ThreadData* next_thread_data = thread_data_list;
807     thread_data_list = thread_data_list->next();
808 
809     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
810          next_thread_data->birth_map_.end() != it; ++it)
811       delete it->second;  // Delete the Birth Records.
812     delete next_thread_data;  // Includes all Death Records.
813   }
814 }
815 
816 //------------------------------------------------------------------------------
TaskStopwatch()817 TaskStopwatch::TaskStopwatch()
818     : wallclock_duration_ms_(0),
819       current_thread_data_(NULL),
820       excluded_duration_ms_(0),
821       parent_(NULL) {
822 #if DCHECK_IS_ON()
823   state_ = CREATED;
824   child_ = NULL;
825 #endif
826 }
827 
~TaskStopwatch()828 TaskStopwatch::~TaskStopwatch() {
829 #if DCHECK_IS_ON()
830   DCHECK(state_ != RUNNING);
831   DCHECK(child_ == NULL);
832 #endif
833 }
834 
Start()835 void TaskStopwatch::Start() {
836 #if DCHECK_IS_ON()
837   DCHECK(state_ == CREATED);
838   state_ = RUNNING;
839 #endif
840 
841   start_time_ = ThreadData::Now();
842 
843   current_thread_data_ = ThreadData::Get();
844   if (!current_thread_data_)
845     return;
846 
847   parent_ = current_thread_data_->current_stopwatch_;
848 #if DCHECK_IS_ON()
849   if (parent_) {
850     DCHECK(parent_->state_ == RUNNING);
851     DCHECK(parent_->child_ == NULL);
852     parent_->child_ = this;
853   }
854 #endif
855   current_thread_data_->current_stopwatch_ = this;
856 }
857 
Stop()858 void TaskStopwatch::Stop() {
859   const TrackedTime end_time = ThreadData::Now();
860 #if DCHECK_IS_ON()
861   DCHECK(state_ == RUNNING);
862   state_ = STOPPED;
863   DCHECK(child_ == NULL);
864 #endif
865 
866   if (!start_time_.is_null() && !end_time.is_null()) {
867     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
868   }
869 
870   if (!current_thread_data_)
871     return;
872 
873   DCHECK(current_thread_data_->current_stopwatch_ == this);
874   current_thread_data_->current_stopwatch_ = parent_;
875   if (!parent_)
876     return;
877 
878 #if DCHECK_IS_ON()
879   DCHECK(parent_->state_ == RUNNING);
880   DCHECK(parent_->child_ == this);
881   parent_->child_ = NULL;
882 #endif
883   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
884   parent_ = NULL;
885 }
886 
StartTime() const887 TrackedTime TaskStopwatch::StartTime() const {
888 #if DCHECK_IS_ON()
889   DCHECK(state_ != CREATED);
890 #endif
891 
892   return start_time_;
893 }
894 
RunDurationMs() const895 int32_t TaskStopwatch::RunDurationMs() const {
896 #if DCHECK_IS_ON()
897   DCHECK(state_ == STOPPED);
898 #endif
899 
900   return wallclock_duration_ms_ - excluded_duration_ms_;
901 }
902 
GetThreadData() const903 ThreadData* TaskStopwatch::GetThreadData() const {
904 #if DCHECK_IS_ON()
905   DCHECK(state_ != CREATED);
906 #endif
907 
908   return current_thread_data_;
909 }
910 
911 //------------------------------------------------------------------------------
912 // DeathDataPhaseSnapshot
913 
DeathDataPhaseSnapshot(int profiling_phase,int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample,const DeathDataPhaseSnapshot * prev)914 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
915     int profiling_phase,
916     int count,
917     int32_t run_duration_sum,
918     int32_t run_duration_max,
919     int32_t run_duration_sample,
920     int32_t queue_duration_sum,
921     int32_t queue_duration_max,
922     int32_t queue_duration_sample,
923     const DeathDataPhaseSnapshot* prev)
924     : profiling_phase(profiling_phase),
925       death_data(count,
926                  run_duration_sum,
927                  run_duration_max,
928                  run_duration_sample,
929                  queue_duration_sum,
930                  queue_duration_max,
931                  queue_duration_sample),
932       prev(prev) {}
933 
934 //------------------------------------------------------------------------------
935 // TaskSnapshot
936 
TaskSnapshot()937 TaskSnapshot::TaskSnapshot() {
938 }
939 
TaskSnapshot(const BirthOnThreadSnapshot & birth,const DeathDataSnapshot & death_data,const std::string & death_thread_name)940 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
941                            const DeathDataSnapshot& death_data,
942                            const std::string& death_thread_name)
943     : birth(birth),
944       death_data(death_data),
945       death_thread_name(death_thread_name) {
946 }
947 
~TaskSnapshot()948 TaskSnapshot::~TaskSnapshot() {
949 }
950 
951 //------------------------------------------------------------------------------
952 // ProcessDataPhaseSnapshot
953 
ProcessDataPhaseSnapshot()954 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
955 }
956 
957 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
958     const ProcessDataPhaseSnapshot& other) = default;
959 
~ProcessDataPhaseSnapshot()960 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
961 }
962 
963 //------------------------------------------------------------------------------
964 // ProcessDataPhaseSnapshot
965 
ProcessDataSnapshot()966 ProcessDataSnapshot::ProcessDataSnapshot()
967 #if !defined(OS_NACL)
968     : process_id(base::GetCurrentProcId()) {
969 #else
970     : process_id(base::kNullProcessId) {
971 #endif
972 }
973 
974 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
975     default;
976 
977 ProcessDataSnapshot::~ProcessDataSnapshot() {
978 }
979 
980 }  // namespace tracked_objects
981