1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/tracked_objects.h"
6 
7 #include <limits.h>
8 #include <stdlib.h>
9 
10 #include "base/atomicops.h"
11 #include "base/base_switches.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/logging.h"
15 #include "base/process/process_handle.h"
16 #include "base/profiler/alternate_timer.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/tracking_info.h"
19 #include "build/build_config.h"
20 #include "third_party/valgrind/memcheck.h"
21 
22 using base::TimeDelta;
23 
24 namespace base {
25 class TimeDelta;
26 }
27 
28 namespace tracked_objects {
29 
30 namespace {
31 // When ThreadData is first initialized, should we start in an ACTIVE state to
32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
33 // that we only record after parsing the command line flag --enable-tracking.
34 // Note that the flag may force either state, so this really controls only the
35 // period of time up until that flag is parsed.  If there is no flag seen, then
36 // this state may prevail for much or all of the process lifetime.
37 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
38 
39 // Control whether an alternate time source (Now() function) is supported by
40 // the ThreadData class.  This compile time flag should be set to true if we
41 // want other modules (such as a memory allocator, or a thread-specific CPU time
42 // clock) to be able to provide a thread-specific Now() function.  Without this
43 // compile-time flag, the code will only support the wall-clock time.  This flag
44 // can be flipped to efficiently disable this path (if there is a performance
45 // problem with its presence).
46 static const bool kAllowAlternateTimeSourceHandling = true;
47 
48 // Possible states of the profiler timing enabledness.
49 enum {
50   UNDEFINED_TIMING,
51   ENABLED_TIMING,
52   DISABLED_TIMING,
53 };
54 
55 // State of the profiler timing enabledness.
56 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
57 
58 // Returns whether profiler timing is enabled.  The default is true, but this
59 // may be overridden by a command-line flag.  Some platforms may
60 // programmatically set this command-line flag to the "off" value if it's not
61 // specified.
62 // This in turn can be overridden by explicitly calling
63 // ThreadData::EnableProfilerTiming, say, based on a field trial.
IsProfilerTimingEnabled()64 inline bool IsProfilerTimingEnabled() {
65   // Reading |g_profiler_timing_enabled| is done without barrier because
66   // multiple initialization is not an issue while the barrier can be relatively
67   // costly given that this method is sometimes called in a tight loop.
68   base::subtle::Atomic32 current_timing_enabled =
69       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
70   if (current_timing_enabled == UNDEFINED_TIMING) {
71     if (!base::CommandLine::InitializedForCurrentProcess())
72       return true;
73     current_timing_enabled =
74         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
75              switches::kProfilerTiming) ==
76          switches::kProfilerTimingDisabledValue)
77             ? DISABLED_TIMING
78             : ENABLED_TIMING;
79     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
80                                   current_timing_enabled);
81   }
82   return current_timing_enabled == ENABLED_TIMING;
83 }
84 
85 }  // namespace
86 
87 //------------------------------------------------------------------------------
88 // DeathData tallies durations when a death takes place.
89 
DeathData()90 DeathData::DeathData()
91     : count_(0),
92       sample_probability_count_(0),
93       run_duration_sum_(0),
94       queue_duration_sum_(0),
95       run_duration_max_(0),
96       queue_duration_max_(0),
97       run_duration_sample_(0),
98       queue_duration_sample_(0),
99       last_phase_snapshot_(nullptr) {
100 }
101 
DeathData(const DeathData & other)102 DeathData::DeathData(const DeathData& other)
103     : count_(other.count_),
104       sample_probability_count_(other.sample_probability_count_),
105       run_duration_sum_(other.run_duration_sum_),
106       queue_duration_sum_(other.queue_duration_sum_),
107       run_duration_max_(other.run_duration_max_),
108       queue_duration_max_(other.queue_duration_max_),
109       run_duration_sample_(other.run_duration_sample_),
110       queue_duration_sample_(other.queue_duration_sample_),
111       last_phase_snapshot_(nullptr) {
112   // This constructor will be used by std::map when adding new DeathData values
113   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
114   // need to worry about ownership transfer.
115   DCHECK(other.last_phase_snapshot_ == nullptr);
116 }
117 
~DeathData()118 DeathData::~DeathData() {
119   while (last_phase_snapshot_) {
120     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
121     last_phase_snapshot_ = snapshot->prev;
122     delete snapshot;
123   }
124 }
125 
126 // TODO(jar): I need to see if this macro to optimize branching is worth using.
127 //
128 // This macro has no branching, so it is surely fast, and is equivalent to:
129 //             if (assign_it)
130 //               target = source;
131 // We use a macro rather than a template to force this to inline.
132 // Related code for calculating max is discussed on the web.
133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
134   ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
135 
RecordDeath(const int32_t queue_duration,const int32_t run_duration,const uint32_t random_number)136 void DeathData::RecordDeath(const int32_t queue_duration,
137                             const int32_t run_duration,
138                             const uint32_t random_number) {
139   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
140   if (count_ < INT_MAX)
141     base::subtle::NoBarrier_Store(&count_, count_ + 1);
142 
143   int sample_probability_count =
144       base::subtle::NoBarrier_Load(&sample_probability_count_);
145   if (sample_probability_count < INT_MAX)
146     ++sample_probability_count;
147   base::subtle::NoBarrier_Store(&sample_probability_count_,
148                                 sample_probability_count);
149 
150   base::subtle::NoBarrier_Store(&queue_duration_sum_,
151                                 queue_duration_sum_ + queue_duration);
152   base::subtle::NoBarrier_Store(&run_duration_sum_,
153                                 run_duration_sum_ + run_duration);
154 
155   if (queue_duration_max() < queue_duration)
156     base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
157   if (run_duration_max() < run_duration)
158     base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
159 
160   // Take a uniformly distributed sample over all durations ever supplied during
161   // the current profiling phase.
162   // The probability that we (instead) use this new sample is
163   // 1/sample_probability_count_. This results in a completely uniform selection
164   // of the sample (at least when we don't clamp sample_probability_count_...
165   // but that should be inconsequentially likely).  We ignore the fact that we
166   // correlated our selection of a sample to the run and queue times (i.e., we
167   // used them to generate random_number).
168   CHECK_GT(sample_probability_count, 0);
169   if (0 == (random_number % sample_probability_count)) {
170     base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
171     base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
172   }
173 }
174 
OnProfilingPhaseCompleted(int profiling_phase)175 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
176   // Snapshotting and storing current state.
177   last_phase_snapshot_ = new DeathDataPhaseSnapshot(
178       profiling_phase, count(), run_duration_sum(), run_duration_max(),
179       run_duration_sample(), queue_duration_sum(), queue_duration_max(),
180       queue_duration_sample(), last_phase_snapshot_);
181 
182   // Not touching fields for which a delta can be computed by comparing with a
183   // snapshot from the previous phase. Resetting other fields.  Sample values
184   // will be reset upon next death recording because sample_probability_count_
185   // is set to 0.
186   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
187   // is that for incrementable fields, resetting to 0 from the snapshot thread
188   // potentially in parallel with incrementing in the death thread may result in
189   // significant data corruption that has a potential to grow with time.  Not
190   // resetting incrementable fields and using deltas will cause any
191   // off-by-little corruptions to be likely fixed at the next snapshot.
192   // The max values are not incrementable, and cannot be deduced using deltas
193   // for a given phase. Hence, we have to reset them to 0.  But the potential
194   // damage is limited to getting the previous phase's max to apply for the next
195   // phase, and the error doesn't have a potential to keep growing with new
196   // resets.
197   // sample_probability_count_ is incrementable, but must be reset to 0 at the
198   // phase end, so that we start a new uniformly randomized sample selection
199   // after the reset. These fields are updated using atomics. However, race
200   // conditions are possible since these are updated individually and not
201   // together atomically, resulting in the values being mutually inconsistent.
202   // The damage is limited to selecting a wrong sample, which is not something
203   // that can cause accumulating or cascading effects.
204   // If there were no inconsistencies caused by race conditions, we never send a
205   // sample for the previous phase in the next phase's snapshot because
206   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
207   base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
208   base::subtle::NoBarrier_Store(&run_duration_max_, 0);
209   base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
210 }
211 
212 //------------------------------------------------------------------------------
DeathDataSnapshot()213 DeathDataSnapshot::DeathDataSnapshot()
214     : count(-1),
215       run_duration_sum(-1),
216       run_duration_max(-1),
217       run_duration_sample(-1),
218       queue_duration_sum(-1),
219       queue_duration_max(-1),
220       queue_duration_sample(-1) {
221 }
222 
DeathDataSnapshot(int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample)223 DeathDataSnapshot::DeathDataSnapshot(int count,
224                                      int32_t run_duration_sum,
225                                      int32_t run_duration_max,
226                                      int32_t run_duration_sample,
227                                      int32_t queue_duration_sum,
228                                      int32_t queue_duration_max,
229                                      int32_t queue_duration_sample)
230     : count(count),
231       run_duration_sum(run_duration_sum),
232       run_duration_max(run_duration_max),
233       run_duration_sample(run_duration_sample),
234       queue_duration_sum(queue_duration_sum),
235       queue_duration_max(queue_duration_max),
236       queue_duration_sample(queue_duration_sample) {}
237 
~DeathDataSnapshot()238 DeathDataSnapshot::~DeathDataSnapshot() {
239 }
240 
Delta(const DeathDataSnapshot & older) const241 DeathDataSnapshot DeathDataSnapshot::Delta(
242     const DeathDataSnapshot& older) const {
243   return DeathDataSnapshot(count - older.count,
244                            run_duration_sum - older.run_duration_sum,
245                            run_duration_max, run_duration_sample,
246                            queue_duration_sum - older.queue_duration_sum,
247                            queue_duration_max, queue_duration_sample);
248 }
249 
250 //------------------------------------------------------------------------------
BirthOnThread(const Location & location,const ThreadData & current)251 BirthOnThread::BirthOnThread(const Location& location,
252                              const ThreadData& current)
253     : location_(location),
254       birth_thread_(&current) {
255 }
256 
257 //------------------------------------------------------------------------------
BirthOnThreadSnapshot()258 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
259 }
260 
BirthOnThreadSnapshot(const BirthOnThread & birth)261 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
262     : location(birth.location()),
263       thread_name(birth.birth_thread()->thread_name()) {
264 }
265 
~BirthOnThreadSnapshot()266 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
267 }
268 
269 //------------------------------------------------------------------------------
Births(const Location & location,const ThreadData & current)270 Births::Births(const Location& location, const ThreadData& current)
271     : BirthOnThread(location, current),
272       birth_count_(1) { }
273 
birth_count() const274 int Births::birth_count() const { return birth_count_; }
275 
RecordBirth()276 void Births::RecordBirth() { ++birth_count_; }
277 
278 //------------------------------------------------------------------------------
279 // ThreadData maintains the central data for all births and deaths on a single
280 // thread.
281 
282 // TODO(jar): We should pull all these static vars together, into a struct, and
283 // optimize layout so that we benefit from locality of reference during accesses
284 // to them.
285 
286 // static
287 NowFunction* ThreadData::now_function_ = NULL;
288 
289 // static
290 bool ThreadData::now_function_is_time_ = false;
291 
292 // A TLS slot which points to the ThreadData instance for the current thread.
293 // We do a fake initialization here (zeroing out data), and then the real
294 // in-place construction happens when we call tls_index_.Initialize().
295 // static
296 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
297 
298 // static
299 int ThreadData::worker_thread_data_creation_count_ = 0;
300 
301 // static
302 int ThreadData::cleanup_count_ = 0;
303 
304 // static
305 int ThreadData::incarnation_counter_ = 0;
306 
307 // static
308 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
309 
310 // static
311 ThreadData* ThreadData::first_retired_worker_ = NULL;
312 
313 // static
314 base::LazyInstance<base::Lock>::Leaky
315     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
316 
317 // static
318 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
319 
ThreadData(const std::string & suggested_name)320 ThreadData::ThreadData(const std::string& suggested_name)
321     : next_(NULL),
322       next_retired_worker_(NULL),
323       worker_thread_number_(0),
324       incarnation_count_for_pool_(-1),
325       current_stopwatch_(NULL) {
326   DCHECK_GE(suggested_name.size(), 0u);
327   thread_name_ = suggested_name;
328   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
329 }
330 
ThreadData(int thread_number)331 ThreadData::ThreadData(int thread_number)
332     : next_(NULL),
333       next_retired_worker_(NULL),
334       worker_thread_number_(thread_number),
335       incarnation_count_for_pool_(-1),
336       current_stopwatch_(NULL) {
337   CHECK_GT(thread_number, 0);
338   base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
339   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
340 }
341 
~ThreadData()342 ThreadData::~ThreadData() {
343 }
344 
PushToHeadOfList()345 void ThreadData::PushToHeadOfList() {
346   // Toss in a hint of randomness (atop the uniniitalized value).
347   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
348                                                  sizeof(random_number_));
349   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
350   random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
351   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
352 
353   DCHECK(!next_);
354   base::AutoLock lock(*list_lock_.Pointer());
355   incarnation_count_for_pool_ = incarnation_counter_;
356   next_ = all_thread_data_list_head_;
357   all_thread_data_list_head_ = this;
358 }
359 
360 // static
first()361 ThreadData* ThreadData::first() {
362   base::AutoLock lock(*list_lock_.Pointer());
363   return all_thread_data_list_head_;
364 }
365 
next() const366 ThreadData* ThreadData::next() const { return next_; }
367 
368 // static
InitializeThreadContext(const std::string & suggested_name)369 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
370   Initialize();
371   ThreadData* current_thread_data =
372       reinterpret_cast<ThreadData*>(tls_index_.Get());
373   if (current_thread_data)
374     return;  // Browser tests instigate this.
375   current_thread_data = new ThreadData(suggested_name);
376   tls_index_.Set(current_thread_data);
377 }
378 
379 // static
Get()380 ThreadData* ThreadData::Get() {
381   if (!tls_index_.initialized())
382     return NULL;  // For unittests only.
383   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
384   if (registered)
385     return registered;
386 
387   // We must be a worker thread, since we didn't pre-register.
388   ThreadData* worker_thread_data = NULL;
389   int worker_thread_number = 0;
390   {
391     base::AutoLock lock(*list_lock_.Pointer());
392     if (first_retired_worker_) {
393       worker_thread_data = first_retired_worker_;
394       first_retired_worker_ = first_retired_worker_->next_retired_worker_;
395       worker_thread_data->next_retired_worker_ = NULL;
396     } else {
397       worker_thread_number = ++worker_thread_data_creation_count_;
398     }
399   }
400 
401   // If we can't find a previously used instance, then we have to create one.
402   if (!worker_thread_data) {
403     DCHECK_GT(worker_thread_number, 0);
404     worker_thread_data = new ThreadData(worker_thread_number);
405   }
406   DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
407 
408   tls_index_.Set(worker_thread_data);
409   return worker_thread_data;
410 }
411 
412 // static
OnThreadTermination(void * thread_data)413 void ThreadData::OnThreadTermination(void* thread_data) {
414   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
415   // We must NOT do any allocations during this callback.  There is a chance
416   // that the allocator is no longer active on this thread.
417   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
418 }
419 
OnThreadTerminationCleanup()420 void ThreadData::OnThreadTerminationCleanup() {
421   // The list_lock_ was created when we registered the callback, so it won't be
422   // allocated here despite the lazy reference.
423   base::AutoLock lock(*list_lock_.Pointer());
424   if (incarnation_counter_ != incarnation_count_for_pool_)
425     return;  // ThreadData was constructed in an earlier unit test.
426   ++cleanup_count_;
427   // Only worker threads need to be retired and reused.
428   if (!worker_thread_number_) {
429     return;
430   }
431   // We must NOT do any allocations during this callback.
432   // Using the simple linked lists avoids all allocations.
433   DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
434   this->next_retired_worker_ = first_retired_worker_;
435   first_retired_worker_ = this;
436 }
437 
438 // static
Snapshot(int current_profiling_phase,ProcessDataSnapshot * process_data_snapshot)439 void ThreadData::Snapshot(int current_profiling_phase,
440                           ProcessDataSnapshot* process_data_snapshot) {
441   // Get an unchanging copy of a ThreadData list.
442   ThreadData* my_list = ThreadData::first();
443 
444   // Gather data serially.
445   // This hackish approach *can* get some slightly corrupt tallies, as we are
446   // grabbing values without the protection of a lock, but it has the advantage
447   // of working even with threads that don't have message loops.  If a user
448   // sees any strangeness, they can always just run their stats gathering a
449   // second time.
450   BirthCountMap birth_counts;
451   for (ThreadData* thread_data = my_list; thread_data;
452        thread_data = thread_data->next()) {
453     thread_data->SnapshotExecutedTasks(current_profiling_phase,
454                                        &process_data_snapshot->phased_snapshots,
455                                        &birth_counts);
456   }
457 
458   // Add births that are still active -- i.e. objects that have tallied a birth,
459   // but have not yet tallied a matching death, and hence must be either
460   // running, queued up, or being held in limbo for future posting.
461   auto* current_phase_tasks =
462       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
463   for (const auto& birth_count : birth_counts) {
464     if (birth_count.second > 0) {
465       current_phase_tasks->push_back(
466           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
467                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
468                        "Still_Alive"));
469     }
470   }
471 }
472 
473 // static
OnProfilingPhaseCompleted(int profiling_phase)474 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
475   // Get an unchanging copy of a ThreadData list.
476   ThreadData* my_list = ThreadData::first();
477 
478   // Add snapshots for all instances of death data in all threads serially.
479   // This hackish approach *can* get some slightly corrupt tallies, as we are
480   // grabbing values without the protection of a lock, but it has the advantage
481   // of working even with threads that don't have message loops.  Any corruption
482   // shouldn't cause "cascading damage" to anything else (in later phases).
483   for (ThreadData* thread_data = my_list; thread_data;
484        thread_data = thread_data->next()) {
485     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
486   }
487 }
488 
TallyABirth(const Location & location)489 Births* ThreadData::TallyABirth(const Location& location) {
490   BirthMap::iterator it = birth_map_.find(location);
491   Births* child;
492   if (it != birth_map_.end()) {
493     child =  it->second;
494     child->RecordBirth();
495   } else {
496     child = new Births(location, *this);  // Leak this.
497     // Lock since the map may get relocated now, and other threads sometimes
498     // snapshot it (but they lock before copying it).
499     base::AutoLock lock(map_lock_);
500     birth_map_[location] = child;
501   }
502 
503   return child;
504 }
505 
TallyADeath(const Births & births,int32_t queue_duration,const TaskStopwatch & stopwatch)506 void ThreadData::TallyADeath(const Births& births,
507                              int32_t queue_duration,
508                              const TaskStopwatch& stopwatch) {
509   int32_t run_duration = stopwatch.RunDurationMs();
510 
511   // Stir in some randomness, plus add constant in case durations are zero.
512   const uint32_t kSomePrimeNumber = 2147483647;
513   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
514   // An address is going to have some randomness to it as well ;-).
515   random_number_ ^=
516       static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
517 
518   // We don't have queue durations without OS timer.  OS timer is automatically
519   // used for task-post-timing, so the use of an alternate timer implies all
520   // queue times are invalid, unless it was explicitly said that we can trust
521   // the alternate timer.
522   if (kAllowAlternateTimeSourceHandling &&
523       now_function_ &&
524       !now_function_is_time_) {
525     queue_duration = 0;
526   }
527 
528   DeathMap::iterator it = death_map_.find(&births);
529   DeathData* death_data;
530   if (it != death_map_.end()) {
531     death_data = &it->second;
532   } else {
533     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
534     death_data = &death_map_[&births];
535   }  // Release lock ASAP.
536   death_data->RecordDeath(queue_duration, run_duration, random_number_);
537 }
538 
539 // static
TallyABirthIfActive(const Location & location)540 Births* ThreadData::TallyABirthIfActive(const Location& location) {
541   if (!TrackingStatus())
542     return NULL;
543   ThreadData* current_thread_data = Get();
544   if (!current_thread_data)
545     return NULL;
546   return current_thread_data->TallyABirth(location);
547 }
548 
549 // static
TallyRunOnNamedThreadIfTracking(const base::TrackingInfo & completed_task,const TaskStopwatch & stopwatch)550 void ThreadData::TallyRunOnNamedThreadIfTracking(
551     const base::TrackingInfo& completed_task,
552     const TaskStopwatch& stopwatch) {
553   // Even if we have been DEACTIVATED, we will process any pending births so
554   // that our data structures (which counted the outstanding births) remain
555   // consistent.
556   const Births* births = completed_task.birth_tally;
557   if (!births)
558     return;
559   ThreadData* current_thread_data = stopwatch.GetThreadData();
560   if (!current_thread_data)
561     return;
562 
563   // Watch out for a race where status_ is changing, and hence one or both
564   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
565   // get a time value since we "weren't tracking" and we were trying to be
566   // efficient by not calling for a genuine time value.  For simplicity, we'll
567   // use a default zero duration when we can't calculate a true value.
568   TrackedTime start_of_run = stopwatch.StartTime();
569   int32_t queue_duration = 0;
570   if (!start_of_run.is_null()) {
571     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
572         .InMilliseconds();
573   }
574   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
575 }
576 
577 // static
TallyRunOnWorkerThreadIfTracking(const Births * births,const TrackedTime & time_posted,const TaskStopwatch & stopwatch)578 void ThreadData::TallyRunOnWorkerThreadIfTracking(
579     const Births* births,
580     const TrackedTime& time_posted,
581     const TaskStopwatch& stopwatch) {
582   // Even if we have been DEACTIVATED, we will process any pending births so
583   // that our data structures (which counted the outstanding births) remain
584   // consistent.
585   if (!births)
586     return;
587 
588   // TODO(jar): Support the option to coalesce all worker-thread activity under
589   // one ThreadData instance that uses locks to protect *all* access.  This will
590   // reduce memory (making it provably bounded), but run incrementally slower
591   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
592   // that the locks on TallyADeath will be *after* the worker thread has run,
593   // and hence nothing will be waiting for the completion (...  besides some
594   // other thread that might like to run).  Also, the worker threads tasks are
595   // generally longer, and hence the cost of the lock may perchance be amortized
596   // over the long task's lifetime.
597   ThreadData* current_thread_data = stopwatch.GetThreadData();
598   if (!current_thread_data)
599     return;
600 
601   TrackedTime start_of_run = stopwatch.StartTime();
602   int32_t queue_duration = 0;
603   if (!start_of_run.is_null()) {
604     queue_duration = (start_of_run - time_posted).InMilliseconds();
605   }
606   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
607 }
608 
609 // static
TallyRunInAScopedRegionIfTracking(const Births * births,const TaskStopwatch & stopwatch)610 void ThreadData::TallyRunInAScopedRegionIfTracking(
611     const Births* births,
612     const TaskStopwatch& stopwatch) {
613   // Even if we have been DEACTIVATED, we will process any pending births so
614   // that our data structures (which counted the outstanding births) remain
615   // consistent.
616   if (!births)
617     return;
618 
619   ThreadData* current_thread_data = stopwatch.GetThreadData();
620   if (!current_thread_data)
621     return;
622 
623   int32_t queue_duration = 0;
624   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
625 }
626 
SnapshotExecutedTasks(int current_profiling_phase,PhasedProcessDataSnapshotMap * phased_snapshots,BirthCountMap * birth_counts)627 void ThreadData::SnapshotExecutedTasks(
628     int current_profiling_phase,
629     PhasedProcessDataSnapshotMap* phased_snapshots,
630     BirthCountMap* birth_counts) {
631   // Get copy of data, so that the data will not change during the iterations
632   // and processing.
633   BirthMap birth_map;
634   DeathsSnapshot deaths;
635   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
636 
637   for (const auto& birth : birth_map) {
638     (*birth_counts)[birth.second] += birth.second->birth_count();
639   }
640 
641   for (const auto& death : deaths) {
642     (*birth_counts)[death.first] -= death.first->birth_count();
643 
644     // For the current death data, walk through all its snapshots, starting from
645     // the current one, then from the previous profiling phase etc., and for
646     // each snapshot calculate the delta between the snapshot and the previous
647     // phase, if any.  Store the deltas in the result.
648     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
649          phase = phase->prev) {
650       const DeathDataSnapshot& death_data =
651           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
652                       : phase->death_data;
653 
654       if (death_data.count > 0) {
655         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
656             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
657                          thread_name()));
658       }
659     }
660   }
661 }
662 
663 // This may be called from another thread.
SnapshotMaps(int profiling_phase,BirthMap * birth_map,DeathsSnapshot * deaths)664 void ThreadData::SnapshotMaps(int profiling_phase,
665                               BirthMap* birth_map,
666                               DeathsSnapshot* deaths) {
667   base::AutoLock lock(map_lock_);
668 
669   for (const auto& birth : birth_map_)
670     (*birth_map)[birth.first] = birth.second;
671 
672   for (const auto& death : death_map_) {
673     deaths->push_back(std::make_pair(
674         death.first,
675         DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
676                                death.second.run_duration_sum(),
677                                death.second.run_duration_max(),
678                                death.second.run_duration_sample(),
679                                death.second.queue_duration_sum(),
680                                death.second.queue_duration_max(),
681                                death.second.queue_duration_sample(),
682                                death.second.last_phase_snapshot())));
683   }
684 }
685 
OnProfilingPhaseCompletedOnThread(int profiling_phase)686 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
687   base::AutoLock lock(map_lock_);
688 
689   for (auto& death : death_map_) {
690     death.second.OnProfilingPhaseCompleted(profiling_phase);
691   }
692 }
693 
OptionallyInitializeAlternateTimer()694 static void OptionallyInitializeAlternateTimer() {
695   NowFunction* alternate_time_source = GetAlternateTimeSource();
696   if (alternate_time_source)
697     ThreadData::SetAlternateTimeSource(alternate_time_source);
698 }
699 
Initialize()700 void ThreadData::Initialize() {
701   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
702     return;  // Someone else did the initialization.
703   // Due to racy lazy initialization in tests, we'll need to recheck status_
704   // after we acquire the lock.
705 
706   // Ensure that we don't double initialize tls.  We are called when single
707   // threaded in the product, but some tests may be racy and lazy about our
708   // initialization.
709   base::AutoLock lock(*list_lock_.Pointer());
710   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
711     return;  // Someone raced in here and beat us.
712 
713   // Put an alternate timer in place if the environment calls for it, such as
714   // for tracking TCMalloc allocations.  This insertion is idempotent, so we
715   // don't mind if there is a race, and we'd prefer not to be in a lock while
716   // doing this work.
717   if (kAllowAlternateTimeSourceHandling)
718     OptionallyInitializeAlternateTimer();
719 
720   // Perform the "real" TLS initialization now, and leave it intact through
721   // process termination.
722   if (!tls_index_.initialized()) {  // Testing may have initialized this.
723     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
724     tls_index_.Initialize(&ThreadData::OnThreadTermination);
725     DCHECK(tls_index_.initialized());
726   } else {
727     // TLS was initialzed for us earlier.
728     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
729   }
730 
731   // Incarnation counter is only significant to testing, as it otherwise will
732   // never again change in this process.
733   ++incarnation_counter_;
734 
735   // The lock is not critical for setting status_, but it doesn't hurt.  It also
736   // ensures that if we have a racy initialization, that we'll bail as soon as
737   // we get the lock earlier in this method.
738   base::subtle::Release_Store(&status_, kInitialStartupState);
739   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
740 }
741 
742 // static
InitializeAndSetTrackingStatus(Status status)743 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
744   DCHECK_GE(status, DEACTIVATED);
745   DCHECK_LE(status, PROFILING_ACTIVE);
746 
747   Initialize();  // No-op if already initialized.
748 
749   if (status > DEACTIVATED)
750     status = PROFILING_ACTIVE;
751   base::subtle::Release_Store(&status_, status);
752 }
753 
754 // static
status()755 ThreadData::Status ThreadData::status() {
756   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
757 }
758 
759 // static
TrackingStatus()760 bool ThreadData::TrackingStatus() {
761   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
762 }
763 
764 // static
SetAlternateTimeSource(NowFunction * now_function)765 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
766   DCHECK(now_function);
767   if (kAllowAlternateTimeSourceHandling)
768     now_function_ = now_function;
769 }
770 
771 // static
EnableProfilerTiming()772 void ThreadData::EnableProfilerTiming() {
773   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
774 }
775 
776 // static
Now()777 TrackedTime ThreadData::Now() {
778   if (kAllowAlternateTimeSourceHandling && now_function_)
779     return TrackedTime::FromMilliseconds((*now_function_)());
780   if (IsProfilerTimingEnabled() && TrackingStatus())
781     return TrackedTime::Now();
782   return TrackedTime();  // Super fast when disabled, or not compiled.
783 }
784 
785 // static
EnsureCleanupWasCalled(int)786 void ThreadData::EnsureCleanupWasCalled(
787     int /* major_threads_shutdown_count */) {
788   base::AutoLock lock(*list_lock_.Pointer());
789   if (worker_thread_data_creation_count_ == 0)
790     return;  // We haven't really run much, and couldn't have leaked.
791 
792   // TODO(jar): until this is working on XP, don't run the real test.
793 #if 0
794   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
795   // caller should tell us how many thread shutdowns should have taken place by
796   // now.
797   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
798 #endif
799 }
800 
801 // static
ShutdownSingleThreadedCleanup(bool leak)802 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
803   // This is only called from test code, where we need to cleanup so that
804   // additional tests can be run.
805   // We must be single threaded... but be careful anyway.
806   InitializeAndSetTrackingStatus(DEACTIVATED);
807 
808   ThreadData* thread_data_list;
809   {
810     base::AutoLock lock(*list_lock_.Pointer());
811     thread_data_list = all_thread_data_list_head_;
812     all_thread_data_list_head_ = NULL;
813     ++incarnation_counter_;
814     // To be clean, break apart the retired worker list (though we leak them).
815     while (first_retired_worker_) {
816       ThreadData* worker = first_retired_worker_;
817       CHECK_GT(worker->worker_thread_number_, 0);
818       first_retired_worker_ = worker->next_retired_worker_;
819       worker->next_retired_worker_ = NULL;
820     }
821   }
822 
823   // Put most global static back in pristine shape.
824   worker_thread_data_creation_count_ = 0;
825   cleanup_count_ = 0;
826   tls_index_.Set(NULL);
827   // Almost UNINITIALIZED.
828   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
829 
830   // To avoid any chance of racing in unit tests, which is the only place we
831   // call this function, we may sometimes leak all the data structures we
832   // recovered, as they may still be in use on threads from prior tests!
833   if (leak) {
834     ThreadData* thread_data = thread_data_list;
835     while (thread_data) {
836       thread_data = thread_data->next();
837     }
838     return;
839   }
840 
841   // When we want to cleanup (on a single thread), here is what we do.
842 
843   // Do actual recursive delete in all ThreadData instances.
844   while (thread_data_list) {
845     ThreadData* next_thread_data = thread_data_list;
846     thread_data_list = thread_data_list->next();
847 
848     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
849          next_thread_data->birth_map_.end() != it; ++it)
850       delete it->second;  // Delete the Birth Records.
851     delete next_thread_data;  // Includes all Death Records.
852   }
853 }
854 
855 //------------------------------------------------------------------------------
TaskStopwatch()856 TaskStopwatch::TaskStopwatch()
857     : wallclock_duration_ms_(0),
858       current_thread_data_(NULL),
859       excluded_duration_ms_(0),
860       parent_(NULL) {
861 #if DCHECK_IS_ON()
862   state_ = CREATED;
863   child_ = NULL;
864 #endif
865 }
866 
~TaskStopwatch()867 TaskStopwatch::~TaskStopwatch() {
868 #if DCHECK_IS_ON()
869   DCHECK(state_ != RUNNING);
870   DCHECK(child_ == NULL);
871 #endif
872 }
873 
Start()874 void TaskStopwatch::Start() {
875 #if DCHECK_IS_ON()
876   DCHECK(state_ == CREATED);
877   state_ = RUNNING;
878 #endif
879 
880   start_time_ = ThreadData::Now();
881 
882   current_thread_data_ = ThreadData::Get();
883   if (!current_thread_data_)
884     return;
885 
886   parent_ = current_thread_data_->current_stopwatch_;
887 #if DCHECK_IS_ON()
888   if (parent_) {
889     DCHECK(parent_->state_ == RUNNING);
890     DCHECK(parent_->child_ == NULL);
891     parent_->child_ = this;
892   }
893 #endif
894   current_thread_data_->current_stopwatch_ = this;
895 }
896 
Stop()897 void TaskStopwatch::Stop() {
898   const TrackedTime end_time = ThreadData::Now();
899 #if DCHECK_IS_ON()
900   DCHECK(state_ == RUNNING);
901   state_ = STOPPED;
902   DCHECK(child_ == NULL);
903 #endif
904 
905   if (!start_time_.is_null() && !end_time.is_null()) {
906     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
907   }
908 
909   if (!current_thread_data_)
910     return;
911 
912   DCHECK(current_thread_data_->current_stopwatch_ == this);
913   current_thread_data_->current_stopwatch_ = parent_;
914   if (!parent_)
915     return;
916 
917 #if DCHECK_IS_ON()
918   DCHECK(parent_->state_ == RUNNING);
919   DCHECK(parent_->child_ == this);
920   parent_->child_ = NULL;
921 #endif
922   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
923   parent_ = NULL;
924 }
925 
StartTime() const926 TrackedTime TaskStopwatch::StartTime() const {
927 #if DCHECK_IS_ON()
928   DCHECK(state_ != CREATED);
929 #endif
930 
931   return start_time_;
932 }
933 
RunDurationMs() const934 int32_t TaskStopwatch::RunDurationMs() const {
935 #if DCHECK_IS_ON()
936   DCHECK(state_ == STOPPED);
937 #endif
938 
939   return wallclock_duration_ms_ - excluded_duration_ms_;
940 }
941 
GetThreadData() const942 ThreadData* TaskStopwatch::GetThreadData() const {
943 #if DCHECK_IS_ON()
944   DCHECK(state_ != CREATED);
945 #endif
946 
947   return current_thread_data_;
948 }
949 
950 //------------------------------------------------------------------------------
951 // DeathDataPhaseSnapshot
952 
DeathDataPhaseSnapshot(int profiling_phase,int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample,const DeathDataPhaseSnapshot * prev)953 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
954     int profiling_phase,
955     int count,
956     int32_t run_duration_sum,
957     int32_t run_duration_max,
958     int32_t run_duration_sample,
959     int32_t queue_duration_sum,
960     int32_t queue_duration_max,
961     int32_t queue_duration_sample,
962     const DeathDataPhaseSnapshot* prev)
963     : profiling_phase(profiling_phase),
964       death_data(count,
965                  run_duration_sum,
966                  run_duration_max,
967                  run_duration_sample,
968                  queue_duration_sum,
969                  queue_duration_max,
970                  queue_duration_sample),
971       prev(prev) {}
972 
973 //------------------------------------------------------------------------------
974 // TaskSnapshot
975 
TaskSnapshot()976 TaskSnapshot::TaskSnapshot() {
977 }
978 
TaskSnapshot(const BirthOnThreadSnapshot & birth,const DeathDataSnapshot & death_data,const std::string & death_thread_name)979 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
980                            const DeathDataSnapshot& death_data,
981                            const std::string& death_thread_name)
982     : birth(birth),
983       death_data(death_data),
984       death_thread_name(death_thread_name) {
985 }
986 
~TaskSnapshot()987 TaskSnapshot::~TaskSnapshot() {
988 }
989 
990 //------------------------------------------------------------------------------
991 // ProcessDataPhaseSnapshot
992 
ProcessDataPhaseSnapshot()993 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
994 }
995 
~ProcessDataPhaseSnapshot()996 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
997 }
998 
999 //------------------------------------------------------------------------------
1000 // ProcessDataPhaseSnapshot
1001 
ProcessDataSnapshot()1002 ProcessDataSnapshot::ProcessDataSnapshot()
1003 #if !defined(OS_NACL)
1004     : process_id(base::GetCurrentProcId()) {
1005 #else
1006     : process_id(base::kNullProcessId) {
1007 #endif
1008 }
1009 
1010 ProcessDataSnapshot::~ProcessDataSnapshot() {
1011 }
1012 
1013 }  // namespace tracked_objects
1014