1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package dalvik.system.profiler;
18 
19 import java.util.Arrays;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.Timer;
25 import java.util.TimerTask;
26 
27 /**
28  * A sampling profiler. It currently is implemented without any
29  * virtual machine support, relying solely on {@code
30  * Thread.getStackTrace} to collect samples. As such, the overhead is
31  * higher than a native approach and it does not provide insight into
32  * where time is spent within native code, but it can still provide
33  * useful insight into where a program is spending time.
34  *
35  * <h3>Usage Example</h3>
36  *
37  * The following example shows how to use the {@code
38  * SamplingProfiler}. It samples the current thread's stack to a depth
39  * of 12 stack frame elements over two different measurement periods
40  * with samples taken every 100 milliseconds. In then prints the
41  * results in hprof format to the standard output.
42  *
43  * <pre> {@code
44  * ThreadSet threadSet = SamplingProfiler.newArrayThreadSet(Thread.currentThread());
45  * SamplingProfiler profiler = new SamplingProfiler(12, threadSet);
46  * profiler.start(100);
47  * // period of measurement
48  * profiler.stop();
49  * // period of non-measurement
50  * profiler.start(100);
51  * // another period of measurement
52  * profiler.stop();
53  * profiler.shutdown();
54  * AsciiHprofWriter.write(profiler.getHprofData(), System.out);
55  * }</pre>
56  */
57 public final class SamplingProfiler {
58 
59     /**
60      * Map of stack traces to a mutable sample count.
61      */
62     private final Map<HprofData.StackTrace, int[]> stackTraces
63             = new HashMap<HprofData.StackTrace, int[]>();
64 
65     /**
66      * Data collected by the sampling profiler
67      */
68     private final HprofData hprofData = new HprofData(stackTraces);
69 
70     /**
71      * Timer that is used for the lifetime of the profiler
72      */
73     private final Timer timer = new Timer("SamplingProfiler", true);
74 
75     /**
76      * A sampler is created every time profiling starts and cleared
77      * every time profiling stops because once a {@code TimerTask} is
78      * canceled it cannot be reused.
79      */
80     private Sampler sampler;
81 
82     /**
83      * The maximum number of {@code StackTraceElements} to retain in
84      * each stack.
85      */
86     private final int depth;
87 
88     /**
89      * The {@code ThreadSet} that identifies which threads to sample.
90      */
91     private final ThreadSet threadSet;
92 
93     /*
94      *  Real hprof output examples don't start the thread and trace
95      *  identifiers at one but seem to start at these arbitrary
96      *  constants. It certainly seems useful to have relatively unique
97      *  identifiers when manual searching hprof output.
98      */
99     private int nextThreadId = 200001;
100     private int nextStackTraceId = 300001;
101     private int nextObjectId = 1;
102 
103     /**
104      * The threads currently known to the profiler for detecting
105      * thread start and end events.
106      */
107     private Thread[] currentThreads = new Thread[0];
108 
109     /**
110      * Map of currently active threads to their identifiers. When
111      * threads disappear they are removed and only referenced by their
112      * identifiers to prevent retaining garbage threads.
113      */
114     private final Map<Thread, Integer> threadIds = new HashMap<Thread, Integer>();
115 
116     /**
117      * Mutable {@code StackTrace} that is used for probing the {@link
118      * #stackTraces stackTraces} map without allocating a {@code
119      * StackTrace}. If {@link #addStackTrace addStackTrace} needs to
120      * be thread safe, have a single mutable instance would need to be
121      * reconsidered.
122      */
123     private final HprofData.StackTrace mutableStackTrace = new HprofData.StackTrace();
124 
125     /**
126      * The {@code ThreadSampler} is used to produce a {@code
127      * StackTraceElement} array for a given thread. The array is
128      * expected to be {@link #depth depth} or less in length.
129      */
130     private final ThreadSampler threadSampler;
131 
132     /**
133      * Create a sampling profiler that collects stacks with the
134      * specified depth from the threads specified by the specified
135      * thread collector.
136      *
137      * @param depth The maximum stack depth to retain for each sample
138      * similar to the hprof option of the same name. Any stack deeper
139      * than this will be truncated to this depth. A good starting
140      * value is 4 although it is not uncommon to need to raise this to
141      * get enough context to understand program behavior. While
142      * programs with extensive recursion may require a high value for
143      * depth, simply passing in a value for Integer.MAX_VALUE is not
144      * advised because of the significant memory need to retain such
145      * stacks and runtime overhead to compare stacks.
146      *
147      * @param threadSet The thread set specifies which threads to
148      * sample. In a general purpose program, all threads typically
149      * should be sample with a ThreadSet such as provided by {@link
150      * #newThreadGroupThreadSet newThreadGroupThreadSet}. For a
151      * benchmark a fixed set such as provided by {@link
152      * #newArrayThreadSet newArrayThreadSet} can reduce the overhead
153      * of profiling.
154      */
SamplingProfiler(int depth, ThreadSet threadSet)155     public SamplingProfiler(int depth, ThreadSet threadSet) {
156         this.depth = depth;
157         this.threadSet = threadSet;
158         this.threadSampler = findDefaultThreadSampler();
159         threadSampler.setDepth(depth);
160         hprofData.setFlags(BinaryHprof.ControlSettings.CPU_SAMPLING.bitmask);
161         hprofData.setDepth(depth);
162     }
163 
findDefaultThreadSampler()164     private static ThreadSampler findDefaultThreadSampler() {
165         if ("Dalvik Core Library".equals(System.getProperty("java.specification.name"))) {
166             String className = "dalvik.system.profiler.DalvikThreadSampler";
167             try {
168                 return (ThreadSampler) Class.forName(className).newInstance();
169             } catch (Exception e) {
170                 System.out.println("Problem creating " + className + ": " + e);
171             }
172         }
173         return new PortableThreadSampler();
174     }
175 
176     /**
177      * A ThreadSet specifies the set of threads to sample.
178      */
179     public static interface ThreadSet {
180         /**
181          * Returns an array containing the threads to be sampled. The
182          * array may be longer than the number of threads to be
183          * sampled, in which case the extra elements must be null.
184          */
threads()185         public Thread[] threads();
186     }
187 
188     /**
189      * Returns a ThreadSet for a fixed set of threads that will not
190      * vary at runtime. This has less overhead than a dynamically
191      * calculated set, such as {@link #newThreadGroupThreadSet}, which has
192      * to enumerate the threads each time profiler wants to collect
193      * samples.
194      */
newArrayThreadSet(Thread... threads)195     public static ThreadSet newArrayThreadSet(Thread... threads) {
196         return new ArrayThreadSet(threads);
197     }
198 
199     /**
200      * An ArrayThreadSet samples a fixed set of threads that does not
201      * vary over the life of the profiler.
202      */
203     private static class ArrayThreadSet implements ThreadSet {
204         private final Thread[] threads;
ArrayThreadSet(Thread... threads)205         public ArrayThreadSet(Thread... threads) {
206             if (threads == null) {
207                 throw new NullPointerException("threads == null");
208             }
209             this.threads = threads;
210         }
threads()211         public Thread[] threads() {
212             return threads;
213         }
214     }
215 
216     /**
217      * Returns a ThreadSet that is dynamically computed based on the
218      * threads found in the specified ThreadGroup and that
219      * ThreadGroup's children.
220      */
newThreadGroupThreadSet(ThreadGroup threadGroup)221     public static ThreadSet newThreadGroupThreadSet(ThreadGroup threadGroup) {
222         return new ThreadGroupThreadSet(threadGroup);
223     }
224 
225     /**
226      * An ThreadGroupThreadSet sample the threads from the specified
227      * ThreadGroup and the ThreadGroup's children
228      */
229     private static class ThreadGroupThreadSet implements ThreadSet {
230         private final ThreadGroup threadGroup;
231         private Thread[] threads;
232         private int lastThread;
233 
ThreadGroupThreadSet(ThreadGroup threadGroup)234         public ThreadGroupThreadSet(ThreadGroup threadGroup) {
235             if (threadGroup == null) {
236                 throw new NullPointerException("threadGroup == null");
237             }
238             this.threadGroup = threadGroup;
239             resize();
240         }
241 
resize()242         private void resize() {
243             int count = threadGroup.activeCount();
244             // we can only tell if we had enough room for all active
245             // threads if we actually are larger than the the number of
246             // active threads. making it larger also leaves us room to
247             // tolerate additional threads without resizing.
248             threads = new Thread[count*2];
249             lastThread = 0;
250         }
251 
threads()252         public Thread[] threads() {
253             int threadCount;
254             while (true) {
255                 threadCount = threadGroup.enumerate(threads);
256                 if (threadCount == threads.length) {
257                     resize();
258                 } else {
259                     break;
260                 }
261             }
262             if (threadCount < lastThread) {
263                 // avoid retaining pointers to threads that have ended
264                 Arrays.fill(threads, threadCount, lastThread, null);
265             }
266             lastThread = threadCount;
267             return threads;
268         }
269     }
270 
271     /**
272      * Starts profiler sampling at the specified rate.
273      *
274      * @param interval The number of milliseconds between samples
275      */
start(int interval)276     public void start(int interval) {
277         if (interval < 1) {
278             throw new IllegalArgumentException("interval < 1");
279         }
280         if (sampler != null) {
281             throw new IllegalStateException("profiling already started");
282         }
283         sampler = new Sampler();
284         hprofData.setStartMillis(System.currentTimeMillis());
285         timer.scheduleAtFixedRate(sampler, 0, interval);
286     }
287 
288     /**
289      * Stops profiler sampling. It can be restarted with {@link
290      * #start(int)} to continue sampling.
291      */
stop()292     public void stop() {
293         if (sampler == null) {
294             return;
295         }
296         synchronized(sampler) {
297             sampler.stop = true;
298             while (!sampler.stopped) {
299                 try {
300                     sampler.wait();
301                 } catch (InterruptedException ignored) {
302                 }
303             }
304         }
305         sampler = null;
306     }
307 
308     /**
309      * Shuts down profiling after which it can not be restarted. It is
310      * important to shut down profiling when done to free resources
311      * used by the profiler. Shutting down the profiler also stops the
312      * profiling if that has not already been done.
313      */
shutdown()314     public void shutdown() {
315         stop();
316         timer.cancel();
317     }
318 
319     /**
320      * Returns the hprof data accumulated by the profiler since it was
321      * created. The profiler needs to be stopped, but not necessarily
322      * shut down, in order to access the data. If the profiler is
323      * restarted, there is no thread safe way to access the data.
324      */
getHprofData()325     public HprofData getHprofData() {
326         if (sampler != null) {
327             throw new IllegalStateException("cannot access hprof data while sampling");
328         }
329         return hprofData;
330     }
331 
332     /**
333      * The Sampler does the real work of the profiler.
334      *
335      * At every sample time, it asks the thread set for the set
336      * of threads to sample. It maintains a history of thread creation
337      * and death events based on changes observed to the threads
338      * returned by the {@code ThreadSet}.
339      *
340      * For each thread to be sampled, a stack is collected and used to
341      * update the set of collected samples. Stacks are truncated to a
342      * maximum depth. There is no way to tell if a stack has been truncated.
343      */
344     private class Sampler extends TimerTask {
345 
346         private boolean stop;
347         private boolean stopped;
348 
349         private Thread timerThread;
350 
run()351         public void run() {
352             synchronized(this) {
353                 if (stop) {
354                     cancel();
355                     stopped = true;
356                     notifyAll();
357                     return;
358                 }
359             }
360 
361             if (timerThread == null) {
362                 timerThread = Thread.currentThread();
363             }
364 
365             // process thread creation and death first so that we
366             // assign thread ids to any new threads before allocating
367             // new stacks for them
368             Thread[] newThreads = threadSet.threads();
369             if (!Arrays.equals(currentThreads, newThreads)) {
370                 updateThreadHistory(currentThreads, newThreads);
371                 currentThreads = newThreads.clone();
372             }
373 
374             for (Thread thread : currentThreads) {
375                 if (thread == null) {
376                     break;
377                 }
378                 if (thread == timerThread) {
379                     continue;
380                 }
381 
382                 StackTraceElement[] stackFrames = threadSampler.getStackTrace(thread);
383                 if (stackFrames == null) {
384                     continue;
385                 }
386                 recordStackTrace(thread, stackFrames);
387             }
388         }
389 
390         /**
391          * Record a new stack trace. The thread should have been
392          * previously registered with addStartThread.
393          */
recordStackTrace(Thread thread, StackTraceElement[] stackFrames)394         private void recordStackTrace(Thread thread, StackTraceElement[] stackFrames) {
395             Integer threadId = threadIds.get(thread);
396             if (threadId == null) {
397                 throw new IllegalArgumentException("Unknown thread " + thread);
398             }
399             mutableStackTrace.threadId = threadId;
400             mutableStackTrace.stackFrames = stackFrames;
401 
402             int[] countCell = stackTraces.get(mutableStackTrace);
403             if (countCell == null) {
404                 countCell = new int[1];
405                 // cloned because the ThreadSampler may reuse the array
406                 StackTraceElement[] stackFramesCopy = stackFrames.clone();
407                 HprofData.StackTrace stackTrace
408                         = new HprofData.StackTrace(nextStackTraceId++, threadId, stackFramesCopy);
409                 hprofData.addStackTrace(stackTrace, countCell);
410             }
411             countCell[0]++;
412         }
413 
updateThreadHistory(Thread[] oldThreads, Thread[] newThreads)414         private void updateThreadHistory(Thread[] oldThreads, Thread[] newThreads) {
415             // thread start/stop shouldn't happen too often and
416             // these aren't too big, so hopefully this approach
417             // won't be too slow...
418             Set<Thread> n = new HashSet<Thread>(Arrays.asList(newThreads));
419             Set<Thread> o = new HashSet<Thread>(Arrays.asList(oldThreads));
420 
421             // added = new-old
422             Set<Thread> added = new HashSet<Thread>(n);
423             added.removeAll(o);
424 
425             // removed = old-new
426             Set<Thread> removed = new HashSet<Thread>(o);
427             removed.removeAll(n);
428 
429             for (Thread thread : added) {
430                 if (thread == null) {
431                     continue;
432                 }
433                 if (thread == timerThread) {
434                     continue;
435                 }
436                 addStartThread(thread);
437             }
438             for (Thread thread : removed) {
439                 if (thread == null) {
440                     continue;
441                 }
442                 if (thread == timerThread) {
443                     continue;
444                 }
445                 addEndThread(thread);
446             }
447         }
448 
449         /**
450          * Record that a newly noticed thread.
451          */
addStartThread(Thread thread)452         private void addStartThread(Thread thread) {
453             if (thread == null) {
454                 throw new NullPointerException("thread == null");
455             }
456             int threadId = nextThreadId++;
457             Integer old = threadIds.put(thread, threadId);
458             if (old != null) {
459                 throw new IllegalArgumentException("Thread already registered as " + old);
460             }
461 
462             String threadName = thread.getName();
463             // group will become null when thread is terminated
464             ThreadGroup group = thread.getThreadGroup();
465             String groupName = group == null ? null : group.getName();
466             ThreadGroup parentGroup = group == null ? null : group.getParent();
467             String parentGroupName = parentGroup == null ? null : parentGroup.getName();
468 
469             HprofData.ThreadEvent event
470                     = HprofData.ThreadEvent.start(nextObjectId++, threadId,
471                                                   threadName, groupName, parentGroupName);
472             hprofData.addThreadEvent(event);
473         }
474 
475         /**
476          * Record that a thread has disappeared.
477          */
addEndThread(Thread thread)478         private void addEndThread(Thread thread) {
479             if (thread == null) {
480                 throw new NullPointerException("thread == null");
481             }
482             Integer threadId = threadIds.remove(thread);
483             if (threadId == null) {
484                 throw new IllegalArgumentException("Unknown thread " + thread);
485             }
486             HprofData.ThreadEvent event = HprofData.ThreadEvent.end(threadId);
487             hprofData.addThreadEvent(event);
488         }
489     }
490 }
491