1 /* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright (C) 2000-2014 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "gprof.h"
23 #include "libiberty.h"
24 #include "filenames.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "cg_arcs.h"
29 #include "cg_print.h"
30 #include "hist.h"
31 #include "utils.h"
32 #include "corefile.h"
33
34 /* Return value of comparison functions used to sort tables. */
35 #define LESSTHAN -1
36 #define EQUALTO 0
37 #define GREATERTHAN 1
38
39 static void print_header (void);
40 static void print_cycle (Sym *);
41 static int cmp_member (Sym *, Sym *);
42 static void sort_members (Sym *);
43 static void print_members (Sym *);
44 static int cmp_arc (Arc *, Arc *);
45 static void sort_parents (Sym *);
46 static void print_parents (Sym *);
47 static void sort_children (Sym *);
48 static void print_children (Sym *);
49 static void print_line (Sym *);
50 static int cmp_name (const PTR, const PTR);
51 static int cmp_arc_count (const PTR, const PTR);
52 static int cmp_fun_nuses (const PTR, const PTR);
53 static void order_and_dump_functions_by_arcs
54 (Arc **, unsigned long, int, Arc **, unsigned long *);
55
56 /* Declarations of automatically generated functions to output blurbs. */
57 extern void bsd_callg_blurb (FILE * fp);
58 extern void fsf_callg_blurb (FILE * fp);
59
60 double print_time = 0.0;
61
62
63 static void
print_header()64 print_header ()
65 {
66 if (first_output)
67 first_output = FALSE;
68 else
69 printf ("\f\n");
70
71 if (!bsd_style_output)
72 {
73 if (print_descriptions)
74 printf (_("\t\t Call graph (explanation follows)\n\n"));
75 else
76 printf (_("\t\t\tCall graph\n\n"));
77 }
78
79 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80 (long) hist_scale * (long) sizeof (UNIT));
81
82 if (print_time > 0.0)
83 printf (_(" for %.2f%% of %.2f seconds\n\n"),
84 100.0 / print_time, print_time / hz);
85 else
86 {
87 printf (_(" no time propagated\n\n"));
88
89 /* This doesn't hurt, since all the numerators will be 0.0. */
90 print_time = 1.0;
91 }
92
93 if (bsd_style_output)
94 {
95 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
96 "", "", "", "", _("called"), _("total"), _("parents"));
97 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98 _("index"),
99 /* xgettext:no-c-format */
100 _("%time"),
101 _("self"), _("descendants"), _("called"), _("self"),
102 _("name"), _("index"));
103 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
104 "", "", "", "", _("called"), _("total"), _("children"));
105 printf ("\n");
106 }
107 else
108 {
109 printf (_("index %% time self children called name\n"));
110 }
111 }
112
113 /* Print a cycle header. */
114
115 static void
print_cycle(Sym * cyc)116 print_cycle (Sym *cyc)
117 {
118 char buf[BUFSIZ];
119
120 sprintf (buf, "[%d]", cyc->cg.index);
121 printf (bsd_style_output
122 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
123 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
124 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
125 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
126
127 if (cyc->cg.self_calls != 0)
128 printf ("+%-7lu", cyc->cg.self_calls);
129 else
130 printf (" %7.7s", "");
131
132 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
133 }
134
135 /* Compare LEFT and RIGHT membmer. Major comparison key is
136 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
137
138 static int
cmp_member(Sym * left,Sym * right)139 cmp_member (Sym *left, Sym *right)
140 {
141 double left_time = left->cg.prop.self + left->cg.prop.child;
142 double right_time = right->cg.prop.self + right->cg.prop.child;
143 unsigned long left_calls = left->ncalls + left->cg.self_calls;
144 unsigned long right_calls = right->ncalls + right->cg.self_calls;
145
146 if (left_time > right_time)
147 return GREATERTHAN;
148
149 if (left_time < right_time)
150 return LESSTHAN;
151
152 if (left_calls > right_calls)
153 return GREATERTHAN;
154
155 if (left_calls < right_calls)
156 return LESSTHAN;
157
158 return EQUALTO;
159 }
160
161 /* Sort members of a cycle. */
162
163 static void
sort_members(Sym * cyc)164 sort_members (Sym *cyc)
165 {
166 Sym *todo, *doing, *prev;
167
168 /* Detach cycle members from cyclehead,
169 and insertion sort them back on. */
170 todo = cyc->cg.cyc.next;
171 cyc->cg.cyc.next = 0;
172
173 for (doing = todo; doing != NULL; doing = todo)
174 {
175 todo = doing->cg.cyc.next;
176
177 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
178 {
179 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
180 break;
181 }
182
183 doing->cg.cyc.next = prev->cg.cyc.next;
184 prev->cg.cyc.next = doing;
185 }
186 }
187
188 /* Print the members of a cycle. */
189
190 static void
print_members(Sym * cyc)191 print_members (Sym *cyc)
192 {
193 Sym *member;
194
195 sort_members (cyc);
196
197 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
198 {
199 printf (bsd_style_output
200 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
201 : "%6.6s %5.5s %7.2f %7.2f %7lu",
202 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
203 member->ncalls);
204
205 if (member->cg.self_calls != 0)
206 printf ("+%-7lu", member->cg.self_calls);
207 else
208 printf (" %7.7s", "");
209
210 printf (" ");
211 print_name (member);
212 printf ("\n");
213 }
214 }
215
216 /* Compare two arcs to/from the same child/parent.
217 - if one arc is a self arc, it's least.
218 - if one arc is within a cycle, it's less than.
219 - if both arcs are within a cycle, compare arc counts.
220 - if neither arc is within a cycle, compare with
221 time + child_time as major key
222 arc count as minor key. */
223
224 static int
cmp_arc(Arc * left,Arc * right)225 cmp_arc (Arc *left, Arc *right)
226 {
227 Sym *left_parent = left->parent;
228 Sym *left_child = left->child;
229 Sym *right_parent = right->parent;
230 Sym *right_child = right->child;
231 double left_time, right_time;
232
233 DBG (TIMEDEBUG,
234 printf ("[cmp_arc] ");
235 print_name (left_parent);
236 printf (" calls ");
237 print_name (left_child);
238 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
239 left->count, left_child->ncalls);
240 printf ("[cmp_arc] ");
241 print_name (right_parent);
242 printf (" calls ");
243 print_name (right_child);
244 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
245 right->count, right_child->ncalls);
246 printf ("\n");
247 );
248
249 if (left_parent == left_child)
250 return LESSTHAN; /* Left is a self call. */
251
252 if (right_parent == right_child)
253 return GREATERTHAN; /* Right is a self call. */
254
255 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
256 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
257 {
258 /* Left is a call within a cycle. */
259 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
260 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
261 {
262 /* Right is a call within the cycle, too. */
263 if (left->count < right->count)
264 return LESSTHAN;
265
266 if (left->count > right->count)
267 return GREATERTHAN;
268
269 return EQUALTO;
270 }
271 else
272 {
273 /* Right isn't a call within the cycle. */
274 return LESSTHAN;
275 }
276 }
277 else
278 {
279 /* Left isn't a call within a cycle. */
280 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
281 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
282 {
283 /* Right is a call within a cycle. */
284 return GREATERTHAN;
285 }
286 else
287 {
288 /* Neither is a call within a cycle. */
289 left_time = left->time + left->child_time;
290 right_time = right->time + right->child_time;
291
292 if (left_time < right_time)
293 return LESSTHAN;
294
295 if (left_time > right_time)
296 return GREATERTHAN;
297
298 if (left->count < right->count)
299 return LESSTHAN;
300
301 if (left->count > right->count)
302 return GREATERTHAN;
303
304 return EQUALTO;
305 }
306 }
307 }
308
309
310 static void
sort_parents(Sym * child)311 sort_parents (Sym * child)
312 {
313 Arc *arc, *detached, sorted, *prev;
314
315 /* Unlink parents from child, then insertion sort back on to
316 sorted's parents.
317 *arc the arc you have detached and are inserting.
318 *detached the rest of the arcs to be sorted.
319 sorted arc list onto which you insertion sort.
320 *prev arc before the arc you are comparing. */
321 sorted.next_parent = 0;
322
323 for (arc = child->cg.parents; arc; arc = detached)
324 {
325 detached = arc->next_parent;
326
327 /* Consider *arc as disconnected; insert it into sorted. */
328 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
329 {
330 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
331 break;
332 }
333
334 arc->next_parent = prev->next_parent;
335 prev->next_parent = arc;
336 }
337
338 /* Reattach sorted arcs to child. */
339 child->cg.parents = sorted.next_parent;
340 }
341
342
343 static void
print_parents(Sym * child)344 print_parents (Sym *child)
345 {
346 Sym *parent;
347 Arc *arc;
348 Sym *cycle_head;
349
350 if (child->cg.cyc.head != 0)
351 cycle_head = child->cg.cyc.head;
352 else
353 cycle_head = child;
354
355 if (!child->cg.parents)
356 {
357 printf (bsd_style_output
358 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
359 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
360 "", "", "", "", "", "");
361 return;
362 }
363
364 sort_parents (child);
365
366 for (arc = child->cg.parents; arc; arc = arc->next_parent)
367 {
368 parent = arc->parent;
369 if (child == parent || (child->cg.cyc.num != 0
370 && parent->cg.cyc.num == child->cg.cyc.num))
371 {
372 /* Selfcall or call among siblings. */
373 printf (bsd_style_output
374 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
375 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
376 "", "", "", "",
377 arc->count, "");
378 print_name (parent);
379 printf ("\n");
380 }
381 else
382 {
383 /* Regular parent of child. */
384 printf (bsd_style_output
385 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
386 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
387 "", "",
388 arc->time / hz, arc->child_time / hz,
389 arc->count, cycle_head->ncalls);
390 print_name (parent);
391 printf ("\n");
392 }
393 }
394 }
395
396
397 static void
sort_children(Sym * parent)398 sort_children (Sym *parent)
399 {
400 Arc *arc, *detached, sorted, *prev;
401
402 /* Unlink children from parent, then insertion sort back on to
403 sorted's children.
404 *arc the arc you have detached and are inserting.
405 *detached the rest of the arcs to be sorted.
406 sorted arc list onto which you insertion sort.
407 *prev arc before the arc you are comparing. */
408 sorted.next_child = 0;
409
410 for (arc = parent->cg.children; arc; arc = detached)
411 {
412 detached = arc->next_child;
413
414 /* Consider *arc as disconnected; insert it into sorted. */
415 for (prev = &sorted; prev->next_child; prev = prev->next_child)
416 {
417 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
418 break;
419 }
420
421 arc->next_child = prev->next_child;
422 prev->next_child = arc;
423 }
424
425 /* Reattach sorted children to parent. */
426 parent->cg.children = sorted.next_child;
427 }
428
429
430 static void
print_children(Sym * parent)431 print_children (Sym *parent)
432 {
433 Sym *child;
434 Arc *arc;
435
436 sort_children (parent);
437 arc = parent->cg.children;
438
439 for (arc = parent->cg.children; arc; arc = arc->next_child)
440 {
441 child = arc->child;
442 if (child == parent || (child->cg.cyc.num != 0
443 && child->cg.cyc.num == parent->cg.cyc.num))
444 {
445 /* Self call or call to sibling. */
446 printf (bsd_style_output
447 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
448 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
449 "", "", "", "", arc->count, "");
450 print_name (child);
451 printf ("\n");
452 }
453 else
454 {
455 /* Regular child of parent. */
456 printf (bsd_style_output
457 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
458 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
459 "", "",
460 arc->time / hz, arc->child_time / hz,
461 arc->count, child->cg.cyc.head->ncalls);
462 print_name (child);
463 printf ("\n");
464 }
465 }
466 }
467
468
469 static void
print_line(Sym * np)470 print_line (Sym *np)
471 {
472 char buf[BUFSIZ];
473
474 sprintf (buf, "[%d]", np->cg.index);
475 printf (bsd_style_output
476 ? "%-6.6s %5.1f %7.2f %11.2f"
477 : "%-6.6s %5.1f %7.2f %7.2f", buf,
478 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
479 np->cg.prop.self / hz, np->cg.prop.child / hz);
480
481 if ((np->ncalls + np->cg.self_calls) != 0)
482 {
483 printf (" %7lu", np->ncalls);
484
485 if (np->cg.self_calls != 0)
486 printf ("+%-7lu ", np->cg.self_calls);
487 else
488 printf (" %7.7s ", "");
489 }
490 else
491 {
492 printf (" %7.7s %7.7s ", "", "");
493 }
494
495 print_name (np);
496 printf ("\n");
497 }
498
499
500 /* Print dynamic call graph. */
501
502 void
cg_print(Sym ** timesortsym)503 cg_print (Sym ** timesortsym)
504 {
505 unsigned int sym_index;
506 Sym *parent;
507
508 if (print_descriptions && bsd_style_output)
509 bsd_callg_blurb (stdout);
510
511 print_header ();
512
513 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
514 {
515 parent = timesortsym[sym_index];
516
517 if ((ignore_zeros && parent->ncalls == 0
518 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
519 && parent->cg.prop.child == 0)
520 || !parent->cg.print_flag
521 || (line_granularity && ! parent->is_func))
522 continue;
523
524 if (!parent->name && parent->cg.cyc.num != 0)
525 {
526 /* Cycle header. */
527 print_cycle (parent);
528 print_members (parent);
529 }
530 else
531 {
532 print_parents (parent);
533 print_line (parent);
534 print_children (parent);
535 }
536
537 if (bsd_style_output)
538 printf ("\n");
539
540 printf ("-----------------------------------------------\n");
541
542 if (bsd_style_output)
543 printf ("\n");
544 }
545
546 free (timesortsym);
547
548 if (print_descriptions && !bsd_style_output)
549 fsf_callg_blurb (stdout);
550 }
551
552
553 static int
cmp_name(const PTR left,const PTR right)554 cmp_name (const PTR left, const PTR right)
555 {
556 const Sym **npp1 = (const Sym **) left;
557 const Sym **npp2 = (const Sym **) right;
558
559 return strcmp ((*npp1)->name, (*npp2)->name);
560 }
561
562
563 void
cg_print_index()564 cg_print_index ()
565 {
566 unsigned int sym_index;
567 unsigned int nnames, todo, i, j;
568 int col, starting_col;
569 Sym **name_sorted_syms, *sym;
570 const char *filename;
571 char buf[20];
572 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
573
574 /* Now, sort regular function name
575 alphabetically to create an index. */
576 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
577
578 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
579 {
580 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
581 && symtab.base[sym_index].hist.time == 0)
582 continue;
583
584 name_sorted_syms[nnames++] = &symtab.base[sym_index];
585 }
586
587 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
588
589 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
590 name_sorted_syms[todo++] = &cycle_header[sym_index];
591
592 printf ("\f\n");
593 printf (_("Index by function name\n\n"));
594 sym_index = (todo + 2) / 3;
595
596 for (i = 0; i < sym_index; i++)
597 {
598 col = 0;
599 starting_col = 0;
600
601 for (j = i; j < todo; j += sym_index)
602 {
603 sym = name_sorted_syms[j];
604
605 if (sym->cg.print_flag)
606 sprintf (buf, "[%d]", sym->cg.index);
607 else
608 sprintf (buf, "(%d)", sym->cg.index);
609
610 if (j < nnames)
611 {
612 if (bsd_style_output)
613 {
614 printf ("%6.6s %-19.19s", buf, sym->name);
615 }
616 else
617 {
618 col += strlen (buf);
619
620 for (; col < starting_col + 5; ++col)
621 putchar (' ');
622
623 printf (" %s ", buf);
624 col += print_name_only (sym);
625
626 if (!line_granularity && sym->is_static && sym->file)
627 {
628 filename = sym->file->name;
629
630 if (!print_path)
631 {
632 filename = strrchr (filename, '/');
633
634 if (filename)
635 ++filename;
636 else
637 filename = sym->file->name;
638 }
639
640 printf (" (%s)", filename);
641 col += strlen (filename) + 3;
642 }
643 }
644 }
645 else
646 {
647 if (bsd_style_output)
648 {
649 printf ("%6.6s ", buf);
650 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
651 printf ("%-19.19s", buf);
652 }
653 else
654 {
655 col += strlen (buf);
656 for (; col < starting_col + 5; ++col)
657 putchar (' ');
658 printf (" %s ", buf);
659 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
660 printf ("%s", buf);
661 col += strlen (buf);
662 }
663 }
664
665 starting_col += column_width;
666 }
667
668 printf ("\n");
669 }
670
671 free (name_sorted_syms);
672 }
673
674 /* Compare two arcs based on their usage counts.
675 We want to sort in descending order. */
676
677 static int
cmp_arc_count(const PTR left,const PTR right)678 cmp_arc_count (const PTR left, const PTR right)
679 {
680 const Arc **npp1 = (const Arc **) left;
681 const Arc **npp2 = (const Arc **) right;
682
683 if ((*npp1)->count > (*npp2)->count)
684 return -1;
685 else if ((*npp1)->count < (*npp2)->count)
686 return 1;
687 else
688 return 0;
689 }
690
691 /* Compare two funtions based on their usage counts.
692 We want to sort in descending order. */
693
694 static int
cmp_fun_nuses(const PTR left,const PTR right)695 cmp_fun_nuses (const PTR left, const PTR right)
696 {
697 const Sym **npp1 = (const Sym **) left;
698 const Sym **npp2 = (const Sym **) right;
699
700 if ((*npp1)->nuses > (*npp2)->nuses)
701 return -1;
702 else if ((*npp1)->nuses < (*npp2)->nuses)
703 return 1;
704 else
705 return 0;
706 }
707
708 /* Print a suggested function ordering based on the profiling data.
709
710 We perform 4 major steps when ordering functions:
711
712 * Group unused functions together and place them at the
713 end of the function order.
714
715 * Search the highest use arcs (those which account for 90% of
716 the total arc count) for functions which have several parents.
717
718 Group those with the most call sites together (currently the
719 top 1.25% which have at least five different call sites).
720
721 These are emitted at the start of the function order.
722
723 * Use a greedy placement algorithm to place functions which
724 occur in the top 99% of the arcs in the profile. Some provisions
725 are made to handle high usage arcs where the parent and/or
726 child has already been placed.
727
728 * Run the same greedy placement algorithm on the remaining
729 arcs to place the leftover functions.
730
731
732 The various "magic numbers" should (one day) be tuneable by command
733 line options. They were arrived at by benchmarking a few applications
734 with various values to see which values produced better overall function
735 orderings.
736
737 Of course, profiling errors, machine limitations (PA long calls), and
738 poor cutoff values for the placement algorithm may limit the usefullness
739 of the resulting function order. Improvements would be greatly appreciated.
740
741 Suggestions:
742
743 * Place the functions with many callers near the middle of the
744 list to reduce long calls.
745
746 * Propagate arc usage changes as functions are placed. Ie if
747 func1 and func2 are placed together, arcs to/from those arcs
748 to the same parent/child should be combined, then resort the
749 arcs to choose the next one.
750
751 * Implement some global positioning algorithm to place the
752 chains made by the greedy local positioning algorithm. Probably
753 by examining arcs which haven't been placed yet to tie two
754 chains together.
755
756 * Take a function's size and time into account in the algorithm;
757 size in particular is important on the PA (long calls). Placing
758 many small functions onto their own page may be wise.
759
760 * Use better profiling information; many published algorithms
761 are based on call sequences through time, rather than just
762 arc counts.
763
764 * Prodecure cloning could improve performance when a small number
765 of arcs account for most of the calls to a particular function.
766
767 * Use relocation information to avoid moving unused functions
768 completely out of the code stream; this would avoid severe lossage
769 when the profile data bears little resemblance to actual runs.
770
771 * Propagation of arc usages should also improve .o link line
772 ordering which shares the same arc placement algorithm with
773 the function ordering code (in fact it is a degenerate case
774 of function ordering). */
775
776 void
cg_print_function_ordering(void)777 cg_print_function_ordering (void)
778 {
779 unsigned long sym_index;
780 unsigned long arc_index;
781 unsigned long used, unused, scratch_index;
782 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
783 #ifdef __GNUC__
784 unsigned long long total_arcs, tmp_arcs_count;
785 #else
786 unsigned long total_arcs, tmp_arcs_count;
787 #endif
788 Sym **unused_syms, **used_syms, **scratch_syms;
789 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
790
791 sym_index = 0;
792 used = 0;
793 unused = 0;
794 scratch_index = 0;
795 unplaced_arc_count = 0;
796 high_arc_count = 0;
797 scratch_arc_count = 0;
798
799 /* First group all the unused functions together. */
800 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
801 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
804 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806
807 /* Walk through all the functions; mark those which are never
808 called as placed (we'll emit them as a group later). */
809 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
810 {
811 if (symtab.base[sym_index].ncalls == 0)
812 {
813 unused_syms[unused++] = &symtab.base[sym_index];
814 symtab.base[sym_index].has_been_placed = 1;
815 }
816 else
817 {
818 used_syms[used++] = &symtab.base[sym_index];
819 symtab.base[sym_index].has_been_placed = 0;
820 symtab.base[sym_index].next = 0;
821 symtab.base[sym_index].prev = 0;
822 symtab.base[sym_index].nuses = 0;
823 }
824 }
825
826 /* Sort the arcs from most used to least used. */
827 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
828
829 /* Compute the total arc count. Also mark arcs as unplaced.
830
831 Note we don't compensate for overflow if that happens!
832 Overflow is much less likely when this file is compiled
833 with GCC as it can double-wide integers via long long. */
834 total_arcs = 0;
835 for (arc_index = 0; arc_index < numarcs; arc_index++)
836 {
837 total_arcs += arcs[arc_index]->count;
838 arcs[arc_index]->has_been_placed = 0;
839 }
840
841 /* We want to pull out those functions which are referenced
842 by many highly used arcs and emit them as a group. This
843 could probably use some tuning. */
844 tmp_arcs_count = 0;
845 for (arc_index = 0; arc_index < numarcs; arc_index++)
846 {
847 tmp_arcs_count += arcs[arc_index]->count;
848
849 /* Count how many times each parent and child are used up
850 to our threshhold of arcs (90%). */
851 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
852 break;
853
854 arcs[arc_index]->child->nuses++;
855 }
856
857 /* Now sort a temporary symbol table based on the number of
858 times each function was used in the highest used arcs. */
859 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
860 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
861
862 /* Now pick out those symbols we're going to emit as
863 a group. We take up to 1.25% of the used symbols. */
864 for (sym_index = 0; sym_index < used / 80; sym_index++)
865 {
866 Sym *sym = scratch_syms[sym_index];
867 Arc *arc;
868
869 /* If we hit symbols that aren't used from many call sites,
870 then we can quit. We choose five as the low limit for
871 no particular reason. */
872 if (sym->nuses == 5)
873 break;
874
875 /* We're going to need the arcs between these functions.
876 Unfortunately, we don't know all these functions
877 until we're done. So we keep track of all the arcs
878 to the functions we care about, then prune out those
879 which are uninteresting.
880
881 An interesting variation would be to quit when we found
882 multi-call site functions which account for some percentage
883 of the arcs. */
884 arc = sym->cg.children;
885
886 while (arc)
887 {
888 if (arc->parent != arc->child)
889 scratch_arcs[scratch_arc_count++] = arc;
890 arc->has_been_placed = 1;
891 arc = arc->next_child;
892 }
893
894 arc = sym->cg.parents;
895
896 while (arc)
897 {
898 if (arc->parent != arc->child)
899 scratch_arcs[scratch_arc_count++] = arc;
900 arc->has_been_placed = 1;
901 arc = arc->next_parent;
902 }
903
904 /* Keep track of how many symbols we're going to place. */
905 scratch_index = sym_index;
906
907 /* A lie, but it makes identifying
908 these functions easier later. */
909 sym->has_been_placed = 1;
910 }
911
912 /* Now walk through the temporary arcs and copy
913 those we care about into the high arcs array. */
914 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
915 {
916 Arc *arc = scratch_arcs[arc_index];
917
918 /* If this arc refers to highly used functions, then
919 then we want to keep it. */
920 if (arc->child->has_been_placed
921 && arc->parent->has_been_placed)
922 {
923 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
924
925 /* We need to turn of has_been_placed since we're going to
926 use the main arc placement algorithm on these arcs. */
927 arc->child->has_been_placed = 0;
928 arc->parent->has_been_placed = 0;
929 }
930 }
931
932 /* Dump the multi-site high usage functions which are not
933 going to be ordered by the main ordering algorithm. */
934 for (sym_index = 0; sym_index < scratch_index; sym_index++)
935 {
936 if (scratch_syms[sym_index]->has_been_placed)
937 printf ("%s\n", scratch_syms[sym_index]->name);
938 }
939
940 /* Now we can order the multi-site high use
941 functions based on the arcs between them. */
942 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
943 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
944 unplaced_arcs, &unplaced_arc_count);
945
946 /* Order and dump the high use functions left,
947 these typically have only a few call sites. */
948 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
949 unplaced_arcs, &unplaced_arc_count);
950
951 /* Now place the rarely used functions. */
952 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
953 scratch_arcs, &scratch_arc_count);
954
955 /* Output any functions not emitted by the order_and_dump calls. */
956 for (sym_index = 0; sym_index < used; sym_index++)
957 if (used_syms[sym_index]->has_been_placed == 0)
958 printf("%s\n", used_syms[sym_index]->name);
959
960 /* Output the unused functions. */
961 for (sym_index = 0; sym_index < unused; sym_index++)
962 printf("%s\n", unused_syms[sym_index]->name);
963
964 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970
971 free (unused_syms);
972 free (used_syms);
973 free (scratch_syms);
974 free (high_arcs);
975 free (scratch_arcs);
976 free (unplaced_arcs);
977 }
978
979 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
980 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
981
982 If ALL is nonzero, then place all functions referenced by THE_ARCS,
983 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
984
985 #define MOST 0.99
986 static void
order_and_dump_functions_by_arcs(the_arcs,arc_count,all,unplaced_arcs,unplaced_arc_count)987 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
988 unplaced_arcs, unplaced_arc_count)
989 Arc **the_arcs;
990 unsigned long arc_count;
991 int all;
992 Arc **unplaced_arcs;
993 unsigned long *unplaced_arc_count;
994 {
995 #ifdef __GNUC__
996 unsigned long long tmp_arcs, total_arcs;
997 #else
998 unsigned long tmp_arcs, total_arcs;
999 #endif
1000 unsigned int arc_index;
1001
1002 /* If needed, compute the total arc count.
1003
1004 Note we don't compensate for overflow if that happens! */
1005 if (! all)
1006 {
1007 total_arcs = 0;
1008 for (arc_index = 0; arc_index < arc_count; arc_index++)
1009 total_arcs += the_arcs[arc_index]->count;
1010 }
1011 else
1012 total_arcs = 0;
1013
1014 tmp_arcs = 0;
1015
1016 for (arc_index = 0; arc_index < arc_count; arc_index++)
1017 {
1018 Sym *sym1, *sym2;
1019 Sym *child, *parent;
1020
1021 tmp_arcs += the_arcs[arc_index]->count;
1022
1023 /* Ignore this arc if it's already been placed. */
1024 if (the_arcs[arc_index]->has_been_placed)
1025 continue;
1026
1027 child = the_arcs[arc_index]->child;
1028 parent = the_arcs[arc_index]->parent;
1029
1030 /* If we're not using all arcs, and this is a rarely used
1031 arc, then put it on the unplaced_arc list. Similarly
1032 if both the parent and child of this arc have been placed. */
1033 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1034 || child->has_been_placed || parent->has_been_placed)
1035 {
1036 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1037 continue;
1038 }
1039
1040 /* If all slots in the parent and child are full, then there isn't
1041 anything we can do right now. We'll place this arc on the
1042 unplaced arc list in the hope that a global positioning
1043 algorithm can use it to place function chains. */
1044 if (parent->next && parent->prev && child->next && child->prev)
1045 {
1046 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1047 continue;
1048 }
1049
1050 /* If the parent is unattached, then find the closest
1051 place to attach it onto child's chain. Similarly
1052 for the opposite case. */
1053 if (!parent->next && !parent->prev)
1054 {
1055 int next_count = 0;
1056 int prev_count = 0;
1057 Sym *prev = child;
1058 Sym *next = child;
1059
1060 /* Walk to the beginning and end of the child's chain. */
1061 while (next->next)
1062 {
1063 next = next->next;
1064 next_count++;
1065 }
1066
1067 while (prev->prev)
1068 {
1069 prev = prev->prev;
1070 prev_count++;
1071 }
1072
1073 /* Choose the closest. */
1074 child = next_count < prev_count ? next : prev;
1075 }
1076 else if (! child->next && !child->prev)
1077 {
1078 int next_count = 0;
1079 int prev_count = 0;
1080 Sym *prev = parent;
1081 Sym *next = parent;
1082
1083 while (next->next)
1084 {
1085 next = next->next;
1086 next_count++;
1087 }
1088
1089 while (prev->prev)
1090 {
1091 prev = prev->prev;
1092 prev_count++;
1093 }
1094
1095 parent = prev_count < next_count ? prev : next;
1096 }
1097 else
1098 {
1099 /* Couldn't find anywhere to attach the functions,
1100 put the arc on the unplaced arc list. */
1101 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1102 continue;
1103 }
1104
1105 /* Make sure we don't tie two ends together. */
1106 sym1 = parent;
1107 if (sym1->next)
1108 while (sym1->next)
1109 sym1 = sym1->next;
1110 else
1111 while (sym1->prev)
1112 sym1 = sym1->prev;
1113
1114 sym2 = child;
1115 if (sym2->next)
1116 while (sym2->next)
1117 sym2 = sym2->next;
1118 else
1119 while (sym2->prev)
1120 sym2 = sym2->prev;
1121
1122 if (sym1 == child
1123 && sym2 == parent)
1124 {
1125 /* This would tie two ends together. */
1126 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1127 continue;
1128 }
1129
1130 if (parent->next)
1131 {
1132 /* Must attach to the parent's prev field. */
1133 if (! child->next)
1134 {
1135 /* parent-prev and child-next */
1136 parent->prev = child;
1137 child->next = parent;
1138 the_arcs[arc_index]->has_been_placed = 1;
1139 }
1140 }
1141 else if (parent->prev)
1142 {
1143 /* Must attach to the parent's next field. */
1144 if (! child->prev)
1145 {
1146 /* parent-next and child-prev */
1147 parent->next = child;
1148 child->prev = parent;
1149 the_arcs[arc_index]->has_been_placed = 1;
1150 }
1151 }
1152 else
1153 {
1154 /* Can attach to either field in the parent, depends
1155 on where we've got space in the child. */
1156 if (child->prev)
1157 {
1158 /* parent-prev and child-next. */
1159 parent->prev = child;
1160 child->next = parent;
1161 the_arcs[arc_index]->has_been_placed = 1;
1162 }
1163 else
1164 {
1165 /* parent-next and child-prev. */
1166 parent->next = child;
1167 child->prev = parent;
1168 the_arcs[arc_index]->has_been_placed = 1;
1169 }
1170 }
1171 }
1172
1173 /* Dump the chains of functions we've made. */
1174 for (arc_index = 0; arc_index < arc_count; arc_index++)
1175 {
1176 Sym *sym;
1177 if (the_arcs[arc_index]->parent->has_been_placed
1178 || the_arcs[arc_index]->child->has_been_placed)
1179 continue;
1180
1181 sym = the_arcs[arc_index]->parent;
1182
1183 /* If this symbol isn't attached to any other
1184 symbols, then we've got a rarely used arc.
1185
1186 Skip it for now, we'll deal with them later. */
1187 if (sym->next == NULL
1188 && sym->prev == NULL)
1189 continue;
1190
1191 /* Get to the start of this chain. */
1192 while (sym->prev)
1193 sym = sym->prev;
1194
1195 while (sym)
1196 {
1197 /* Mark it as placed. */
1198 sym->has_been_placed = 1;
1199 printf ("%s\n", sym->name);
1200 sym = sym->next;
1201 }
1202 }
1203
1204 /* If we want to place all the arcs, then output
1205 those which weren't placed by the main algorithm. */
1206 if (all)
1207 for (arc_index = 0; arc_index < arc_count; arc_index++)
1208 {
1209 Sym *sym;
1210 if (the_arcs[arc_index]->parent->has_been_placed
1211 || the_arcs[arc_index]->child->has_been_placed)
1212 continue;
1213
1214 sym = the_arcs[arc_index]->parent;
1215
1216 sym->has_been_placed = 1;
1217 printf ("%s\n", sym->name);
1218 }
1219 }
1220
1221 /* Compare two function_map structs based on file name.
1222 We want to sort in ascending order. */
1223
1224 static int
cmp_symbol_map(const void * l,const void * r)1225 cmp_symbol_map (const void * l, const void * r)
1226 {
1227 return filename_cmp (((struct function_map *) l)->file_name,
1228 ((struct function_map *) r)->file_name);
1229 }
1230
1231 /* Print a suggested .o ordering for files on a link line based
1232 on profiling information. This uses the function placement
1233 code for the bulk of its work. */
1234
1235 void
cg_print_file_ordering(void)1236 cg_print_file_ordering (void)
1237 {
1238 unsigned long scratch_arc_count;
1239 unsigned long arc_index;
1240 unsigned long sym_index;
1241 Arc **scratch_arcs;
1242 char *last;
1243
1244 scratch_arc_count = 0;
1245
1246 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1247 for (arc_index = 0; arc_index < numarcs; arc_index++)
1248 {
1249 if (! arcs[arc_index]->parent->mapped
1250 || ! arcs[arc_index]->child->mapped)
1251 arcs[arc_index]->has_been_placed = 1;
1252 }
1253
1254 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1255 scratch_arcs, &scratch_arc_count);
1256
1257 /* Output .o's not handled by the main placement algorithm. */
1258 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1259 {
1260 if (symtab.base[sym_index].mapped
1261 && ! symtab.base[sym_index].has_been_placed)
1262 printf ("%s\n", symtab.base[sym_index].name);
1263 }
1264
1265 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1266
1267 /* Now output any .o's that didn't have any text symbols. */
1268 last = NULL;
1269 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1270 {
1271 unsigned int index2;
1272
1273 /* Don't bother searching if this symbol
1274 is the same as the previous one. */
1275 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1276 continue;
1277
1278 for (index2 = 0; index2 < symtab.len; index2++)
1279 {
1280 if (! symtab.base[index2].mapped)
1281 continue;
1282
1283 if (!filename_cmp (symtab.base[index2].name,
1284 symbol_map[sym_index].file_name))
1285 break;
1286 }
1287
1288 /* If we didn't find it in the symbol table, then it must
1289 be a .o with no text symbols. Output it last. */
1290 if (index2 == symtab.len)
1291 printf ("%s\n", symbol_map[sym_index].file_name);
1292 last = symbol_map[sym_index].file_name;
1293 }
1294 }
1295