1 /* cg_print.c -  Print routines for displaying call graphs.
2 
3    Copyright (C) 2000-2014 Free Software Foundation, Inc.
4 
5    This file is part of GNU Binutils.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20    02110-1301, USA.  */
21 
22 #include "gprof.h"
23 #include "libiberty.h"
24 #include "filenames.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "cg_arcs.h"
29 #include "cg_print.h"
30 #include "hist.h"
31 #include "utils.h"
32 #include "corefile.h"
33 
34 /* Return value of comparison functions used to sort tables.  */
35 #define	LESSTHAN	-1
36 #define	EQUALTO		0
37 #define	GREATERTHAN	1
38 
39 static void print_header (void);
40 static void print_cycle (Sym *);
41 static int cmp_member (Sym *, Sym *);
42 static void sort_members (Sym *);
43 static void print_members (Sym *);
44 static int cmp_arc (Arc *, Arc *);
45 static void sort_parents (Sym *);
46 static void print_parents (Sym *);
47 static void sort_children (Sym *);
48 static void print_children (Sym *);
49 static void print_line (Sym *);
50 static int cmp_name (const PTR, const PTR);
51 static int cmp_arc_count (const PTR, const PTR);
52 static int cmp_fun_nuses (const PTR, const PTR);
53 static void order_and_dump_functions_by_arcs
54   (Arc **, unsigned long, int, Arc **, unsigned long *);
55 
56 /* Declarations of automatically generated functions to output blurbs.  */
57 extern void bsd_callg_blurb (FILE * fp);
58 extern void fsf_callg_blurb (FILE * fp);
59 
60 double print_time = 0.0;
61 
62 
63 static void
print_header()64 print_header ()
65 {
66   if (first_output)
67     first_output = FALSE;
68   else
69     printf ("\f\n");
70 
71   if (!bsd_style_output)
72     {
73       if (print_descriptions)
74 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
75       else
76 	printf (_("\t\t\tCall graph\n\n"));
77     }
78 
79   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80 	  (long) hist_scale * (long) sizeof (UNIT));
81 
82   if (print_time > 0.0)
83     printf (_(" for %.2f%% of %.2f seconds\n\n"),
84 	    100.0 / print_time, print_time / hz);
85   else
86     {
87       printf (_(" no time propagated\n\n"));
88 
89       /* This doesn't hurt, since all the numerators will be 0.0.  */
90       print_time = 1.0;
91     }
92 
93   if (bsd_style_output)
94     {
95       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
96 	      "", "", "", "", _("called"), _("total"), _("parents"));
97       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98 	      _("index"),
99 	      /* xgettext:no-c-format */
100 	      _("%time"),
101 	      _("self"), _("descendants"), _("called"), _("self"),
102 	      _("name"), _("index"));
103       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
104 	      "", "", "", "", _("called"), _("total"), _("children"));
105       printf ("\n");
106     }
107   else
108     {
109       printf (_("index %% time    self  children    called     name\n"));
110     }
111 }
112 
113 /* Print a cycle header.  */
114 
115 static void
print_cycle(Sym * cyc)116 print_cycle (Sym *cyc)
117 {
118   char buf[BUFSIZ];
119 
120   sprintf (buf, "[%d]", cyc->cg.index);
121   printf (bsd_style_output
122 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
123 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
124 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
125 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
126 
127   if (cyc->cg.self_calls != 0)
128     printf ("+%-7lu", cyc->cg.self_calls);
129   else
130     printf (" %7.7s", "");
131 
132   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
133 }
134 
135 /* Compare LEFT and RIGHT membmer.  Major comparison key is
136    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
137 
138 static int
cmp_member(Sym * left,Sym * right)139 cmp_member (Sym *left, Sym *right)
140 {
141   double left_time = left->cg.prop.self + left->cg.prop.child;
142   double right_time = right->cg.prop.self + right->cg.prop.child;
143   unsigned long left_calls = left->ncalls + left->cg.self_calls;
144   unsigned long right_calls = right->ncalls + right->cg.self_calls;
145 
146   if (left_time > right_time)
147     return GREATERTHAN;
148 
149   if (left_time < right_time)
150     return LESSTHAN;
151 
152   if (left_calls > right_calls)
153     return GREATERTHAN;
154 
155   if (left_calls < right_calls)
156     return LESSTHAN;
157 
158   return EQUALTO;
159 }
160 
161 /* Sort members of a cycle.  */
162 
163 static void
sort_members(Sym * cyc)164 sort_members (Sym *cyc)
165 {
166   Sym *todo, *doing, *prev;
167 
168   /* Detach cycle members from cyclehead,
169      and insertion sort them back on.  */
170   todo = cyc->cg.cyc.next;
171   cyc->cg.cyc.next = 0;
172 
173   for (doing = todo; doing != NULL; doing = todo)
174     {
175       todo = doing->cg.cyc.next;
176 
177       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
178 	{
179 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
180 	    break;
181 	}
182 
183       doing->cg.cyc.next = prev->cg.cyc.next;
184       prev->cg.cyc.next = doing;
185     }
186 }
187 
188 /* Print the members of a cycle.  */
189 
190 static void
print_members(Sym * cyc)191 print_members (Sym *cyc)
192 {
193   Sym *member;
194 
195   sort_members (cyc);
196 
197   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
198     {
199       printf (bsd_style_output
200 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
201 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
202 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
203 	      member->ncalls);
204 
205       if (member->cg.self_calls != 0)
206 	printf ("+%-7lu", member->cg.self_calls);
207       else
208 	printf (" %7.7s", "");
209 
210       printf ("     ");
211       print_name (member);
212       printf ("\n");
213     }
214 }
215 
216 /* Compare two arcs to/from the same child/parent.
217 	- if one arc is a self arc, it's least.
218 	- if one arc is within a cycle, it's less than.
219 	- if both arcs are within a cycle, compare arc counts.
220 	- if neither arc is within a cycle, compare with
221 		time + child_time as major key
222 		arc count as minor key.  */
223 
224 static int
cmp_arc(Arc * left,Arc * right)225 cmp_arc (Arc *left, Arc *right)
226 {
227   Sym *left_parent = left->parent;
228   Sym *left_child = left->child;
229   Sym *right_parent = right->parent;
230   Sym *right_child = right->child;
231   double left_time, right_time;
232 
233   DBG (TIMEDEBUG,
234        printf ("[cmp_arc] ");
235        print_name (left_parent);
236        printf (" calls ");
237        print_name (left_child);
238        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
239 	       left->count, left_child->ncalls);
240        printf ("[cmp_arc] ");
241        print_name (right_parent);
242        printf (" calls ");
243        print_name (right_child);
244        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
245 	       right->count, right_child->ncalls);
246        printf ("\n");
247     );
248 
249   if (left_parent == left_child)
250     return LESSTHAN;		/* Left is a self call.  */
251 
252   if (right_parent == right_child)
253     return GREATERTHAN;		/* Right is a self call.  */
254 
255   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
256       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
257     {
258       /* Left is a call within a cycle.  */
259       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
260 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
261 	{
262 	  /* Right is a call within the cycle, too.  */
263 	  if (left->count < right->count)
264 	    return LESSTHAN;
265 
266 	  if (left->count > right->count)
267 	    return GREATERTHAN;
268 
269 	  return EQUALTO;
270 	}
271       else
272 	{
273 	  /* Right isn't a call within the cycle.  */
274 	  return LESSTHAN;
275 	}
276     }
277   else
278     {
279       /* Left isn't a call within a cycle.  */
280       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
281 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
282 	{
283 	  /* Right is a call within a cycle.  */
284 	  return GREATERTHAN;
285 	}
286       else
287 	{
288 	  /* Neither is a call within a cycle.  */
289 	  left_time = left->time + left->child_time;
290 	  right_time = right->time + right->child_time;
291 
292 	  if (left_time < right_time)
293 	    return LESSTHAN;
294 
295 	  if (left_time > right_time)
296 	    return GREATERTHAN;
297 
298 	  if (left->count < right->count)
299 	    return LESSTHAN;
300 
301 	  if (left->count > right->count)
302 	    return GREATERTHAN;
303 
304 	  return EQUALTO;
305 	}
306     }
307 }
308 
309 
310 static void
sort_parents(Sym * child)311 sort_parents (Sym * child)
312 {
313   Arc *arc, *detached, sorted, *prev;
314 
315   /* Unlink parents from child, then insertion sort back on to
316      sorted's parents.
317 	  *arc        the arc you have detached and are inserting.
318 	  *detached   the rest of the arcs to be sorted.
319 	  sorted      arc list onto which you insertion sort.
320 	  *prev       arc before the arc you are comparing.  */
321   sorted.next_parent = 0;
322 
323   for (arc = child->cg.parents; arc; arc = detached)
324     {
325       detached = arc->next_parent;
326 
327       /* Consider *arc as disconnected; insert it into sorted.  */
328       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
329 	{
330 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
331 	    break;
332 	}
333 
334       arc->next_parent = prev->next_parent;
335       prev->next_parent = arc;
336     }
337 
338   /* Reattach sorted arcs to child.  */
339   child->cg.parents = sorted.next_parent;
340 }
341 
342 
343 static void
print_parents(Sym * child)344 print_parents (Sym *child)
345 {
346   Sym *parent;
347   Arc *arc;
348   Sym *cycle_head;
349 
350   if (child->cg.cyc.head != 0)
351     cycle_head = child->cg.cyc.head;
352   else
353     cycle_head = child;
354 
355   if (!child->cg.parents)
356     {
357       printf (bsd_style_output
358 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
359 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
360 	      "", "", "", "", "", "");
361       return;
362     }
363 
364   sort_parents (child);
365 
366   for (arc = child->cg.parents; arc; arc = arc->next_parent)
367     {
368       parent = arc->parent;
369       if (child == parent || (child->cg.cyc.num != 0
370 			      && parent->cg.cyc.num == child->cg.cyc.num))
371 	{
372 	  /* Selfcall or call among siblings.  */
373 	  printf (bsd_style_output
374 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
375 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
376 		  "", "", "", "",
377 		  arc->count, "");
378 	  print_name (parent);
379 	  printf ("\n");
380 	}
381       else
382 	{
383 	  /* Regular parent of child.  */
384 	  printf (bsd_style_output
385 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
386 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
387 		  "", "",
388 		  arc->time / hz, arc->child_time / hz,
389 		  arc->count, cycle_head->ncalls);
390 	  print_name (parent);
391 	  printf ("\n");
392 	}
393     }
394 }
395 
396 
397 static void
sort_children(Sym * parent)398 sort_children (Sym *parent)
399 {
400   Arc *arc, *detached, sorted, *prev;
401 
402   /* Unlink children from parent, then insertion sort back on to
403      sorted's children.
404 	  *arc        the arc you have detached and are inserting.
405 	  *detached   the rest of the arcs to be sorted.
406 	  sorted      arc list onto which you insertion sort.
407 	  *prev       arc before the arc you are comparing.  */
408   sorted.next_child = 0;
409 
410   for (arc = parent->cg.children; arc; arc = detached)
411     {
412       detached = arc->next_child;
413 
414       /* Consider *arc as disconnected; insert it into sorted.  */
415       for (prev = &sorted; prev->next_child; prev = prev->next_child)
416 	{
417 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
418 	    break;
419 	}
420 
421       arc->next_child = prev->next_child;
422       prev->next_child = arc;
423     }
424 
425   /* Reattach sorted children to parent.  */
426   parent->cg.children = sorted.next_child;
427 }
428 
429 
430 static void
print_children(Sym * parent)431 print_children (Sym *parent)
432 {
433   Sym *child;
434   Arc *arc;
435 
436   sort_children (parent);
437   arc = parent->cg.children;
438 
439   for (arc = parent->cg.children; arc; arc = arc->next_child)
440     {
441       child = arc->child;
442       if (child == parent || (child->cg.cyc.num != 0
443 			      && child->cg.cyc.num == parent->cg.cyc.num))
444 	{
445 	  /* Self call or call to sibling.  */
446 	  printf (bsd_style_output
447 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
448 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
449 		  "", "", "", "", arc->count, "");
450 	  print_name (child);
451 	  printf ("\n");
452 	}
453       else
454 	{
455 	  /* Regular child of parent.  */
456 	  printf (bsd_style_output
457 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
458 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
459 		  "", "",
460 		  arc->time / hz, arc->child_time / hz,
461 		  arc->count, child->cg.cyc.head->ncalls);
462 	  print_name (child);
463 	  printf ("\n");
464 	}
465     }
466 }
467 
468 
469 static void
print_line(Sym * np)470 print_line (Sym *np)
471 {
472   char buf[BUFSIZ];
473 
474   sprintf (buf, "[%d]", np->cg.index);
475   printf (bsd_style_output
476 	  ? "%-6.6s %5.1f %7.2f %11.2f"
477 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
478 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
479 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
480 
481   if ((np->ncalls + np->cg.self_calls) != 0)
482     {
483       printf (" %7lu", np->ncalls);
484 
485       if (np->cg.self_calls != 0)
486 	  printf ("+%-7lu ", np->cg.self_calls);
487       else
488 	  printf (" %7.7s ", "");
489     }
490   else
491     {
492       printf (" %7.7s %7.7s ", "", "");
493     }
494 
495   print_name (np);
496   printf ("\n");
497 }
498 
499 
500 /* Print dynamic call graph.  */
501 
502 void
cg_print(Sym ** timesortsym)503 cg_print (Sym ** timesortsym)
504 {
505   unsigned int sym_index;
506   Sym *parent;
507 
508   if (print_descriptions && bsd_style_output)
509     bsd_callg_blurb (stdout);
510 
511   print_header ();
512 
513   for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
514     {
515       parent = timesortsym[sym_index];
516 
517       if ((ignore_zeros && parent->ncalls == 0
518 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
519 	   && parent->cg.prop.child == 0)
520 	  || !parent->cg.print_flag
521 	  || (line_granularity && ! parent->is_func))
522 	continue;
523 
524       if (!parent->name && parent->cg.cyc.num != 0)
525 	{
526 	  /* Cycle header.  */
527 	  print_cycle (parent);
528 	  print_members (parent);
529 	}
530       else
531 	{
532 	  print_parents (parent);
533 	  print_line (parent);
534 	  print_children (parent);
535 	}
536 
537       if (bsd_style_output)
538 	printf ("\n");
539 
540       printf ("-----------------------------------------------\n");
541 
542       if (bsd_style_output)
543 	printf ("\n");
544     }
545 
546   free (timesortsym);
547 
548   if (print_descriptions && !bsd_style_output)
549     fsf_callg_blurb (stdout);
550 }
551 
552 
553 static int
cmp_name(const PTR left,const PTR right)554 cmp_name (const PTR left, const PTR right)
555 {
556   const Sym **npp1 = (const Sym **) left;
557   const Sym **npp2 = (const Sym **) right;
558 
559   return strcmp ((*npp1)->name, (*npp2)->name);
560 }
561 
562 
563 void
cg_print_index()564 cg_print_index ()
565 {
566   unsigned int sym_index;
567   unsigned int nnames, todo, i, j;
568   int col, starting_col;
569   Sym **name_sorted_syms, *sym;
570   const char *filename;
571   char buf[20];
572   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
573 
574   /* Now, sort regular function name
575      alphabetically to create an index.  */
576   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
577 
578   for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
579     {
580       if (ignore_zeros && symtab.base[sym_index].ncalls == 0
581 	  && symtab.base[sym_index].hist.time == 0)
582 	continue;
583 
584       name_sorted_syms[nnames++] = &symtab.base[sym_index];
585     }
586 
587   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
588 
589   for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
590     name_sorted_syms[todo++] = &cycle_header[sym_index];
591 
592   printf ("\f\n");
593   printf (_("Index by function name\n\n"));
594   sym_index = (todo + 2) / 3;
595 
596   for (i = 0; i < sym_index; i++)
597     {
598       col = 0;
599       starting_col = 0;
600 
601       for (j = i; j < todo; j += sym_index)
602 	{
603 	  sym = name_sorted_syms[j];
604 
605 	  if (sym->cg.print_flag)
606 	    sprintf (buf, "[%d]", sym->cg.index);
607 	  else
608 	    sprintf (buf, "(%d)", sym->cg.index);
609 
610 	  if (j < nnames)
611 	    {
612 	      if (bsd_style_output)
613 		{
614 		  printf ("%6.6s %-19.19s", buf, sym->name);
615 		}
616 	      else
617 		{
618 		  col += strlen (buf);
619 
620 		  for (; col < starting_col + 5; ++col)
621 		    putchar (' ');
622 
623 		  printf (" %s ", buf);
624 		  col += print_name_only (sym);
625 
626 		  if (!line_granularity && sym->is_static && sym->file)
627 		    {
628 		      filename = sym->file->name;
629 
630 		      if (!print_path)
631 			{
632 			  filename = strrchr (filename, '/');
633 
634 			  if (filename)
635 			    ++filename;
636 			  else
637 			    filename = sym->file->name;
638 			}
639 
640 		      printf (" (%s)", filename);
641 		      col += strlen (filename) + 3;
642 		    }
643 		}
644 	    }
645 	  else
646 	    {
647 	      if (bsd_style_output)
648 		{
649 		  printf ("%6.6s ", buf);
650 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
651 		  printf ("%-19.19s", buf);
652 		}
653 	      else
654 		{
655 		  col += strlen (buf);
656 		  for (; col < starting_col + 5; ++col)
657 		    putchar (' ');
658 		  printf (" %s ", buf);
659 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
660 		  printf ("%s", buf);
661 		  col += strlen (buf);
662 		}
663 	    }
664 
665 	  starting_col += column_width;
666 	}
667 
668       printf ("\n");
669     }
670 
671   free (name_sorted_syms);
672 }
673 
674 /* Compare two arcs based on their usage counts.
675    We want to sort in descending order.  */
676 
677 static int
cmp_arc_count(const PTR left,const PTR right)678 cmp_arc_count (const PTR left, const PTR right)
679 {
680   const Arc **npp1 = (const Arc **) left;
681   const Arc **npp2 = (const Arc **) right;
682 
683   if ((*npp1)->count > (*npp2)->count)
684     return -1;
685   else if ((*npp1)->count < (*npp2)->count)
686     return 1;
687   else
688     return 0;
689 }
690 
691 /* Compare two funtions based on their usage counts.
692    We want to sort in descending order.  */
693 
694 static int
cmp_fun_nuses(const PTR left,const PTR right)695 cmp_fun_nuses (const PTR left, const PTR right)
696 {
697   const Sym **npp1 = (const Sym **) left;
698   const Sym **npp2 = (const Sym **) right;
699 
700   if ((*npp1)->nuses > (*npp2)->nuses)
701     return -1;
702   else if ((*npp1)->nuses < (*npp2)->nuses)
703     return 1;
704   else
705     return 0;
706 }
707 
708 /* Print a suggested function ordering based on the profiling data.
709 
710    We perform 4 major steps when ordering functions:
711 
712 	* Group unused functions together and place them at the
713 	end of the function order.
714 
715 	* Search the highest use arcs (those which account for 90% of
716 	the total arc count) for functions which have several parents.
717 
718 	Group those with the most call sites together (currently the
719 	top 1.25% which have at least five different call sites).
720 
721 	These are emitted at the start of the function order.
722 
723 	* Use a greedy placement algorithm to place functions which
724 	occur in the top 99% of the arcs in the profile.  Some provisions
725 	are made to handle high usage arcs where the parent and/or
726 	child has already been placed.
727 
728 	* Run the same greedy placement algorithm on the remaining
729 	arcs to place the leftover functions.
730 
731 
732    The various "magic numbers" should (one day) be tuneable by command
733    line options.  They were arrived at by benchmarking a few applications
734    with various values to see which values produced better overall function
735    orderings.
736 
737    Of course, profiling errors, machine limitations (PA long calls), and
738    poor cutoff values for the placement algorithm may limit the usefullness
739    of the resulting function order.  Improvements would be greatly appreciated.
740 
741    Suggestions:
742 
743 	* Place the functions with many callers near the middle of the
744 	list to reduce long calls.
745 
746 	* Propagate arc usage changes as functions are placed.  Ie if
747 	func1 and func2 are placed together, arcs to/from those arcs
748 	to the same parent/child should be combined, then resort the
749 	arcs to choose the next one.
750 
751 	* Implement some global positioning algorithm to place the
752 	chains made by the greedy local positioning algorithm.  Probably
753 	by examining arcs which haven't been placed yet to tie two
754 	chains together.
755 
756 	* Take a function's size and time into account in the algorithm;
757 	size in particular is important on the PA (long calls).  Placing
758 	many small functions onto their own page may be wise.
759 
760 	* Use better profiling information; many published algorithms
761 	are based on call sequences through time, rather than just
762 	arc counts.
763 
764 	* Prodecure cloning could improve performance when a small number
765 	of arcs account for most of the calls to a particular function.
766 
767 	* Use relocation information to avoid moving unused functions
768 	completely out of the code stream; this would avoid severe lossage
769 	when the profile data bears little resemblance to actual runs.
770 
771 	* Propagation of arc usages should also improve .o link line
772 	ordering which shares the same arc placement algorithm with
773 	the function ordering code (in fact it is a degenerate case
774 	of function ordering).  */
775 
776 void
cg_print_function_ordering(void)777 cg_print_function_ordering (void)
778 {
779   unsigned long sym_index;
780   unsigned long arc_index;
781   unsigned long used, unused, scratch_index;
782   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
783 #ifdef __GNUC__
784   unsigned long long total_arcs, tmp_arcs_count;
785 #else
786   unsigned long total_arcs, tmp_arcs_count;
787 #endif
788   Sym **unused_syms, **used_syms, **scratch_syms;
789   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
790 
791   sym_index = 0;
792   used = 0;
793   unused = 0;
794   scratch_index = 0;
795   unplaced_arc_count = 0;
796   high_arc_count = 0;
797   scratch_arc_count = 0;
798 
799   /* First group all the unused functions together.  */
800   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
801   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
804   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806 
807   /* Walk through all the functions; mark those which are never
808      called as placed (we'll emit them as a group later).  */
809   for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
810     {
811       if (symtab.base[sym_index].ncalls == 0)
812 	{
813 	  unused_syms[unused++] = &symtab.base[sym_index];
814 	  symtab.base[sym_index].has_been_placed = 1;
815 	}
816       else
817 	{
818 	  used_syms[used++] = &symtab.base[sym_index];
819 	  symtab.base[sym_index].has_been_placed = 0;
820 	  symtab.base[sym_index].next = 0;
821 	  symtab.base[sym_index].prev = 0;
822 	  symtab.base[sym_index].nuses = 0;
823 	}
824     }
825 
826   /* Sort the arcs from most used to least used.  */
827   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
828 
829   /* Compute the total arc count.  Also mark arcs as unplaced.
830 
831      Note we don't compensate for overflow if that happens!
832      Overflow is much less likely when this file is compiled
833      with GCC as it can double-wide integers via long long.  */
834   total_arcs = 0;
835   for (arc_index = 0; arc_index < numarcs; arc_index++)
836     {
837       total_arcs += arcs[arc_index]->count;
838       arcs[arc_index]->has_been_placed = 0;
839     }
840 
841   /* We want to pull out those functions which are referenced
842      by many highly used arcs and emit them as a group.  This
843      could probably use some tuning.  */
844   tmp_arcs_count = 0;
845   for (arc_index = 0; arc_index < numarcs; arc_index++)
846     {
847       tmp_arcs_count += arcs[arc_index]->count;
848 
849       /* Count how many times each parent and child are used up
850 	 to our threshhold of arcs (90%).  */
851       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
852 	break;
853 
854       arcs[arc_index]->child->nuses++;
855     }
856 
857   /* Now sort a temporary symbol table based on the number of
858      times each function was used in the highest used arcs.  */
859   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
860   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
861 
862   /* Now pick out those symbols we're going to emit as
863      a group.  We take up to 1.25% of the used symbols.  */
864   for (sym_index = 0; sym_index < used / 80; sym_index++)
865     {
866       Sym *sym = scratch_syms[sym_index];
867       Arc *arc;
868 
869       /* If we hit symbols that aren't used from many call sites,
870 	 then we can quit.  We choose five as the low limit for
871 	 no particular reason.  */
872       if (sym->nuses == 5)
873 	break;
874 
875       /* We're going to need the arcs between these functions.
876 	 Unfortunately, we don't know all these functions
877 	 until we're done.  So we keep track of all the arcs
878 	 to the functions we care about, then prune out those
879 	 which are uninteresting.
880 
881 	 An interesting variation would be to quit when we found
882 	 multi-call site functions which account for some percentage
883 	 of the arcs.  */
884       arc = sym->cg.children;
885 
886       while (arc)
887 	{
888 	  if (arc->parent != arc->child)
889 	    scratch_arcs[scratch_arc_count++] = arc;
890 	  arc->has_been_placed = 1;
891 	  arc = arc->next_child;
892 	}
893 
894       arc = sym->cg.parents;
895 
896       while (arc)
897 	{
898 	  if (arc->parent != arc->child)
899 	    scratch_arcs[scratch_arc_count++] = arc;
900 	  arc->has_been_placed = 1;
901 	  arc = arc->next_parent;
902 	}
903 
904       /* Keep track of how many symbols we're going to place.  */
905       scratch_index = sym_index;
906 
907       /* A lie, but it makes identifying
908 	 these functions easier later.  */
909       sym->has_been_placed = 1;
910     }
911 
912   /* Now walk through the temporary arcs and copy
913      those we care about into the high arcs array.  */
914   for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
915     {
916       Arc *arc = scratch_arcs[arc_index];
917 
918       /* If this arc refers to highly used functions, then
919 	 then we want to keep it.  */
920       if (arc->child->has_been_placed
921 	  && arc->parent->has_been_placed)
922 	{
923 	  high_arcs[high_arc_count++] = scratch_arcs[arc_index];
924 
925 	  /* We need to turn of has_been_placed since we're going to
926 	     use the main arc placement algorithm on these arcs.  */
927 	  arc->child->has_been_placed = 0;
928 	  arc->parent->has_been_placed = 0;
929 	}
930     }
931 
932   /* Dump the multi-site high usage functions which are not
933      going to be ordered by the main ordering algorithm.  */
934   for (sym_index = 0; sym_index < scratch_index; sym_index++)
935     {
936       if (scratch_syms[sym_index]->has_been_placed)
937 	printf ("%s\n", scratch_syms[sym_index]->name);
938     }
939 
940   /* Now we can order the multi-site high use
941      functions based on the arcs between them.  */
942   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
943   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
944 				    unplaced_arcs, &unplaced_arc_count);
945 
946   /* Order and dump the high use functions left,
947      these typically have only a few call sites.  */
948   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
949 				    unplaced_arcs, &unplaced_arc_count);
950 
951   /* Now place the rarely used functions.  */
952   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
953 				    scratch_arcs, &scratch_arc_count);
954 
955   /* Output any functions not emitted by the order_and_dump calls.  */
956   for (sym_index = 0; sym_index < used; sym_index++)
957     if (used_syms[sym_index]->has_been_placed == 0)
958       printf("%s\n", used_syms[sym_index]->name);
959 
960   /* Output the unused functions.  */
961   for (sym_index = 0; sym_index < unused; sym_index++)
962     printf("%s\n", unused_syms[sym_index]->name);
963 
964   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970 
971   free (unused_syms);
972   free (used_syms);
973   free (scratch_syms);
974   free (high_arcs);
975   free (scratch_arcs);
976   free (unplaced_arcs);
977 }
978 
979 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
980    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
981 
982    If ALL is nonzero, then place all functions referenced by THE_ARCS,
983    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
984 
985 #define MOST 0.99
986 static void
order_and_dump_functions_by_arcs(the_arcs,arc_count,all,unplaced_arcs,unplaced_arc_count)987 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
988 				  unplaced_arcs, unplaced_arc_count)
989      Arc **the_arcs;
990      unsigned long arc_count;
991      int all;
992      Arc **unplaced_arcs;
993      unsigned long *unplaced_arc_count;
994 {
995 #ifdef __GNUC__
996   unsigned long long tmp_arcs, total_arcs;
997 #else
998   unsigned long tmp_arcs, total_arcs;
999 #endif
1000   unsigned int arc_index;
1001 
1002   /* If needed, compute the total arc count.
1003 
1004      Note we don't compensate for overflow if that happens!  */
1005   if (! all)
1006     {
1007       total_arcs = 0;
1008       for (arc_index = 0; arc_index < arc_count; arc_index++)
1009 	total_arcs += the_arcs[arc_index]->count;
1010     }
1011   else
1012     total_arcs = 0;
1013 
1014   tmp_arcs = 0;
1015 
1016   for (arc_index = 0; arc_index < arc_count; arc_index++)
1017     {
1018       Sym *sym1, *sym2;
1019       Sym *child, *parent;
1020 
1021       tmp_arcs += the_arcs[arc_index]->count;
1022 
1023       /* Ignore this arc if it's already been placed.  */
1024       if (the_arcs[arc_index]->has_been_placed)
1025 	continue;
1026 
1027       child = the_arcs[arc_index]->child;
1028       parent = the_arcs[arc_index]->parent;
1029 
1030       /* If we're not using all arcs, and this is a rarely used
1031 	 arc, then put it on the unplaced_arc list.  Similarly
1032 	 if both the parent and child of this arc have been placed.  */
1033       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1034 	  || child->has_been_placed || parent->has_been_placed)
1035 	{
1036 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1037 	  continue;
1038 	}
1039 
1040       /* If all slots in the parent and child are full, then there isn't
1041 	 anything we can do right now.  We'll place this arc on the
1042 	 unplaced arc list in the hope that a global positioning
1043 	 algorithm can use it to place function chains.  */
1044       if (parent->next && parent->prev && child->next && child->prev)
1045 	{
1046 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1047 	  continue;
1048 	}
1049 
1050       /* If the parent is unattached, then find the closest
1051 	 place to attach it onto child's chain.   Similarly
1052 	 for the opposite case.  */
1053       if (!parent->next && !parent->prev)
1054 	{
1055 	  int next_count = 0;
1056 	  int prev_count = 0;
1057 	  Sym *prev = child;
1058 	  Sym *next = child;
1059 
1060 	  /* Walk to the beginning and end of the child's chain.  */
1061 	  while (next->next)
1062 	    {
1063 	      next = next->next;
1064 	      next_count++;
1065 	    }
1066 
1067 	  while (prev->prev)
1068 	    {
1069 	      prev = prev->prev;
1070 	      prev_count++;
1071 	    }
1072 
1073 	  /* Choose the closest.  */
1074 	  child = next_count < prev_count ? next : prev;
1075 	}
1076       else if (! child->next && !child->prev)
1077 	{
1078 	  int next_count = 0;
1079 	  int prev_count = 0;
1080 	  Sym *prev = parent;
1081 	  Sym *next = parent;
1082 
1083 	  while (next->next)
1084 	    {
1085 	      next = next->next;
1086 	      next_count++;
1087 	    }
1088 
1089 	  while (prev->prev)
1090 	    {
1091 	      prev = prev->prev;
1092 	      prev_count++;
1093 	    }
1094 
1095 	  parent = prev_count < next_count ? prev : next;
1096 	}
1097       else
1098 	{
1099 	  /* Couldn't find anywhere to attach the functions,
1100 	     put the arc on the unplaced arc list.  */
1101 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1102 	  continue;
1103 	}
1104 
1105       /* Make sure we don't tie two ends together.  */
1106       sym1 = parent;
1107       if (sym1->next)
1108 	while (sym1->next)
1109 	  sym1 = sym1->next;
1110       else
1111 	while (sym1->prev)
1112 	  sym1 = sym1->prev;
1113 
1114       sym2 = child;
1115       if (sym2->next)
1116 	while (sym2->next)
1117 	  sym2 = sym2->next;
1118       else
1119 	while (sym2->prev)
1120 	  sym2 = sym2->prev;
1121 
1122       if (sym1 == child
1123 	  && sym2 == parent)
1124 	{
1125 	  /* This would tie two ends together.  */
1126 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1127 	  continue;
1128 	}
1129 
1130       if (parent->next)
1131 	{
1132 	  /* Must attach to the parent's prev field.  */
1133 	  if (! child->next)
1134 	    {
1135 	      /* parent-prev and child-next */
1136 	      parent->prev = child;
1137 	      child->next = parent;
1138 	      the_arcs[arc_index]->has_been_placed = 1;
1139 	    }
1140 	}
1141       else if (parent->prev)
1142 	{
1143 	  /* Must attach to the parent's next field.  */
1144 	  if (! child->prev)
1145 	    {
1146 	      /* parent-next and child-prev */
1147 	      parent->next = child;
1148 	      child->prev = parent;
1149 	      the_arcs[arc_index]->has_been_placed = 1;
1150 	    }
1151 	}
1152       else
1153 	{
1154 	  /* Can attach to either field in the parent, depends
1155 	     on where we've got space in the child.  */
1156 	  if (child->prev)
1157 	    {
1158 	      /* parent-prev and child-next.  */
1159 	      parent->prev = child;
1160 	      child->next = parent;
1161 	      the_arcs[arc_index]->has_been_placed = 1;
1162 	    }
1163 	  else
1164 	    {
1165 	      /* parent-next and child-prev.  */
1166 	      parent->next = child;
1167 	      child->prev = parent;
1168 	      the_arcs[arc_index]->has_been_placed = 1;
1169 	    }
1170 	}
1171     }
1172 
1173   /* Dump the chains of functions we've made.  */
1174   for (arc_index = 0; arc_index < arc_count; arc_index++)
1175     {
1176       Sym *sym;
1177       if (the_arcs[arc_index]->parent->has_been_placed
1178 	  || the_arcs[arc_index]->child->has_been_placed)
1179 	continue;
1180 
1181       sym = the_arcs[arc_index]->parent;
1182 
1183       /* If this symbol isn't attached to any other
1184 	 symbols, then we've got a rarely used arc.
1185 
1186 	 Skip it for now, we'll deal with them later.  */
1187       if (sym->next == NULL
1188 	  && sym->prev == NULL)
1189 	continue;
1190 
1191       /* Get to the start of this chain.  */
1192       while (sym->prev)
1193 	sym = sym->prev;
1194 
1195       while (sym)
1196 	{
1197 	  /* Mark it as placed.  */
1198 	  sym->has_been_placed = 1;
1199 	  printf ("%s\n", sym->name);
1200 	  sym = sym->next;
1201 	}
1202     }
1203 
1204   /* If we want to place all the arcs, then output
1205      those which weren't placed by the main algorithm.  */
1206   if (all)
1207     for (arc_index = 0; arc_index < arc_count; arc_index++)
1208       {
1209 	Sym *sym;
1210 	if (the_arcs[arc_index]->parent->has_been_placed
1211 	    || the_arcs[arc_index]->child->has_been_placed)
1212 	  continue;
1213 
1214 	sym = the_arcs[arc_index]->parent;
1215 
1216 	sym->has_been_placed = 1;
1217 	printf ("%s\n", sym->name);
1218       }
1219 }
1220 
1221 /* Compare two function_map structs based on file name.
1222    We want to sort in ascending order.  */
1223 
1224 static int
cmp_symbol_map(const void * l,const void * r)1225 cmp_symbol_map (const void * l, const void * r)
1226 {
1227   return filename_cmp (((struct function_map *) l)->file_name,
1228 		       ((struct function_map *) r)->file_name);
1229 }
1230 
1231 /* Print a suggested .o ordering for files on a link line based
1232    on profiling information.  This uses the function placement
1233    code for the bulk of its work.  */
1234 
1235 void
cg_print_file_ordering(void)1236 cg_print_file_ordering (void)
1237 {
1238   unsigned long scratch_arc_count;
1239   unsigned long arc_index;
1240   unsigned long sym_index;
1241   Arc **scratch_arcs;
1242   char *last;
1243 
1244   scratch_arc_count = 0;
1245 
1246   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1247   for (arc_index = 0; arc_index < numarcs; arc_index++)
1248     {
1249       if (! arcs[arc_index]->parent->mapped
1250 	  || ! arcs[arc_index]->child->mapped)
1251 	arcs[arc_index]->has_been_placed = 1;
1252     }
1253 
1254   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1255 				    scratch_arcs, &scratch_arc_count);
1256 
1257   /* Output .o's not handled by the main placement algorithm.  */
1258   for (sym_index = 0; sym_index < symtab.len; sym_index++)
1259     {
1260       if (symtab.base[sym_index].mapped
1261 	  && ! symtab.base[sym_index].has_been_placed)
1262 	printf ("%s\n", symtab.base[sym_index].name);
1263     }
1264 
1265   qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1266 
1267   /* Now output any .o's that didn't have any text symbols.  */
1268   last = NULL;
1269   for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1270     {
1271       unsigned int index2;
1272 
1273       /* Don't bother searching if this symbol
1274 	 is the same as the previous one.  */
1275       if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1276 	continue;
1277 
1278       for (index2 = 0; index2 < symtab.len; index2++)
1279 	{
1280 	  if (! symtab.base[index2].mapped)
1281 	    continue;
1282 
1283 	  if (!filename_cmp (symtab.base[index2].name,
1284 			     symbol_map[sym_index].file_name))
1285 	    break;
1286 	}
1287 
1288       /* If we didn't find it in the symbol table, then it must
1289 	 be a .o with no text symbols.  Output it last.  */
1290       if (index2 == symtab.len)
1291 	printf ("%s\n", symbol_map[sym_index].file_name);
1292       last = symbol_map[sym_index].file_name;
1293     }
1294 }
1295