1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2    Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3    This file is part of elfutils.
4 
5    This file is free software; you can redistribute it and/or modify
6    it under the terms of either
7 
8      * the GNU Lesser General Public License as published by the Free
9        Software Foundation; either version 3 of the License, or (at
10        your option) any later version
11 
12    or
13 
14      * the GNU General Public License as published by the Free
15        Software Foundation; either version 2 of the License, or (at
16        your option) any later version
17 
18    or both in parallel, as here.
19 
20    elfutils is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received copies of the GNU General Public License and
26    the GNU Lesser General Public License along with this program.  If
27    not, see <http://www.gnu.org/licenses/>.  */
28 
29 /* In case we have a bad fts we include this before config.h because it
30    can't handle _FILE_OFFSET_BITS.
31    Everything we need here is fine if its declarations just come first.
32    Also, include sys/types.h before fts. On some systems fts.h is not self
33    contained. */
34 #ifdef BAD_FTS
35   #include <sys/types.h>
36   #include <fts.h>
37 #endif
38 
39 #include <config.h>
40 #include <system.h>
41 
42 #include "libelfP.h"
43 #include "libdwflP.h"
44 #include <inttypes.h>
45 #include <errno.h>
46 #include <stdio.h>
47 #include <stdio_ext.h>
48 #include <string.h>
49 #include <stdlib.h>
50 #include <sys/utsname.h>
51 #include <fcntl.h>
52 #include <unistd.h>
53 
54 /* If fts.h is included before config.h, its indirect inclusions may not
55    give us the right LFS aliases of these functions, so map them manually.  */
56 #ifdef BAD_FTS
57   #ifdef _FILE_OFFSET_BITS
58     #define open open64
59     #define fopen fopen64
60   #endif
61 #else
62   #include <sys/types.h>
63   #include <fts.h>
64 #endif
65 
66 
67 #define KERNEL_MODNAME	"kernel"
68 
69 #define MODULEDIRFMT	"/lib/modules/%s"
70 
71 #define KNOTESFILE	"/sys/kernel/notes"
72 #define	MODNOTESFMT	"/sys/module/%s/notes"
73 #define KSYMSFILE	"/proc/kallsyms"
74 #define MODULELIST	"/proc/modules"
75 #define	SECADDRDIRFMT	"/sys/module/%s/sections/"
76 #define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
77 
78 
79 static const char *vmlinux_suffixes[] =
80   {
81     ".gz",
82 #ifdef USE_BZLIB
83     ".bz2",
84 #endif
85 #ifdef USE_LZMA
86     ".xz",
87 #endif
88   };
89 
90 /* Try to open the given file as it is or under the debuginfo directory.  */
91 static int
try_kernel_name(Dwfl * dwfl,char ** fname,bool try_debug)92 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
93 {
94   if (*fname == NULL)
95     return -1;
96 
97   /* Don't bother trying *FNAME itself here if the path will cause it to be
98      tried because we give its own basename as DEBUGLINK_FILE.  */
99   int fd = ((((dwfl->callbacks->debuginfo_path
100 	       ? *dwfl->callbacks->debuginfo_path : NULL)
101 	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
102 	    : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
103 
104   if (fd < 0)
105     {
106       Dwfl_Module fakemod = { .dwfl = dwfl };
107 
108       if (try_debug)
109 	/* Passing NULL for DEBUGLINK_FILE searches for both the basenamer
110 	   "vmlinux" and the default of basename + ".debug", to look for
111 	   "vmlinux.debug" files.  */
112 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
113 						   *fname, NULL, 0,
114 						   &fakemod.debug.name);
115       else
116 	/* Try the file's unadorned basename as DEBUGLINK_FILE,
117 	   to look only for "vmlinux" files.  */
118 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
119 						   *fname, basename (*fname),
120 						   0, &fakemod.debug.name);
121 
122       if (fakemod.debug.name != NULL)
123 	{
124 	  free (*fname);
125 	  *fname = fakemod.debug.name;
126 	}
127     }
128 
129   if (fd < 0)
130     for (size_t i = 0;
131 	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
132 	 ++i)
133       {
134 	char *zname;
135 	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
136 	  {
137 	    fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
138 	    if (fd < 0)
139 	      free (zname);
140 	    else
141 	      {
142 		free (*fname);
143 		*fname = zname;
144 	      }
145 	  }
146       }
147 
148   if (fd < 0)
149     {
150       free (*fname);
151       *fname = NULL;
152     }
153 
154   return fd;
155 }
156 
157 static inline const char *
kernel_release(void)158 kernel_release (void)
159 {
160 #ifdef __linux__
161   /* Cache the `uname -r` string we'll use.  */
162   static struct utsname utsname;
163   if (utsname.release[0] == '\0' && uname (&utsname) != 0)
164     return NULL;
165   return utsname.release;
166 #else
167   /* Used for finding the running linux kernel, which isn't supported
168      on non-linux kernel systems.  */
169   errno = ENOTSUP;
170   return NULL;
171 #endif
172 }
173 
174 static int
find_kernel_elf(Dwfl * dwfl,const char * release,char ** fname)175 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
176 {
177   if ((release[0] == '/'
178        ? asprintf (fname, "%s/vmlinux", release)
179        : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
180     return -1;
181 
182   int fd = try_kernel_name (dwfl, fname, true);
183   if (fd < 0 && release[0] != '/')
184     {
185       free (*fname);
186       if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
187 	return -1;
188       fd = try_kernel_name (dwfl, fname, true);
189     }
190 
191   return fd;
192 }
193 
194 static int
get_release(Dwfl * dwfl,const char ** release)195 get_release (Dwfl *dwfl, const char **release)
196 {
197   if (dwfl == NULL)
198     return -1;
199 
200   const char *release_string = release == NULL ? NULL : *release;
201   if (release_string == NULL)
202     {
203       release_string = kernel_release ();
204       if (release_string == NULL)
205 	return errno;
206       if (release != NULL)
207 	*release = release_string;
208     }
209 
210   return 0;
211 }
212 
213 static int
report_kernel(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))214 report_kernel (Dwfl *dwfl, const char **release,
215 	       int (*predicate) (const char *module, const char *file))
216 {
217   int result = get_release (dwfl, release);
218   if (unlikely (result != 0))
219     return result;
220 
221   char *fname;
222   int fd = find_kernel_elf (dwfl, *release, &fname);
223 
224   if (fd < 0)
225     result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
226 	      ? 0 : errno ?: ENOENT);
227   else
228     {
229       bool report = true;
230 
231       if (predicate != NULL)
232 	{
233 	  /* Let the predicate decide whether to use this one.  */
234 	  int want = (*predicate) (KERNEL_MODNAME, fname);
235 	  if (want < 0)
236 	    result = errno;
237 	  report = want > 0;
238 	}
239 
240       if (report)
241 	{
242 	  /* Note that on some architectures (e.g. x86_64) the vmlinux
243 	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
244 	     In both cases the phdr p_vaddr load address will be non-zero.
245 	     We want the image to be placed as if it was ET_DYN, so
246 	     pass true for add_p_vaddr which will do the right thing
247 	     (in combination with a zero base) in either case.  */
248 	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
249 						      fname, fd, 0, true);
250 	  if (mod == NULL)
251 	    result = -1;
252 	  else
253 	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
254 	    mod->e_type = ET_DYN;
255 	}
256 
257       free (fname);
258 
259       if (!report || result < 0)
260 	close (fd);
261     }
262 
263   return result;
264 }
265 
266 /* Look for a kernel debug archive.  If we find one, report all its modules.
267    If not, return ENOENT.  */
268 static int
report_kernel_archive(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))269 report_kernel_archive (Dwfl *dwfl, const char **release,
270 		       int (*predicate) (const char *module, const char *file))
271 {
272   int result = get_release (dwfl, release);
273   if (unlikely (result != 0))
274     return result;
275 
276   char *archive;
277   int res = (((*release)[0] == '/')
278 	     ? asprintf (&archive, "%s/debug.a", *release)
279 	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
280   if (unlikely (res < 0))
281     return ENOMEM;
282 
283   int fd = try_kernel_name (dwfl, &archive, false);
284   if (fd < 0)
285     result = errno ?: ENOENT;
286   else
287     {
288       /* We have the archive file open!  */
289       Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
290 						    true, predicate);
291       if (unlikely (last == NULL))
292 	result = -1;
293       else
294 	{
295 	  /* Find the kernel and move it to the head of the list.  */
296 	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
297 	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
298 	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
299 	      {
300 		*prevp = m->next;
301 		m->next = *tailp;
302 		*tailp = m;
303 		break;
304 	      }
305 	}
306     }
307 
308   free (archive);
309   return result;
310 }
311 
312 static size_t
check_suffix(const FTSENT * f,size_t namelen)313 check_suffix (const FTSENT *f, size_t namelen)
314 {
315 #define TRY(sfx)							\
316   if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
317        : f->fts_namelen >= sizeof sfx)					\
318       && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
319 		  sfx, sizeof sfx))					\
320     return sizeof sfx - 1
321 
322   TRY (".ko");
323   TRY (".ko.gz");
324 #if USE_BZLIB
325   TRY (".ko.bz2");
326 #endif
327 #if USE_LZMA
328   TRY (".ko.xz");
329 #endif
330 
331   return 0;
332 
333 #undef	TRY
334 }
335 
336 /* Report a kernel and all its modules found on disk, for offline use.
337    If RELEASE starts with '/', it names a directory to look in;
338    if not, it names a directory to find under /lib/modules/;
339    if null, /lib/modules/`uname -r` is used.
340    Returns zero on success, -1 if dwfl_report_module failed,
341    or an errno code if finding the files on disk failed.  */
342 int
dwfl_linux_kernel_report_offline(Dwfl * dwfl,const char * release,int (* predicate)(const char * module,const char * file))343 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
344 				  int (*predicate) (const char *module,
345 						    const char *file))
346 {
347   int result = report_kernel_archive (dwfl, &release, predicate);
348   if (result != ENOENT)
349     return result;
350 
351   /* First report the kernel.  */
352   result = report_kernel (dwfl, &release, predicate);
353   if (result == 0)
354     {
355       /* Do "find /lib/modules/RELEASE -name *.ko".  */
356 
357       char *modulesdir[] = { NULL, NULL };
358       if (release[0] == '/')
359 	modulesdir[0] = (char *) release;
360       else
361 	{
362 	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
363 	    return errno;
364 	}
365 
366       FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
367       if (modulesdir[0] == (char *) release)
368 	modulesdir[0] = NULL;
369       if (fts == NULL)
370 	{
371 	  free (modulesdir[0]);
372 	  return errno;
373 	}
374 
375       FTSENT *f;
376       while ((f = fts_read (fts)) != NULL)
377 	{
378 	  /* Skip a "source" subtree, which tends to be large.
379 	     This insane hard-coding of names is what depmod does too.  */
380 	  if (f->fts_namelen == sizeof "source" - 1
381 	      && !strcmp (f->fts_name, "source"))
382 	    {
383 	      fts_set (fts, f, FTS_SKIP);
384 	      continue;
385 	    }
386 
387 	  switch (f->fts_info)
388 	    {
389 	    case FTS_F:
390 	    case FTS_SL:
391 	    case FTS_NSOK:;
392 	      /* See if this file name matches "*.ko".  */
393 	      const size_t suffix = check_suffix (f, 0);
394 	      if (suffix)
395 		{
396 		  /* We have a .ko file to report.  Following the algorithm
397 		     by which the kernel makefiles set KBUILD_MODNAME, we
398 		     replace all ',' or '-' with '_' in the file name and
399 		     call that the module name.  Modules could well be
400 		     built using different embedded names than their file
401 		     names.  To handle that, we would have to look at the
402 		     __this_module.name contents in the module's text.  */
403 
404 		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
405 		  if (unlikely (name == NULL))
406 		    {
407 		      __libdwfl_seterrno (DWFL_E_NOMEM);
408 		      result = -1;
409 		      break;
410 		    }
411 		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
412 		    if (name[i] == '-' || name[i] == ',')
413 		      name[i] = '_';
414 
415 		  if (predicate != NULL)
416 		    {
417 		      /* Let the predicate decide whether to use this one.  */
418 		      int want = (*predicate) (name, f->fts_path);
419 		      if (want < 0)
420 			{
421 			  result = -1;
422 			  free (name);
423 			  break;
424 			}
425 		      if (!want)
426 			{
427 			  free (name);
428 			  continue;
429 			}
430 		    }
431 
432 		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
433 		    {
434 		      free (name);
435 		      result = -1;
436 		      break;
437 		    }
438 		  free (name);
439 		}
440 	      continue;
441 
442 	    case FTS_ERR:
443 	    case FTS_DNR:
444 	    case FTS_NS:
445 	      result = f->fts_errno;
446 	      break;
447 
448 	    case FTS_SLNONE:
449 	    default:
450 	      continue;
451 	    }
452 
453 	  /* We only get here in error cases.  */
454 	  break;
455 	}
456       fts_close (fts);
457       free (modulesdir[0]);
458     }
459 
460   return result;
461 }
462 INTDEF (dwfl_linux_kernel_report_offline)
463 
464 
465 /* State of read_address used by intuit_kernel_bounds. */
466 struct read_address_state {
467   FILE *f;
468   char *line;
469   size_t linesz;
470   size_t n;
471   char *p;
472   const char *type;
473 };
474 
475 static inline bool
read_address(struct read_address_state * state,Dwarf_Addr * addr)476 read_address (struct read_address_state *state, Dwarf_Addr *addr)
477 {
478   if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
479       state->line[state->n - 2] == ']')
480     return false;
481   *addr = strtoull (state->line, &state->p, 16);
482   state->p += strspn (state->p, " \t");
483   state->type = strsep (&state->p, " \t\n");
484   if (state->type == NULL)
485     return false;
486   return state->p != NULL && state->p != state->line;
487 }
488 
489 
490 /* Grovel around to guess the bounds of the runtime kernel image.  */
491 static int
intuit_kernel_bounds(Dwarf_Addr * start,Dwarf_Addr * end,Dwarf_Addr * notes)492 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
493 {
494   struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
495 
496   *notes = 0;
497 
498   state.f = fopen (KSYMSFILE, "r");
499   if (state.f == NULL)
500     return errno;
501 
502   (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
503 
504   int result;
505   do
506     result = read_address (&state, start) ? 0 : -1;
507   while (result == 0 && strchr ("TtRr", *state.type) == NULL);
508 
509   if (result == 0)
510     {
511       *end = *start;
512       while (read_address (&state, end))
513 	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
514 	  *notes = *end;
515 
516       Dwarf_Addr round_kernel = sysconf (_SC_PAGESIZE);
517       *start &= -(Dwarf_Addr) round_kernel;
518       *end += round_kernel - 1;
519       *end &= -(Dwarf_Addr) round_kernel;
520       if (*start >= *end || *end - *start < round_kernel)
521 	result = -1;
522     }
523   free (state.line);
524 
525   if (result == -1)
526     result = ferror_unlocked (state.f) ? errno : ENOEXEC;
527 
528   fclose (state.f);
529 
530   return result;
531 }
532 
533 
534 /* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
535 static int
check_notes(Dwfl_Module * mod,const char * notesfile,Dwarf_Addr vaddr,const char * secname)536 check_notes (Dwfl_Module *mod, const char *notesfile,
537 	     Dwarf_Addr vaddr, const char *secname)
538 {
539   int fd = open (notesfile, O_RDONLY);
540   if (fd < 0)
541     return 1;
542 
543   assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
544   assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
545   union
546   {
547     GElf_Nhdr nhdr;
548     unsigned char data[8192];
549   } buf;
550 
551   ssize_t n = read (fd, buf.data, sizeof buf);
552   close (fd);
553 
554   if (n <= 0)
555     return 1;
556 
557   unsigned char *p = buf.data;
558   size_t len = 0;
559   while (p < &buf.data[n])
560     {
561       /* No translation required since we are reading the native kernel.  */
562       GElf_Nhdr *nhdr = (void *) p;
563       len += sizeof *nhdr;
564       p += len;
565       unsigned char *name = p;
566       unsigned char *bits;
567       /* This is somewhat ugly, GNU Property notes use different padding,
568 	 but all we have is the file content, so we have to actually check
569 	 the name and type.  */
570       if (nhdr->n_type == NT_GNU_PROPERTY_TYPE_0
571           && nhdr->n_namesz == sizeof "GNU"
572           && name + nhdr->n_namesz < &buf.data[n]
573           && !memcmp (name, "GNU", sizeof "GNU"))
574 	{
575 	  len += nhdr->n_namesz;
576 	  len = NOTE_ALIGN8 (len);
577 	  p = buf.data + len;
578 	  bits = p;
579 	  len += nhdr->n_descsz;
580 	  len = NOTE_ALIGN8 (len);
581 	  p = buf.data + len;
582 	}
583       else
584 	{
585 	  len += nhdr->n_namesz;
586 	  len = NOTE_ALIGN4 (len);
587 	  p = buf.data + len;
588 	  bits = p;
589 	  len += nhdr->n_descsz;
590 	  len = NOTE_ALIGN4 (len);
591 	  p = buf.data + len;
592 	}
593 
594       if (p <= &buf.data[n]
595 	  && nhdr->n_type == NT_GNU_BUILD_ID
596 	  && nhdr->n_namesz == sizeof "GNU"
597 	  && !memcmp (name, "GNU", sizeof "GNU"))
598 	{
599 	  /* Found it.  For a module we must figure out its VADDR now.  */
600 
601 	  if (secname != NULL
602 	      && (INTUSE(dwfl_linux_kernel_module_section_address)
603 		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
604 		  || vaddr == (GElf_Addr) -1l))
605 	    vaddr = 0;
606 
607 	  if (vaddr != 0)
608 	    vaddr += bits - buf.data;
609 	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
610 						      nhdr->n_descsz, vaddr);
611 	}
612     }
613 
614   return 0;
615 }
616 
617 /* Look for a build ID for the kernel.  */
618 static int
check_kernel_notes(Dwfl_Module * kernelmod,GElf_Addr vaddr)619 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
620 {
621   return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
622 }
623 
624 /* Look for a build ID for a loaded kernel module.  */
625 static int
check_module_notes(Dwfl_Module * mod)626 check_module_notes (Dwfl_Module *mod)
627 {
628   char *dirs[2] = { NULL, NULL };
629   if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
630     return ENOMEM;
631 
632   FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
633   if (fts == NULL)
634     {
635       free (dirs[0]);
636       return 0;
637     }
638 
639   int result = 0;
640   FTSENT *f;
641   while ((f = fts_read (fts)) != NULL)
642     {
643       switch (f->fts_info)
644 	{
645 	case FTS_F:
646 	case FTS_SL:
647 	case FTS_NSOK:
648 	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
649 	  if (result > 0)	/* Nothing found.  */
650 	    {
651 	      result = 0;
652 	      continue;
653 	    }
654 	  break;
655 
656 	case FTS_ERR:
657 	case FTS_DNR:
658 	  result = f->fts_errno;
659 	  break;
660 
661 	case FTS_NS:
662 	case FTS_SLNONE:
663 	default:
664 	  continue;
665 	}
666 
667       /* We only get here when finished or in error cases.  */
668       break;
669     }
670   fts_close (fts);
671   free (dirs[0]);
672 
673   return result;
674 }
675 
676 int
dwfl_linux_kernel_report_kernel(Dwfl * dwfl)677 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
678 {
679   Dwarf_Addr start = 0;
680   Dwarf_Addr end = 0;
681 
682   #define report() \
683     (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
684 
685   /* This is a bit of a kludge.  If we already reported the kernel,
686      don't bother figuring it out again--it never changes.  */
687   for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
688     if (!strcmp (m->name, KERNEL_MODNAME))
689       {
690 	start = m->low_addr;
691 	end = m->high_addr;
692 	return report () == NULL ? -1 : 0;
693       }
694 
695   /* Try to figure out the bounds of the kernel image without
696      looking for any vmlinux file.  */
697   Dwarf_Addr notes;
698   int result = intuit_kernel_bounds (&start, &end, &notes);
699   if (result == 0)
700     {
701       Dwfl_Module *mod = report ();
702       return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
703     }
704   if (result != ENOENT)
705     return result;
706 
707   /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
708   return report_kernel (dwfl, NULL, NULL);
709 }
INTDEF(dwfl_linux_kernel_report_kernel)710 INTDEF (dwfl_linux_kernel_report_kernel)
711 
712 
713 static inline bool
714 subst_name (char from, char to,
715             const char * const module_name,
716             char * const alternate_name,
717             const size_t namelen)
718 {
719   const char *n = memchr (module_name, from, namelen);
720   if (n == NULL)
721     return false;
722   char *a = mempcpy (alternate_name, module_name, n - module_name);
723   *a++ = to;
724   ++n;
725   const char *p;
726   while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
727     {
728       a = mempcpy (a, n, p - n);
729       *a++ = to;
730       n = p + 1;
731     }
732   memcpy (a, n, namelen - (n - module_name) + 1);
733   return true;
734 }
735 
736 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
737 
738 int
dwfl_linux_kernel_find_elf(Dwfl_Module * mod,void ** userdata,const char * module_name,Dwarf_Addr base,char ** file_name,Elf ** elfp)739 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
740 			    void **userdata __attribute__ ((unused)),
741 			    const char *module_name,
742 			    Dwarf_Addr base __attribute__ ((unused)),
743 			    char **file_name, Elf **elfp)
744 {
745   if (mod->build_id_len > 0)
746     {
747       int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
748 					       file_name, elfp);
749       if (fd >= 0 || mod->main.elf != NULL || errno != 0)
750 	return fd;
751     }
752 
753   const char *release = kernel_release ();
754   if (release == NULL)
755     return errno;
756 
757   if (!strcmp (module_name, KERNEL_MODNAME))
758     return find_kernel_elf (mod->dwfl, release, file_name);
759 
760   /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
761 
762   char *modulesdir[] = { NULL, NULL };
763   if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
764     return -1;
765 
766   FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
767   if (fts == NULL)
768     {
769       free (modulesdir[0]);
770       return -1;
771     }
772 
773   size_t namelen = strlen (module_name);
774 
775   /* This is a kludge.  There is no actual necessary relationship between
776      the name of the .ko file installed and the module name the kernel
777      knows it by when it's loaded.  The kernel's only idea of the module
778      name comes from the name embedded in the object's magic
779      .gnu.linkonce.this_module section.
780 
781      In practice, these module names match the .ko file names except for
782      some using '_' and some using '-'.  So our cheap kludge is to look for
783      two files when either a '_' or '-' appears in a module name, one using
784      only '_' and one only using '-'.  */
785 
786   char *alternate_name = malloc (namelen + 1);
787   if (unlikely (alternate_name == NULL))
788     {
789       free (modulesdir[0]);
790       return ENOMEM;
791     }
792   if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
793       !subst_name ('_', '-', module_name, alternate_name, namelen))
794     alternate_name[0] = '\0';
795 
796   FTSENT *f;
797   int error = ENOENT;
798   while ((f = fts_read (fts)) != NULL)
799     {
800       /* Skip a "source" subtree, which tends to be large.
801 	 This insane hard-coding of names is what depmod does too.  */
802       if (f->fts_namelen == sizeof "source" - 1
803 	  && !strcmp (f->fts_name, "source"))
804 	{
805 	  fts_set (fts, f, FTS_SKIP);
806 	  continue;
807 	}
808 
809       error = ENOENT;
810       switch (f->fts_info)
811 	{
812 	case FTS_F:
813 	case FTS_SL:
814 	case FTS_NSOK:
815 	  /* See if this file name is "MODULE_NAME.ko".  */
816 	  if (check_suffix (f, namelen)
817 	      && (!memcmp (f->fts_name, module_name, namelen)
818 		  || !memcmp (f->fts_name, alternate_name, namelen)))
819 	    {
820 	      int fd = open (f->fts_accpath, O_RDONLY);
821 	      *file_name = strdup (f->fts_path);
822 	      fts_close (fts);
823 	      free (modulesdir[0]);
824 	      free (alternate_name);
825 	      if (fd < 0)
826 		free (*file_name);
827 	      else if (*file_name == NULL)
828 		{
829 		  close (fd);
830 		  fd = -1;
831 		}
832 	      return fd;
833 	    }
834 	  break;
835 
836 	case FTS_ERR:
837 	case FTS_DNR:
838 	case FTS_NS:
839 	  error = f->fts_errno;
840 	  break;
841 
842 	case FTS_SLNONE:
843 	default:
844 	  break;
845 	}
846     }
847 
848   fts_close (fts);
849   free (modulesdir[0]);
850   free (alternate_name);
851   errno = error;
852   return -1;
853 }
INTDEF(dwfl_linux_kernel_find_elf)854 INTDEF (dwfl_linux_kernel_find_elf)
855 
856 
857 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
858    We read the information from /sys/module directly.  */
859 
860 int
861 dwfl_linux_kernel_module_section_address
862 (Dwfl_Module *mod __attribute__ ((unused)),
863  void **userdata __attribute__ ((unused)),
864  const char *modname, Dwarf_Addr base __attribute__ ((unused)),
865  const char *secname, Elf32_Word shndx __attribute__ ((unused)),
866  const GElf_Shdr *shdr __attribute__ ((unused)),
867  Dwarf_Addr *addr)
868 {
869   char *sysfile;
870   if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
871     return DWARF_CB_ABORT;
872 
873   FILE *f = fopen (sysfile, "r");
874   free (sysfile);
875 
876   if (f == NULL)
877     {
878       if (errno == ENOENT)
879 	{
880 	  /* The .modinfo and .data.percpu sections are never kept
881 	     loaded in the kernel.  If the kernel was compiled without
882 	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
883 	     actually loaded at all.
884 
885 	     Setting *ADDR to -1 tells the caller this section is
886 	     actually absent from memory.  */
887 
888 	  if (!strcmp (secname, ".modinfo")
889 	      || !strcmp (secname, ".data.percpu")
890 	      || !strncmp (secname, ".exit", 5))
891 	    {
892 	      *addr = (Dwarf_Addr) -1l;
893 	      return DWARF_CB_OK;
894 	    }
895 
896 	  /* The goofy PPC64 module_frob_arch_sections function tweaks
897 	     the section names as a way to control other kernel code's
898 	     behavior, and this cruft leaks out into the /sys information.
899 	     The file name for ".init*" may actually look like "_init*".  */
900 
901 	  const bool is_init = !strncmp (secname, ".init", 5);
902 	  if (is_init)
903 	    {
904 	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
905 			    modname, &secname[1]) < 0)
906 		return ENOMEM;
907 	      f = fopen (sysfile, "r");
908 	      free (sysfile);
909 	      if (f != NULL)
910 		goto ok;
911 	    }
912 
913 	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
914 	     In case that size increases in the future, look for longer
915 	     truncated names first.  */
916 	  size_t namelen = strlen (secname);
917 	  if (namelen >= MODULE_SECT_NAME_LEN)
918 	    {
919 	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
920 				  modname, secname);
921 	      if (len < 0)
922 		return DWARF_CB_ABORT;
923 	      char *end = sysfile + len;
924 	      do
925 		{
926 		  *--end = '\0';
927 		  f = fopen (sysfile, "r");
928 		  if (is_init && f == NULL && errno == ENOENT)
929 		    {
930 		      sysfile[len - namelen] = '_';
931 		      f = fopen (sysfile, "r");
932 		      sysfile[len - namelen] = '.';
933 		    }
934 		}
935 	      while (f == NULL && errno == ENOENT
936 		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
937 	      free (sysfile);
938 
939 	      if (f != NULL)
940 		goto ok;
941 	    }
942 	}
943 
944       return DWARF_CB_ABORT;
945     }
946 
947  ok:
948   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
949 
950   int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
951 		: ferror_unlocked (f) ? errno : ENOEXEC);
952   fclose (f);
953 
954   if (result == 0)
955     return DWARF_CB_OK;
956 
957   errno = result;
958   return DWARF_CB_ABORT;
959 }
INTDEF(dwfl_linux_kernel_module_section_address)960 INTDEF (dwfl_linux_kernel_module_section_address)
961 
962 int
963 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
964 {
965   FILE *f = fopen (MODULELIST, "r");
966   if (f == NULL)
967     return errno;
968 
969   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
970 
971   int result = 0;
972   Dwarf_Addr modaddr;
973   unsigned long int modsz;
974   char modname[128];
975   char *line = NULL;
976   size_t linesz = 0;
977   /* We can't just use fscanf here because it's not easy to distinguish \n
978      from other whitespace so as to take the optional word following the
979      address but always stop at the end of the line.  */
980   while (getline (&line, &linesz, f) > 0
981 	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
982 		    modname, &modsz, &modaddr) == 3)
983     {
984       Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
985 						     modaddr, modaddr + modsz);
986       if (mod == NULL)
987 	{
988 	  result = -1;
989 	  break;
990 	}
991 
992       result = check_module_notes (mod);
993     }
994   free (line);
995 
996   if (result == 0)
997     result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
998 
999   fclose (f);
1000 
1001   return result;
1002 }
1003 INTDEF (dwfl_linux_kernel_report_modules)
1004