1 /* strings -- print the strings of printable characters in files
2    Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17    02110-1301, USA.  */
18 
19 /* Usage: strings [options] file...
20 
21    Options:
22    --all
23    -a
24    -		Scan each file in its entirety.
25 
26    --data
27    -d		Scan only the initialized data section(s) of object files.
28 
29    --print-file-name
30    -f		Print the name of the file before each string.
31 
32    --bytes=min-len
33    -n min-len
34    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
35 		that are followed by a NUL or a newline.  Default is 4.
36 
37    --radix={o,x,d}
38    -t {o,x,d}	Print the offset within the file before each string,
39 		in octal/hex/decimal.
40 
41   --include-all-whitespace
42   -w		By default tab and space are the only whitepace included in graphic
43 		char sequences.  This option considers all of isspace() valid.
44 
45    -o		Like -to.  (Some other implementations have -o like -to,
46 		others like -td.  We chose one arbitrarily.)
47 
48    --encoding={s,S,b,l,B,L}
49    -e {s,S,b,l,B,L}
50 		Select character encoding: 7-bit-character, 8-bit-character,
51 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
52 		littleendian 32-bit.
53 
54    --target=BFDNAME
55    -T {bfdname}
56 		Specify a non-default object file format.
57 
58    --help
59    -h		Print the usage message on the standard output.
60 
61    --version
62    -V
63    -v		Print the program version number.
64 
65    Written by Richard Stallman <rms@gnu.ai.mit.edu>
66    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
67 
68 #include "sysdep.h"
69 #include "bfd.h"
70 #include "getopt.h"
71 #include "libiberty.h"
72 #include "safe-ctype.h"
73 #include "bucomm.h"
74 
75 #define STRING_ISGRAPHIC(c) \
76       (   (c) >= 0 \
77        && (c) <= 255 \
78        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
79            || (include_all_whitespace == TRUE && ISSPACE (c))) \
80       )
81 
82 #ifndef errno
83 extern int errno;
84 #endif
85 
86 /* The BFD section flags that identify an initialized data section.  */
87 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
88 
89 /* Radix for printing addresses (must be 8, 10 or 16).  */
90 static int address_radix;
91 
92 /* Minimum length of sequence of graphic chars to trigger output.  */
93 static int string_min;
94 
95 /* Whether or not we include all whitespace as a graphic char.   */
96 static bfd_boolean include_all_whitespace;
97 
98 /* TRUE means print address within file for each string.  */
99 static bfd_boolean print_addresses;
100 
101 /* TRUE means print filename for each string.  */
102 static bfd_boolean print_filenames;
103 
104 /* TRUE means for object files scan only the data section.  */
105 static bfd_boolean datasection_only;
106 
107 /* TRUE if we found an initialized data section in the current file.  */
108 static bfd_boolean got_a_section;
109 
110 /* The BFD object file format.  */
111 static char *target;
112 
113 /* The character encoding format.  */
114 static char encoding;
115 static int encoding_bytes;
116 
117 static struct option long_options[] =
118 {
119   {"all", no_argument, NULL, 'a'},
120   {"data", no_argument, NULL, 'd'},
121   {"print-file-name", no_argument, NULL, 'f'},
122   {"bytes", required_argument, NULL, 'n'},
123   {"radix", required_argument, NULL, 't'},
124   {"include-all-whitespace", required_argument, NULL, 'w'},
125   {"encoding", required_argument, NULL, 'e'},
126   {"target", required_argument, NULL, 'T'},
127   {"help", no_argument, NULL, 'h'},
128   {"version", no_argument, NULL, 'v'},
129   {NULL, 0, NULL, 0}
130 };
131 
132 /* Records the size of a named file so that we
133    do not repeatedly run bfd_stat() on it.  */
134 
135 typedef struct
136 {
137   const char *  filename;
138   bfd_size_type filesize;
139 } filename_and_size_t;
140 
141 static void strings_a_section (bfd *, asection *, void *);
142 static bfd_boolean strings_object_file (const char *);
143 static bfd_boolean strings_file (char *);
144 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
145 static void usage (FILE *, int);
146 static long get_char (FILE *, file_ptr *, int *, char **);
147 
148 int main (int, char **);
149 
150 int
main(int argc,char ** argv)151 main (int argc, char **argv)
152 {
153   int optc;
154   int exit_status = 0;
155   bfd_boolean files_given = FALSE;
156   char *s;
157   int numeric_opt = 0;
158 
159 #if defined (HAVE_SETLOCALE)
160   setlocale (LC_ALL, "");
161 #endif
162   bindtextdomain (PACKAGE, LOCALEDIR);
163   textdomain (PACKAGE);
164 
165   program_name = argv[0];
166   xmalloc_set_program_name (program_name);
167 
168   expandargv (&argc, &argv);
169 
170   string_min = 4;
171   include_all_whitespace = FALSE;
172   print_addresses = FALSE;
173   print_filenames = FALSE;
174   if (DEFAULT_STRINGS_ALL)
175     datasection_only = FALSE;
176   else
177     datasection_only = TRUE;
178   target = NULL;
179   encoding = 's';
180 
181   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:Vv0123456789",
182 			      long_options, (int *) 0)) != EOF)
183     {
184       switch (optc)
185 	{
186 	case 'a':
187 	  datasection_only = FALSE;
188 	  break;
189 
190 	case 'd':
191 	  datasection_only = TRUE;
192 	  break;
193 
194 	case 'f':
195 	  print_filenames = TRUE;
196 	  break;
197 
198 	case 'H':
199 	case 'h':
200 	  usage (stdout, 0);
201 
202 	case 'n':
203 	  string_min = (int) strtoul (optarg, &s, 0);
204 	  if (s != NULL && *s != 0)
205 	    fatal (_("invalid integer argument %s"), optarg);
206 	  break;
207 
208 	case 'w':
209 	  include_all_whitespace = TRUE;
210 	  break;
211 
212 	case 'o':
213 	  print_addresses = TRUE;
214 	  address_radix = 8;
215 	  break;
216 
217 	case 't':
218 	  print_addresses = TRUE;
219 	  if (optarg[1] != '\0')
220 	    usage (stderr, 1);
221 	  switch (optarg[0])
222 	    {
223 	    case 'o':
224 	      address_radix = 8;
225 	      break;
226 
227 	    case 'd':
228 	      address_radix = 10;
229 	      break;
230 
231 	    case 'x':
232 	      address_radix = 16;
233 	      break;
234 
235 	    default:
236 	      usage (stderr, 1);
237 	    }
238 	  break;
239 
240 	case 'T':
241 	  target = optarg;
242 	  break;
243 
244 	case 'e':
245 	  if (optarg[1] != '\0')
246 	    usage (stderr, 1);
247 	  encoding = optarg[0];
248 	  break;
249 
250 	case 'V':
251 	case 'v':
252 	  print_version ("strings");
253 	  break;
254 
255 	case '?':
256 	  usage (stderr, 1);
257 
258 	default:
259 	  numeric_opt = optind;
260 	  break;
261 	}
262     }
263 
264   if (numeric_opt != 0)
265     {
266       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
267       if (s != NULL && *s != 0)
268 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
269     }
270   if (string_min < 1)
271     fatal (_("invalid minimum string length %d"), string_min);
272 
273   switch (encoding)
274     {
275     case 'S':
276     case 's':
277       encoding_bytes = 1;
278       break;
279     case 'b':
280     case 'l':
281       encoding_bytes = 2;
282       break;
283     case 'B':
284     case 'L':
285       encoding_bytes = 4;
286       break;
287     default:
288       usage (stderr, 1);
289     }
290 
291   bfd_init ();
292   set_default_bfd_target ();
293 
294   if (optind >= argc)
295     {
296       datasection_only = FALSE;
297       SET_BINARY (fileno (stdin));
298       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
299       files_given = TRUE;
300     }
301   else
302     {
303       for (; optind < argc; ++optind)
304 	{
305 	  if (strcmp (argv[optind], "-") == 0)
306 	    datasection_only = FALSE;
307 	  else
308 	    {
309 	      files_given = TRUE;
310 	      exit_status |= strings_file (argv[optind]) == FALSE;
311 	    }
312 	}
313     }
314 
315   if (!files_given)
316     usage (stderr, 1);
317 
318   return (exit_status);
319 }
320 
321 /* Scan section SECT of the file ABFD, whose printable name is in
322    ARG->filename and whose size might be in ARG->filesize.  If it
323    contains initialized data set `got_a_section' and print the
324    strings in it.
325 
326    FIXME: We ought to be able to return error codes/messages for
327    certain conditions.  */
328 
329 static void
strings_a_section(bfd * abfd,asection * sect,void * arg)330 strings_a_section (bfd *abfd, asection *sect, void *arg)
331 {
332   filename_and_size_t * filename_and_sizep;
333   bfd_size_type *filesizep;
334   bfd_size_type sectsize;
335   void *mem;
336 
337   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
338     return;
339 
340   sectsize = bfd_get_section_size (sect);
341 
342   if (sectsize <= 0)
343     return;
344 
345   /* Get the size of the file.  This might have been cached for us.  */
346   filename_and_sizep = (filename_and_size_t *) arg;
347   filesizep = & filename_and_sizep->filesize;
348 
349   if (*filesizep == 0)
350     {
351       struct stat st;
352 
353       if (bfd_stat (abfd, &st))
354 	return;
355 
356       /* Cache the result so that we do not repeatedly stat this file.  */
357       *filesizep = st.st_size;
358     }
359 
360   /* Compare the size of the section against the size of the file.
361      If the section is bigger then the file must be corrupt and
362      we should not try dumping it.  */
363   if (sectsize >= *filesizep)
364     return;
365 
366   mem = xmalloc (sectsize);
367 
368   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
369     {
370       got_a_section = TRUE;
371 
372       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
373 		     0, sectsize, (char *) mem);
374     }
375 
376   free (mem);
377 }
378 
379 /* Scan all of the sections in FILE, and print the strings
380    in the initialized data section(s).
381 
382    Return TRUE if successful,
383    FALSE if not (such as if FILE is not an object file).  */
384 
385 static bfd_boolean
strings_object_file(const char * file)386 strings_object_file (const char *file)
387 {
388   filename_and_size_t filename_and_size;
389   bfd *abfd;
390 
391   abfd = bfd_openr (file, target);
392 
393   if (abfd == NULL)
394     /* Treat the file as a non-object file.  */
395     return FALSE;
396 
397   /* This call is mainly for its side effect of reading in the sections.
398      We follow the traditional behavior of `strings' in that we don't
399      complain if we don't recognize a file to be an object file.  */
400   if (!bfd_check_format (abfd, bfd_object))
401     {
402       bfd_close (abfd);
403       return FALSE;
404     }
405 
406   got_a_section = FALSE;
407   filename_and_size.filename = file;
408   filename_and_size.filesize = 0;
409   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
410 
411   if (!bfd_close (abfd))
412     {
413       bfd_nonfatal (file);
414       return FALSE;
415     }
416 
417   return got_a_section;
418 }
419 
420 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
421 
422 static bfd_boolean
strings_file(char * file)423 strings_file (char *file)
424 {
425   struct stat st;
426 
427   /* get_file_size does not support non-S_ISREG files.  */
428 
429   if (stat (file, &st) < 0)
430     {
431       if (errno == ENOENT)
432 	non_fatal (_("'%s': No such file"), file);
433       else
434 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
435 		   file, strerror (errno));
436       return FALSE;
437     }
438 
439   /* If we weren't told to scan the whole file,
440      try to open it as an object file and only look at
441      initialized data sections.  If that fails, fall back to the
442      whole file.  */
443   if (!datasection_only || !strings_object_file (file))
444     {
445       FILE *stream;
446 
447       stream = fopen (file, FOPEN_RB);
448       if (stream == NULL)
449 	{
450 	  fprintf (stderr, "%s: ", program_name);
451 	  perror (file);
452 	  return FALSE;
453 	}
454 
455       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
456 
457       if (fclose (stream) == EOF)
458 	{
459 	  fprintf (stderr, "%s: ", program_name);
460 	  perror (file);
461 	  return FALSE;
462 	}
463     }
464 
465   return TRUE;
466 }
467 
468 /* Read the next character, return EOF if none available.
469    Assume that STREAM is positioned so that the next byte read
470    is at address ADDRESS in the file.
471 
472    If STREAM is NULL, do not read from it.
473    The caller can supply a buffer of characters
474    to be processed before the data in STREAM.
475    MAGIC is the address of the buffer and
476    MAGICCOUNT is how many characters are in it.  */
477 
478 static long
get_char(FILE * stream,file_ptr * address,int * magiccount,char ** magic)479 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
480 {
481   int c, i;
482   long r = 0;
483 
484   for (i = 0; i < encoding_bytes; i++)
485     {
486       if (*magiccount)
487 	{
488 	  (*magiccount)--;
489 	  c = *(*magic)++;
490 	}
491       else
492 	{
493 	  if (stream == NULL)
494 	    return EOF;
495 
496 	  /* Only use getc_unlocked if we found a declaration for it.
497 	     Otherwise, libc is not thread safe by default, and we
498 	     should not use it.  */
499 
500 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
501 	  c = getc_unlocked (stream);
502 #else
503 	  c = getc (stream);
504 #endif
505 	  if (c == EOF)
506 	    return EOF;
507 	}
508 
509       (*address)++;
510       r = (r << 8) | (c & 0xff);
511     }
512 
513   switch (encoding)
514     {
515     default:
516       break;
517     case 'l':
518       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
519       break;
520     case 'L':
521       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
522 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
523       break;
524     }
525 
526   return r;
527 }
528 
529 /* Find the strings in file FILENAME, read from STREAM.
530    Assume that STREAM is positioned so that the next byte read
531    is at address ADDRESS in the file.
532    Stop reading at address STOP_POINT in the file, if nonzero.
533 
534    If STREAM is NULL, do not read from it.
535    The caller can supply a buffer of characters
536    to be processed before the data in STREAM.
537    MAGIC is the address of the buffer and
538    MAGICCOUNT is how many characters are in it.
539    Those characters come at address ADDRESS and the data in STREAM follow.  */
540 
541 static void
print_strings(const char * filename,FILE * stream,file_ptr address,int stop_point,int magiccount,char * magic)542 print_strings (const char *filename, FILE *stream, file_ptr address,
543 	       int stop_point, int magiccount, char *magic)
544 {
545   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
546 
547   while (1)
548     {
549       file_ptr start;
550       int i;
551       long c;
552 
553       /* See if the next `string_min' chars are all graphic chars.  */
554     tryline:
555       if (stop_point && address >= stop_point)
556 	break;
557       start = address;
558       for (i = 0; i < string_min; i++)
559 	{
560 	  c = get_char (stream, &address, &magiccount, &magic);
561 	  if (c == EOF)
562 	    {
563 	      free (buf);
564 	      return;
565 	    }
566 	  if (! STRING_ISGRAPHIC (c))
567 	    /* Found a non-graphic.  Try again starting with next char.  */
568 	    goto tryline;
569 	  buf[i] = c;
570 	}
571 
572       /* We found a run of `string_min' graphic characters.  Print up
573 	 to the next non-graphic character.  */
574 
575       if (print_filenames)
576 	printf ("%s: ", filename);
577       if (print_addresses)
578 	switch (address_radix)
579 	  {
580 	  case 8:
581 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
582 	    if (sizeof (start) > sizeof (long))
583 	      {
584 #ifndef __MSVCRT__
585 	        printf ("%7llo ", (unsigned long long) start);
586 #else
587 	        printf ("%7I64o ", (unsigned long long) start);
588 #endif
589 	      }
590 	    else
591 #elif !BFD_HOST_64BIT_LONG
592 	    if (start != (unsigned long) start)
593 	      printf ("++%7lo ", (unsigned long) start);
594 	    else
595 #endif
596 	      printf ("%7lo ", (unsigned long) start);
597 	    break;
598 
599 	  case 10:
600 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
601 	    if (sizeof (start) > sizeof (long))
602 	      {
603 #ifndef __MSVCRT__
604 	        printf ("%7lld ", (unsigned long long) start);
605 #else
606 	        printf ("%7I64d ", (unsigned long long) start);
607 #endif
608 	      }
609 	    else
610 #elif !BFD_HOST_64BIT_LONG
611 	    if (start != (unsigned long) start)
612 	      printf ("++%7ld ", (unsigned long) start);
613 	    else
614 #endif
615 	      printf ("%7ld ", (long) start);
616 	    break;
617 
618 	  case 16:
619 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
620 	    if (sizeof (start) > sizeof (long))
621 	      {
622 #ifndef __MSVCRT__
623 	        printf ("%7llx ", (unsigned long long) start);
624 #else
625 	        printf ("%7I64x ", (unsigned long long) start);
626 #endif
627 	      }
628 	    else
629 #elif !BFD_HOST_64BIT_LONG
630 	    if (start != (unsigned long) start)
631 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
632 		      (unsigned long) (start & 0xffffffff));
633 	    else
634 #endif
635 	      printf ("%7lx ", (unsigned long) start);
636 	    break;
637 	  }
638 
639       buf[i] = '\0';
640       fputs (buf, stdout);
641 
642       while (1)
643 	{
644 	  c = get_char (stream, &address, &magiccount, &magic);
645 	  if (c == EOF)
646 	    break;
647 	  if (! STRING_ISGRAPHIC (c))
648 	    break;
649 	  putchar (c);
650 	}
651 
652       putchar ('\n');
653     }
654   free (buf);
655 }
656 
657 static void
usage(FILE * stream,int status)658 usage (FILE *stream, int status)
659 {
660   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
661   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
662   fprintf (stream, _(" The options are:\n"));
663 
664   if (DEFAULT_STRINGS_ALL)
665     fprintf (stream, _("\
666   -a - --all                Scan the entire file, not just the data section [default]\n\
667   -d --data                 Only scan the data sections in the file\n"));
668   else
669     fprintf (stream, _("\
670   -a - --all                Scan the entire file, not just the data section\n\
671   -d --data                 Only scan the data sections in the file [default]\n"));
672 
673   fprintf (stream, _("\
674   -f --print-file-name      Print the name of the file before each string\n\
675   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
676   -<number>                   least [number] characters (default 4).\n\
677   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
678   -w --include-all-whitespace Include all whitespace as valid string characters\n\
679   -o                        An alias for --radix=o\n\
680   -T --target=<BFDNAME>     Specify the binary file format\n\
681   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
682                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
683   @<file>                   Read options from <file>\n\
684   -h --help                 Display this information\n\
685   -v -V --version           Print the program's version number\n"));
686   list_supported_targets (program_name, stream);
687   if (REPORT_BUGS_TO[0] && status == 0)
688     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
689   exit (status);
690 }
691