1 /* strings -- print the strings of printable characters in files
2    Copyright (C) 1993-2016 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17    02110-1301, USA.  */
18 
19 /* Usage: strings [options] file...
20 
21    Options:
22    --all
23    -a
24    -		Scan each file in its entirety.
25 
26    --data
27    -d		Scan only the initialized data section(s) of object files.
28 
29    --print-file-name
30    -f		Print the name of the file before each string.
31 
32    --bytes=min-len
33    -n min-len
34    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
35 		that are followed by a NUL or a newline.  Default is 4.
36 
37    --radix={o,x,d}
38    -t {o,x,d}	Print the offset within the file before each string,
39 		in octal/hex/decimal.
40 
41   --include-all-whitespace
42   -w		By default tab and space are the only whitepace included in graphic
43 		char sequences.  This option considers all of isspace() valid.
44 
45    -o		Like -to.  (Some other implementations have -o like -to,
46 		others like -td.  We chose one arbitrarily.)
47 
48    --encoding={s,S,b,l,B,L}
49    -e {s,S,b,l,B,L}
50 		Select character encoding: 7-bit-character, 8-bit-character,
51 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
52 		littleendian 32-bit.
53 
54    --target=BFDNAME
55    -T {bfdname}
56 		Specify a non-default object file format.
57 
58   --output-separator=sep_string
59   -s sep_string	String used to separate parsed strings in output.
60 		Default is newline.
61 
62    --help
63    -h		Print the usage message on the standard output.
64 
65    --version
66    -V
67    -v		Print the program version number.
68 
69    Written by Richard Stallman <rms@gnu.ai.mit.edu>
70    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
71 
72 #include "sysdep.h"
73 #include "bfd.h"
74 #include "getopt.h"
75 #include "libiberty.h"
76 #include "safe-ctype.h"
77 #include "bucomm.h"
78 
79 #define STRING_ISGRAPHIC(c) \
80       (   (c) >= 0 \
81        && (c) <= 255 \
82        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
83            || (include_all_whitespace == TRUE && ISSPACE (c))) \
84       )
85 
86 #ifndef errno
87 extern int errno;
88 #endif
89 
90 /* The BFD section flags that identify an initialized data section.  */
91 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
92 
93 /* Radix for printing addresses (must be 8, 10 or 16).  */
94 static int address_radix;
95 
96 /* Minimum length of sequence of graphic chars to trigger output.  */
97 static int string_min;
98 
99 /* Whether or not we include all whitespace as a graphic char.   */
100 static bfd_boolean include_all_whitespace;
101 
102 /* TRUE means print address within file for each string.  */
103 static bfd_boolean print_addresses;
104 
105 /* TRUE means print filename for each string.  */
106 static bfd_boolean print_filenames;
107 
108 /* TRUE means for object files scan only the data section.  */
109 static bfd_boolean datasection_only;
110 
111 /* TRUE if we found an initialized data section in the current file.  */
112 static bfd_boolean got_a_section;
113 
114 /* The BFD object file format.  */
115 static char *target;
116 
117 /* The character encoding format.  */
118 static char encoding;
119 static int encoding_bytes;
120 
121 /* Output string used to separate parsed strings  */
122 static char *output_separator;
123 
124 static struct option long_options[] =
125 {
126   {"all", no_argument, NULL, 'a'},
127   {"data", no_argument, NULL, 'd'},
128   {"print-file-name", no_argument, NULL, 'f'},
129   {"bytes", required_argument, NULL, 'n'},
130   {"radix", required_argument, NULL, 't'},
131   {"include-all-whitespace", required_argument, NULL, 'w'},
132   {"encoding", required_argument, NULL, 'e'},
133   {"target", required_argument, NULL, 'T'},
134   {"output-separator", required_argument, NULL, 's'},
135   {"help", no_argument, NULL, 'h'},
136   {"version", no_argument, NULL, 'v'},
137   {NULL, 0, NULL, 0}
138 };
139 
140 /* Records the size of a named file so that we
141    do not repeatedly run bfd_stat() on it.  */
142 
143 typedef struct
144 {
145   const char *  filename;
146   bfd_size_type filesize;
147 } filename_and_size_t;
148 
149 static void strings_a_section (bfd *, asection *, void *);
150 static bfd_boolean strings_object_file (const char *);
151 static bfd_boolean strings_file (char *);
152 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
153 static void usage (FILE *, int);
154 static long get_char (FILE *, file_ptr *, int *, char **);
155 
156 int main (int, char **);
157 
158 int
main(int argc,char ** argv)159 main (int argc, char **argv)
160 {
161   int optc;
162   int exit_status = 0;
163   bfd_boolean files_given = FALSE;
164   char *s;
165   int numeric_opt = 0;
166 
167 #if defined (HAVE_SETLOCALE)
168   setlocale (LC_ALL, "");
169 #endif
170   bindtextdomain (PACKAGE, LOCALEDIR);
171   textdomain (PACKAGE);
172 
173   program_name = argv[0];
174   xmalloc_set_program_name (program_name);
175   bfd_set_error_program_name (program_name);
176 
177   expandargv (&argc, &argv);
178 
179   string_min = 4;
180   include_all_whitespace = FALSE;
181   print_addresses = FALSE;
182   print_filenames = FALSE;
183   if (DEFAULT_STRINGS_ALL)
184     datasection_only = FALSE;
185   else
186     datasection_only = TRUE;
187   target = NULL;
188   encoding = 's';
189   output_separator = NULL;
190 
191   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
192 			      long_options, (int *) 0)) != EOF)
193     {
194       switch (optc)
195 	{
196 	case 'a':
197 	  datasection_only = FALSE;
198 	  break;
199 
200 	case 'd':
201 	  datasection_only = TRUE;
202 	  break;
203 
204 	case 'f':
205 	  print_filenames = TRUE;
206 	  break;
207 
208 	case 'H':
209 	case 'h':
210 	  usage (stdout, 0);
211 
212 	case 'n':
213 	  string_min = (int) strtoul (optarg, &s, 0);
214 	  if (s != NULL && *s != 0)
215 	    fatal (_("invalid integer argument %s"), optarg);
216 	  break;
217 
218 	case 'w':
219 	  include_all_whitespace = TRUE;
220 	  break;
221 
222 	case 'o':
223 	  print_addresses = TRUE;
224 	  address_radix = 8;
225 	  break;
226 
227 	case 't':
228 	  print_addresses = TRUE;
229 	  if (optarg[1] != '\0')
230 	    usage (stderr, 1);
231 	  switch (optarg[0])
232 	    {
233 	    case 'o':
234 	      address_radix = 8;
235 	      break;
236 
237 	    case 'd':
238 	      address_radix = 10;
239 	      break;
240 
241 	    case 'x':
242 	      address_radix = 16;
243 	      break;
244 
245 	    default:
246 	      usage (stderr, 1);
247 	    }
248 	  break;
249 
250 	case 'T':
251 	  target = optarg;
252 	  break;
253 
254 	case 'e':
255 	  if (optarg[1] != '\0')
256 	    usage (stderr, 1);
257 	  encoding = optarg[0];
258 	  break;
259 
260 	case 's':
261 	  output_separator = optarg;
262           break;
263 
264 	case 'V':
265 	case 'v':
266 	  print_version ("strings");
267 	  break;
268 
269 	case '?':
270 	  usage (stderr, 1);
271 
272 	default:
273 	  numeric_opt = optind;
274 	  break;
275 	}
276     }
277 
278   if (numeric_opt != 0)
279     {
280       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
281       if (s != NULL && *s != 0)
282 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
283     }
284   if (string_min < 1)
285     fatal (_("invalid minimum string length %d"), string_min);
286 
287   switch (encoding)
288     {
289     case 'S':
290     case 's':
291       encoding_bytes = 1;
292       break;
293     case 'b':
294     case 'l':
295       encoding_bytes = 2;
296       break;
297     case 'B':
298     case 'L':
299       encoding_bytes = 4;
300       break;
301     default:
302       usage (stderr, 1);
303     }
304 
305   bfd_init ();
306   set_default_bfd_target ();
307 
308   if (optind >= argc)
309     {
310       datasection_only = FALSE;
311       SET_BINARY (fileno (stdin));
312       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
313       files_given = TRUE;
314     }
315   else
316     {
317       for (; optind < argc; ++optind)
318 	{
319 	  if (strcmp (argv[optind], "-") == 0)
320 	    datasection_only = FALSE;
321 	  else
322 	    {
323 	      files_given = TRUE;
324 	      exit_status |= strings_file (argv[optind]) == FALSE;
325 	    }
326 	}
327     }
328 
329   if (!files_given)
330     usage (stderr, 1);
331 
332   return (exit_status);
333 }
334 
335 /* Scan section SECT of the file ABFD, whose printable name is in
336    ARG->filename and whose size might be in ARG->filesize.  If it
337    contains initialized data set `got_a_section' and print the
338    strings in it.
339 
340    FIXME: We ought to be able to return error codes/messages for
341    certain conditions.  */
342 
343 static void
strings_a_section(bfd * abfd,asection * sect,void * arg)344 strings_a_section (bfd *abfd, asection *sect, void *arg)
345 {
346   filename_and_size_t * filename_and_sizep;
347   bfd_size_type *filesizep;
348   bfd_size_type sectsize;
349   void *mem;
350 
351   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
352     return;
353 
354   sectsize = bfd_get_section_size (sect);
355 
356   if (sectsize <= 0)
357     return;
358 
359   /* Get the size of the file.  This might have been cached for us.  */
360   filename_and_sizep = (filename_and_size_t *) arg;
361   filesizep = & filename_and_sizep->filesize;
362 
363   if (*filesizep == 0)
364     {
365       struct stat st;
366 
367       if (bfd_stat (abfd, &st))
368 	return;
369 
370       /* Cache the result so that we do not repeatedly stat this file.  */
371       *filesizep = st.st_size;
372     }
373 
374   /* Compare the size of the section against the size of the file.
375      If the section is bigger then the file must be corrupt and
376      we should not try dumping it.  */
377   if (sectsize >= *filesizep)
378     return;
379 
380   mem = xmalloc (sectsize);
381 
382   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
383     {
384       got_a_section = TRUE;
385 
386       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
387 		     0, sectsize, (char *) mem);
388     }
389 
390   free (mem);
391 }
392 
393 /* Scan all of the sections in FILE, and print the strings
394    in the initialized data section(s).
395 
396    Return TRUE if successful,
397    FALSE if not (such as if FILE is not an object file).  */
398 
399 static bfd_boolean
strings_object_file(const char * file)400 strings_object_file (const char *file)
401 {
402   filename_and_size_t filename_and_size;
403   bfd *abfd;
404 
405   abfd = bfd_openr (file, target);
406 
407   if (abfd == NULL)
408     /* Treat the file as a non-object file.  */
409     return FALSE;
410 
411   /* This call is mainly for its side effect of reading in the sections.
412      We follow the traditional behavior of `strings' in that we don't
413      complain if we don't recognize a file to be an object file.  */
414   if (!bfd_check_format (abfd, bfd_object))
415     {
416       bfd_close (abfd);
417       return FALSE;
418     }
419 
420   got_a_section = FALSE;
421   filename_and_size.filename = file;
422   filename_and_size.filesize = 0;
423   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
424 
425   if (!bfd_close (abfd))
426     {
427       bfd_nonfatal (file);
428       return FALSE;
429     }
430 
431   return got_a_section;
432 }
433 
434 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
435 
436 static bfd_boolean
strings_file(char * file)437 strings_file (char *file)
438 {
439   struct stat st;
440 
441   /* get_file_size does not support non-S_ISREG files.  */
442 
443   if (stat (file, &st) < 0)
444     {
445       if (errno == ENOENT)
446 	non_fatal (_("'%s': No such file"), file);
447       else
448 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
449 		   file, strerror (errno));
450       return FALSE;
451     }
452 
453   /* If we weren't told to scan the whole file,
454      try to open it as an object file and only look at
455      initialized data sections.  If that fails, fall back to the
456      whole file.  */
457   if (!datasection_only || !strings_object_file (file))
458     {
459       FILE *stream;
460 
461       stream = fopen (file, FOPEN_RB);
462       if (stream == NULL)
463 	{
464 	  fprintf (stderr, "%s: ", program_name);
465 	  perror (file);
466 	  return FALSE;
467 	}
468 
469       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
470 
471       if (fclose (stream) == EOF)
472 	{
473 	  fprintf (stderr, "%s: ", program_name);
474 	  perror (file);
475 	  return FALSE;
476 	}
477     }
478 
479   return TRUE;
480 }
481 
482 /* Read the next character, return EOF if none available.
483    Assume that STREAM is positioned so that the next byte read
484    is at address ADDRESS in the file.
485 
486    If STREAM is NULL, do not read from it.
487    The caller can supply a buffer of characters
488    to be processed before the data in STREAM.
489    MAGIC is the address of the buffer and
490    MAGICCOUNT is how many characters are in it.  */
491 
492 static long
get_char(FILE * stream,file_ptr * address,int * magiccount,char ** magic)493 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
494 {
495   int c, i;
496   long r = 0;
497 
498   for (i = 0; i < encoding_bytes; i++)
499     {
500       if (*magiccount)
501 	{
502 	  (*magiccount)--;
503 	  c = *(*magic)++;
504 	}
505       else
506 	{
507 	  if (stream == NULL)
508 	    return EOF;
509 
510 	  /* Only use getc_unlocked if we found a declaration for it.
511 	     Otherwise, libc is not thread safe by default, and we
512 	     should not use it.  */
513 
514 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
515 	  c = getc_unlocked (stream);
516 #else
517 	  c = getc (stream);
518 #endif
519 	  if (c == EOF)
520 	    return EOF;
521 	}
522 
523       (*address)++;
524       r = (r << 8) | (c & 0xff);
525     }
526 
527   switch (encoding)
528     {
529     default:
530       break;
531     case 'l':
532       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
533       break;
534     case 'L':
535       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
536 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
537       break;
538     }
539 
540   return r;
541 }
542 
543 /* Find the strings in file FILENAME, read from STREAM.
544    Assume that STREAM is positioned so that the next byte read
545    is at address ADDRESS in the file.
546    Stop reading at address STOP_POINT in the file, if nonzero.
547 
548    If STREAM is NULL, do not read from it.
549    The caller can supply a buffer of characters
550    to be processed before the data in STREAM.
551    MAGIC is the address of the buffer and
552    MAGICCOUNT is how many characters are in it.
553    Those characters come at address ADDRESS and the data in STREAM follow.  */
554 
555 static void
print_strings(const char * filename,FILE * stream,file_ptr address,int stop_point,int magiccount,char * magic)556 print_strings (const char *filename, FILE *stream, file_ptr address,
557 	       int stop_point, int magiccount, char *magic)
558 {
559   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
560 
561   while (1)
562     {
563       file_ptr start;
564       int i;
565       long c;
566 
567       /* See if the next `string_min' chars are all graphic chars.  */
568     tryline:
569       if (stop_point && address >= stop_point)
570 	break;
571       start = address;
572       for (i = 0; i < string_min; i++)
573 	{
574 	  c = get_char (stream, &address, &magiccount, &magic);
575 	  if (c == EOF)
576 	    {
577 	      free (buf);
578 	      return;
579 	    }
580 	  if (! STRING_ISGRAPHIC (c))
581 	    /* Found a non-graphic.  Try again starting with next char.  */
582 	    goto tryline;
583 	  buf[i] = c;
584 	}
585 
586       /* We found a run of `string_min' graphic characters.  Print up
587 	 to the next non-graphic character.  */
588 
589       if (print_filenames)
590 	printf ("%s: ", filename);
591       if (print_addresses)
592 	switch (address_radix)
593 	  {
594 	  case 8:
595 #ifdef HAVE_LONG_LONG
596 	    if (sizeof (start) > sizeof (long))
597 	      {
598 # ifndef __MSVCRT__
599 	        printf ("%7llo ", (unsigned long long) start);
600 # else
601 	        printf ("%7I64o ", (unsigned long long) start);
602 # endif
603 	      }
604 	    else
605 #elif !BFD_HOST_64BIT_LONG
606 	    if (start != (unsigned long) start)
607 	      printf ("++%7lo ", (unsigned long) start);
608 	    else
609 #endif
610 	      printf ("%7lo ", (unsigned long) start);
611 	    break;
612 
613 	  case 10:
614 #ifdef HAVE_LONG_LONG
615 	    if (sizeof (start) > sizeof (long))
616 	      {
617 # ifndef __MSVCRT__
618 	        printf ("%7lld ", (unsigned long long) start);
619 # else
620 	        printf ("%7I64d ", (unsigned long long) start);
621 # endif
622 	      }
623 	    else
624 #elif !BFD_HOST_64BIT_LONG
625 	    if (start != (unsigned long) start)
626 	      printf ("++%7ld ", (unsigned long) start);
627 	    else
628 #endif
629 	      printf ("%7ld ", (long) start);
630 	    break;
631 
632 	  case 16:
633 #ifdef HAVE_LONG_LONG
634 	    if (sizeof (start) > sizeof (long))
635 	      {
636 # ifndef __MSVCRT__
637 	        printf ("%7llx ", (unsigned long long) start);
638 # else
639 	        printf ("%7I64x ", (unsigned long long) start);
640 # endif
641 	      }
642 	    else
643 #elif !BFD_HOST_64BIT_LONG
644 	    if (start != (unsigned long) start)
645 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
646 		      (unsigned long) (start & 0xffffffff));
647 	    else
648 #endif
649 	      printf ("%7lx ", (unsigned long) start);
650 	    break;
651 	  }
652 
653       buf[i] = '\0';
654       fputs (buf, stdout);
655 
656       while (1)
657 	{
658 	  c = get_char (stream, &address, &magiccount, &magic);
659 	  if (c == EOF)
660 	    break;
661 	  if (! STRING_ISGRAPHIC (c))
662 	    break;
663 	  putchar (c);
664 	}
665 
666       if (output_separator)
667         fputs (output_separator, stdout);
668       else
669         putchar ('\n');
670     }
671   free (buf);
672 }
673 
674 static void
usage(FILE * stream,int status)675 usage (FILE *stream, int status)
676 {
677   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
678   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
679   fprintf (stream, _(" The options are:\n"));
680 
681   if (DEFAULT_STRINGS_ALL)
682     fprintf (stream, _("\
683   -a - --all                Scan the entire file, not just the data section [default]\n\
684   -d --data                 Only scan the data sections in the file\n"));
685   else
686     fprintf (stream, _("\
687   -a - --all                Scan the entire file, not just the data section\n\
688   -d --data                 Only scan the data sections in the file [default]\n"));
689 
690   fprintf (stream, _("\
691   -f --print-file-name      Print the name of the file before each string\n\
692   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
693   -<number>                   least [number] characters (default 4).\n\
694   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
695   -w --include-all-whitespace Include all whitespace as valid string characters\n\
696   -o                        An alias for --radix=o\n\
697   -T --target=<BFDNAME>     Specify the binary file format\n\
698   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
699                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
700   -s --output-separator=<string> String used to separate strings in output.\n\
701   @<file>                   Read options from <file>\n\
702   -h --help                 Display this information\n\
703   -v -V --version           Print the program's version number\n"));
704   list_supported_targets (program_name, stream);
705   if (REPORT_BUGS_TO[0] && status == 0)
706     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
707   exit (status);
708 }
709