1 /* mclex.c -- lexer for Windows mc files parser.
2    Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 
4    Written by Kai Tietz, Onevision.
5 
6    This file is part of GNU Binutils.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21    02110-1301, USA.  */
22 
23 /* This is a lexer used by the Windows rc file parser.
24    It basically just recognized a bunch of keywords.  */
25 
26 #include "sysdep.h"
27 #include "bfd.h"
28 #include "bucomm.h"
29 #include "libiberty.h"
30 #include "safe-ctype.h"
31 #include "windmc.h"
32 #include "mcparse.h"
33 
34 #include <assert.h>
35 
36 /* Exported globals.  */
37 bfd_boolean mclex_want_nl = FALSE;
38 bfd_boolean mclex_want_line = FALSE;
39 bfd_boolean mclex_want_filename = FALSE;
40 
41 /* Local globals.  */
42 static unichar *input_stream = NULL;
43 static unichar *input_stream_pos = NULL;
44 static int input_line = 1;
45 static const char *input_filename = NULL;
46 
47 void
mc_set_content(const unichar * src)48 mc_set_content (const unichar *src)
49 {
50   if (!src)
51     return;
52   input_stream = input_stream_pos = unichar_dup (src);
53 }
54 
55 void
mc_set_inputfile(const char * name)56 mc_set_inputfile (const char *name)
57 {
58   if (! name || *name == 0)
59     input_filename = "-";
60   else
61     {
62       const char *s1 = strrchr (name, '/');
63       const char *s2 = strrchr (name, '\\');
64 
65       if (! s1)
66 	s1 = s2;
67       if (s1 && s2 && s1 < s2)
68 	s1 = s2;
69       if (! s1)
70 	s1 = name;
71       else
72 	s1++;
73       s1 = xstrdup (s1);
74       input_filename = s1;
75     }
76 }
77 
78 static void
show_msg(const char * kind,const char * msg,va_list argp)79 show_msg (const char *kind, const char *msg, va_list argp)
80 {
81   fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
82   vfprintf (stderr, msg, argp);
83   fprintf (stderr, ".\n");
84 }
85 
86 void
mc_warn(const char * s,...)87 mc_warn (const char *s, ...)
88 {
89   va_list argp;
90   va_start (argp, s);
91   show_msg ("warning", s, argp);
92   va_end (argp);
93 }
94 
95 void
mc_fatal(const char * s,...)96 mc_fatal (const char *s, ...)
97 {
98   va_list argp;
99   va_start (argp, s);
100   show_msg ("fatal", s, argp);
101   va_end (argp);
102   xexit (1);
103 }
104 
105 
106 int
yyerror(const char * s,...)107 yyerror (const char *s, ...)
108 {
109   va_list argp;
110   va_start (argp, s);
111   show_msg ("parser", s, argp);
112   va_end (argp);
113   return 1;
114 }
115 
116 static unichar *
get_diff(unichar * end,unichar * start)117 get_diff (unichar *end, unichar *start)
118 {
119   unichar *ret;
120   unichar save = *end;
121 
122   *end = 0;
123   ret = unichar_dup (start);
124   *end = save;
125   return ret;
126 }
127 
128 static rc_uint_type
parse_digit(unichar ch)129 parse_digit (unichar ch)
130 {
131   rc_uint_type base = 10, v = 0, c;
132 
133   if (ch == '0')
134     {
135       base = 8;
136       switch (input_stream_pos[0])
137 	{
138 	case 'x': case 'X': base = 16; input_stream_pos++; break;
139 	case 'o': case 'O': base = 8; input_stream_pos++; break;
140 	case 'b': case 'B': base = 2; input_stream_pos++; break;
141 	}
142     }
143   else
144     v = (rc_uint_type) (ch - '0');
145 
146   while ((ch = input_stream_pos[0]) != 0)
147     {
148       if (ch >= 'A' && ch <= 'F')
149 	c = (rc_uint_type) (ch - 'A') + 10;
150       else if (ch >= 'a' && ch <= 'f')
151 	c = (rc_uint_type) (ch - 'a') + 10;
152       else if (ch >= '0' && ch <= '9')
153 	c = (rc_uint_type) (ch - '0');
154       else
155 	break;
156       v *= base;
157       v += c;
158       ++input_stream_pos;
159     }
160   if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
161     input_stream_pos++;
162   if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
163     input_stream_pos++;
164   if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
165     input_stream_pos++;
166   return v;
167 }
168 
169 static mc_keyword *keyword_top = NULL;
170 
171 const mc_keyword *
enum_facility(int e)172 enum_facility (int e)
173 {
174   mc_keyword *h = keyword_top;
175 
176   while (h != NULL)
177     {
178       while (h && strcmp (h->group_name, "facility") != 0)
179 	h = h->next;
180       if (e == 0)
181 	return h;
182       --e;
183       if (h)
184 	h = h->next;
185     }
186   return h;
187 }
188 
189 const mc_keyword *
enum_severity(int e)190 enum_severity (int e)
191 {
192   mc_keyword *h = keyword_top;
193 
194   while (h != NULL)
195     {
196       while (h && strcmp (h->group_name, "severity") != 0)
197 	h = h->next;
198       if (e == 0)
199 	return h;
200       --e;
201       if (h)
202 	h = h->next;
203     }
204   return h;
205 }
206 
207 static void
mc_add_keyword_ascii(const char * sz,int rid,const char * grp,rc_uint_type nv,const char * sv)208 mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
209 {
210   unichar *usz, *usv = NULL;
211   rc_uint_type usz_len;
212 
213   unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
214   if (sv)
215     unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
216   mc_add_keyword (usz, rid, grp, nv, usv);
217 }
218 
219 void
mc_add_keyword(unichar * usz,int rid,const char * grp,rc_uint_type nv,unichar * sv)220 mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
221 {
222   mc_keyword *p, *c, *n;
223   size_t len = unichar_len (usz);
224 
225   c = keyword_top;
226   p = NULL;
227   while (c != NULL)
228     {
229       if (c->len > len)
230 	break;
231       if (c->len == len)
232 	{
233 	  int e = memcmp (usz, c->usz, len * sizeof (unichar));
234 
235 	  if (e < 0)
236 	    break;
237 	  if (! e)
238 	    {
239 	      if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
240 		fatal (_("Duplicate symbol entered into keyword list."));
241 	      c->rid = rid;
242 	      c->nval = nv;
243 	      c->sval = (!sv ? NULL : unichar_dup (sv));
244 	      if (! strcmp (grp, "language"))
245 		{
246 		  const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
247 
248 		  if (lag == NULL)
249 		    fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
250 		  memcpy (&c->lang_info, lag, sizeof (*lag));
251 		}
252 	      return;
253 	    }
254 	}
255       c = (p = c)->next;
256     }
257   n = xmalloc (sizeof (mc_keyword));
258   n->next = c;
259   n->len = len;
260   n->group_name = grp;
261   n->usz = usz;
262   n->rid = rid;
263   n->nval = nv;
264   n->sval = (!sv ? NULL : unichar_dup (sv));
265   if (! strcmp (grp, "language"))
266     {
267       const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
268       if (lag == NULL)
269 	fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
270       memcpy (&n->lang_info, lag, sizeof (*lag));
271     }
272   if (! p)
273     keyword_top = n;
274   else
275     p->next = n;
276 }
277 
278 static int
mc_token(const unichar * t,size_t len)279 mc_token (const unichar *t, size_t len)
280 {
281   static int was_init = 0;
282   mc_keyword *k;
283 
284   if (! was_init)
285     {
286       was_init = 1;
287       mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
288       mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
289       mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
290       mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
291       mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
292       mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
293       mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
294       mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
295       mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
296       mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
297       mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
298       mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
299       mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
300       mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
301       mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
302       mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
303       mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
304   }
305   k = keyword_top;
306   if (!len || !t || *t == 0)
307     return -1;
308   while (k != NULL)
309     {
310       if (k->len > len)
311 	break;
312       if (k->len == len)
313 	{
314 	  if (! memcmp (k->usz, t, len * sizeof (unichar)))
315 	    {
316 	      if (k->rid == MCTOKEN)
317 		yylval.tok = k;
318 	      return k->rid;
319 	    }
320 	}
321       k = k->next;
322     }
323   return -1;
324 }
325 
326 int
yylex(void)327 yylex (void)
328 {
329   unichar *start_token;
330   unichar ch;
331 
332   if (! input_stream_pos)
333     {
334       fatal ("Input stream not setuped.\n");
335       return -1;
336     }
337   if (mclex_want_line)
338     {
339       start_token = input_stream_pos;
340       if (input_stream_pos[0] == '.'
341 	  && (input_stream_pos[1] == '\n'
342 	      || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
343       {
344 	mclex_want_line = FALSE;
345 	while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
346 	  ++input_stream_pos;
347 	if (input_stream_pos[0] == '\n')
348 	  ++input_stream_pos;
349 	return MCENDLINE;
350       }
351       while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
352 	++input_stream_pos;
353       if (input_stream_pos[0] == '\n')
354 	++input_stream_pos;
355       yylval.ustr = get_diff (input_stream_pos, start_token);
356       return MCLINE;
357     }
358   while ((ch = input_stream_pos[0]) <= 0x20)
359     {
360       if (ch == 0)
361 	return -1;
362       ++input_stream_pos;
363       if (ch == '\n')
364 	input_line += 1;
365       if (mclex_want_nl && ch == '\n')
366 	{
367 	  mclex_want_nl = FALSE;
368 	  return NL;
369 	}
370     }
371   start_token = input_stream_pos;
372   ++input_stream_pos;
373   if (mclex_want_filename)
374     {
375       mclex_want_filename = FALSE;
376       if (ch == '"')
377 	{
378 	  start_token++;
379 	  while ((ch = input_stream_pos[0]) != 0)
380 	    {
381 	      if (ch == '"')
382 		break;
383 	      ++input_stream_pos;
384 	    }
385 	  yylval.ustr = get_diff (input_stream_pos, start_token);
386 	  if (ch == '"')
387 	    ++input_stream_pos;
388 	}
389       else
390 	{
391 	  while ((ch = input_stream_pos[0]) != 0)
392 	    {
393 	      if (ch <= 0x20 || ch == ')')
394 		break;
395 	      ++input_stream_pos;
396 	    }
397 	  yylval.ustr = get_diff (input_stream_pos, start_token);
398 	}
399       return MCFILENAME;
400     }
401   switch (ch)
402   {
403   case ';':
404     ++start_token;
405     while (input_stream_pos[0] != '\n' && input_stream_pos[0] != 0)
406       ++input_stream_pos;
407     if (input_stream_pos[0] == '\n')
408       input_stream_pos++;
409     yylval.ustr = get_diff (input_stream_pos, start_token);
410     return MCCOMMENT;
411   case '=':
412     return '=';
413   case '(':
414     return '(';
415   case ')':
416     return ')';
417   case '+':
418     return '+';
419   case ':':
420     return ':';
421   case '0': case '1': case '2': case '3': case '4':
422   case '5': case '6': case '7': case '8': case '9':
423     yylval.ival = parse_digit (ch);
424     return MCNUMBER;
425   default:
426     if (ch >= 0x40)
427       {
428 	int ret;
429 	while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
430 	  ++input_stream_pos;
431 	ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
432 	if (ret != -1)
433 	  return ret;
434 	yylval.ustr = get_diff (input_stream_pos, start_token);
435 	return MCIDENT;
436       }
437     yyerror ("illegal character 0x%x.", ch);
438   }
439   return -1;
440 }
441