1 /** @file
2     Implementation of scanf internals for <stdio.h>.
3 
4     Copyright (c) 2010 - 2014, Intel Corporation. All rights reserved.<BR>
5     This program and the accompanying materials are licensed and made available
6     under the terms and conditions of the BSD License that accompanies this
7     distribution.  The full text of the license may be found at
8     http://opensource.org/licenses/bsd-license.
9 
10     THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11     WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12 
13     Copyright (c) 1990, 1993
14     The Regents of the University of California.  All rights reserved.
15 
16     This code is derived from software contributed to Berkeley by
17     Chris Torek.
18 
19     Redistribution and use in source and binary forms, with or without
20     modification, are permitted provided that the following conditions
21     are met:
22       - Redistributions of source code must retain the above copyright
23         notice, this list of conditions and the following disclaimer.
24       - Redistributions in binary form must reproduce the above copyright
25         notice, this list of conditions and the following disclaimer in the
26         documentation and/or other materials provided with the distribution.
27       - Neither the name of the University nor the names of its contributors
28         may be used to endorse or promote products derived from this software
29         without specific prior written permission.
30 
31     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
35     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41     POSSIBILITY OF SUCH DAMAGE.
42 
43     NetBSD: vfscanf.c,v 1.37.4.1 2007/05/07 19:49:08 pavel Exp
44     FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.41 2007/01/09 00:28:07 imp Exp
45     vfscanf.c 8.1 (Berkeley) 6/4/93
46 **/
47 #include  <LibConfig.h>
48 
49 #include "namespace.h"
50 #include <assert.h>
51 #include <ctype.h>
52 #include  <errno.h>
53 #include <inttypes.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <stddef.h>
57 #include <stdarg.h>
58 #include <string.h>
59 #include  <sys/types.h>
60 #include <wchar.h>
61 #include <wctype.h>
62 
63 #include "reentrant.h"
64 #include "local.h"
65 
66 #ifndef NO_FLOATING_POINT
67 #include <locale.h>
68 #endif
69 
70 /*
71  * Provide an external name for vfscanf.  Note, EFI uses the normal
72  * namespace.h method; stdio routines explicitly use the internal name
73  * __svfscanf.
74  */
75 #ifdef __weak_alias
76 __weak_alias(vfscanf,__svfscanf)
77 #endif
78 
79 #define BUF   513 /* Maximum length of numeric string. */
80 
81 /*
82  * Flags used during conversion.
83  */
84 #define LONG        0x0001  /* l: long or double */
85 #define LONGDBL     0x0002  /* L: long double */
86 #define SHORT       0x0004  /* h: short */
87 #define SUPPRESS    0x0008  /* *: suppress assignment */
88 #define POINTER     0x0010  /* p: void * (as hex) */
89 #define NOSKIP      0x0020  /* [ or c: do not skip blanks */
90 #define LONGLONG    0x0400  /* ll: long long (+ deprecated q: quad) */
91 #define INTMAXT     0x0800  /* j: intmax_t */
92 #define PTRDIFFT    0x1000  /* t: ptrdiff_t */
93 #define SIZET       0x2000  /* z: size_t */
94 #define SHORTSHORT  0x4000  /* hh: char */
95 #define UNSIGNED    0x8000  /* %[oupxX] conversions */
96 
97 /*
98  * The following are used in integral conversions only:
99  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
100  */
101 #define SIGNOK      0x00040 /* +/- is (still) legal */
102 #define NDIGITS     0x00080 /* no digits detected */
103 #define PFXOK       0x00100 /* 0x prefix is (still) legal */
104 #define NZDIGITS    0x00200 /* no zero digits detected */
105 #define HAVESIGN    0x10000 /* sign detected */
106 
107 /*
108  * Conversion types.
109  */
110 #define CT_CHAR     0 /* %c conversion */
111 #define CT_CCL      1 /* %[...] conversion */
112 #define CT_STRING   2 /* %s conversion */
113 #define CT_INT      3 /* %[dioupxX] conversion */
114 #define CT_FLOAT    4 /* %[efgEFG] conversion */
115 
116 static const u_char  *__sccl(char *, const u_char *);
117 #ifndef NO_FLOATING_POINT
118   static int            parsefloat(FILE *, char *, char *);
119 #endif
120 
121 int __scanfdebug = 0;
122 
123 #define __collate_load_error /*CONSTCOND*/0
124 static int
__collate_range_cmp(int c1,int c2)125 __collate_range_cmp(int c1, int c2)
126 {
127   static char s1[2] = { 0 };
128   static char s2[2] = { 0 };
129 
130   s1[0] = (char)c1;
131   s2[0] = (char)c2;
132   return strcoll(s1, s2);
133 }
134 
135 
136 /*
137  * __svfscanf - MT-safe version
138  */
139 int
__svfscanf(FILE * fp,char const * fmt0,va_list ap)140 __svfscanf(FILE *fp, char const *fmt0, va_list ap)
141 {
142   int ret;
143 
144   if(fp == NULL) {
145     errno = EINVAL;
146     return (EOF);
147   }
148   FLOCKFILE(fp);
149   ret = __svfscanf_unlocked(fp, fmt0, ap);
150   FUNLOCKFILE(fp);
151   return (ret);
152 }
153 
154 /*
155  * __svfscanf_unlocked - non-MT-safe version of __svfscanf
156  */
157 int
__svfscanf_unlocked(FILE * fp,const char * fmt0,va_list ap)158 __svfscanf_unlocked(FILE *fp, const char *fmt0, va_list ap)
159 {
160           const u_char     *fmt     = (const u_char *)fmt0;
161                 int         c;              /* character from format, or conversion */
162                 size_t      width;          /* field width, or 0 */
163                 char       *p;              /* points into all kinds of strings */
164                 size_t      n;              /* handy size_t */
165                 int         flags;          /* flags as defined above */
166                 char       *p0;             /* saves original value of p when necessary */
167                 int         nassigned;      /* number of fields assigned */
168                 int         nconversions;   /* number of conversions */
169                 int         nread;          /* number of characters consumed from fp */
170                 int         base;           /* base argument to conversion function */
171                 char        ccltab[256];    /* character class table for %[...] */
172                 char        buf[BUF];       /* buffer for numeric and mb conversions */
173                 wchar_t    *wcp;            /* handy wide character pointer */
174                 size_t      nconv;          /* length of multibyte sequence converted */
175   static const  mbstate_t   initial = { 0 };
176                 mbstate_t   mbs;
177 
178   /* `basefix' is used to avoid `if' tests in the integer scanner */
179   static const short basefix[17] =
180     { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
181 
182   _DIAGASSERT(fp != NULL);
183   _DIAGASSERT(fmt0 != NULL);
184   if(fp == NULL) {
185     errno = EINVAL;
186     return (EOF);
187   }
188 
189   _SET_ORIENTATION(fp, -1);
190 
191 //Print(L"%a( %d, \"%a\", ...)\n", __func__, fp->_file, fmt0);
192   nassigned = 0;
193   nconversions = 0;
194   nread = 0;
195   base = 0;
196   for (;;) {
197     c = (unsigned char)*fmt++;
198     if (c == 0)
199       return (nassigned);
200     if (isspace(c)) {
201       while ((fp->_r > 0 || __srefill(fp) == 0) &&
202           isspace(*fp->_p))
203         nread++, fp->_r--, fp->_p++;
204       continue;
205     }
206 //Print(L"%a: %d\n", __func__, __LINE__);
207     if (c != '%')
208       goto literal;
209     width = 0;
210     flags = 0;
211     /*
212      * switch on the format.  continue if done;
213      * break once format type is derived.
214      */
215 again:    c = *fmt++;
216 //Print(L"%a: %d\n", __func__, __LINE__);
217     switch (c) {
218     case '%':
219 literal:
220 //Print(L"%a: %d\n", __func__, __LINE__);
221       if (fp->_r <= 0 && __srefill(fp))
222         goto input_failure;
223       if (*fp->_p != c)
224         goto match_failure;
225       fp->_r--, fp->_p++;
226       nread++;
227       continue;
228 
229     case '*':
230       flags |= SUPPRESS;
231       goto again;
232     case 'j':
233       flags |= INTMAXT;
234       goto again;
235     case 'l':
236       if (flags & LONG) {
237         flags &= ~LONG;
238         flags |= LONGLONG;
239       } else
240         flags |= LONG;
241       goto again;
242     case 'q':
243       flags |= LONGLONG;  /* not quite */
244       goto again;
245     case 't':
246       flags |= PTRDIFFT;
247       goto again;
248     case 'z':
249       flags |= SIZET;
250       goto again;
251     case 'L':
252       flags |= LONGDBL;
253       goto again;
254     case 'h':
255       if (flags & SHORT) {
256         flags &= ~SHORT;
257         flags |= SHORTSHORT;
258       } else
259         flags |= SHORT;
260       goto again;
261 
262     case '0': case '1': case '2': case '3': case '4':
263     case '5': case '6': case '7': case '8': case '9':
264       width = width * 10 + c - '0';
265       goto again;
266 
267     /*
268      * Conversions.
269      */
270     case 'd':
271       c = CT_INT;
272       base = 10;
273       break;
274 
275     case 'i':
276       c = CT_INT;
277       base = 0;
278       break;
279 
280     case 'o':
281       c = CT_INT;
282       flags |= UNSIGNED;
283       base = 8;
284       break;
285 
286     case 'u':
287       c = CT_INT;
288       flags |= UNSIGNED;
289       base = 10;
290       break;
291 
292     case 'X':
293     case 'x':
294       flags |= PFXOK; /* enable 0x prefixing */
295       c = CT_INT;
296       flags |= UNSIGNED;
297       base = 16;
298       break;
299 
300 #ifndef NO_FLOATING_POINT
301     case 'A': case 'E': case 'F': case 'G':
302     case 'a': case 'e': case 'f': case 'g':
303       c = CT_FLOAT;
304       break;
305 #endif
306 
307     case 'S':
308       flags |= LONG;
309       /* FALLTHROUGH */
310     case 's':
311       c = CT_STRING;
312       break;
313 
314     case '[':
315       fmt = __sccl(ccltab, fmt);
316       flags |= NOSKIP;
317       c = CT_CCL;
318       break;
319 
320     case 'C':
321       flags |= LONG;
322       /* FALLTHROUGH */
323     case 'c':
324       flags |= NOSKIP;
325       c = CT_CHAR;
326       break;
327 
328     case 'p': /* pointer format is like hex */
329       flags |= POINTER | PFXOK;
330       c = CT_INT;   /* assumes sizeof(uintmax_t) */
331       flags |= UNSIGNED;  /*      >= sizeof(uintptr_t) */
332       base = 16;
333       break;
334 
335     case 'n':
336       nconversions++;
337       if (flags & SUPPRESS) /* ??? */
338         continue;
339       if (flags & SHORTSHORT)
340         *va_arg(ap, char *) = (char)nread;
341       else if (flags & SHORT)
342         *va_arg(ap, short *) = (short)nread;
343       else if (flags & LONG)
344         *va_arg(ap, long *) = nread;
345       else if (flags & LONGLONG)
346         *va_arg(ap, long long *) = nread;
347       else if (flags & INTMAXT)
348         *va_arg(ap, intmax_t *) = nread;
349       else if (flags & SIZET)
350         *va_arg(ap, size_t *) = nread;
351       else if (flags & PTRDIFFT)
352         *va_arg(ap, ptrdiff_t *) = nread;
353       else
354         *va_arg(ap, int *) = nread;
355       continue;
356 
357     default:
358       goto match_failure;
359 
360     /*
361      * Disgusting backwards compatibility hack. XXX
362      */
363     case '\0':  /* compat */
364       return (EOF);
365     }
366 //Print(L"%a: %d\n", __func__, __LINE__);
367 
368     /*
369      * We have a conversion that requires input.
370      */
371     if (fp->_r <= 0 && __srefill(fp))
372     {
373 //Print(L"%a: %d\n", __func__, __LINE__);
374       goto input_failure;
375     }
376 
377     /*
378      * Consume leading white space, except for formats
379      * that suppress this.
380      */
381     if ((flags & NOSKIP) == 0) {
382       while (isspace(*fp->_p)) {
383         nread++;
384         if (--fp->_r > 0)
385           fp->_p++;
386         else if (__srefill(fp))
387         {
388 //Print(L"%a: %d\n", __func__, __LINE__);
389           goto input_failure;
390         }
391       }
392       /*
393        * Note that there is at least one character in
394        * the buffer, so conversions that do not set NOSKIP
395        * ca no longer result in an input failure.
396        */
397     }
398 
399     /*
400      * Do the conversion.
401      */
402 //Print(L"%a: %d\n", __func__, __LINE__);
403     switch (c) {
404 
405     case CT_CHAR:
406       /* scan arbitrary characters (sets NOSKIP) */
407       if (width == 0)
408         width = 1;
409       if (flags & LONG) {
410         if ((flags & SUPPRESS) == 0)
411           wcp = va_arg(ap, wchar_t *);
412         else
413           wcp = NULL;
414         n = 0;
415         while (width != 0) {
416           if (n == MB_CUR_MAX) {
417             fp->_flags |= __SERR;
418             goto input_failure;
419           }
420           buf[n++] = *fp->_p;
421           fp->_p++;
422           fp->_r--;
423           mbs = initial;
424           nconv = mbrtowc(wcp, buf, n, &mbs);
425           if (nconv == (size_t)-1) {
426             fp->_flags |= __SERR;
427             goto input_failure;
428           }
429           if (nconv == 0 && !(flags & SUPPRESS))
430             *wcp = L'\0';
431           if (nconv != (size_t)-2) {
432             nread += (int)n;
433             width--;
434             if (!(flags & SUPPRESS))
435               wcp++;
436             n = 0;
437           }
438           if (fp->_r <= 0 && __srefill(fp)) {
439             if (n != 0) {
440               fp->_flags |= __SERR;
441               goto input_failure;
442             }
443             break;
444           }
445         }
446         if (!(flags & SUPPRESS))
447           nassigned++;
448       } else if (flags & SUPPRESS) {
449         size_t sum = 0;
450         for (;;) {
451           if ((n = fp->_r) < width) {
452             sum += n;
453             width -= n;
454             fp->_p += n;
455             if (__srefill(fp)) {
456               if (sum == 0)
457                   goto input_failure;
458               break;
459             }
460           } else {
461             sum += width;
462             fp->_r -= (int)width;
463             fp->_p += width;
464             break;
465           }
466         }
467         nread += (int)sum;
468       } else {
469         size_t r = fread(va_arg(ap, char *), 1,
470             width, fp);
471 
472         if (r == 0)
473           goto input_failure;
474         nread += (int)r;
475         nassigned++;
476       }
477       nconversions++;
478       break;
479 
480     case CT_CCL:
481       /* scan a (nonempty) character class (sets NOSKIP) */
482       if (width == 0)
483         width = (size_t)~0; /* `infinity' */
484       /* take only those things in the class */
485       if (flags & LONG) {
486         wchar_t twc;
487         int nchars;
488 
489         if ((flags & SUPPRESS) == 0)
490           wcp = va_arg(ap, wchar_t *);
491         else
492           wcp = &twc;
493         n = 0;
494         nchars = 0;
495         while (width != 0) {
496           if (n == MB_CUR_MAX) {
497             fp->_flags |= __SERR;
498             goto input_failure;
499           }
500           buf[n++] = *fp->_p;
501           fp->_p++;
502           fp->_r--;
503           mbs = initial;
504           nconv = mbrtowc(wcp, buf, n, &mbs);
505           if (nconv == (size_t)-1) {
506             fp->_flags |= __SERR;
507             goto input_failure;
508           }
509           if (nconv == 0)
510             *wcp = L'\0';
511           if (nconv != (size_t)-2) {
512             if (wctob(*wcp) != EOF &&
513                 !ccltab[wctob(*wcp)]) {
514               while (n != 0) {
515                 n--;
516                 (void)ungetc(buf[n],
517                     fp);
518               }
519               break;
520             }
521             nread += (int)n;
522             width--;
523             if (!(flags & SUPPRESS))
524               wcp++;
525             nchars++;
526             n = 0;
527           }
528           if (fp->_r <= 0 && __srefill(fp)) {
529             if (n != 0) {
530               fp->_flags |= __SERR;
531               goto input_failure;
532             }
533             break;
534           }
535         }
536         if (n != 0) {
537           fp->_flags |= __SERR;
538           goto input_failure;
539         }
540         n = nchars;
541         if (n == 0)
542           goto match_failure;
543         if (!(flags & SUPPRESS)) {
544           *wcp = L'\0';
545           nassigned++;
546         }
547       } else if (flags & SUPPRESS) {
548         n = 0;
549         while (ccltab[*fp->_p]) {
550           n++, fp->_r--, fp->_p++;
551           if (--width == 0)
552             break;
553           if (fp->_r <= 0 && __srefill(fp)) {
554             if (n == 0)
555               goto input_failure;
556             break;
557           }
558         }
559         if (n == 0)
560           goto match_failure;
561       } else {
562         p0 = p = va_arg(ap, char *);
563         while (ccltab[*fp->_p]) {
564           fp->_r--;
565           *p++ = *fp->_p++;
566           if (--width == 0)
567             break;
568           if (fp->_r <= 0 && __srefill(fp)) {
569             if (p == p0)
570               goto input_failure;
571             break;
572           }
573         }
574         n = p - p0;
575         if (n == 0)
576           goto match_failure;
577         *p = 0;
578         nassigned++;
579       }
580       nread += (int)n;
581       nconversions++;
582       break;
583 
584     case CT_STRING:
585       /* like CCL, but zero-length string OK, & no NOSKIP */
586       if (width == 0)
587         width = (size_t)~0;
588       if (flags & LONG) {
589         wchar_t twc;
590 
591         if ((flags & SUPPRESS) == 0)
592           wcp = va_arg(ap, wchar_t *);
593         else
594           wcp = &twc;
595         n = 0;
596         while (!isspace(*fp->_p) && width != 0) {
597           if (n == MB_CUR_MAX) {
598             fp->_flags |= __SERR;
599             goto input_failure;
600           }
601           buf[n++] = *fp->_p;
602           fp->_p++;
603           fp->_r--;
604           mbs = initial;
605           nconv = mbrtowc(wcp, buf, n, &mbs);
606           if (nconv == (size_t)-1) {
607             fp->_flags |= __SERR;
608             goto input_failure;
609           }
610           if (nconv == 0)
611             *wcp = L'\0';
612           if (nconv != (size_t)-2) {
613             if (iswspace(*wcp)) {
614               while (n != 0) {
615                 n--;
616                 (void)ungetc(buf[n],
617                     fp);
618               }
619               break;
620             }
621             nread += (int)n;
622             width--;
623             if (!(flags & SUPPRESS))
624               wcp++;
625             n = 0;
626           }
627           if (fp->_r <= 0 && __srefill(fp)) {
628             if (n != 0) {
629               fp->_flags |= __SERR;
630               goto input_failure;
631             }
632             break;
633           }
634         }
635         if (!(flags & SUPPRESS)) {
636           *wcp = L'\0';
637           nassigned++;
638         }
639       } else if (flags & SUPPRESS) {
640         n = 0;
641         while (!isspace(*fp->_p)) {
642           n++, fp->_r--, fp->_p++;
643           if (--width == 0)
644             break;
645           if (fp->_r <= 0 && __srefill(fp))
646             break;
647         }
648         nread += (int)n;
649       } else {
650         p0 = p = va_arg(ap, char *);
651         while (!isspace(*fp->_p)) {
652           fp->_r--;
653           *p++ = *fp->_p++;
654           if (--width == 0)
655             break;
656           if (fp->_r <= 0 && __srefill(fp))
657             break;
658         }
659         *p = 0;
660         nread += (int)(p - p0);
661         nassigned++;
662       }
663       nconversions++;
664       continue;
665 
666     case CT_INT:
667 //Print(L"%a: %d\n", __func__, __LINE__);
668       /* scan an integer as if by the conversion function */
669 #ifdef hardway
670       if (width == 0 || width > sizeof(buf) - 1)
671         width = sizeof(buf) - 1;
672 #else
673       /* size_t is unsigned, hence this optimisation */
674       if (--width > sizeof(buf) - 2)
675         width = sizeof(buf) - 2;
676       width++;
677 #endif
678       flags |= SIGNOK | NDIGITS | NZDIGITS;
679       for (p = buf; width; width--) {
680         c = *fp->_p;
681         /*
682          * Switch on the character; `goto ok'
683          * if we accept it as a part of number.
684          */
685         switch (c) {
686 
687         /*
688          * The digit 0 is always legal, but is
689          * special.  For %i conversions, if no
690          * digits (zero or nonzero) have been
691          * scanned (only signs), we will have
692          * base==0.  In that case, we should set
693          * it to 8 and enable 0x prefixing.
694          * Also, if we have not scanned zero digits
695          * before this, do not turn off prefixing
696          * (someone else will turn it off if we
697          * have scanned any nonzero digits).
698          */
699         case '0':
700           if (base == 0) {
701             base = 8;
702             flags |= PFXOK;
703           }
704           if (flags & NZDIGITS)
705               flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
706           else
707               flags &= ~(SIGNOK|PFXOK|NDIGITS);
708           goto ok;
709 
710         /* 1 through 7 always legal */
711         case '1': case '2': case '3':
712         case '4': case '5': case '6': case '7':
713           base = basefix[base];
714           flags &= ~(SIGNOK | PFXOK | NDIGITS);
715           goto ok;
716 
717         /* digits 8 and 9 ok iff decimal or hex */
718         case '8': case '9':
719           base = basefix[base];
720           if (base <= 8)
721             break;  /* not legal here */
722           flags &= ~(SIGNOK | PFXOK | NDIGITS);
723           goto ok;
724 
725         /* letters ok iff hex */
726         case 'A': case 'B': case 'C':
727         case 'D': case 'E': case 'F':
728         case 'a': case 'b': case 'c':
729         case 'd': case 'e': case 'f':
730           /* no need to fix base here */
731           if (base <= 10)
732             break;  /* not legal here */
733           flags &= ~(SIGNOK | PFXOK | NDIGITS);
734           goto ok;
735 
736         /* sign ok only as first character */
737         case '+': case '-':
738           if (flags & SIGNOK) {
739             flags &= ~SIGNOK;
740             flags |= HAVESIGN;
741             goto ok;
742           }
743           break;
744 
745         /*
746          * x ok iff flag still set & 2nd char (or
747          * 3rd char if we have a sign).
748          */
749         case 'x': case 'X':
750           if (flags & PFXOK && p ==
751               buf + 1 + !!(flags & HAVESIGN)) {
752             base = 16;  /* if %i */
753             flags &= ~PFXOK;
754             goto ok;
755           }
756           break;
757         }
758 
759         /*
760          * If we got here, c is not a legal character
761          * for a number.  Stop accumulating digits.
762          */
763         break;
764     ok:
765         /*
766          * c is legal: store it and look at the next.
767          */
768         *p++ = (char)c;
769         if (--fp->_r > 0)
770           fp->_p++;
771         else if (__srefill(fp))
772           break;    /* EOF */
773       }
774       /*
775        * If we had only a sign, it is no good; push
776        * back the sign.  If the number ends in `x',
777        * it was [sign] '0' 'x', so push back the x
778        * and treat it as [sign] '0'.
779        */
780       if (flags & NDIGITS) {
781         if (p > buf)
782           (void)ungetc(*(u_char *)--p, fp);
783         goto match_failure;
784       }
785       c = ((u_char *)p)[-1];
786       if (c == 'x' || c == 'X') {
787         --p;
788         (void)ungetc(c, fp);
789       }
790       if ((flags & SUPPRESS) == 0) {
791         //uintmax_t res;
792         // Use a union to get around the truncation warnings.
793         union {
794           uintmax_t   umax;
795           intmax_t    imax;
796           void       *vp;
797           ptrdiff_t   pdt;
798           size_t      sz;
799           long long   ll;
800           long        lo;
801           int         in;
802           short       hw;
803           char        ch;
804         } res;
805 
806         *p = 0;
807         if ((flags & UNSIGNED) == 0)
808             res.imax = strtoimax(buf, (char **)NULL, base);
809         else
810             res.umax = strtoumax(buf, (char **)NULL, base);
811         if (flags & POINTER)
812           *va_arg(ap, void **) = res.vp;
813               //(void *)((uintptr_t)res);
814         else if (flags & SHORTSHORT)
815           *va_arg(ap, char *) = res.ch;
816         else if (flags & SHORT)
817           *va_arg(ap, short *) = res.hw;
818         else if (flags & LONG)
819           *va_arg(ap, long *) = res.lo;
820         else if (flags & LONGLONG)
821           *va_arg(ap, long long *) = res.ll;
822         else if (flags & INTMAXT)
823           *va_arg(ap, intmax_t *) = res.imax;
824         else if (flags & PTRDIFFT)
825           *va_arg(ap, ptrdiff_t *) = res.pdt;
826               //(ptrdiff_t)res;
827         else if (flags & SIZET)
828           *va_arg(ap, size_t *) = res.sz;
829         else
830           *va_arg(ap, int *) = res.in;
831         nassigned++;
832       }
833       nread += (int)(p - buf);
834       nconversions++;
835 //Print(L"%a: %d\n", __func__, __LINE__);
836       break;
837 
838 #ifndef NO_FLOATING_POINT
839     case CT_FLOAT:
840       /* scan a floating point number as if by strtod */
841       if (width == 0 || width > sizeof(buf) - 1)
842         width = sizeof(buf) - 1;
843       if ((width = parsefloat(fp, buf, buf + width)) == 0)
844         goto match_failure;
845       if ((flags & SUPPRESS) == 0) {
846         if (flags & LONGDBL) {
847           long double res = strtold(buf, &p);
848           *va_arg(ap, long double *) = res;
849         } else if (flags & LONG) {
850           double res = strtod(buf, &p);
851           *va_arg(ap, double *) = res;
852         } else {
853           float res = strtof(buf, &p);
854           *va_arg(ap, float *) = res;
855         }
856         if (__scanfdebug && p - buf != (ptrdiff_t)width)
857           abort();
858         nassigned++;
859       }
860       nread += (int)width;
861       nconversions++;
862       break;
863 #endif /* !NO_FLOATING_POINT */
864     }
865   }
866 input_failure:
867 //Print(L"%a: %d\n", __func__, __LINE__);
868   return (nconversions != 0 ? nassigned : EOF);
869 match_failure:
870   return (nassigned);
871 }
872 
873 /*
874  * Fill in the given table from the scanset at the given format
875  * (just after `[').  Return a pointer to the character past the
876  * closing `]'.  The table has a 1 wherever characters should be
877  * considered part of the scanset.
878  */
879 static const u_char *
__sccl(char * tab,const u_char * fmt)880 __sccl(char *tab, const u_char *fmt)
881 {
882   int c, n, v, i;
883 
884   _DIAGASSERT(tab != NULL);
885   _DIAGASSERT(fmt != NULL);
886   /* first `clear' the whole table */
887   c = *fmt++;   /* first char hat => negated scanset */
888   if (c == '^') {
889     v = 1;    /* default => accept */
890     c = *fmt++; /* get new first char */
891   } else
892     v = 0;    /* default => reject */
893 
894   /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
895   (void)memset(tab, v, 256);
896 
897   if (c == 0)
898     return (fmt - 1);/* format ended before closing ] */
899 
900   /*
901    * Now set the entries corresponding to the actual scanset
902    * to the opposite of the above.
903    *
904    * The first character may be ']' (or '-') without being special;
905    * the last character may be '-'.
906    */
907   v = 1 - v;
908   for (;;) {
909     tab[c] = (char)v;   /* take character c */
910 doswitch:
911     n = *fmt++;   /* and examine the next */
912     switch (n) {
913 
914     case 0:     /* format ended too soon */
915       return (fmt - 1);
916 
917     case '-':
918       /*
919        * A scanset of the form
920        *  [01+-]
921        * is defined as `the digit 0, the digit 1,
922        * the character +, the character -', but
923        * the effect of a scanset such as
924        *  [a-zA-Z0-9]
925        * is implementation defined.  The V7 Unix
926        * scanf treats `a-z' as `the letters a through
927        * z', but treats `a-a' as `the letter a, the
928        * character -, and the letter a'.
929        *
930        * For compatibility, the `-' is not considerd
931        * to define a range if the character following
932        * it is either a close bracket (required by ANSI)
933        * or is not numerically greater than the character
934        * we just stored in the table (c).
935        */
936       n = *fmt;
937       if (n == ']' || (__collate_load_error ? n < c :
938           __collate_range_cmp(n, c) < 0)) {
939         c = '-';
940         break;  /* resume the for(;;) */
941       }
942       fmt++;
943       /* fill in the range */
944       if (__collate_load_error) {
945         do
946           tab[++c] = (char)v;
947         while (c < n);
948       } else {
949         for (i = 0; i < 256; i ++)
950           if (__collate_range_cmp(c, i) < 0 &&
951               __collate_range_cmp(i, n) <= 0)
952             tab[i] = (char)v;
953       }
954 #if 1 /* XXX another disgusting compatibility hack */
955       c = n;
956       /*
957        * Alas, the V7 Unix scanf also treats formats
958        * such as [a-c-e] as `the letters a through e'.
959        * This too is permitted by the standard....
960        */
961       goto doswitch;
962 #else
963       c = *fmt++;
964       if (c == 0)
965         return (fmt - 1);
966       if (c == ']')
967         return (fmt);
968 #endif
969 
970     case ']':   /* end of scanset */
971       return (fmt);
972 
973     default:    /* just another character */
974       c = n;
975       break;
976     }
977   }
978   /* NOTREACHED */
979 }
980 
981 #ifndef NO_FLOATING_POINT
982 static int
parsefloat(FILE * fp,char * buf,char * end)983 parsefloat(FILE *fp, char *buf, char *end)
984 {
985   char *commit, *p;
986   int infnanpos = 0;
987   enum {
988     S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
989     S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
990   } state = S_START;
991   unsigned char c;
992   char decpt = *localeconv()->decimal_point;
993   _Bool gotmantdig = 0, ishex = 0;
994 
995   if(fp == NULL) {
996     errno = EINVAL;
997     return (EOF);
998   }
999 
1000   /*
1001    * We set commit = p whenever the string we have read so far
1002    * constitutes a valid representation of a floating point
1003    * number by itself.  At some point, the parse will complete
1004    * or fail, and we will ungetc() back to the last commit point.
1005    * To ensure that the file offset gets updated properly, it is
1006    * always necessary to read at least one character that doesn't
1007    * match; thus, we can't short-circuit "infinity" or "nan(...)".
1008    */
1009   commit = buf - 1;
1010   for (p = buf; p < end; ) {
1011     c = *fp->_p;
1012 reswitch:
1013     switch (state) {
1014     case S_START:
1015       state = S_GOTSIGN;
1016       if (c == '-' || c == '+')
1017         break;
1018       else
1019         goto reswitch;
1020     case S_GOTSIGN:
1021       switch (c) {
1022       case '0':
1023         state = S_MAYBEHEX;
1024         commit = p;
1025         break;
1026       case 'I':
1027       case 'i':
1028         state = S_INF;
1029         break;
1030       case 'N':
1031       case 'n':
1032         state = S_NAN;
1033         break;
1034       default:
1035         state = S_DIGITS;
1036         goto reswitch;
1037       }
1038       break;
1039     case S_INF:
1040       if (infnanpos > 6 ||
1041           (c != "nfinity"[infnanpos] &&
1042            c != "NFINITY"[infnanpos]))
1043         goto parsedone;
1044       if (infnanpos == 1 || infnanpos == 6)
1045         commit = p; /* inf or infinity */
1046       infnanpos++;
1047       break;
1048     case S_NAN:
1049       switch (infnanpos) {
1050       case -1:  /* XXX kludge to deal with nan(...) */
1051         goto parsedone;
1052       case 0:
1053         if (c != 'A' && c != 'a')
1054           goto parsedone;
1055         break;
1056       case 1:
1057         if (c != 'N' && c != 'n')
1058           goto parsedone;
1059         else
1060           commit = p;
1061         break;
1062       case 2:
1063         if (c != '(')
1064           goto parsedone;
1065         break;
1066       default:
1067         if (c == ')') {
1068           commit = p;
1069           infnanpos = -2;
1070         } else if (!isalnum(c) && c != '_')
1071           goto parsedone;
1072         break;
1073       }
1074       infnanpos++;
1075       break;
1076     case S_MAYBEHEX:
1077       state = S_DIGITS;
1078       if (c == 'X' || c == 'x') {
1079         ishex = 1;
1080         break;
1081       } else {  /* we saw a '0', but no 'x' */
1082         gotmantdig = 1;
1083         goto reswitch;
1084       }
1085     case S_DIGITS:
1086       if ((ishex && isxdigit(c)) || isdigit(c))
1087         gotmantdig = 1;
1088       else {
1089         state = S_FRAC;
1090         if (c != decpt)
1091           goto reswitch;
1092       }
1093       if (gotmantdig)
1094         commit = p;
1095       break;
1096     case S_FRAC:
1097       if (((c == 'E' || c == 'e') && !ishex) ||
1098           ((c == 'P' || c == 'p') && ishex)) {
1099         if (!gotmantdig)
1100           goto parsedone;
1101         else
1102           state = S_EXP;
1103       } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1104         commit = p;
1105         gotmantdig = 1;
1106       } else
1107         goto parsedone;
1108       break;
1109     case S_EXP:
1110       state = S_EXPDIGITS;
1111       if (c == '-' || c == '+')
1112         break;
1113       else
1114         goto reswitch;
1115     case S_EXPDIGITS:
1116       if (isdigit(c))
1117         commit = p;
1118       else
1119         goto parsedone;
1120       break;
1121     default:
1122       abort();
1123     }
1124     *p++ = c;
1125     if (--fp->_r > 0)
1126       fp->_p++;
1127     else if (__srefill(fp))
1128       break;  /* EOF */
1129   }
1130 
1131 parsedone:
1132   while (commit < --p)
1133     (void)ungetc(*(u_char *)p, fp);
1134   *++commit = '\0';
1135   return (int)(commit - buf);
1136 }
1137 #endif
1138