1 /** @file
2 Implementation of scanf internals for <stdio.h>.
3
4 Copyright (c) 2010 - 2014, Intel Corporation. All rights reserved.<BR>
5 This program and the accompanying materials are licensed and made available
6 under the terms and conditions of the BSD License that accompanies this
7 distribution. The full text of the license may be found at
8 http://opensource.org/licenses/bsd-license.
9
10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12
13 Copyright (c) 1990, 1993
14 The Regents of the University of California. All rights reserved.
15
16 This code is derived from software contributed to Berkeley by
17 Chris Torek.
18
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions
21 are met:
22 - Redistributions of source code must retain the above copyright
23 notice, this list of conditions and the following disclaimer.
24 - Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27 - Neither the name of the University nor the names of its contributors
28 may be used to endorse or promote products derived from this software
29 without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42
43 NetBSD: vfscanf.c,v 1.37.4.1 2007/05/07 19:49:08 pavel Exp
44 FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.41 2007/01/09 00:28:07 imp Exp
45 vfscanf.c 8.1 (Berkeley) 6/4/93
46 **/
47 #include <LibConfig.h>
48
49 #include "namespace.h"
50 #include <assert.h>
51 #include <ctype.h>
52 #include <errno.h>
53 #include <inttypes.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <stddef.h>
57 #include <stdarg.h>
58 #include <string.h>
59 #include <sys/types.h>
60 #include <wchar.h>
61 #include <wctype.h>
62
63 #include "reentrant.h"
64 #include "local.h"
65
66 #ifndef NO_FLOATING_POINT
67 #include <locale.h>
68 #endif
69
70 /*
71 * Provide an external name for vfscanf. Note, EFI uses the normal
72 * namespace.h method; stdio routines explicitly use the internal name
73 * __svfscanf.
74 */
75 #ifdef __weak_alias
76 __weak_alias(vfscanf,__svfscanf)
77 #endif
78
79 #define BUF 513 /* Maximum length of numeric string. */
80
81 /*
82 * Flags used during conversion.
83 */
84 #define LONG 0x0001 /* l: long or double */
85 #define LONGDBL 0x0002 /* L: long double */
86 #define SHORT 0x0004 /* h: short */
87 #define SUPPRESS 0x0008 /* *: suppress assignment */
88 #define POINTER 0x0010 /* p: void * (as hex) */
89 #define NOSKIP 0x0020 /* [ or c: do not skip blanks */
90 #define LONGLONG 0x0400 /* ll: long long (+ deprecated q: quad) */
91 #define INTMAXT 0x0800 /* j: intmax_t */
92 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
93 #define SIZET 0x2000 /* z: size_t */
94 #define SHORTSHORT 0x4000 /* hh: char */
95 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
96
97 /*
98 * The following are used in integral conversions only:
99 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
100 */
101 #define SIGNOK 0x00040 /* +/- is (still) legal */
102 #define NDIGITS 0x00080 /* no digits detected */
103 #define PFXOK 0x00100 /* 0x prefix is (still) legal */
104 #define NZDIGITS 0x00200 /* no zero digits detected */
105 #define HAVESIGN 0x10000 /* sign detected */
106
107 /*
108 * Conversion types.
109 */
110 #define CT_CHAR 0 /* %c conversion */
111 #define CT_CCL 1 /* %[...] conversion */
112 #define CT_STRING 2 /* %s conversion */
113 #define CT_INT 3 /* %[dioupxX] conversion */
114 #define CT_FLOAT 4 /* %[efgEFG] conversion */
115
116 static const u_char *__sccl(char *, const u_char *);
117 #ifndef NO_FLOATING_POINT
118 static int parsefloat(FILE *, char *, char *);
119 #endif
120
121 int __scanfdebug = 0;
122
123 #define __collate_load_error /*CONSTCOND*/0
124 static int
__collate_range_cmp(int c1,int c2)125 __collate_range_cmp(int c1, int c2)
126 {
127 static char s1[2] = { 0 };
128 static char s2[2] = { 0 };
129
130 s1[0] = (char)c1;
131 s2[0] = (char)c2;
132 return strcoll(s1, s2);
133 }
134
135
136 /*
137 * __svfscanf - MT-safe version
138 */
139 int
__svfscanf(FILE * fp,char const * fmt0,va_list ap)140 __svfscanf(FILE *fp, char const *fmt0, va_list ap)
141 {
142 int ret;
143
144 if(fp == NULL) {
145 errno = EINVAL;
146 return (EOF);
147 }
148 FLOCKFILE(fp);
149 ret = __svfscanf_unlocked(fp, fmt0, ap);
150 FUNLOCKFILE(fp);
151 return (ret);
152 }
153
154 /*
155 * __svfscanf_unlocked - non-MT-safe version of __svfscanf
156 */
157 int
__svfscanf_unlocked(FILE * fp,const char * fmt0,va_list ap)158 __svfscanf_unlocked(FILE *fp, const char *fmt0, va_list ap)
159 {
160 const u_char *fmt = (const u_char *)fmt0;
161 int c; /* character from format, or conversion */
162 size_t width; /* field width, or 0 */
163 char *p; /* points into all kinds of strings */
164 size_t n; /* handy size_t */
165 int flags; /* flags as defined above */
166 char *p0; /* saves original value of p when necessary */
167 int nassigned; /* number of fields assigned */
168 int nconversions; /* number of conversions */
169 int nread; /* number of characters consumed from fp */
170 int base; /* base argument to conversion function */
171 char ccltab[256]; /* character class table for %[...] */
172 char buf[BUF]; /* buffer for numeric and mb conversions */
173 wchar_t *wcp; /* handy wide character pointer */
174 size_t nconv; /* length of multibyte sequence converted */
175 static const mbstate_t initial = { 0 };
176 mbstate_t mbs;
177
178 /* `basefix' is used to avoid `if' tests in the integer scanner */
179 static const short basefix[17] =
180 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
181
182 _DIAGASSERT(fp != NULL);
183 _DIAGASSERT(fmt0 != NULL);
184 if(fp == NULL) {
185 errno = EINVAL;
186 return (EOF);
187 }
188
189 _SET_ORIENTATION(fp, -1);
190
191 //Print(L"%a( %d, \"%a\", ...)\n", __func__, fp->_file, fmt0);
192 nassigned = 0;
193 nconversions = 0;
194 nread = 0;
195 base = 0;
196 for (;;) {
197 c = (unsigned char)*fmt++;
198 if (c == 0)
199 return (nassigned);
200 if (isspace(c)) {
201 while ((fp->_r > 0 || __srefill(fp) == 0) &&
202 isspace(*fp->_p))
203 nread++, fp->_r--, fp->_p++;
204 continue;
205 }
206 //Print(L"%a: %d\n", __func__, __LINE__);
207 if (c != '%')
208 goto literal;
209 width = 0;
210 flags = 0;
211 /*
212 * switch on the format. continue if done;
213 * break once format type is derived.
214 */
215 again: c = *fmt++;
216 //Print(L"%a: %d\n", __func__, __LINE__);
217 switch (c) {
218 case '%':
219 literal:
220 //Print(L"%a: %d\n", __func__, __LINE__);
221 if (fp->_r <= 0 && __srefill(fp))
222 goto input_failure;
223 if (*fp->_p != c)
224 goto match_failure;
225 fp->_r--, fp->_p++;
226 nread++;
227 continue;
228
229 case '*':
230 flags |= SUPPRESS;
231 goto again;
232 case 'j':
233 flags |= INTMAXT;
234 goto again;
235 case 'l':
236 if (flags & LONG) {
237 flags &= ~LONG;
238 flags |= LONGLONG;
239 } else
240 flags |= LONG;
241 goto again;
242 case 'q':
243 flags |= LONGLONG; /* not quite */
244 goto again;
245 case 't':
246 flags |= PTRDIFFT;
247 goto again;
248 case 'z':
249 flags |= SIZET;
250 goto again;
251 case 'L':
252 flags |= LONGDBL;
253 goto again;
254 case 'h':
255 if (flags & SHORT) {
256 flags &= ~SHORT;
257 flags |= SHORTSHORT;
258 } else
259 flags |= SHORT;
260 goto again;
261
262 case '0': case '1': case '2': case '3': case '4':
263 case '5': case '6': case '7': case '8': case '9':
264 width = width * 10 + c - '0';
265 goto again;
266
267 /*
268 * Conversions.
269 */
270 case 'd':
271 c = CT_INT;
272 base = 10;
273 break;
274
275 case 'i':
276 c = CT_INT;
277 base = 0;
278 break;
279
280 case 'o':
281 c = CT_INT;
282 flags |= UNSIGNED;
283 base = 8;
284 break;
285
286 case 'u':
287 c = CT_INT;
288 flags |= UNSIGNED;
289 base = 10;
290 break;
291
292 case 'X':
293 case 'x':
294 flags |= PFXOK; /* enable 0x prefixing */
295 c = CT_INT;
296 flags |= UNSIGNED;
297 base = 16;
298 break;
299
300 #ifndef NO_FLOATING_POINT
301 case 'A': case 'E': case 'F': case 'G':
302 case 'a': case 'e': case 'f': case 'g':
303 c = CT_FLOAT;
304 break;
305 #endif
306
307 case 'S':
308 flags |= LONG;
309 /* FALLTHROUGH */
310 case 's':
311 c = CT_STRING;
312 break;
313
314 case '[':
315 fmt = __sccl(ccltab, fmt);
316 flags |= NOSKIP;
317 c = CT_CCL;
318 break;
319
320 case 'C':
321 flags |= LONG;
322 /* FALLTHROUGH */
323 case 'c':
324 flags |= NOSKIP;
325 c = CT_CHAR;
326 break;
327
328 case 'p': /* pointer format is like hex */
329 flags |= POINTER | PFXOK;
330 c = CT_INT; /* assumes sizeof(uintmax_t) */
331 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
332 base = 16;
333 break;
334
335 case 'n':
336 nconversions++;
337 if (flags & SUPPRESS) /* ??? */
338 continue;
339 if (flags & SHORTSHORT)
340 *va_arg(ap, char *) = (char)nread;
341 else if (flags & SHORT)
342 *va_arg(ap, short *) = (short)nread;
343 else if (flags & LONG)
344 *va_arg(ap, long *) = nread;
345 else if (flags & LONGLONG)
346 *va_arg(ap, long long *) = nread;
347 else if (flags & INTMAXT)
348 *va_arg(ap, intmax_t *) = nread;
349 else if (flags & SIZET)
350 *va_arg(ap, size_t *) = nread;
351 else if (flags & PTRDIFFT)
352 *va_arg(ap, ptrdiff_t *) = nread;
353 else
354 *va_arg(ap, int *) = nread;
355 continue;
356
357 default:
358 goto match_failure;
359
360 /*
361 * Disgusting backwards compatibility hack. XXX
362 */
363 case '\0': /* compat */
364 return (EOF);
365 }
366 //Print(L"%a: %d\n", __func__, __LINE__);
367
368 /*
369 * We have a conversion that requires input.
370 */
371 if (fp->_r <= 0 && __srefill(fp))
372 {
373 //Print(L"%a: %d\n", __func__, __LINE__);
374 goto input_failure;
375 }
376
377 /*
378 * Consume leading white space, except for formats
379 * that suppress this.
380 */
381 if ((flags & NOSKIP) == 0) {
382 while (isspace(*fp->_p)) {
383 nread++;
384 if (--fp->_r > 0)
385 fp->_p++;
386 else if (__srefill(fp))
387 {
388 //Print(L"%a: %d\n", __func__, __LINE__);
389 goto input_failure;
390 }
391 }
392 /*
393 * Note that there is at least one character in
394 * the buffer, so conversions that do not set NOSKIP
395 * ca no longer result in an input failure.
396 */
397 }
398
399 /*
400 * Do the conversion.
401 */
402 //Print(L"%a: %d\n", __func__, __LINE__);
403 switch (c) {
404
405 case CT_CHAR:
406 /* scan arbitrary characters (sets NOSKIP) */
407 if (width == 0)
408 width = 1;
409 if (flags & LONG) {
410 if ((flags & SUPPRESS) == 0)
411 wcp = va_arg(ap, wchar_t *);
412 else
413 wcp = NULL;
414 n = 0;
415 while (width != 0) {
416 if (n == MB_CUR_MAX) {
417 fp->_flags |= __SERR;
418 goto input_failure;
419 }
420 buf[n++] = *fp->_p;
421 fp->_p++;
422 fp->_r--;
423 mbs = initial;
424 nconv = mbrtowc(wcp, buf, n, &mbs);
425 if (nconv == (size_t)-1) {
426 fp->_flags |= __SERR;
427 goto input_failure;
428 }
429 if (nconv == 0 && !(flags & SUPPRESS))
430 *wcp = L'\0';
431 if (nconv != (size_t)-2) {
432 nread += (int)n;
433 width--;
434 if (!(flags & SUPPRESS))
435 wcp++;
436 n = 0;
437 }
438 if (fp->_r <= 0 && __srefill(fp)) {
439 if (n != 0) {
440 fp->_flags |= __SERR;
441 goto input_failure;
442 }
443 break;
444 }
445 }
446 if (!(flags & SUPPRESS))
447 nassigned++;
448 } else if (flags & SUPPRESS) {
449 size_t sum = 0;
450 for (;;) {
451 if ((n = fp->_r) < width) {
452 sum += n;
453 width -= n;
454 fp->_p += n;
455 if (__srefill(fp)) {
456 if (sum == 0)
457 goto input_failure;
458 break;
459 }
460 } else {
461 sum += width;
462 fp->_r -= (int)width;
463 fp->_p += width;
464 break;
465 }
466 }
467 nread += (int)sum;
468 } else {
469 size_t r = fread(va_arg(ap, char *), 1,
470 width, fp);
471
472 if (r == 0)
473 goto input_failure;
474 nread += (int)r;
475 nassigned++;
476 }
477 nconversions++;
478 break;
479
480 case CT_CCL:
481 /* scan a (nonempty) character class (sets NOSKIP) */
482 if (width == 0)
483 width = (size_t)~0; /* `infinity' */
484 /* take only those things in the class */
485 if (flags & LONG) {
486 wchar_t twc;
487 int nchars;
488
489 if ((flags & SUPPRESS) == 0)
490 wcp = va_arg(ap, wchar_t *);
491 else
492 wcp = &twc;
493 n = 0;
494 nchars = 0;
495 while (width != 0) {
496 if (n == MB_CUR_MAX) {
497 fp->_flags |= __SERR;
498 goto input_failure;
499 }
500 buf[n++] = *fp->_p;
501 fp->_p++;
502 fp->_r--;
503 mbs = initial;
504 nconv = mbrtowc(wcp, buf, n, &mbs);
505 if (nconv == (size_t)-1) {
506 fp->_flags |= __SERR;
507 goto input_failure;
508 }
509 if (nconv == 0)
510 *wcp = L'\0';
511 if (nconv != (size_t)-2) {
512 if (wctob(*wcp) != EOF &&
513 !ccltab[wctob(*wcp)]) {
514 while (n != 0) {
515 n--;
516 (void)ungetc(buf[n],
517 fp);
518 }
519 break;
520 }
521 nread += (int)n;
522 width--;
523 if (!(flags & SUPPRESS))
524 wcp++;
525 nchars++;
526 n = 0;
527 }
528 if (fp->_r <= 0 && __srefill(fp)) {
529 if (n != 0) {
530 fp->_flags |= __SERR;
531 goto input_failure;
532 }
533 break;
534 }
535 }
536 if (n != 0) {
537 fp->_flags |= __SERR;
538 goto input_failure;
539 }
540 n = nchars;
541 if (n == 0)
542 goto match_failure;
543 if (!(flags & SUPPRESS)) {
544 *wcp = L'\0';
545 nassigned++;
546 }
547 } else if (flags & SUPPRESS) {
548 n = 0;
549 while (ccltab[*fp->_p]) {
550 n++, fp->_r--, fp->_p++;
551 if (--width == 0)
552 break;
553 if (fp->_r <= 0 && __srefill(fp)) {
554 if (n == 0)
555 goto input_failure;
556 break;
557 }
558 }
559 if (n == 0)
560 goto match_failure;
561 } else {
562 p0 = p = va_arg(ap, char *);
563 while (ccltab[*fp->_p]) {
564 fp->_r--;
565 *p++ = *fp->_p++;
566 if (--width == 0)
567 break;
568 if (fp->_r <= 0 && __srefill(fp)) {
569 if (p == p0)
570 goto input_failure;
571 break;
572 }
573 }
574 n = p - p0;
575 if (n == 0)
576 goto match_failure;
577 *p = 0;
578 nassigned++;
579 }
580 nread += (int)n;
581 nconversions++;
582 break;
583
584 case CT_STRING:
585 /* like CCL, but zero-length string OK, & no NOSKIP */
586 if (width == 0)
587 width = (size_t)~0;
588 if (flags & LONG) {
589 wchar_t twc;
590
591 if ((flags & SUPPRESS) == 0)
592 wcp = va_arg(ap, wchar_t *);
593 else
594 wcp = &twc;
595 n = 0;
596 while (!isspace(*fp->_p) && width != 0) {
597 if (n == MB_CUR_MAX) {
598 fp->_flags |= __SERR;
599 goto input_failure;
600 }
601 buf[n++] = *fp->_p;
602 fp->_p++;
603 fp->_r--;
604 mbs = initial;
605 nconv = mbrtowc(wcp, buf, n, &mbs);
606 if (nconv == (size_t)-1) {
607 fp->_flags |= __SERR;
608 goto input_failure;
609 }
610 if (nconv == 0)
611 *wcp = L'\0';
612 if (nconv != (size_t)-2) {
613 if (iswspace(*wcp)) {
614 while (n != 0) {
615 n--;
616 (void)ungetc(buf[n],
617 fp);
618 }
619 break;
620 }
621 nread += (int)n;
622 width--;
623 if (!(flags & SUPPRESS))
624 wcp++;
625 n = 0;
626 }
627 if (fp->_r <= 0 && __srefill(fp)) {
628 if (n != 0) {
629 fp->_flags |= __SERR;
630 goto input_failure;
631 }
632 break;
633 }
634 }
635 if (!(flags & SUPPRESS)) {
636 *wcp = L'\0';
637 nassigned++;
638 }
639 } else if (flags & SUPPRESS) {
640 n = 0;
641 while (!isspace(*fp->_p)) {
642 n++, fp->_r--, fp->_p++;
643 if (--width == 0)
644 break;
645 if (fp->_r <= 0 && __srefill(fp))
646 break;
647 }
648 nread += (int)n;
649 } else {
650 p0 = p = va_arg(ap, char *);
651 while (!isspace(*fp->_p)) {
652 fp->_r--;
653 *p++ = *fp->_p++;
654 if (--width == 0)
655 break;
656 if (fp->_r <= 0 && __srefill(fp))
657 break;
658 }
659 *p = 0;
660 nread += (int)(p - p0);
661 nassigned++;
662 }
663 nconversions++;
664 continue;
665
666 case CT_INT:
667 //Print(L"%a: %d\n", __func__, __LINE__);
668 /* scan an integer as if by the conversion function */
669 #ifdef hardway
670 if (width == 0 || width > sizeof(buf) - 1)
671 width = sizeof(buf) - 1;
672 #else
673 /* size_t is unsigned, hence this optimisation */
674 if (--width > sizeof(buf) - 2)
675 width = sizeof(buf) - 2;
676 width++;
677 #endif
678 flags |= SIGNOK | NDIGITS | NZDIGITS;
679 for (p = buf; width; width--) {
680 c = *fp->_p;
681 /*
682 * Switch on the character; `goto ok'
683 * if we accept it as a part of number.
684 */
685 switch (c) {
686
687 /*
688 * The digit 0 is always legal, but is
689 * special. For %i conversions, if no
690 * digits (zero or nonzero) have been
691 * scanned (only signs), we will have
692 * base==0. In that case, we should set
693 * it to 8 and enable 0x prefixing.
694 * Also, if we have not scanned zero digits
695 * before this, do not turn off prefixing
696 * (someone else will turn it off if we
697 * have scanned any nonzero digits).
698 */
699 case '0':
700 if (base == 0) {
701 base = 8;
702 flags |= PFXOK;
703 }
704 if (flags & NZDIGITS)
705 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
706 else
707 flags &= ~(SIGNOK|PFXOK|NDIGITS);
708 goto ok;
709
710 /* 1 through 7 always legal */
711 case '1': case '2': case '3':
712 case '4': case '5': case '6': case '7':
713 base = basefix[base];
714 flags &= ~(SIGNOK | PFXOK | NDIGITS);
715 goto ok;
716
717 /* digits 8 and 9 ok iff decimal or hex */
718 case '8': case '9':
719 base = basefix[base];
720 if (base <= 8)
721 break; /* not legal here */
722 flags &= ~(SIGNOK | PFXOK | NDIGITS);
723 goto ok;
724
725 /* letters ok iff hex */
726 case 'A': case 'B': case 'C':
727 case 'D': case 'E': case 'F':
728 case 'a': case 'b': case 'c':
729 case 'd': case 'e': case 'f':
730 /* no need to fix base here */
731 if (base <= 10)
732 break; /* not legal here */
733 flags &= ~(SIGNOK | PFXOK | NDIGITS);
734 goto ok;
735
736 /* sign ok only as first character */
737 case '+': case '-':
738 if (flags & SIGNOK) {
739 flags &= ~SIGNOK;
740 flags |= HAVESIGN;
741 goto ok;
742 }
743 break;
744
745 /*
746 * x ok iff flag still set & 2nd char (or
747 * 3rd char if we have a sign).
748 */
749 case 'x': case 'X':
750 if (flags & PFXOK && p ==
751 buf + 1 + !!(flags & HAVESIGN)) {
752 base = 16; /* if %i */
753 flags &= ~PFXOK;
754 goto ok;
755 }
756 break;
757 }
758
759 /*
760 * If we got here, c is not a legal character
761 * for a number. Stop accumulating digits.
762 */
763 break;
764 ok:
765 /*
766 * c is legal: store it and look at the next.
767 */
768 *p++ = (char)c;
769 if (--fp->_r > 0)
770 fp->_p++;
771 else if (__srefill(fp))
772 break; /* EOF */
773 }
774 /*
775 * If we had only a sign, it is no good; push
776 * back the sign. If the number ends in `x',
777 * it was [sign] '0' 'x', so push back the x
778 * and treat it as [sign] '0'.
779 */
780 if (flags & NDIGITS) {
781 if (p > buf)
782 (void)ungetc(*(u_char *)--p, fp);
783 goto match_failure;
784 }
785 c = ((u_char *)p)[-1];
786 if (c == 'x' || c == 'X') {
787 --p;
788 (void)ungetc(c, fp);
789 }
790 if ((flags & SUPPRESS) == 0) {
791 //uintmax_t res;
792 // Use a union to get around the truncation warnings.
793 union {
794 uintmax_t umax;
795 intmax_t imax;
796 void *vp;
797 ptrdiff_t pdt;
798 size_t sz;
799 long long ll;
800 long lo;
801 int in;
802 short hw;
803 char ch;
804 } res;
805
806 *p = 0;
807 if ((flags & UNSIGNED) == 0)
808 res.imax = strtoimax(buf, (char **)NULL, base);
809 else
810 res.umax = strtoumax(buf, (char **)NULL, base);
811 if (flags & POINTER)
812 *va_arg(ap, void **) = res.vp;
813 //(void *)((uintptr_t)res);
814 else if (flags & SHORTSHORT)
815 *va_arg(ap, char *) = res.ch;
816 else if (flags & SHORT)
817 *va_arg(ap, short *) = res.hw;
818 else if (flags & LONG)
819 *va_arg(ap, long *) = res.lo;
820 else if (flags & LONGLONG)
821 *va_arg(ap, long long *) = res.ll;
822 else if (flags & INTMAXT)
823 *va_arg(ap, intmax_t *) = res.imax;
824 else if (flags & PTRDIFFT)
825 *va_arg(ap, ptrdiff_t *) = res.pdt;
826 //(ptrdiff_t)res;
827 else if (flags & SIZET)
828 *va_arg(ap, size_t *) = res.sz;
829 else
830 *va_arg(ap, int *) = res.in;
831 nassigned++;
832 }
833 nread += (int)(p - buf);
834 nconversions++;
835 //Print(L"%a: %d\n", __func__, __LINE__);
836 break;
837
838 #ifndef NO_FLOATING_POINT
839 case CT_FLOAT:
840 /* scan a floating point number as if by strtod */
841 if (width == 0 || width > sizeof(buf) - 1)
842 width = sizeof(buf) - 1;
843 if ((width = parsefloat(fp, buf, buf + width)) == 0)
844 goto match_failure;
845 if ((flags & SUPPRESS) == 0) {
846 if (flags & LONGDBL) {
847 long double res = strtold(buf, &p);
848 *va_arg(ap, long double *) = res;
849 } else if (flags & LONG) {
850 double res = strtod(buf, &p);
851 *va_arg(ap, double *) = res;
852 } else {
853 float res = strtof(buf, &p);
854 *va_arg(ap, float *) = res;
855 }
856 if (__scanfdebug && p - buf != (ptrdiff_t)width)
857 abort();
858 nassigned++;
859 }
860 nread += (int)width;
861 nconversions++;
862 break;
863 #endif /* !NO_FLOATING_POINT */
864 }
865 }
866 input_failure:
867 //Print(L"%a: %d\n", __func__, __LINE__);
868 return (nconversions != 0 ? nassigned : EOF);
869 match_failure:
870 return (nassigned);
871 }
872
873 /*
874 * Fill in the given table from the scanset at the given format
875 * (just after `['). Return a pointer to the character past the
876 * closing `]'. The table has a 1 wherever characters should be
877 * considered part of the scanset.
878 */
879 static const u_char *
__sccl(char * tab,const u_char * fmt)880 __sccl(char *tab, const u_char *fmt)
881 {
882 int c, n, v, i;
883
884 _DIAGASSERT(tab != NULL);
885 _DIAGASSERT(fmt != NULL);
886 /* first `clear' the whole table */
887 c = *fmt++; /* first char hat => negated scanset */
888 if (c == '^') {
889 v = 1; /* default => accept */
890 c = *fmt++; /* get new first char */
891 } else
892 v = 0; /* default => reject */
893
894 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
895 (void)memset(tab, v, 256);
896
897 if (c == 0)
898 return (fmt - 1);/* format ended before closing ] */
899
900 /*
901 * Now set the entries corresponding to the actual scanset
902 * to the opposite of the above.
903 *
904 * The first character may be ']' (or '-') without being special;
905 * the last character may be '-'.
906 */
907 v = 1 - v;
908 for (;;) {
909 tab[c] = (char)v; /* take character c */
910 doswitch:
911 n = *fmt++; /* and examine the next */
912 switch (n) {
913
914 case 0: /* format ended too soon */
915 return (fmt - 1);
916
917 case '-':
918 /*
919 * A scanset of the form
920 * [01+-]
921 * is defined as `the digit 0, the digit 1,
922 * the character +, the character -', but
923 * the effect of a scanset such as
924 * [a-zA-Z0-9]
925 * is implementation defined. The V7 Unix
926 * scanf treats `a-z' as `the letters a through
927 * z', but treats `a-a' as `the letter a, the
928 * character -, and the letter a'.
929 *
930 * For compatibility, the `-' is not considerd
931 * to define a range if the character following
932 * it is either a close bracket (required by ANSI)
933 * or is not numerically greater than the character
934 * we just stored in the table (c).
935 */
936 n = *fmt;
937 if (n == ']' || (__collate_load_error ? n < c :
938 __collate_range_cmp(n, c) < 0)) {
939 c = '-';
940 break; /* resume the for(;;) */
941 }
942 fmt++;
943 /* fill in the range */
944 if (__collate_load_error) {
945 do
946 tab[++c] = (char)v;
947 while (c < n);
948 } else {
949 for (i = 0; i < 256; i ++)
950 if (__collate_range_cmp(c, i) < 0 &&
951 __collate_range_cmp(i, n) <= 0)
952 tab[i] = (char)v;
953 }
954 #if 1 /* XXX another disgusting compatibility hack */
955 c = n;
956 /*
957 * Alas, the V7 Unix scanf also treats formats
958 * such as [a-c-e] as `the letters a through e'.
959 * This too is permitted by the standard....
960 */
961 goto doswitch;
962 #else
963 c = *fmt++;
964 if (c == 0)
965 return (fmt - 1);
966 if (c == ']')
967 return (fmt);
968 #endif
969
970 case ']': /* end of scanset */
971 return (fmt);
972
973 default: /* just another character */
974 c = n;
975 break;
976 }
977 }
978 /* NOTREACHED */
979 }
980
981 #ifndef NO_FLOATING_POINT
982 static int
parsefloat(FILE * fp,char * buf,char * end)983 parsefloat(FILE *fp, char *buf, char *end)
984 {
985 char *commit, *p;
986 int infnanpos = 0;
987 enum {
988 S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
989 S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
990 } state = S_START;
991 unsigned char c;
992 char decpt = *localeconv()->decimal_point;
993 _Bool gotmantdig = 0, ishex = 0;
994
995 if(fp == NULL) {
996 errno = EINVAL;
997 return (EOF);
998 }
999
1000 /*
1001 * We set commit = p whenever the string we have read so far
1002 * constitutes a valid representation of a floating point
1003 * number by itself. At some point, the parse will complete
1004 * or fail, and we will ungetc() back to the last commit point.
1005 * To ensure that the file offset gets updated properly, it is
1006 * always necessary to read at least one character that doesn't
1007 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1008 */
1009 commit = buf - 1;
1010 for (p = buf; p < end; ) {
1011 c = *fp->_p;
1012 reswitch:
1013 switch (state) {
1014 case S_START:
1015 state = S_GOTSIGN;
1016 if (c == '-' || c == '+')
1017 break;
1018 else
1019 goto reswitch;
1020 case S_GOTSIGN:
1021 switch (c) {
1022 case '0':
1023 state = S_MAYBEHEX;
1024 commit = p;
1025 break;
1026 case 'I':
1027 case 'i':
1028 state = S_INF;
1029 break;
1030 case 'N':
1031 case 'n':
1032 state = S_NAN;
1033 break;
1034 default:
1035 state = S_DIGITS;
1036 goto reswitch;
1037 }
1038 break;
1039 case S_INF:
1040 if (infnanpos > 6 ||
1041 (c != "nfinity"[infnanpos] &&
1042 c != "NFINITY"[infnanpos]))
1043 goto parsedone;
1044 if (infnanpos == 1 || infnanpos == 6)
1045 commit = p; /* inf or infinity */
1046 infnanpos++;
1047 break;
1048 case S_NAN:
1049 switch (infnanpos) {
1050 case -1: /* XXX kludge to deal with nan(...) */
1051 goto parsedone;
1052 case 0:
1053 if (c != 'A' && c != 'a')
1054 goto parsedone;
1055 break;
1056 case 1:
1057 if (c != 'N' && c != 'n')
1058 goto parsedone;
1059 else
1060 commit = p;
1061 break;
1062 case 2:
1063 if (c != '(')
1064 goto parsedone;
1065 break;
1066 default:
1067 if (c == ')') {
1068 commit = p;
1069 infnanpos = -2;
1070 } else if (!isalnum(c) && c != '_')
1071 goto parsedone;
1072 break;
1073 }
1074 infnanpos++;
1075 break;
1076 case S_MAYBEHEX:
1077 state = S_DIGITS;
1078 if (c == 'X' || c == 'x') {
1079 ishex = 1;
1080 break;
1081 } else { /* we saw a '0', but no 'x' */
1082 gotmantdig = 1;
1083 goto reswitch;
1084 }
1085 case S_DIGITS:
1086 if ((ishex && isxdigit(c)) || isdigit(c))
1087 gotmantdig = 1;
1088 else {
1089 state = S_FRAC;
1090 if (c != decpt)
1091 goto reswitch;
1092 }
1093 if (gotmantdig)
1094 commit = p;
1095 break;
1096 case S_FRAC:
1097 if (((c == 'E' || c == 'e') && !ishex) ||
1098 ((c == 'P' || c == 'p') && ishex)) {
1099 if (!gotmantdig)
1100 goto parsedone;
1101 else
1102 state = S_EXP;
1103 } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1104 commit = p;
1105 gotmantdig = 1;
1106 } else
1107 goto parsedone;
1108 break;
1109 case S_EXP:
1110 state = S_EXPDIGITS;
1111 if (c == '-' || c == '+')
1112 break;
1113 else
1114 goto reswitch;
1115 case S_EXPDIGITS:
1116 if (isdigit(c))
1117 commit = p;
1118 else
1119 goto parsedone;
1120 break;
1121 default:
1122 abort();
1123 }
1124 *p++ = c;
1125 if (--fp->_r > 0)
1126 fp->_p++;
1127 else if (__srefill(fp))
1128 break; /* EOF */
1129 }
1130
1131 parsedone:
1132 while (commit < --p)
1133 (void)ungetc(*(u_char *)p, fp);
1134 *++commit = '\0';
1135 return (int)(commit - buf);
1136 }
1137 #endif
1138