1 /* $OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <inttypes.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdarg.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wctype.h>
43 #include "local.h"
44
45 #ifdef FLOATING_POINT
46 #include "floatio.h"
47 #endif
48
49 #define BUF 513 /* Maximum length of numeric string. */
50
51 /*
52 * Flags used during conversion.
53 */
54 #define LONG 0x00001 /* l: long or double */
55 #define LONGDBL 0x00002 /* L: long double */
56 #define SHORT 0x00004 /* h: short */
57 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */
58 #define LLONG 0x00010 /* ll: long long (+ deprecated q: quad) */
59 #define POINTER 0x00020 /* p: void * (as hex) */
60 #define SIZEINT 0x00040 /* z: (signed) size_t */
61 #define MAXINT 0x00080 /* j: intmax_t */
62 #define PTRINT 0x00100 /* t: ptrdiff_t */
63 #define NOSKIP 0x00200 /* [ or c: do not skip blanks */
64 #define SUPPRESS 0x00400 /* *: suppress assignment */
65 #define UNSIGNED 0x00800 /* %[oupxX] conversions */
66
67 /*
68 * The following are used in numeric conversions only:
69 * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
70 * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
71 */
72 #define SIGNOK 0x01000 /* +/- is (still) legal */
73 #define HAVESIGN 0x02000 /* sign detected */
74 #define NDIGITS 0x04000 /* no digits detected */
75
76 #define DPTOK 0x08000 /* (float) decimal point is still legal */
77 #define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */
78
79 #define PFXOK 0x08000 /* 0x prefix is (still) legal */
80 #define NZDIGITS 0x10000 /* no zero digits detected */
81
82 /*
83 * Conversion types.
84 */
85 #define CT_CHAR 0 /* %c conversion */
86 #define CT_CCL 1 /* %[...] conversion */
87 #define CT_STRING 2 /* %s conversion */
88 #define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
89 #define CT_FLOAT 4 /* floating, i.e., strtod */
90
91 #define u_char unsigned char
92 #define u_long unsigned long
93
94 #define INCCL(_c) \
95 (cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \
96 (wmemchr(ccls, (_c), ccle - ccls) != NULL))
97
98 #pragma GCC diagnostic push
99 #pragma GCC diagnostic ignored "-Wframe-larger-than="
100
101 /*
102 * vfwscanf
103 */
104 int
__vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)105 __vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, __va_list ap)
106 {
107 wint_t c; /* character from format, or conversion */
108 size_t width; /* field width, or 0 */
109 wchar_t *p; /* points into all kinds of strings */
110 int n; /* handy integer */
111 int flags; /* flags as defined above */
112 wchar_t *p0; /* saves original value of p when necessary */
113 int nassigned; /* number of fields assigned */
114 int nconversions; /* number of conversions */
115 int nread; /* number of characters consumed from fp */
116 int base; /* base argument to strtoimax/strtouimax */
117 wchar_t buf[BUF]; /* buffer for numeric conversions */
118 const wchar_t *ccls; /* character class start */
119 const wchar_t *ccle; /* character class end */
120 int cclcompl; /* ccl is complemented? */
121 wint_t wi; /* handy wint_t */
122 char *mbp; /* multibyte string pointer for %c %s %[ */
123 size_t nconv; /* number of bytes in mb. conversion */
124 char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
125 mbstate_t mbs;
126
127 /* `basefix' is used to avoid `if' tests in the integer scanner */
128 static short basefix[17] =
129 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
130
131 _SET_ORIENTATION(fp, 1);
132
133 nassigned = 0;
134 nconversions = 0;
135 nread = 0;
136 base = 0; /* XXX just to keep gcc happy */
137 ccls = ccle = NULL;
138 for (;;) {
139 c = *fmt++;
140 if (c == 0) {
141 return (nassigned);
142 }
143 if (iswspace(c)) {
144 while ((c = __fgetwc_unlock(fp)) != WEOF &&
145 iswspace(c))
146 ;
147 if (c != WEOF)
148 __ungetwc(c, fp);
149 continue;
150 }
151 if (c != '%')
152 goto literal;
153 width = 0;
154 flags = 0;
155 /*
156 * switch on the format. continue if done;
157 * break once format type is derived.
158 */
159 again: c = *fmt++;
160 switch (c) {
161 case '%':
162 literal:
163 if ((wi = __fgetwc_unlock(fp)) == WEOF)
164 goto input_failure;
165 if (wi != c) {
166 __ungetwc(wi, fp);
167 goto input_failure;
168 }
169 nread++;
170 continue;
171
172 case '*':
173 flags |= SUPPRESS;
174 goto again;
175 case 'j':
176 flags |= MAXINT;
177 goto again;
178 case 'L':
179 flags |= LONGDBL;
180 goto again;
181 case 'h':
182 if (*fmt == 'h') {
183 fmt++;
184 flags |= SHORTSHORT;
185 } else {
186 flags |= SHORT;
187 }
188 goto again;
189 case 'l':
190 if (*fmt == 'l') {
191 fmt++;
192 flags |= LLONG;
193 } else {
194 flags |= LONG;
195 }
196 goto again;
197 case 'q':
198 flags |= LLONG; /* deprecated */
199 goto again;
200 case 't':
201 flags |= PTRINT;
202 goto again;
203 case 'z':
204 flags |= SIZEINT;
205 goto again;
206
207 case '0': case '1': case '2': case '3': case '4':
208 case '5': case '6': case '7': case '8': case '9':
209 width = width * 10 + c - '0';
210 goto again;
211
212 /*
213 * Conversions.
214 * Those marked `compat' are for 4.[123]BSD compatibility.
215 *
216 * (According to ANSI, E and X formats are supposed
217 * to the same as e and x. Sorry about that.)
218 */
219 case 'D': /* compat */
220 flags |= LONG;
221 /* FALLTHROUGH */
222 case 'd':
223 c = CT_INT;
224 base = 10;
225 break;
226
227 case 'i':
228 c = CT_INT;
229 base = 0;
230 break;
231
232 case 'O': /* compat */
233 flags |= LONG;
234 /* FALLTHROUGH */
235 case 'o':
236 c = CT_INT;
237 flags |= UNSIGNED;
238 base = 8;
239 break;
240
241 case 'u':
242 c = CT_INT;
243 flags |= UNSIGNED;
244 base = 10;
245 break;
246
247 case 'X':
248 case 'x':
249 flags |= PFXOK; /* enable 0x prefixing */
250 c = CT_INT;
251 flags |= UNSIGNED;
252 base = 16;
253 break;
254
255 #ifdef FLOATING_POINT
256 case 'e': case 'E':
257 case 'f': case 'F':
258 case 'g': case 'G':
259 case 'a': case 'A':
260 c = CT_FLOAT;
261 break;
262 #endif
263
264 case 's':
265 c = CT_STRING;
266 break;
267
268 case '[':
269 ccls = fmt;
270 if (*fmt == '^') {
271 cclcompl = 1;
272 fmt++;
273 } else
274 cclcompl = 0;
275 if (*fmt == ']')
276 fmt++;
277 while (*fmt != '\0' && *fmt != ']')
278 fmt++;
279 ccle = fmt;
280 fmt++;
281 flags |= NOSKIP;
282 c = CT_CCL;
283 break;
284
285 case 'c':
286 flags |= NOSKIP;
287 c = CT_CHAR;
288 break;
289
290 case 'p': /* pointer format is like hex */
291 flags |= POINTER | PFXOK;
292 c = CT_INT;
293 flags |= UNSIGNED;
294 base = 16;
295 break;
296
297 case 'n':
298 nconversions++;
299 if (flags & SUPPRESS)
300 continue;
301 if (flags & SHORTSHORT)
302 *va_arg(ap, signed char *) = nread;
303 else if (flags & SHORT)
304 *va_arg(ap, short *) = nread;
305 else if (flags & LONG)
306 *va_arg(ap, long *) = nread;
307 else if (flags & SIZEINT)
308 *va_arg(ap, ssize_t *) = nread;
309 else if (flags & PTRINT)
310 *va_arg(ap, ptrdiff_t *) = nread;
311 else if (flags & LLONG)
312 *va_arg(ap, long long *) = nread;
313 else if (flags & MAXINT)
314 *va_arg(ap, intmax_t *) = nread;
315 else
316 *va_arg(ap, int *) = nread;
317 continue;
318
319 /*
320 * Disgusting backwards compatibility hacks. XXX
321 */
322 case '\0': /* compat */
323 return (EOF);
324
325 default: /* compat */
326 if (iswupper(c))
327 flags |= LONG;
328 c = CT_INT;
329 base = 10;
330 break;
331 }
332
333 /*
334 * Consume leading white space, except for formats
335 * that suppress this.
336 */
337 if ((flags & NOSKIP) == 0) {
338 while ((wi = __fgetwc_unlock(fp)) != WEOF &&
339 iswspace(wi))
340 nread++;
341 if (wi == WEOF)
342 goto input_failure;
343 __ungetwc(wi, fp);
344 }
345
346 /*
347 * Do the conversion.
348 */
349 switch (c) {
350
351 case CT_CHAR:
352 /* scan arbitrary characters (sets NOSKIP) */
353 if (width == 0)
354 width = 1;
355 if (flags & LONG) {
356 if (!(flags & SUPPRESS))
357 p = va_arg(ap, wchar_t *);
358 n = 0;
359 while (width-- != 0 &&
360 (wi = __fgetwc_unlock(fp)) != WEOF) {
361 if (!(flags & SUPPRESS))
362 *p++ = (wchar_t)wi;
363 n++;
364 }
365 if (n == 0)
366 goto input_failure;
367 nread += n;
368 if (!(flags & SUPPRESS))
369 nassigned++;
370 } else {
371 if (!(flags & SUPPRESS))
372 mbp = va_arg(ap, char *);
373 n = 0;
374 memset(&mbs, 0, sizeof(mbs));
375 while (width != 0 &&
376 (wi = __fgetwc_unlock(fp)) != WEOF) {
377 if (width >= MB_CUR_MAX &&
378 !(flags & SUPPRESS)) {
379 nconv = wcrtomb(mbp, wi, &mbs);
380 if (nconv == (size_t)-1)
381 goto input_failure;
382 } else {
383 nconv = wcrtomb(mbbuf, wi,
384 &mbs);
385 if (nconv == (size_t)-1)
386 goto input_failure;
387 if (nconv > width) {
388 __ungetwc(wi, fp);
389 break;
390 }
391 if (!(flags & SUPPRESS))
392 memcpy(mbp, mbbuf,
393 nconv);
394 }
395 if (!(flags & SUPPRESS))
396 mbp += nconv;
397 width -= nconv;
398 n++;
399 }
400 if (n == 0)
401 goto input_failure;
402 nread += n;
403 if (!(flags & SUPPRESS))
404 nassigned++;
405 }
406 nconversions++;
407 break;
408
409 case CT_CCL:
410 /* scan a (nonempty) character class (sets NOSKIP) */
411 if (width == 0)
412 width = (size_t)~0; /* `infinity' */
413 /* take only those things in the class */
414 if ((flags & SUPPRESS) && (flags & LONG)) {
415 n = 0;
416 while ((wi = __fgetwc_unlock(fp)) != WEOF &&
417 width-- != 0 && INCCL(wi))
418 n++;
419 if (wi != WEOF)
420 __ungetwc(wi, fp);
421 if (n == 0)
422 goto match_failure;
423 } else if (flags & LONG) {
424 p0 = p = va_arg(ap, wchar_t *);
425 while ((wi = __fgetwc_unlock(fp)) != WEOF &&
426 width-- != 0 && INCCL(wi))
427 *p++ = (wchar_t)wi;
428 if (wi != WEOF)
429 __ungetwc(wi, fp);
430 n = p - p0;
431 if (n == 0)
432 goto match_failure;
433 *p = 0;
434 nassigned++;
435 } else {
436 if (!(flags & SUPPRESS))
437 mbp = va_arg(ap, char *);
438 n = 0;
439 memset(&mbs, 0, sizeof(mbs));
440 while ((wi = __fgetwc_unlock(fp)) != WEOF &&
441 width != 0 && INCCL(wi)) {
442 if (width >= MB_CUR_MAX &&
443 !(flags & SUPPRESS)) {
444 nconv = wcrtomb(mbp, wi, &mbs);
445 if (nconv == (size_t)-1)
446 goto input_failure;
447 } else {
448 nconv = wcrtomb(mbbuf, wi,
449 &mbs);
450 if (nconv == (size_t)-1)
451 goto input_failure;
452 if (nconv > width)
453 break;
454 if (!(flags & SUPPRESS))
455 memcpy(mbp, mbbuf,
456 nconv);
457 }
458 if (!(flags & SUPPRESS))
459 mbp += nconv;
460 width -= nconv;
461 n++;
462 }
463 if (wi != WEOF)
464 __ungetwc(wi, fp);
465 if (!(flags & SUPPRESS)) {
466 *mbp = 0;
467 nassigned++;
468 }
469 }
470 nread += n;
471 nconversions++;
472 break;
473
474 case CT_STRING:
475 /* like CCL, but zero-length string OK, & no NOSKIP */
476 if (width == 0)
477 width = (size_t)~0;
478 if ((flags & SUPPRESS) && (flags & LONG)) {
479 while ((wi = __fgetwc_unlock(fp)) != WEOF &&
480 width-- != 0 &&
481 !iswspace(wi))
482 nread++;
483 if (wi != WEOF)
484 __ungetwc(wi, fp);
485 } else if (flags & LONG) {
486 p0 = p = va_arg(ap, wchar_t *);
487 while ((wi = __fgetwc_unlock(fp)) != WEOF &&
488 width-- != 0 &&
489 !iswspace(wi)) {
490 *p++ = (wchar_t)wi;
491 nread++;
492 }
493 if (wi != WEOF)
494 __ungetwc(wi, fp);
495 *p = 0;
496 nassigned++;
497 } else {
498 if (!(flags & SUPPRESS))
499 mbp = va_arg(ap, char *);
500 memset(&mbs, 0, sizeof(mbs));
501 while ((wi = __fgetwc_unlock(fp)) != WEOF &&
502 width != 0 &&
503 !iswspace(wi)) {
504 if (width >= MB_CUR_MAX &&
505 !(flags & SUPPRESS)) {
506 nconv = wcrtomb(mbp, wi, &mbs);
507 if (nconv == (size_t)-1)
508 goto input_failure;
509 } else {
510 nconv = wcrtomb(mbbuf, wi,
511 &mbs);
512 if (nconv == (size_t)-1)
513 goto input_failure;
514 if (nconv > width)
515 break;
516 if (!(flags & SUPPRESS))
517 memcpy(mbp, mbbuf,
518 nconv);
519 }
520 if (!(flags & SUPPRESS))
521 mbp += nconv;
522 width -= nconv;
523 nread++;
524 }
525 if (wi != WEOF)
526 __ungetwc(wi, fp);
527 if (!(flags & SUPPRESS)) {
528 *mbp = 0;
529 nassigned++;
530 }
531 }
532 nconversions++;
533 continue;
534
535 case CT_INT:
536 /* scan an integer as if by strtoimax/strtoumax */
537 if (width == 0 || width > sizeof(buf) /
538 sizeof(*buf) - 1)
539 width = sizeof(buf) / sizeof(*buf) - 1;
540 flags |= SIGNOK | NDIGITS | NZDIGITS;
541 for (p = buf; width; width--) {
542 c = __fgetwc_unlock(fp);
543 /*
544 * Switch on the character; `goto ok'
545 * if we accept it as a part of number.
546 */
547 switch (c) {
548
549 /*
550 * The digit 0 is always legal, but is
551 * special. For %i conversions, if no
552 * digits (zero or nonzero) have been
553 * scanned (only signs), we will have
554 * base==0. In that case, we should set
555 * it to 8 and enable 0x prefixing.
556 * Also, if we have not scanned zero digits
557 * before this, do not turn off prefixing
558 * (someone else will turn it off if we
559 * have scanned any nonzero digits).
560 */
561 case '0':
562 if (base == 0) {
563 base = 8;
564 flags |= PFXOK;
565 }
566 if (flags & NZDIGITS)
567 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
568 else
569 flags &= ~(SIGNOK|PFXOK|NDIGITS);
570 goto ok;
571
572 /* 1 through 7 always legal */
573 case '1': case '2': case '3':
574 case '4': case '5': case '6': case '7':
575 base = basefix[base];
576 flags &= ~(SIGNOK | PFXOK | NDIGITS);
577 goto ok;
578
579 /* digits 8 and 9 ok iff decimal or hex */
580 case '8': case '9':
581 base = basefix[base];
582 if (base <= 8)
583 break; /* not legal here */
584 flags &= ~(SIGNOK | PFXOK | NDIGITS);
585 goto ok;
586
587 /* letters ok iff hex */
588 case 'A': case 'B': case 'C':
589 case 'D': case 'E': case 'F':
590 case 'a': case 'b': case 'c':
591 case 'd': case 'e': case 'f':
592 /* no need to fix base here */
593 if (base <= 10)
594 break; /* not legal here */
595 flags &= ~(SIGNOK | PFXOK | NDIGITS);
596 goto ok;
597
598 /* sign ok only as first character */
599 case '+': case '-':
600 if (flags & SIGNOK) {
601 flags &= ~SIGNOK;
602 flags |= HAVESIGN;
603 goto ok;
604 }
605 break;
606
607 /*
608 * x ok iff flag still set and 2nd char (or
609 * 3rd char if we have a sign).
610 */
611 case 'x': case 'X':
612 if ((flags & PFXOK) && p ==
613 buf + 1 + !!(flags & HAVESIGN)) {
614 base = 16; /* if %i */
615 flags &= ~PFXOK;
616 goto ok;
617 }
618 break;
619 }
620
621 /*
622 * If we got here, c is not a legal character
623 * for a number. Stop accumulating digits.
624 */
625 if (c != WEOF)
626 __ungetwc(c, fp);
627 break;
628 ok:
629 /*
630 * c is legal: store it and look at the next.
631 */
632 *p++ = (wchar_t)c;
633 }
634 /*
635 * If we had only a sign, it is no good; push
636 * back the sign. If the number ends in `x',
637 * it was [sign] '0' 'x', so push back the x
638 * and treat it as [sign] '0'.
639 */
640 if (flags & NDIGITS) {
641 if (p > buf)
642 __ungetwc(*--p, fp);
643 goto match_failure;
644 }
645 c = p[-1];
646 if (c == 'x' || c == 'X') {
647 --p;
648 __ungetwc(c, fp);
649 }
650 if ((flags & SUPPRESS) == 0) {
651 uintmax_t res;
652
653 *p = '\0';
654 if (flags & UNSIGNED)
655 res = wcstoimax(buf, NULL, base);
656 else
657 res = wcstoumax(buf, NULL, base);
658 if (flags & POINTER)
659 *va_arg(ap, void **) =
660 (void *)(uintptr_t)res;
661 else if (flags & MAXINT)
662 *va_arg(ap, intmax_t *) = res;
663 else if (flags & LLONG)
664 *va_arg(ap, long long *) = res;
665 else if (flags & SIZEINT)
666 *va_arg(ap, ssize_t *) = res;
667 else if (flags & PTRINT)
668 *va_arg(ap, ptrdiff_t *) = res;
669 else if (flags & LONG)
670 *va_arg(ap, long *) = res;
671 else if (flags & SHORT)
672 *va_arg(ap, short *) = res;
673 else if (flags & SHORTSHORT)
674 *va_arg(ap, signed char *) = res;
675 else
676 *va_arg(ap, int *) = res;
677 nassigned++;
678 }
679 nread += p - buf;
680 nconversions++;
681 break;
682
683 #ifdef FLOATING_POINT
684 case CT_FLOAT:
685 /* scan a floating point number as if by strtod */
686 if (width == 0 || width > sizeof(buf) /
687 sizeof(*buf) - 1)
688 width = sizeof(buf) / sizeof(*buf) - 1;
689 if ((width = wparsefloat(fp, buf, buf + width)) == 0)
690 goto match_failure;
691 if ((flags & SUPPRESS) == 0) {
692 if (flags & LONGDBL) {
693 long double res = wcstold(buf, &p);
694 *va_arg(ap, long double *) = res;
695 } else if (flags & LONG) {
696 double res = wcstod(buf, &p);
697 *va_arg(ap, double *) = res;
698 } else {
699 float res = wcstof(buf, &p);
700 *va_arg(ap, float *) = res;
701 }
702 if (p - buf != (ptrdiff_t)width) abort();
703 nassigned++;
704 }
705 nread += width;
706 nconversions++;
707 break;
708 #endif /* FLOATING_POINT */
709 }
710 }
711 input_failure:
712 return (nconversions != 0 ? nassigned : EOF);
713 match_failure:
714 return (nassigned);
715 }
716 #pragma GCC diagnostic pop
717
718 int
vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)719 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, __va_list ap)
720 {
721 int r;
722
723 FLOCKFILE(fp);
724 r = __vfwscanf(fp, fmt, ap);
725 FUNLOCKFILE(fp);
726 return (r);
727 }
728