1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Chris Torek.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <ctype.h>
34 #include <stdlib.h>
35 
36 #include "local.h"
37 #include "floatio.h"
38 
39 #define	BUF		513	/* Maximum length of numeric string. */
40 
parsefloat(FILE * fp,char * buf,char * end)41 size_t parsefloat(FILE *fp, char *buf, char *end) {
42 	char *commit, *p;
43 	int infnanpos = 0;
44 	enum {
45 		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
46 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
47 	} state = S_START;
48 	unsigned char c;
49 	int gotmantdig = 0, ishex = 0;
50 
51 	/*
52 	 * We set commit = p whenever the string we have read so far
53 	 * constitutes a valid representation of a floating point
54 	 * number by itself.  At some point, the parse will complete
55 	 * or fail, and we will ungetc() back to the last commit point.
56 	 * To ensure that the file offset gets updated properly, it is
57 	 * always necessary to read at least one character that doesn't
58 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
59 	 */
60 	commit = buf - 1;
61 	for (p = buf; p < end; ) {
62 		c = *fp->_p;
63 reswitch:
64 		switch (state) {
65 		case S_START:
66 			state = S_GOTSIGN;
67 			if (c == '-' || c == '+')
68 				break;
69 			else
70 				goto reswitch;
71 		case S_GOTSIGN:
72 			switch (c) {
73 			case '0':
74 				state = S_MAYBEHEX;
75 				commit = p;
76 				break;
77 			case 'I':
78 			case 'i':
79 				state = S_INF;
80 				break;
81 			case 'N':
82 			case 'n':
83 				state = S_NAN;
84 				break;
85 			default:
86 				state = S_DIGITS;
87 				goto reswitch;
88 			}
89 			break;
90 		case S_INF:
91 			if (infnanpos > 6 ||
92 			    (c != "nfinity"[infnanpos] &&
93 			     c != "NFINITY"[infnanpos]))
94 				goto parsedone;
95 			if (infnanpos == 1 || infnanpos == 6)
96 				commit = p;	/* inf or infinity */
97 			infnanpos++;
98 			break;
99 		case S_NAN:
100 			switch (infnanpos) {
101 			case -1:	/* XXX kludge to deal with nan(...) */
102 				goto parsedone;
103 			case 0:
104 				if (c != 'A' && c != 'a')
105 					goto parsedone;
106 				break;
107 			case 1:
108 				if (c != 'N' && c != 'n')
109 					goto parsedone;
110 				else
111 					commit = p;
112 				break;
113 			case 2:
114 				if (c != '(')
115 					goto parsedone;
116 				break;
117 			default:
118 				if (c == ')') {
119 					commit = p;
120 					infnanpos = -2;
121 				} else if (!isalnum(c) && c != '_')
122 					goto parsedone;
123 				break;
124 			}
125 			infnanpos++;
126 			break;
127 		case S_MAYBEHEX:
128 			state = S_DIGITS;
129 			if (c == 'X' || c == 'x') {
130 				ishex = 1;
131 				break;
132 			} else {	/* we saw a '0', but no 'x' */
133 				gotmantdig = 1;
134 				goto reswitch;
135 			}
136 		case S_DIGITS:
137 			if ((ishex && isxdigit(c)) || isdigit(c))
138 				gotmantdig = 1;
139 			else {
140 				state = S_FRAC;
141 				if (c != '.')
142 					goto reswitch;
143 			}
144 			if (gotmantdig)
145 				commit = p;
146 			break;
147 		case S_FRAC:
148 			if (((c == 'E' || c == 'e') && !ishex) ||
149 			    ((c == 'P' || c == 'p') && ishex)) {
150 				if (!gotmantdig)
151 					goto parsedone;
152 				else
153 					state = S_EXP;
154 			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
155 				commit = p;
156 				gotmantdig = 1;
157 			} else
158 				goto parsedone;
159 			break;
160 		case S_EXP:
161 			state = S_EXPDIGITS;
162 			if (c == '-' || c == '+')
163 				break;
164 			else
165 				goto reswitch;
166 		case S_EXPDIGITS:
167 			if (isdigit(c))
168 				commit = p;
169 			else
170 				goto parsedone;
171 			break;
172 		default:
173 			abort();
174 		}
175 		*p++ = c;
176 		if (--fp->_r > 0)
177 			fp->_p++;
178 		else if (__srefill(fp))
179 			break;	/* EOF */
180 	}
181 
182 parsedone:
183 	while (commit < --p)
184 		(void)ungetc(*(unsigned char *)p, fp);
185 	*++commit = '\0';
186 	return commit - buf;
187 }
188 
wparsefloat(FILE * fp,wchar_t * buf,wchar_t * end)189 size_t wparsefloat(FILE *fp, wchar_t *buf, wchar_t *end) {
190 	wchar_t *commit, *p;
191 	int infnanpos = 0;
192 	enum {
193 		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
194 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
195 	} state = S_START;
196 	wint_t c;
197 	int gotmantdig = 0, ishex = 0;
198 
199 	/*
200 	 * We set commit = p whenever the string we have read so far
201 	 * constitutes a valid representation of a floating point
202 	 * number by itself.  At some point, the parse will complete
203 	 * or fail, and we will ungetc() back to the last commit point.
204 	 * To ensure that the file offset gets updated properly, it is
205 	 * always necessary to read at least one character that doesn't
206 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
207 	 */
208 	commit = buf - 1;
209 	c = WEOF;
210 	for (p = buf; p < end; ) {
211 		if ((c = __fgetwc_unlock(fp)) == WEOF)
212 			break;
213 reswitch:
214 		switch (state) {
215 		case S_START:
216 			state = S_GOTSIGN;
217 			if (c == '-' || c == '+')
218 				break;
219 			else
220 				goto reswitch;
221 		case S_GOTSIGN:
222 			switch (c) {
223 			case '0':
224 				state = S_MAYBEHEX;
225 				commit = p;
226 				break;
227 			case 'I':
228 			case 'i':
229 				state = S_INF;
230 				break;
231 			case 'N':
232 			case 'n':
233 				state = S_NAN;
234 				break;
235 			default:
236 				state = S_DIGITS;
237 				goto reswitch;
238 			}
239 			break;
240 		case S_INF:
241 			if (infnanpos > 6 ||
242 			    (c != (wint_t)"nfinity"[infnanpos] &&
243 			     c != (wint_t)"NFINITY"[infnanpos]))
244 				goto parsedone;
245 			if (infnanpos == 1 || infnanpos == 6)
246 				commit = p;	/* inf or infinity */
247 			infnanpos++;
248 			break;
249 		case S_NAN:
250 			switch (infnanpos) {
251 			case -1:	/* XXX kludge to deal with nan(...) */
252 				goto parsedone;
253 			case 0:
254 				if (c != 'A' && c != 'a')
255 					goto parsedone;
256 				break;
257 			case 1:
258 				if (c != 'N' && c != 'n')
259 					goto parsedone;
260 				else
261 					commit = p;
262 				break;
263 			case 2:
264 				if (c != '(')
265 					goto parsedone;
266 				break;
267 			default:
268 				if (c == ')') {
269 					commit = p;
270 					infnanpos = -2;
271 				} else if (!iswalnum(c) && c != '_')
272 					goto parsedone;
273 				break;
274 			}
275 			infnanpos++;
276 			break;
277 		case S_MAYBEHEX:
278 			state = S_DIGITS;
279 			if (c == 'X' || c == 'x') {
280 				ishex = 1;
281 				break;
282 			} else {	/* we saw a '0', but no 'x' */
283 				gotmantdig = 1;
284 				goto reswitch;
285 			}
286 		case S_DIGITS:
287 			if ((ishex && iswxdigit(c)) || iswdigit(c))
288 				gotmantdig = 1;
289 			else {
290 				state = S_FRAC;
291 				if (c != L'.')
292 					goto reswitch;
293 			}
294 			if (gotmantdig)
295 				commit = p;
296 			break;
297 		case S_FRAC:
298 			if (((c == 'E' || c == 'e') && !ishex) ||
299 			    ((c == 'P' || c == 'p') && ishex)) {
300 				if (!gotmantdig)
301 					goto parsedone;
302 				else
303 					state = S_EXP;
304 			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
305 				commit = p;
306 				gotmantdig = 1;
307 			} else
308 				goto parsedone;
309 			break;
310 		case S_EXP:
311 			state = S_EXPDIGITS;
312 			if (c == '-' || c == '+')
313 				break;
314 			else
315 				goto reswitch;
316 		case S_EXPDIGITS:
317 			if (iswdigit(c))
318 				commit = p;
319 			else
320 				goto parsedone;
321 			break;
322 		default:
323 			abort();
324 		}
325 		*p++ = c;
326 		c = WEOF;
327 	}
328 
329 parsedone:
330 	if (c != WEOF)
331 		ungetwc(c, fp);
332 	while (commit < --p)
333 		ungetwc(*p, fp);
334 	*++commit = '\0';
335 	return (int)(commit - buf);
336 }
337