1 /*  $OpenBSD: strptime.c,v 1.11 2005/08/08 08:05:38 espie Exp $ */
2 /*  $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $    */
3 
4 /*-
5  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code was contributed to The NetBSD Foundation by Klaus Klein.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 //#include <sys/localedef.h>
40 #include <ctype.h>
41 #include <errno.h>
42 #include <locale.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <time.h>
46 #include "tzfile.h"
47 
48 static const struct {
49     const char *abday[7];
50     const char *day[7];
51     const char *abmon[12];
52     const char *mon[12];
53     const char *am_pm[2];
54     const char *d_t_fmt;
55     const char *d_fmt;
56     const char *t_fmt;
57     const char *t_fmt_ampm;
58 } _DefaultTimeLocale = {
59     {
60         "Sun","Mon","Tue","Wed","Thu","Fri","Sat",
61     },
62     {
63         "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
64         "Friday", "Saturday"
65     },
66     {
67         "Jan", "Feb", "Mar", "Apr", "May", "Jun",
68         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
69     },
70     {
71         "January", "February", "March", "April", "May", "June", "July",
72         "August", "September", "October", "November", "December"
73     },
74     {
75         "AM", "PM"
76     },
77     "%a %b %d %H:%M:%S %Y",
78     "%m/%d/%y",
79     "%H:%M:%S",
80     "%I:%M:%S %p"
81 };
82 
83 #define _ctloc(x) (_DefaultTimeLocale.x)
84 
85 /*
86  * We do not implement alternate representations. However, we always
87  * check whether a given modifier is allowed for a certain conversion.
88  */
89 #define _ALT_E          0x01
90 #define _ALT_O          0x02
91 #define _LEGAL_ALT(x)       { if (alt_format & ~(x)) return (0); }
92 
93 
94 struct century_relyear {
95     int century;
96     int relyear;
97 };
98 
99 static char gmt[] = { "GMT" };
100 static char utc[] = { "UTC" };
101 /* RFC-822/RFC-2822 */
102 static const char * const nast[5] = {
103        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
104 };
105 static const char * const nadt[5] = {
106        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
107 };
108 
109 static  int _conv_num(const unsigned char **, int *, int, int);
110 static  unsigned char *_strptime(const unsigned char *, const char *, struct tm *,
111         struct century_relyear *);
112 static	const u_char *_find_string(const u_char *, int *, const char * const *,
113 	    const char * const *, int);
114 
115 
116 char *
strptime(const char * buf,const char * fmt,struct tm * tm)117 strptime(const char *buf, const char *fmt, struct tm *tm)
118 {
119     struct century_relyear cr;
120     cr.century = TM_YEAR_BASE;
121     cr.relyear = -1;
122     return (char*)(_strptime((const unsigned char*)buf, fmt, tm, &cr));
123 }
124 
125 static unsigned char *
_strptime(const unsigned char * buf,const char * fmt,struct tm * tm,struct century_relyear * cr)126 _strptime(const unsigned char *buf, const char *fmt, struct tm *tm, struct century_relyear *cr)
127 {
128     unsigned char c;
129     const unsigned char *bp, *ep;
130     size_t len = 0;
131     int alt_format, i, offs;
132     int neg = 0;
133 
134     bp = (unsigned char *)buf;
135     while ((c = *fmt) != '\0') {
136         /* Clear `alternate' modifier prior to new conversion. */
137         alt_format = 0;
138 
139         /* Eat up white-space. */
140         if (isspace(c)) {
141             while (isspace(*bp))
142                 bp++;
143 
144             fmt++;
145             continue;
146         }
147 
148         if ((c = *fmt++) != '%')
149             goto literal;
150 
151 
152 again:      switch (c = *fmt++) {
153         case '%':   /* "%%" is converted to "%". */
154 literal:
155         if (c != *bp++)
156             return (NULL);
157 
158         break;
159 
160         /*
161          * "Alternative" modifiers. Just set the appropriate flag
162          * and start over again.
163          */
164         case 'E':   /* "%E?" alternative conversion modifier. */
165             _LEGAL_ALT(0);
166             alt_format |= _ALT_E;
167             goto again;
168 
169         case 'O':   /* "%O?" alternative conversion modifier. */
170             _LEGAL_ALT(0);
171             alt_format |= _ALT_O;
172             goto again;
173 
174         /*
175          * "Complex" conversion rules, implemented through recursion.
176          */
177         case 'c':   /* Date and time, using the locale's format. */
178             _LEGAL_ALT(_ALT_E);
179             if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, cr)))
180                 return (NULL);
181             break;
182 
183         case 'D':   /* The date as "%m/%d/%y". */
184             _LEGAL_ALT(0);
185             if (!(bp = _strptime(bp, "%m/%d/%y", tm, cr)))
186                 return (NULL);
187             break;
188 
189         case 'F':  /* The date as "%Y-%m-%d". */
190             _LEGAL_ALT(0);
191             if (!(bp = _strptime(bp, "%Y-%m-%d", tm, cr)))
192                 return (NULL);
193             continue;
194 
195         case 'R':   /* The time as "%H:%M". */
196             _LEGAL_ALT(0);
197             if (!(bp = _strptime(bp, "%H:%M", tm, cr)))
198                 return (NULL);
199             break;
200 
201         case 'r':   /* The time as "%I:%M:%S %p". */
202             _LEGAL_ALT(0);
203             if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, cr)))
204                 return (NULL);
205             break;
206 
207         case 'T':   /* The time as "%H:%M:%S". */
208             _LEGAL_ALT(0);
209             if (!(bp = _strptime(bp, "%H:%M:%S", tm, cr)))
210                 return (NULL);
211             break;
212 
213         case 'v':  /* The date as "%e-%b-%Y". */
214             _LEGAL_ALT(0);
215             if (!(bp = _strptime(bp, "%e-%b-%Y", tm, cr)))
216                 return (NULL);
217             break;
218 
219         case 'X':   /* The time, using the locale's format. */
220             _LEGAL_ALT(_ALT_E);
221             if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, cr)))
222                 return (NULL);
223             break;
224 
225         case 'x':   /* The date, using the locale's format. */
226             _LEGAL_ALT(_ALT_E);
227             if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, cr)))
228                 return (NULL);
229             break;
230 
231         /*
232          * "Elementary" conversion rules.
233          */
234         case 'A':   /* The day of week, using the locale's form. */
235         case 'a':
236             _LEGAL_ALT(0);
237             for (i = 0; i < 7; i++) {
238                 /* Full name. */
239                 len = strlen(_ctloc(day[i]));
240                 if (strncasecmp(_ctloc(day[i]), (const char*)bp, len) == 0)
241                     break;
242 
243                 /* Abbreviated name. */
244                 len = strlen(_ctloc(abday[i]));
245                 if (strncasecmp(_ctloc(abday[i]), (const char*)bp, len) == 0)
246                     break;
247             }
248 
249             /* Nothing matched. */
250             if (i == 7)
251                 return (NULL);
252 
253             tm->tm_wday = i;
254             bp += len;
255             break;
256 
257         case 'B':   /* The month, using the locale's form. */
258         case 'b':
259         case 'h':
260             _LEGAL_ALT(0);
261             for (i = 0; i < 12; i++) {
262                 /* Full name. */
263                 len = strlen(_ctloc(mon[i]));
264                 if (strncasecmp(_ctloc(mon[i]), (const char*)bp, len) == 0)
265                     break;
266 
267                 /* Abbreviated name. */
268                 len = strlen(_ctloc(abmon[i]));
269                 if (strncasecmp(_ctloc(abmon[i]), (const char*)bp, len) == 0)
270                     break;
271             }
272 
273             /* Nothing matched. */
274             if (i == 12)
275                 return (NULL);
276 
277             tm->tm_mon = i;
278             bp += len;
279             break;
280 
281         case 'C':   /* The century number. */
282             _LEGAL_ALT(_ALT_E);
283             if (!(_conv_num(&bp, &i, 0, 99)))
284                 return (NULL);
285 
286             cr->century = i * 100;
287             break;
288 
289         case 'd':   /* The day of month. */
290         case 'e':
291             _LEGAL_ALT(_ALT_O);
292             if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
293                 return (NULL);
294             break;
295 
296         case 'k':   /* The hour (24-hour clock representation). */
297             _LEGAL_ALT(0);
298             /* FALLTHROUGH */
299         case 'H':
300             _LEGAL_ALT(_ALT_O);
301             if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
302                 return (NULL);
303             break;
304 
305         case 'l':   /* The hour (12-hour clock representation). */
306             _LEGAL_ALT(0);
307             /* FALLTHROUGH */
308         case 'I':
309             _LEGAL_ALT(_ALT_O);
310             if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
311                 return (NULL);
312             break;
313 
314         case 'j':   /* The day of year. */
315             _LEGAL_ALT(0);
316             if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
317                 return (NULL);
318             tm->tm_yday--;
319             break;
320 
321         case 'M':   /* The minute. */
322             _LEGAL_ALT(_ALT_O);
323             if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
324                 return (NULL);
325             break;
326 
327         case 'm':   /* The month. */
328             _LEGAL_ALT(_ALT_O);
329             if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
330                 return (NULL);
331             tm->tm_mon--;
332             break;
333 
334         case 'P':
335         case 'p':   /* The locale's equivalent of AM/PM. */
336             _LEGAL_ALT(0);
337             /* AM? */
338             len = strlen(_ctloc(am_pm[0]));
339             if (strncasecmp(_ctloc(am_pm[0]), (const char*)bp, len) == 0) {
340                 if (tm->tm_hour > 12)   /* i.e., 13:00 AM ?! */
341                     return (NULL);
342                 else if (tm->tm_hour == 12)
343                     tm->tm_hour = 0;
344 
345                 bp += len;
346                 break;
347             }
348             /* PM? */
349             len = strlen(_ctloc(am_pm[1]));
350             if (strncasecmp(_ctloc(am_pm[1]), (const char*)bp, len) == 0) {
351                 if (tm->tm_hour > 12)   /* i.e., 13:00 PM ?! */
352                     return (NULL);
353                 else if (tm->tm_hour < 12)
354                     tm->tm_hour += 12;
355 
356                 bp += len;
357                 break;
358             }
359 
360             /* Nothing matched. */
361             return (NULL);
362 
363         case 'S':   /* The seconds. */
364             _LEGAL_ALT(_ALT_O);
365             if (!(_conv_num(&bp, &tm->tm_sec, 0, 61)))
366                 return (NULL);
367             break;
368 
369         case 's':
370             {
371                 // Android addition, based on FreeBSD's implementation.
372                 int saved_errno = errno;
373                 errno = 0;
374                 const unsigned char* old_bp = bp;
375                 long n = strtol((const char*) bp, (char**) &bp, 10);
376                 time_t t = n;
377                 if (bp == old_bp || errno == ERANGE || ((long) t) != n) {
378                     errno = saved_errno;
379                     return NULL;
380                 }
381                 errno = saved_errno;
382 
383                 if (localtime_r(&t, tm) == NULL) return NULL;
384             }
385             break;
386 
387 
388         case 'U':   /* The week of year, beginning on sunday. */
389         case 'W':   /* The week of year, beginning on monday. */
390             _LEGAL_ALT(_ALT_O);
391             /*
392              * XXX This is bogus, as we can not assume any valid
393              * information present in the tm structure at this
394              * point to calculate a real value, so just check the
395              * range for now.
396              */
397              if (!(_conv_num(&bp, &i, 0, 53)))
398                 return (NULL);
399              break;
400 
401         case 'w':   /* The day of week, beginning on sunday. */
402             _LEGAL_ALT(_ALT_O);
403             if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
404                 return (NULL);
405             break;
406 
407         case 'u':  /* The day of week, monday = 1. */
408             _LEGAL_ALT(_ALT_O);
409             if (!(_conv_num(&bp, &i, 1, 7)))
410                 return (NULL);
411             tm->tm_wday = i % 7;
412             continue;
413 
414         case 'g':  /* The year corresponding to the ISO week
415                     * number but without the century.
416                     */
417             if (!(_conv_num(&bp, &i, 0, 99)))
418                 return (NULL);
419             continue;
420 
421         case 'G':  /* The year corresponding to the ISO week
422                     * number with century.
423                     */
424             do
425                 bp++;
426             while (isdigit(*bp));
427             continue;
428 
429         case 'V':  /* The ISO 8601:1988 week number as decimal */
430             if (!(_conv_num(&bp, &i, 0, 53)))
431                 return (NULL);
432             continue;
433 
434         case 'Y':   /* The year. */
435             _LEGAL_ALT(_ALT_E);
436             if (!(_conv_num(&bp, &i, 0, 9999)))
437                 return (NULL);
438 
439             cr->relyear = -1;
440             tm->tm_year = i - TM_YEAR_BASE;
441             break;
442 
443         case 'y':   /* The year within the century (2 digits). */
444             _LEGAL_ALT(_ALT_E | _ALT_O);
445             if (!(_conv_num(&bp, &cr->relyear, 0, 99)))
446                 return (NULL);
447             break;
448 
449 		case 'Z':
450 			tzset();
451 			if (strncmp((const char *)bp, gmt, 3) == 0) {
452 				tm->tm_isdst = 0;
453 				tm->tm_gmtoff = 0;
454 				tm->tm_zone = gmt;
455 				bp += 3;
456 			} else if (strncmp((const char *)bp, utc, 3) == 0) {
457 				tm->tm_isdst = 0;
458 				tm->tm_gmtoff = 0;
459 				tm->tm_zone = utc;
460 				bp += 3;
461 			} else {
462 				ep = _find_string(bp, &i,
463 						 (const char * const *)tzname,
464 						  NULL, 2);
465 				if (ep == NULL)
466 					return (NULL);
467 
468 				tm->tm_isdst = i;
469 				tm->tm_gmtoff = -(timezone);
470 				tm->tm_zone = tzname[i];
471 				bp = ep;
472 			}
473 			continue;
474 
475 		case 'z':
476 			/*
477 			 * We recognize all ISO 8601 formats:
478 			 * Z	= Zulu time/UTC
479 			 * [+-]hhmm
480 			 * [+-]hh:mm
481 			 * [+-]hh
482 			 * We recognize all RFC-822/RFC-2822 formats:
483 			 * UT|GMT
484 			 *          North American : UTC offsets
485 			 * E[DS]T = Eastern : -4 | -5
486 			 * C[DS]T = Central : -5 | -6
487 			 * M[DS]T = Mountain: -6 | -7
488 			 * P[DS]T = Pacific : -7 | -8
489 			 */
490 			while (isspace(*bp))
491 				bp++;
492 
493 			switch (*bp++) {
494 			case 'G':
495 				if (*bp++ != 'M')
496 					return NULL;
497 				/*FALLTHROUGH*/
498 			case 'U':
499 				if (*bp++ != 'T')
500 					return NULL;
501 				/*FALLTHROUGH*/
502 			case 'Z':
503 				tm->tm_isdst = 0;
504 				tm->tm_gmtoff = 0;
505 				tm->tm_zone = utc;
506 				continue;
507 			case '+':
508 				neg = 0;
509 				break;
510 			case '-':
511 				neg = 1;
512 				break;
513 			default:
514 				--bp;
515 				ep = _find_string(bp, &i, nast, NULL, 4);
516 				if (ep != NULL) {
517 					tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
518 					tm->tm_zone = (char *)nast[i];
519 					bp = ep;
520 					continue;
521 				}
522 				ep = _find_string(bp, &i, nadt, NULL, 4);
523 				if (ep != NULL) {
524 					tm->tm_isdst = 1;
525 					tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
526 					tm->tm_zone = (char *)nadt[i];
527 					bp = ep;
528 					continue;
529 				}
530 				return NULL;
531 			}
532 			if (!isdigit(bp[0]) || !isdigit(bp[1]))
533 				return NULL;
534 			offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
535 			bp += 2;
536 			if (*bp == ':')
537 				bp++;
538 			if (isdigit(*bp)) {
539 				offs += (*bp++ - '0') * 10 * SECSPERMIN;
540 				if (!isdigit(*bp))
541 					return NULL;
542 				offs += (*bp++ - '0') * SECSPERMIN;
543 			}
544 			if (neg)
545 				offs = -offs;
546 			tm->tm_isdst = 0;	/* XXX */
547 			tm->tm_gmtoff = offs;
548 			tm->tm_zone = NULL;	/* XXX */
549 			continue;
550 
551         /*
552          * Miscellaneous conversions.
553          */
554         case 'n':   /* Any kind of white-space. */
555         case 't':
556             _LEGAL_ALT(0);
557             while (isspace(*bp))
558                 bp++;
559             break;
560 
561 
562         default:    /* Unknown/unsupported conversion. */
563             return (NULL);
564         }
565 
566 
567     }
568 
569     /*
570      * We need to evaluate the two digit year spec (%y)
571      * last as we can get a century spec (%C) at any time.
572      */
573     if (cr->relyear != -1) {
574         if (cr->century == TM_YEAR_BASE) {
575             if (cr->relyear <= 68)
576                 tm->tm_year = cr->relyear + 2000 - TM_YEAR_BASE;
577             else
578                 tm->tm_year = cr->relyear + 1900 - TM_YEAR_BASE;
579         } else {
580             tm->tm_year = cr->relyear + cr->century - TM_YEAR_BASE;
581         }
582     }
583 
584     return (unsigned char*)bp;
585 }
586 
587 static int
_conv_num(const unsigned char ** buf,int * dest,int llim,int ulim)588 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
589 {
590 	int result = 0;
591 	int rulim = ulim;
592 
593 	if (**buf < '0' || **buf > '9')
594 		return (0);
595 
596 	/* we use rulim to break out of the loop when we run out of digits */
597 	do {
598 		result *= 10;
599 		result += *(*buf)++ - '0';
600 		rulim /= 10;
601 	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
602 
603 	if (result < llim || result > ulim)
604 		return (0);
605 
606 	*dest = result;
607 	return (1);
608 }
609 
610 static const u_char *
_find_string(const u_char * bp,int * tgt,const char * const * n1,const char * const * n2,int c)611 _find_string(const u_char *bp, int *tgt, const char * const *n1,
612 		const char * const *n2, int c)
613 {
614 	int i;
615 	unsigned int len;
616 
617 	/* check full name - then abbreviated ones */
618 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
619 		for (i = 0; i < c; i++, n1++) {
620 			len = strlen(*n1);
621 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
622 				*tgt = i;
623 				return bp + len;
624 			}
625 		}
626 	}
627 
628 	/* Nothing matched */
629 	return NULL;
630 }
631 
strptime_l(const char * buf,const char * fmt,struct tm * tm,locale_t l)632 char* strptime_l(const char* buf, const char* fmt, struct tm* tm, locale_t l) {
633   return strptime(buf, fmt, tm);
634 }
635