1 /* $OpenBSD: strptime.c,v 1.31 2023/03/02 16:21:51 millert Exp $ */
2 /* $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $ */
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "private.h"
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <time.h>
40
41 #include "localedef.h"
42 #include "tzfile.h"
43
44 // Android: ignore OpenBSD's DEF_WEAK() stuff.
45 #define DEF_WEAK(sym) /* */
46 // Android: this code is not pointer-sign clean.
47 #pragma clang diagnostic ignored "-Wpointer-sign"
48 #pragma clang diagnostic ignored "-Wunused-function"
49 // Android: clang thinks people don't know && has higher precedence than ||.
50 #pragma clang diagnostic ignored "-Wlogical-op-parentheses"
51
52 #define _ctloc(x) (_CurrentTimeLocale->x)
53
54 /*
55 * We do not implement alternate representations. However, we always
56 * check whether a given modifier is allowed for a certain conversion.
57 */
58 #define _ALT_E 0x01
59 #define _ALT_O 0x02
60 #define _LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); }
61
62 /*
63 * We keep track of some of the fields we set in order to compute missing ones.
64 */
65 #define FIELD_TM_MON (1 << 0)
66 #define FIELD_TM_MDAY (1 << 1)
67 #define FIELD_TM_WDAY (1 << 2)
68 #define FIELD_TM_YDAY (1 << 3)
69 #define FIELD_TM_YEAR (1 << 4)
70
71 static char gmt[] = { "GMT" };
72 static char utc[] = { "UTC" };
73 /* RFC-822/RFC-2822 */
74 static const char * const nast[5] = {
75 "EST", "CST", "MST", "PST", "\0\0\0"
76 };
77 static const char * const nadt[5] = {
78 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
79 };
80
81 static const int mon_lengths[2][MONSPERYEAR] = {
82 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
83 { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
84 };
85
86 static int _conv_num(const unsigned char **, int *, int, int);
87 static int epoch_to_tm(const unsigned char **, struct tm *);
88 static int leaps_thru_end_of(const int y);
89 static char *_strptime(const char *, const char *, struct tm *, int);
90 static const u_char *_find_string(const u_char *, int *, const char * const *,
91 const char * const *, int);
92
93
94 char *
strptime(const char * buf,const char * fmt,struct tm * tm)95 strptime(const char *buf, const char *fmt, struct tm *tm)
96 {
97 return(_strptime(buf, fmt, tm, 1));
98 }
99 DEF_WEAK(strptime);
100
101 static char *
_strptime(const char * buf,const char * fmt,struct tm * tm,int initialize)102 _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
103 {
104 unsigned char c;
105 const unsigned char *bp, *ep;
106 size_t len;
107 int alt_format, i, offs;
108 int neg = 0;
109 static int century, relyear, fields;
110
111 if (initialize) {
112 century = TM_YEAR_BASE;
113 relyear = -1;
114 fields = 0;
115 }
116
117 bp = (const unsigned char *)buf;
118 while ((c = *fmt) != '\0') {
119 /* Clear `alternate' modifier prior to new conversion. */
120 alt_format = 0;
121
122 /* Eat up white-space. */
123 if (isspace(c)) {
124 while (isspace(*bp))
125 bp++;
126
127 fmt++;
128 continue;
129 }
130
131 if ((c = *fmt++) != '%')
132 goto literal;
133
134
135 again: switch (c = *fmt++) {
136 case '%': /* "%%" is converted to "%". */
137 literal:
138 if (c != *bp++)
139 return (NULL);
140
141 break;
142
143 /*
144 * "Alternative" modifiers. Just set the appropriate flag
145 * and start over again.
146 */
147 case 'E': /* "%E?" alternative conversion modifier. */
148 _LEGAL_ALT(0);
149 alt_format |= _ALT_E;
150 goto again;
151
152 case 'O': /* "%O?" alternative conversion modifier. */
153 _LEGAL_ALT(0);
154 alt_format |= _ALT_O;
155 goto again;
156
157 /*
158 * "Complex" conversion rules, implemented through recursion.
159 */
160 case 'c': /* Date and time, using the locale's format. */
161 _LEGAL_ALT(_ALT_E);
162 if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
163 return (NULL);
164 break;
165
166 case 'D': /* The date as "%m/%d/%y". */
167 _LEGAL_ALT(0);
168 if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
169 return (NULL);
170 break;
171
172 case 'F': /* The date as "%Y-%m-%d". */
173 _LEGAL_ALT(0);
174 if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
175 return (NULL);
176 continue;
177
178 case 'R': /* The time as "%H:%M". */
179 _LEGAL_ALT(0);
180 if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
181 return (NULL);
182 break;
183
184 case 'r': /* The time as "%I:%M:%S %p". */
185 _LEGAL_ALT(0);
186 if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
187 return (NULL);
188 break;
189
190 case 'T': /* The time as "%H:%M:%S". */
191 _LEGAL_ALT(0);
192 if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
193 return (NULL);
194 break;
195
196 case 'v': /* Android: the date as "%e-%b-%Y" for strftime() compat; glibc does this too. */
197 _LEGAL_ALT(0);
198 if (!(bp = _strptime(bp, "%e-%b-%Y", tm, 0)))
199 return (NULL);
200 break;
201
202 case 'X': /* The time, using the locale's format. */
203 _LEGAL_ALT(_ALT_E);
204 if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
205 return (NULL);
206 break;
207
208 case 'x': /* The date, using the locale's format. */
209 _LEGAL_ALT(_ALT_E);
210 if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
211 return (NULL);
212 break;
213
214 /*
215 * "Elementary" conversion rules.
216 */
217 case 'A': /* The day of week, using the locale's form. */
218 case 'a':
219 _LEGAL_ALT(0);
220 for (i = 0; i < 7; i++) {
221 /* Full name. */
222 len = strlen(_ctloc(day[i]));
223 if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
224 break;
225
226 /* Abbreviated name. */
227 len = strlen(_ctloc(abday[i]));
228 if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
229 break;
230 }
231
232 /* Nothing matched. */
233 if (i == 7)
234 return (NULL);
235
236 tm->tm_wday = i;
237 bp += len;
238 fields |= FIELD_TM_WDAY;
239 break;
240
241 case 'B': /* The month, using the locale's form. */
242 case 'b':
243 case 'h':
244 _LEGAL_ALT(0);
245 for (i = 0; i < 12; i++) {
246 /* Full name. */
247 len = strlen(_ctloc(mon[i]));
248 if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
249 break;
250
251 /* Abbreviated name. */
252 len = strlen(_ctloc(abmon[i]));
253 if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
254 break;
255 }
256
257 /* Nothing matched. */
258 if (i == 12)
259 return (NULL);
260
261 tm->tm_mon = i;
262 bp += len;
263 fields |= FIELD_TM_MON;
264 break;
265
266 case 'C': /* The century number. */
267 _LEGAL_ALT(_ALT_E);
268 if (!(_conv_num(&bp, &i, 0, 99)))
269 return (NULL);
270
271 century = i * 100;
272 break;
273
274 case 'e': /* The day of month. */
275 if (isspace(*bp))
276 bp++;
277 /* FALLTHROUGH */
278 case 'd':
279 _LEGAL_ALT(_ALT_O);
280 if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
281 return (NULL);
282 fields |= FIELD_TM_MDAY;
283 break;
284
285 case 'k': /* The hour (24-hour clock representation). */
286 _LEGAL_ALT(0);
287 /* FALLTHROUGH */
288 case 'H':
289 _LEGAL_ALT(_ALT_O);
290 if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
291 return (NULL);
292 break;
293
294 case 'l': /* The hour (12-hour clock representation). */
295 _LEGAL_ALT(0);
296 /* FALLTHROUGH */
297 case 'I':
298 _LEGAL_ALT(_ALT_O);
299 if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
300 return (NULL);
301 break;
302
303 case 'j': /* The day of year. */
304 _LEGAL_ALT(0);
305 if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
306 return (NULL);
307 tm->tm_yday--;
308 fields |= FIELD_TM_YDAY;
309 break;
310
311 case 'M': /* The minute. */
312 _LEGAL_ALT(_ALT_O);
313 if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
314 return (NULL);
315 break;
316
317 case 'm': /* The month. */
318 _LEGAL_ALT(_ALT_O);
319 if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
320 return (NULL);
321 tm->tm_mon--;
322 fields |= FIELD_TM_MON;
323 break;
324
325 case 'P': /* Android addition for strftime() compat; glibc does this too. */
326 case 'p': /* The locale's equivalent of AM/PM. */
327 _LEGAL_ALT(0);
328 /* AM? */
329 len = strlen(_ctloc(am_pm[0]));
330 if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
331 if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */
332 return (NULL);
333 else if (tm->tm_hour == 12)
334 tm->tm_hour = 0;
335
336 bp += len;
337 break;
338 }
339 /* PM? */
340 len = strlen(_ctloc(am_pm[1]));
341 if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
342 if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */
343 return (NULL);
344 else if (tm->tm_hour < 12)
345 tm->tm_hour += 12;
346
347 bp += len;
348 break;
349 }
350
351 /* Nothing matched. */
352 return (NULL);
353
354 case 'S': /* The seconds. */
355 _LEGAL_ALT(_ALT_O);
356 if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
357 return (NULL);
358 break;
359 case 's': /* Seconds since epoch. */
360 if (!(epoch_to_tm(&bp, tm)))
361 return (NULL);
362 fields = 0xffff; /* everything */
363 break;
364 case 'U': /* The week of year, beginning on sunday. */
365 case 'W': /* The week of year, beginning on monday. */
366 _LEGAL_ALT(_ALT_O);
367 /*
368 * XXX This is bogus, as we can not assume any valid
369 * information present in the tm structure at this
370 * point to calculate a real value, so just check the
371 * range for now.
372 */
373 if (!(_conv_num(&bp, &i, 0, 53)))
374 return (NULL);
375 break;
376
377 case 'w': /* The day of week, beginning on sunday. */
378 _LEGAL_ALT(_ALT_O);
379 if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
380 return (NULL);
381 fields |= FIELD_TM_WDAY;
382 break;
383
384 case 'u': /* The day of week, monday = 1. */
385 _LEGAL_ALT(_ALT_O);
386 if (!(_conv_num(&bp, &i, 1, 7)))
387 return (NULL);
388 tm->tm_wday = i % 7;
389 fields |= FIELD_TM_WDAY;
390 continue;
391
392 case 'g': /* The year corresponding to the ISO week
393 * number but without the century.
394 */
395 if (!(_conv_num(&bp, &i, 0, 99)))
396 return (NULL);
397 continue;
398
399 case 'G': /* The year corresponding to the ISO week
400 * number with century.
401 */
402 do
403 bp++;
404 while (isdigit(*bp));
405 continue;
406
407 case 'V': /* The ISO 8601:1988 week number as decimal */
408 if (!(_conv_num(&bp, &i, 0, 53)))
409 return (NULL);
410 continue;
411
412 case 'Y': /* The year. */
413 _LEGAL_ALT(_ALT_E);
414 if (!(_conv_num(&bp, &i, 0, 9999)))
415 return (NULL);
416
417 relyear = -1;
418 tm->tm_year = i - TM_YEAR_BASE;
419 fields |= FIELD_TM_YEAR;
420 break;
421
422 case 'y': /* The year within the century (2 digits). */
423 _LEGAL_ALT(_ALT_E | _ALT_O);
424 if (!(_conv_num(&bp, &relyear, 0, 99)))
425 return (NULL);
426 break;
427
428 case 'Z':
429 tzset();
430 if (strncmp((const char *)bp, gmt, 3) == 0) {
431 tm->tm_isdst = 0;
432 tm->tm_gmtoff = 0;
433 tm->tm_zone = gmt;
434 bp += 3;
435 } else if (strncmp((const char *)bp, utc, 3) == 0) {
436 tm->tm_isdst = 0;
437 tm->tm_gmtoff = 0;
438 tm->tm_zone = utc;
439 bp += 3;
440 } else {
441 ep = _find_string(bp, &i,
442 (const char * const *)tzname,
443 NULL, 2);
444 if (ep == NULL)
445 return (NULL);
446
447 tm->tm_isdst = i;
448 tm->tm_gmtoff = -(timezone);
449 tm->tm_zone = tzname[i];
450 bp = ep;
451 }
452 continue;
453
454 case 'z':
455 /*
456 * We recognize all ISO 8601 formats:
457 * Z = Zulu time/UTC
458 * [+-]hhmm
459 * [+-]hh:mm
460 * [+-]hh
461 * We recognize all RFC-822/RFC-2822 formats:
462 * UT|GMT
463 * North American : UTC offsets
464 * E[DS]T = Eastern : -4 | -5
465 * C[DS]T = Central : -5 | -6
466 * M[DS]T = Mountain: -6 | -7
467 * P[DS]T = Pacific : -7 | -8
468 */
469 while (isspace(*bp))
470 bp++;
471
472 switch (*bp++) {
473 case 'G':
474 if (*bp++ != 'M')
475 return NULL;
476 /*FALLTHROUGH*/
477 case 'U':
478 if (*bp++ != 'T')
479 return NULL;
480 /*FALLTHROUGH*/
481 case 'Z':
482 tm->tm_isdst = 0;
483 tm->tm_gmtoff = 0;
484 tm->tm_zone = utc;
485 continue;
486 case '+':
487 neg = 0;
488 break;
489 case '-':
490 neg = 1;
491 break;
492 default:
493 --bp;
494 ep = _find_string(bp, &i, nast, NULL, 4);
495 if (ep != NULL) {
496 tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
497 tm->tm_zone = (char *)nast[i];
498 bp = ep;
499 continue;
500 }
501 ep = _find_string(bp, &i, nadt, NULL, 4);
502 if (ep != NULL) {
503 tm->tm_isdst = 1;
504 tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
505 tm->tm_zone = (char *)nadt[i];
506 bp = ep;
507 continue;
508 }
509 return NULL;
510 }
511 if (!isdigit(bp[0]) || !isdigit(bp[1]))
512 return NULL;
513 offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
514 bp += 2;
515 if (*bp == ':')
516 bp++;
517 if (isdigit(*bp)) {
518 offs += (*bp++ - '0') * 10 * SECSPERMIN;
519 if (!isdigit(*bp))
520 return NULL;
521 offs += (*bp++ - '0') * SECSPERMIN;
522 }
523 if (neg)
524 offs = -offs;
525 tm->tm_isdst = 0; /* XXX */
526 tm->tm_gmtoff = offs;
527 tm->tm_zone = NULL; /* XXX */
528 continue;
529
530 /*
531 * Miscellaneous conversions.
532 */
533 case 'n': /* Any kind of white-space. */
534 case 't':
535 _LEGAL_ALT(0);
536 while (isspace(*bp))
537 bp++;
538 break;
539
540
541 default: /* Unknown/unsupported conversion. */
542 return (NULL);
543 }
544
545
546 }
547
548 /*
549 * We need to evaluate the two digit year spec (%y)
550 * last as we can get a century spec (%C) at any time.
551 */
552 if (relyear != -1) {
553 if (century == TM_YEAR_BASE) {
554 if (relyear <= 68)
555 tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
556 else
557 tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
558 } else {
559 tm->tm_year = relyear + century - TM_YEAR_BASE;
560 }
561 fields |= FIELD_TM_YEAR;
562 }
563
564 /* Compute some missing values when possible. */
565 if (fields & FIELD_TM_YEAR) {
566 const int year = tm->tm_year + TM_YEAR_BASE;
567 const int *mon_lens = mon_lengths[isleap(year)];
568 if (!(fields & FIELD_TM_YDAY) &&
569 (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
570 tm->tm_yday = tm->tm_mday - 1;
571 for (i = 0; i < tm->tm_mon; i++)
572 tm->tm_yday += mon_lens[i];
573 fields |= FIELD_TM_YDAY;
574 }
575 if (fields & FIELD_TM_YDAY) {
576 int days = tm->tm_yday;
577 if (!(fields & FIELD_TM_WDAY)) {
578 tm->tm_wday = EPOCH_WDAY +
579 ((year - EPOCH_YEAR) % DAYSPERWEEK) *
580 (DAYSPERNYEAR % DAYSPERWEEK) +
581 leaps_thru_end_of(year - 1) -
582 leaps_thru_end_of(EPOCH_YEAR - 1) +
583 tm->tm_yday;
584 tm->tm_wday %= DAYSPERWEEK;
585 if (tm->tm_wday < 0)
586 tm->tm_wday += DAYSPERWEEK;
587 }
588 if (!(fields & FIELD_TM_MON)) {
589 tm->tm_mon = 0;
590 while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
591 days -= mon_lens[tm->tm_mon++];
592 }
593 if (!(fields & FIELD_TM_MDAY))
594 tm->tm_mday = days + 1;
595 }
596 }
597
598 return ((char *)bp);
599 }
600
601
602 static int
_conv_num(const unsigned char ** buf,int * dest,int llim,int ulim)603 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
604 {
605 int result = 0;
606 int rulim = ulim;
607
608 if (**buf < '0' || **buf > '9')
609 return (0);
610
611 /* we use rulim to break out of the loop when we run out of digits */
612 do {
613 result *= 10;
614 result += *(*buf)++ - '0';
615 rulim /= 10;
616 } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
617
618 if (result < llim || result > ulim)
619 return (0);
620
621 *dest = result;
622 return (1);
623 }
624
625 static int
epoch_to_tm(const unsigned char ** buf,struct tm * tm)626 epoch_to_tm(const unsigned char **buf, struct tm *tm)
627 {
628 int saved_errno = errno;
629 int ret = 0;
630 time_t secs;
631 char *ep;
632
633 errno = 0;
634 secs = strtoll(*buf, &ep, 10);
635 if (*buf == (unsigned char *)ep)
636 goto done;
637 if (secs < 0 ||
638 secs == LLONG_MAX && errno == ERANGE)
639 goto done;
640 if (localtime_r(&secs, tm) == NULL)
641 goto done;
642 ret = 1;
643 done:
644 *buf = ep;
645 errno = saved_errno;
646 return (ret);
647 }
648
649 static const u_char *
_find_string(const u_char * bp,int * tgt,const char * const * n1,const char * const * n2,int c)650 _find_string(const u_char *bp, int *tgt, const char * const *n1,
651 const char * const *n2, int c)
652 {
653 int i;
654 unsigned int len;
655
656 /* check full name - then abbreviated ones */
657 for (; n1 != NULL; n1 = n2, n2 = NULL) {
658 for (i = 0; i < c; i++, n1++) {
659 len = strlen(*n1);
660 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
661 *tgt = i;
662 return bp + len;
663 }
664 }
665 }
666
667 /* Nothing matched */
668 return NULL;
669 }
670
671 static int
leaps_thru_end_of(const int y)672 leaps_thru_end_of(const int y)
673 {
674 return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
675 -(leaps_thru_end_of(-(y + 1)) + 1);
676 }
677