1 #include "Python.h"
2 #include "structmember.h"
3 
4 #include <ctype.h>
5 #include <stddef.h>
6 #include <stdint.h>
7 
8 #include "datetime.h"
9 
10 // Imports
11 static PyObject *io_open = NULL;
12 static PyObject *_tzpath_find_tzfile = NULL;
13 static PyObject *_common_mod = NULL;
14 
15 typedef struct TransitionRuleType TransitionRuleType;
16 typedef struct StrongCacheNode StrongCacheNode;
17 
18 typedef struct {
19     PyObject *utcoff;
20     PyObject *dstoff;
21     PyObject *tzname;
22     long utcoff_seconds;
23 } _ttinfo;
24 
25 typedef struct {
26     _ttinfo std;
27     _ttinfo dst;
28     int dst_diff;
29     TransitionRuleType *start;
30     TransitionRuleType *end;
31     unsigned char std_only;
32 } _tzrule;
33 
34 typedef struct {
35     PyDateTime_TZInfo base;
36     PyObject *key;
37     PyObject *file_repr;
38     PyObject *weakreflist;
39     size_t num_transitions;
40     size_t num_ttinfos;
41     int64_t *trans_list_utc;
42     int64_t *trans_list_wall[2];
43     _ttinfo **trans_ttinfos;  // References to the ttinfo for each transition
44     _ttinfo *ttinfo_before;
45     _tzrule tzrule_after;
46     _ttinfo *_ttinfos;  // Unique array of ttinfos for ease of deallocation
47     unsigned char fixed_offset;
48     unsigned char source;
49 } PyZoneInfo_ZoneInfo;
50 
51 struct TransitionRuleType {
52     int64_t (*year_to_timestamp)(TransitionRuleType *, int);
53 };
54 
55 typedef struct {
56     TransitionRuleType base;
57     uint8_t month;
58     uint8_t week;
59     uint8_t day;
60     int8_t hour;
61     int8_t minute;
62     int8_t second;
63 } CalendarRule;
64 
65 typedef struct {
66     TransitionRuleType base;
67     uint8_t julian;
68     unsigned int day;
69     int8_t hour;
70     int8_t minute;
71     int8_t second;
72 } DayRule;
73 
74 struct StrongCacheNode {
75     StrongCacheNode *next;
76     StrongCacheNode *prev;
77     PyObject *key;
78     PyObject *zone;
79 };
80 
81 static PyTypeObject PyZoneInfo_ZoneInfoType;
82 
83 // Globals
84 static PyObject *TIMEDELTA_CACHE = NULL;
85 static PyObject *ZONEINFO_WEAK_CACHE = NULL;
86 static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
87 static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
88 
89 static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
90 
91 // Constants
92 static const int EPOCHORDINAL = 719163;
93 static int DAYS_IN_MONTH[] = {
94     -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
95 };
96 
97 static int DAYS_BEFORE_MONTH[] = {
98     -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
99 };
100 
101 static const int SOURCE_NOCACHE = 0;
102 static const int SOURCE_CACHE = 1;
103 static const int SOURCE_FILE = 2;
104 
105 // Forward declarations
106 static int
107 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
108 static void
109 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
110                  unsigned char *isdsts, size_t num_transitions,
111                  size_t num_ttinfos);
112 static int
113 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
114             int64_t *trans_local[2], size_t num_ttinfos,
115             size_t num_transitions);
116 
117 static int
118 parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
119 
120 static Py_ssize_t
121 parse_abbr(const char *const p, PyObject **abbr);
122 static Py_ssize_t
123 parse_tz_delta(const char *const p, long *total_seconds);
124 static Py_ssize_t
125 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
126                       int8_t *second);
127 static Py_ssize_t
128 parse_transition_rule(const char *const p, TransitionRuleType **out);
129 
130 static _ttinfo *
131 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
132 static _ttinfo *
133 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
134                            unsigned char *fold);
135 
136 static int
137 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
138 static void
139 xdecref_ttinfo(_ttinfo *ttinfo);
140 static int
141 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
142 
143 static int
144 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
145              long dst_offset, TransitionRuleType *start,
146              TransitionRuleType *end, _tzrule *out);
147 static void
148 free_tzrule(_tzrule *tzrule);
149 
150 static PyObject *
151 load_timedelta(long seconds);
152 
153 static int
154 get_local_timestamp(PyObject *dt, int64_t *local_ts);
155 static _ttinfo *
156 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
157 
158 static int
159 ymd_to_ord(int y, int m, int d);
160 static int
161 is_leap_year(int year);
162 
163 static size_t
164 _bisect(const int64_t value, const int64_t *arr, size_t size);
165 
166 static void
167 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
168 static void
169 clear_strong_cache(const PyTypeObject *const type);
170 static void
171 update_strong_cache(const PyTypeObject *const type, PyObject *key,
172                     PyObject *zone);
173 static PyObject *
174 zone_from_strong_cache(const PyTypeObject *const type, PyObject *key);
175 
176 static PyObject *
zoneinfo_new_instance(PyTypeObject * type,PyObject * key)177 zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
178 {
179     PyObject *file_obj = NULL;
180     PyObject *file_path = NULL;
181 
182     file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
183     if (file_path == NULL) {
184         return NULL;
185     }
186     else if (file_path == Py_None) {
187         file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
188         if (file_obj == NULL) {
189             Py_DECREF(file_path);
190             return NULL;
191         }
192     }
193 
194     PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
195     if (self == NULL) {
196         goto error;
197     }
198 
199     if (file_obj == NULL) {
200         file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
201         if (file_obj == NULL) {
202             goto error;
203         }
204     }
205 
206     if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
207         goto error;
208     }
209 
210     PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
211     Py_DECREF(file_obj);
212     file_obj = NULL;
213     if (rv == NULL) {
214         goto error;
215     }
216     Py_DECREF(rv);
217 
218     ((PyZoneInfo_ZoneInfo *)self)->key = key;
219     Py_INCREF(key);
220 
221     goto cleanup;
222 error:
223     Py_XDECREF(self);
224     self = NULL;
225 cleanup:
226     if (file_obj != NULL) {
227         PyObject *exc, *val, *tb;
228         PyErr_Fetch(&exc, &val, &tb);
229         PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
230         _PyErr_ChainExceptions(exc, val, tb);
231         if (tmp == NULL) {
232             Py_CLEAR(self);
233         }
234         Py_XDECREF(tmp);
235         Py_DECREF(file_obj);
236     }
237     Py_DECREF(file_path);
238     return self;
239 }
240 
241 static PyObject *
get_weak_cache(PyTypeObject * type)242 get_weak_cache(PyTypeObject *type)
243 {
244     if (type == &PyZoneInfo_ZoneInfoType) {
245         return ZONEINFO_WEAK_CACHE;
246     }
247     else {
248         PyObject *cache =
249             PyObject_GetAttrString((PyObject *)type, "_weak_cache");
250         // We are assuming that the type lives at least as long as the function
251         // that calls get_weak_cache, and that it holds a reference to the
252         // cache, so we'll return a "borrowed reference".
253         Py_XDECREF(cache);
254         return cache;
255     }
256 }
257 
258 static PyObject *
zoneinfo_new(PyTypeObject * type,PyObject * args,PyObject * kw)259 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
260 {
261     PyObject *key = NULL;
262     static char *kwlist[] = {"key", NULL};
263     if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
264         return NULL;
265     }
266 
267     PyObject *instance = zone_from_strong_cache(type, key);
268     if (instance != NULL) {
269         return instance;
270     }
271 
272     PyObject *weak_cache = get_weak_cache(type);
273     instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
274     if (instance == NULL) {
275         return NULL;
276     }
277 
278     if (instance == Py_None) {
279         Py_DECREF(instance);
280         PyObject *tmp = zoneinfo_new_instance(type, key);
281         if (tmp == NULL) {
282             return NULL;
283         }
284 
285         instance =
286             PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
287         Py_DECREF(tmp);
288         if (instance == NULL) {
289             return NULL;
290         }
291         ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
292     }
293 
294     update_strong_cache(type, key, instance);
295     return instance;
296 }
297 
298 static void
zoneinfo_dealloc(PyObject * obj_self)299 zoneinfo_dealloc(PyObject *obj_self)
300 {
301     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
302 
303     if (self->weakreflist != NULL) {
304         PyObject_ClearWeakRefs(obj_self);
305     }
306 
307     if (self->trans_list_utc != NULL) {
308         PyMem_Free(self->trans_list_utc);
309     }
310 
311     for (size_t i = 0; i < 2; i++) {
312         if (self->trans_list_wall[i] != NULL) {
313             PyMem_Free(self->trans_list_wall[i]);
314         }
315     }
316 
317     if (self->_ttinfos != NULL) {
318         for (size_t i = 0; i < self->num_ttinfos; ++i) {
319             xdecref_ttinfo(&(self->_ttinfos[i]));
320         }
321         PyMem_Free(self->_ttinfos);
322     }
323 
324     if (self->trans_ttinfos != NULL) {
325         PyMem_Free(self->trans_ttinfos);
326     }
327 
328     free_tzrule(&(self->tzrule_after));
329 
330     Py_XDECREF(self->key);
331     Py_XDECREF(self->file_repr);
332 
333     Py_TYPE(self)->tp_free((PyObject *)self);
334 }
335 
336 static PyObject *
zoneinfo_from_file(PyTypeObject * type,PyObject * args,PyObject * kwargs)337 zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
338 {
339     PyObject *file_obj = NULL;
340     PyObject *file_repr = NULL;
341     PyObject *key = Py_None;
342     PyZoneInfo_ZoneInfo *self = NULL;
343 
344     static char *kwlist[] = {"", "key", NULL};
345     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
346                                      &key)) {
347         return NULL;
348     }
349 
350     PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
351     self = (PyZoneInfo_ZoneInfo *)obj_self;
352     if (self == NULL) {
353         return NULL;
354     }
355 
356     file_repr = PyUnicode_FromFormat("%R", file_obj);
357     if (file_repr == NULL) {
358         goto error;
359     }
360 
361     if (load_data(self, file_obj)) {
362         goto error;
363     }
364 
365     self->source = SOURCE_FILE;
366     self->file_repr = file_repr;
367     self->key = key;
368     Py_INCREF(key);
369 
370     return obj_self;
371 error:
372     Py_XDECREF(file_repr);
373     Py_XDECREF(self);
374     return NULL;
375 }
376 
377 static PyObject *
zoneinfo_no_cache(PyTypeObject * cls,PyObject * args,PyObject * kwargs)378 zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
379 {
380     static char *kwlist[] = {"key", NULL};
381     PyObject *key = NULL;
382     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
383         return NULL;
384     }
385 
386     PyObject *out = zoneinfo_new_instance(cls, key);
387     if (out != NULL) {
388         ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
389     }
390 
391     return out;
392 }
393 
394 static PyObject *
zoneinfo_clear_cache(PyObject * cls,PyObject * args,PyObject * kwargs)395 zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
396 {
397     PyObject *only_keys = NULL;
398     static char *kwlist[] = {"only_keys", NULL};
399 
400     if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
401                                       &only_keys))) {
402         return NULL;
403     }
404 
405     PyTypeObject *type = (PyTypeObject *)cls;
406     PyObject *weak_cache = get_weak_cache(type);
407 
408     if (only_keys == NULL || only_keys == Py_None) {
409         PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
410         if (rv != NULL) {
411             Py_DECREF(rv);
412         }
413 
414         clear_strong_cache(type);
415     }
416     else {
417         PyObject *item = NULL;
418         PyObject *pop = PyUnicode_FromString("pop");
419         if (pop == NULL) {
420             return NULL;
421         }
422 
423         PyObject *iter = PyObject_GetIter(only_keys);
424         if (iter == NULL) {
425             Py_DECREF(pop);
426             return NULL;
427         }
428 
429         while ((item = PyIter_Next(iter))) {
430             // Remove from strong cache
431             eject_from_strong_cache(type, item);
432 
433             // Remove from weak cache
434             PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
435                                                        Py_None, NULL);
436 
437             Py_DECREF(item);
438             if (tmp == NULL) {
439                 break;
440             }
441             Py_DECREF(tmp);
442         }
443         Py_DECREF(iter);
444         Py_DECREF(pop);
445     }
446 
447     if (PyErr_Occurred()) {
448         return NULL;
449     }
450 
451     Py_RETURN_NONE;
452 }
453 
454 static PyObject *
zoneinfo_utcoffset(PyObject * self,PyObject * dt)455 zoneinfo_utcoffset(PyObject *self, PyObject *dt)
456 {
457     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
458     if (tti == NULL) {
459         return NULL;
460     }
461     Py_INCREF(tti->utcoff);
462     return tti->utcoff;
463 }
464 
465 static PyObject *
zoneinfo_dst(PyObject * self,PyObject * dt)466 zoneinfo_dst(PyObject *self, PyObject *dt)
467 {
468     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
469     if (tti == NULL) {
470         return NULL;
471     }
472     Py_INCREF(tti->dstoff);
473     return tti->dstoff;
474 }
475 
476 static PyObject *
zoneinfo_tzname(PyObject * self,PyObject * dt)477 zoneinfo_tzname(PyObject *self, PyObject *dt)
478 {
479     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
480     if (tti == NULL) {
481         return NULL;
482     }
483     Py_INCREF(tti->tzname);
484     return tti->tzname;
485 }
486 
487 #define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo)
488 #define GET_DT_TZINFO(p) \
489     (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None)
490 
491 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)492 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
493 {
494     if (!PyDateTime_Check(dt)) {
495         PyErr_SetString(PyExc_TypeError,
496                         "fromutc: argument must be a datetime");
497         return NULL;
498     }
499     if (GET_DT_TZINFO(dt) != obj_self) {
500         PyErr_SetString(PyExc_ValueError,
501                         "fromutc: dt.tzinfo "
502                         "is not self");
503         return NULL;
504     }
505 
506     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
507 
508     int64_t timestamp;
509     if (get_local_timestamp(dt, &timestamp)) {
510         return NULL;
511     }
512     size_t num_trans = self->num_transitions;
513 
514     _ttinfo *tti = NULL;
515     unsigned char fold = 0;
516 
517     if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
518         tti = self->ttinfo_before;
519     }
520     else if (num_trans == 0 ||
521              timestamp > self->trans_list_utc[num_trans - 1]) {
522         tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
523                                          PyDateTime_GET_YEAR(dt), &fold);
524 
525         // Immediately after the last manual transition, the fold/gap is
526         // between self->trans_ttinfos[num_transitions - 1] and whatever
527         // ttinfo applies immediately after the last transition, not between
528         // the STD and DST rules in the tzrule_after, so we may need to
529         // adjust the fold value.
530         if (num_trans) {
531             _ttinfo *tti_prev = NULL;
532             if (num_trans == 1) {
533                 tti_prev = self->ttinfo_before;
534             }
535             else {
536                 tti_prev = self->trans_ttinfos[num_trans - 2];
537             }
538             int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
539             if (diff > 0 &&
540                 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
541                 fold = 1;
542             }
543         }
544     }
545     else {
546         size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
547         _ttinfo *tti_prev = NULL;
548 
549         if (idx >= 2) {
550             tti_prev = self->trans_ttinfos[idx - 2];
551             tti = self->trans_ttinfos[idx - 1];
552         }
553         else {
554             tti_prev = self->ttinfo_before;
555             tti = self->trans_ttinfos[0];
556         }
557 
558         // Detect fold
559         int64_t shift =
560             (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
561         if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
562             fold = 1;
563         }
564     }
565 
566     PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
567     if (tmp == NULL) {
568         return NULL;
569     }
570 
571     if (fold) {
572         if (PyDateTime_CheckExact(tmp)) {
573             ((PyDateTime_DateTime *)tmp)->fold = 1;
574             dt = tmp;
575         }
576         else {
577             PyObject *replace = PyObject_GetAttrString(tmp, "replace");
578             PyObject *args = PyTuple_New(0);
579             PyObject *kwargs = PyDict_New();
580 
581             Py_DECREF(tmp);
582             if (args == NULL || kwargs == NULL || replace == NULL) {
583                 Py_XDECREF(args);
584                 Py_XDECREF(kwargs);
585                 Py_XDECREF(replace);
586                 return NULL;
587             }
588 
589             dt = NULL;
590             if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) {
591                 dt = PyObject_Call(replace, args, kwargs);
592             }
593 
594             Py_DECREF(args);
595             Py_DECREF(kwargs);
596             Py_DECREF(replace);
597 
598             if (dt == NULL) {
599                 return NULL;
600             }
601         }
602     }
603     else {
604         dt = tmp;
605     }
606     return dt;
607 }
608 
609 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)610 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
611 {
612     PyObject *rv = NULL;
613     const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
614     if (!(self->key == Py_None)) {
615         rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
616     }
617     else {
618         assert(PyUnicode_Check(self->file_repr));
619         rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
620                                   self->file_repr);
621     }
622 
623     return rv;
624 }
625 
626 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)627 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
628 {
629     if (!(self->key == Py_None)) {
630         Py_INCREF(self->key);
631         return self->key;
632     }
633     else {
634         return zoneinfo_repr(self);
635     }
636 }
637 
638 /* Pickles the ZoneInfo object by key and source.
639  *
640  * ZoneInfo objects are pickled by reference to the TZif file that they came
641  * from, which means that the exact transitions may be different or the file
642  * may not un-pickle if the data has changed on disk in the interim.
643  *
644  * It is necessary to include a bit indicating whether or not the object
645  * was constructed from the cache, because from-cache objects will hit the
646  * unpickling process's cache, whereas no-cache objects will bypass it.
647  *
648  * Objects constructed from ZoneInfo.from_file cannot be pickled.
649  */
650 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)651 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
652 {
653     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
654     if (self->source == SOURCE_FILE) {
655         // Objects constructed from files cannot be pickled.
656         PyObject *pickle = PyImport_ImportModule("pickle");
657         if (pickle == NULL) {
658             return NULL;
659         }
660 
661         PyObject *pickle_error =
662             PyObject_GetAttrString(pickle, "PicklingError");
663         Py_DECREF(pickle);
664         if (pickle_error == NULL) {
665             return NULL;
666         }
667 
668         PyErr_Format(pickle_error,
669                      "Cannot pickle a ZoneInfo file from a file stream.");
670         Py_DECREF(pickle_error);
671         return NULL;
672     }
673 
674     unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
675     PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
676 
677     if (constructor == NULL) {
678         return NULL;
679     }
680 
681     PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
682     Py_DECREF(constructor);
683     return rv;
684 }
685 
686 static PyObject *
zoneinfo__unpickle(PyTypeObject * cls,PyObject * args)687 zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
688 {
689     PyObject *key;
690     unsigned char from_cache;
691     if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
692         return NULL;
693     }
694 
695     if (from_cache) {
696         PyObject *val_args = Py_BuildValue("(O)", key);
697         if (val_args == NULL) {
698             return NULL;
699         }
700 
701         PyObject *rv = zoneinfo_new(cls, val_args, NULL);
702 
703         Py_DECREF(val_args);
704         return rv;
705     }
706     else {
707         return zoneinfo_new_instance(cls, key);
708     }
709 }
710 
711 /* It is relatively expensive to construct new timedelta objects, and in most
712  * cases we're looking at a relatively small number of timedeltas, such as
713  * integer number of hours, etc. We will keep a cache so that we construct
714  * a minimal number of these.
715  *
716  * Possibly this should be replaced with an LRU cache so that it's not possible
717  * for the memory usage to explode from this, but in order for this to be a
718  * serious problem, one would need to deliberately craft a malicious time zone
719  * file with many distinct offsets. As of tzdb 2019c, loading every single zone
720  * fills the cache with ~450 timedeltas for a total size of ~12kB.
721  *
722  * This returns a new reference to the timedelta.
723  */
724 static PyObject *
load_timedelta(long seconds)725 load_timedelta(long seconds)
726 {
727     PyObject *rv = NULL;
728     PyObject *pyoffset = PyLong_FromLong(seconds);
729     if (pyoffset == NULL) {
730         return NULL;
731     }
732     int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset);
733     if (contains == -1) {
734         goto error;
735     }
736 
737     if (!contains) {
738         PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
739             0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
740 
741         if (tmp == NULL) {
742             goto error;
743         }
744 
745         rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
746         Py_DECREF(tmp);
747     }
748     else {
749         rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset);
750     }
751 
752     Py_DECREF(pyoffset);
753     Py_INCREF(rv);
754     return rv;
755 error:
756     Py_DECREF(pyoffset);
757     return NULL;
758 }
759 
760 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
761  * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
762  * initialized _ttinfo objects.
763  */
764 static int
build_ttinfo(long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)765 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
766 {
767     out->utcoff = NULL;
768     out->dstoff = NULL;
769     out->tzname = NULL;
770 
771     out->utcoff_seconds = utcoffset;
772     out->utcoff = load_timedelta(utcoffset);
773     if (out->utcoff == NULL) {
774         return -1;
775     }
776 
777     out->dstoff = load_timedelta(dstoffset);
778     if (out->dstoff == NULL) {
779         return -1;
780     }
781 
782     out->tzname = tzname;
783     Py_INCREF(tzname);
784 
785     return 0;
786 }
787 
788 /* Decrease reference count on any non-NULL members of a _ttinfo  */
789 static void
xdecref_ttinfo(_ttinfo * ttinfo)790 xdecref_ttinfo(_ttinfo *ttinfo)
791 {
792     if (ttinfo != NULL) {
793         Py_XDECREF(ttinfo->utcoff);
794         Py_XDECREF(ttinfo->dstoff);
795         Py_XDECREF(ttinfo->tzname);
796     }
797 }
798 
799 /* Equality function for _ttinfo. */
800 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)801 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
802 {
803     int rv;
804     if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
805         1) {
806         goto end;
807     }
808 
809     if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
810         1) {
811         goto end;
812     }
813 
814     if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
815         1) {
816         goto end;
817     }
818 end:
819     return rv;
820 }
821 
822 /* Given a file-like object, this populates a ZoneInfo object
823  *
824  * The current version calls into a Python function to read the data from
825  * file into Python objects, and this translates those Python objects into
826  * C values and calculates derived values (e.g. dstoff) in C.
827  *
828  * This returns 0 on success and -1 on failure.
829  *
830  * The function will never return while `self` is partially initialized —
831  * the object only needs to be freed / deallocated if this succeeds.
832  */
833 static int
load_data(PyZoneInfo_ZoneInfo * self,PyObject * file_obj)834 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
835 {
836     PyObject *data_tuple = NULL;
837 
838     long *utcoff = NULL;
839     long *dstoff = NULL;
840     size_t *trans_idx = NULL;
841     unsigned char *isdst = NULL;
842 
843     self->trans_list_utc = NULL;
844     self->trans_list_wall[0] = NULL;
845     self->trans_list_wall[1] = NULL;
846     self->trans_ttinfos = NULL;
847     self->_ttinfos = NULL;
848     self->file_repr = NULL;
849 
850     size_t ttinfos_allocated = 0;
851 
852     data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
853 
854     if (data_tuple == NULL) {
855         goto error;
856     }
857 
858     if (!PyTuple_CheckExact(data_tuple)) {
859         PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
860                      data_tuple);
861         goto error;
862     }
863 
864     // Unpack the data tuple
865     PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
866     if (trans_idx_list == NULL) {
867         goto error;
868     }
869 
870     PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
871     if (trans_utc == NULL) {
872         goto error;
873     }
874 
875     PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
876     if (utcoff_list == NULL) {
877         goto error;
878     }
879 
880     PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
881     if (isdst_list == NULL) {
882         goto error;
883     }
884 
885     PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
886     if (abbr == NULL) {
887         goto error;
888     }
889 
890     PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
891     if (tz_str == NULL) {
892         goto error;
893     }
894 
895     // Load the relevant sizes
896     Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
897     if (num_transitions < 0) {
898         goto error;
899     }
900 
901     Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
902     if (num_ttinfos < 0) {
903         goto error;
904     }
905 
906     self->num_transitions = (size_t)num_transitions;
907     self->num_ttinfos = (size_t)num_ttinfos;
908 
909     // Load the transition indices and list
910     self->trans_list_utc =
911         PyMem_Malloc(self->num_transitions * sizeof(int64_t));
912     trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
913 
914     for (size_t i = 0; i < self->num_transitions; ++i) {
915         PyObject *num = PyTuple_GetItem(trans_utc, i);
916         if (num == NULL) {
917             goto error;
918         }
919         self->trans_list_utc[i] = PyLong_AsLongLong(num);
920         if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
921             goto error;
922         }
923 
924         num = PyTuple_GetItem(trans_idx_list, i);
925         if (num == NULL) {
926             goto error;
927         }
928 
929         Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
930         if (cur_trans_idx == -1) {
931             goto error;
932         }
933 
934         trans_idx[i] = (size_t)cur_trans_idx;
935         if (trans_idx[i] > self->num_ttinfos) {
936             PyErr_Format(
937                 PyExc_ValueError,
938                 "Invalid transition index found while reading TZif: %zd",
939                 cur_trans_idx);
940 
941             goto error;
942         }
943     }
944 
945     // Load UTC offsets and isdst (size num_ttinfos)
946     utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
947     isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
948 
949     if (utcoff == NULL || isdst == NULL) {
950         goto error;
951     }
952     for (size_t i = 0; i < self->num_ttinfos; ++i) {
953         PyObject *num = PyTuple_GetItem(utcoff_list, i);
954         if (num == NULL) {
955             goto error;
956         }
957 
958         utcoff[i] = PyLong_AsLong(num);
959         if (utcoff[i] == -1 && PyErr_Occurred()) {
960             goto error;
961         }
962 
963         num = PyTuple_GetItem(isdst_list, i);
964         if (num == NULL) {
965             goto error;
966         }
967 
968         int isdst_with_error = PyObject_IsTrue(num);
969         if (isdst_with_error == -1) {
970             goto error;
971         }
972         else {
973             isdst[i] = (unsigned char)isdst_with_error;
974         }
975     }
976 
977     dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
978     if (dstoff == NULL) {
979         goto error;
980     }
981 
982     // Derive dstoff and trans_list_wall from the information we've loaded
983     utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
984                      self->num_ttinfos);
985 
986     if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
987                     self->trans_list_wall, self->num_ttinfos,
988                     self->num_transitions)) {
989         goto error;
990     }
991 
992     // Build _ttinfo objects from utcoff, dstoff and abbr
993     self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
994     for (size_t i = 0; i < self->num_ttinfos; ++i) {
995         PyObject *tzname = PyTuple_GetItem(abbr, i);
996         if (tzname == NULL) {
997             goto error;
998         }
999 
1000         ttinfos_allocated++;
1001         if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1002             goto error;
1003         }
1004     }
1005 
1006     // Build our mapping from transition to the ttinfo that applies
1007     self->trans_ttinfos =
1008         PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1009     for (size_t i = 0; i < self->num_transitions; ++i) {
1010         size_t ttinfo_idx = trans_idx[i];
1011         assert(ttinfo_idx < self->num_ttinfos);
1012         self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1013     }
1014 
1015     // Set ttinfo_before to the first non-DST transition
1016     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1017         if (!isdst[i]) {
1018             self->ttinfo_before = &(self->_ttinfos[i]);
1019             break;
1020         }
1021     }
1022 
1023     // If there are only DST ttinfos, pick the first one, if there are no
1024     // ttinfos at all, set ttinfo_before to NULL
1025     if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1026         self->ttinfo_before = &(self->_ttinfos[0]);
1027     }
1028 
1029     if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1030         if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1031             goto error;
1032         }
1033     }
1034     else {
1035         if (!self->num_ttinfos) {
1036             PyErr_Format(PyExc_ValueError, "No time zone information found.");
1037             goto error;
1038         }
1039 
1040         size_t idx;
1041         if (!self->num_transitions) {
1042             idx = self->num_ttinfos - 1;
1043         }
1044         else {
1045             idx = trans_idx[self->num_transitions - 1];
1046         }
1047 
1048         _ttinfo *tti = &(self->_ttinfos[idx]);
1049         build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1050                      &(self->tzrule_after));
1051 
1052         // We've abused the build_tzrule constructor to construct an STD-only
1053         // rule mimicking whatever ttinfo we've picked up, but it's possible
1054         // that the one we've picked up is a DST zone, so we need to make sure
1055         // that the dstoff is set correctly in that case.
1056         if (PyObject_IsTrue(tti->dstoff)) {
1057             _ttinfo *tti_after = &(self->tzrule_after.std);
1058             Py_DECREF(tti_after->dstoff);
1059             tti_after->dstoff = tti->dstoff;
1060             Py_INCREF(tti_after->dstoff);
1061         }
1062     }
1063 
1064     // Determine if this is a "fixed offset" zone, meaning that the output of
1065     // the utcoffset, dst and tzname functions does not depend on the specific
1066     // datetime passed.
1067     //
1068     // We make three simplifying assumptions here:
1069     //
1070     // 1. If tzrule_after is not std_only, it has transitions that might occur
1071     //    (it is possible to construct TZ strings that specify STD and DST but
1072     //    no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1073     // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1074     //    represent different offsets.
1075     // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1076     //    fixed-offset zone with extra _ttinfos defined may appear to *not* be
1077     //    a fixed offset zone).
1078     //
1079     // Violations to these assumptions would be fairly exotic, and exotic
1080     // zones should almost certainly not be used with datetime.time (the
1081     // only thing that would be affected by this).
1082     if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1083         self->fixed_offset = 0;
1084     }
1085     else if (self->num_ttinfos == 0) {
1086         self->fixed_offset = 1;
1087     }
1088     else {
1089         int constant_offset =
1090             ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1091         if (constant_offset < 0) {
1092             goto error;
1093         }
1094         else {
1095             self->fixed_offset = constant_offset;
1096         }
1097     }
1098 
1099     int rv = 0;
1100     goto cleanup;
1101 error:
1102     // These resources only need to be freed if we have failed, if we succeed
1103     // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1104     // method to free the relevant resources.
1105     if (self->trans_list_utc != NULL) {
1106         PyMem_Free(self->trans_list_utc);
1107         self->trans_list_utc = NULL;
1108     }
1109 
1110     for (size_t i = 0; i < 2; ++i) {
1111         if (self->trans_list_wall[i] != NULL) {
1112             PyMem_Free(self->trans_list_wall[i]);
1113             self->trans_list_wall[i] = NULL;
1114         }
1115     }
1116 
1117     if (self->_ttinfos != NULL) {
1118         for (size_t i = 0; i < ttinfos_allocated; ++i) {
1119             xdecref_ttinfo(&(self->_ttinfos[i]));
1120         }
1121         PyMem_Free(self->_ttinfos);
1122         self->_ttinfos = NULL;
1123     }
1124 
1125     if (self->trans_ttinfos != NULL) {
1126         PyMem_Free(self->trans_ttinfos);
1127         self->trans_ttinfos = NULL;
1128     }
1129 
1130     rv = -1;
1131 cleanup:
1132     Py_XDECREF(data_tuple);
1133 
1134     if (utcoff != NULL) {
1135         PyMem_Free(utcoff);
1136     }
1137 
1138     if (dstoff != NULL) {
1139         PyMem_Free(dstoff);
1140     }
1141 
1142     if (isdst != NULL) {
1143         PyMem_Free(isdst);
1144     }
1145 
1146     if (trans_idx != NULL) {
1147         PyMem_Free(trans_idx);
1148     }
1149 
1150     return rv;
1151 }
1152 
1153 /* Function to calculate the local timestamp of a transition from the year. */
1154 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1155 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1156 {
1157     CalendarRule *self = (CalendarRule *)base_self;
1158 
1159     // We want (year, month, day of month); we have year and month, but we
1160     // need to turn (week, day-of-week) into day-of-month
1161     //
1162     // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1163     // Week 5 represents the last occurrence of day `day`, so we need to know
1164     // the first weekday of the month and the number of days in the month.
1165     int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1166     uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1167     if (self->month == 2 && is_leap_year(year)) {
1168         days_in_month += 1;
1169     }
1170 
1171     // This equation seems magical, so I'll break it down:
1172     // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1173     //    + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1174     //    because this math is mod 7
1175     // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1176     //    numbers so that -1 % 7 = 6).
1177     // 3. Add 1 because month days are a 1-based index.
1178     int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1179     if (month_day < 0) {
1180         month_day += 7;
1181     }
1182     month_day += 1;
1183 
1184     // Now use a 0-based index version of `week` to calculate the w-th
1185     // occurrence of `day`
1186     month_day += ((int8_t)(self->week) - 1) * 7;
1187 
1188     // month_day will only be > days_in_month if w was 5, and `w` means "last
1189     // occurrence of `d`", so now we just check if we over-shot the end of the
1190     // month and if so knock off 1 week.
1191     if (month_day > days_in_month) {
1192         month_day -= 7;
1193     }
1194 
1195     int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1196     return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1197             (int64_t)(self->minute * 60) + (int64_t)(self->second));
1198 }
1199 
1200 /* Constructor for CalendarRule. */
1201 int
calendarrule_new(uint8_t month,uint8_t week,uint8_t day,int8_t hour,int8_t minute,int8_t second,CalendarRule * out)1202 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1203                  int8_t minute, int8_t second, CalendarRule *out)
1204 {
1205     // These bounds come from the POSIX standard, which describes an Mm.n.d
1206     // rule as:
1207     //
1208     //   The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1209     //   5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1210     //   may occur in either the fourth or the fifth week). Week 1 is the first
1211     //   week in which the d'th day occurs. Day zero is Sunday.
1212     if (month <= 0 || month > 12) {
1213         PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1214         return -1;
1215     }
1216 
1217     if (week <= 0 || week > 5) {
1218         PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1219         return -1;
1220     }
1221 
1222     // day is an unsigned integer, so day < 0 should always return false, but
1223     // if day's type changes to a signed integer *without* changing this value,
1224     // it may create a bug. Considering that the compiler should be able to
1225     // optimize out the first comparison if day is an unsigned integer anyway,
1226     // we will leave this comparison in place and disable the compiler warning.
1227 #pragma GCC diagnostic push
1228 #pragma GCC diagnostic ignored "-Wtype-limits"
1229     if (day < 0 || day > 6) {
1230 #pragma GCC diagnostic pop
1231         PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1232         return -1;
1233     }
1234 
1235     TransitionRuleType base = {&calendarrule_year_to_timestamp};
1236 
1237     CalendarRule new_offset = {
1238         .base = base,
1239         .month = month,
1240         .week = week,
1241         .day = day,
1242         .hour = hour,
1243         .minute = minute,
1244         .second = second,
1245     };
1246 
1247     *out = new_offset;
1248     return 0;
1249 }
1250 
1251 /* Function to calculate the local timestamp of a transition from the year.
1252  *
1253  * This translates the day of the year into a local timestamp — either a
1254  * 1-based Julian day, not including leap days, or the 0-based year-day,
1255  * including leap days.
1256  * */
1257 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1258 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1259 {
1260     // The function signature requires a TransitionRuleType pointer, but this
1261     // function is only applicable to DayRule* objects.
1262     DayRule *self = (DayRule *)base_self;
1263 
1264     // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1265     // to know the number of days since 1970-01-01, so we must subtract off
1266     // the equivalent of ymd_to_ord(1970, 1, 1).
1267     //
1268     // We subtract off an additional 1 day to account for January 1st (we want
1269     // the number of full days *before* the date of the transition - partial
1270     // days are accounted for in the hour, minute and second portions.
1271     int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1272 
1273     // The Julian day specification skips over February 29th in leap years,
1274     // from the POSIX standard:
1275     //
1276     //   Leap days shall not be counted. That is, in all years-including leap
1277     //   years-February 28 is day 59 and March 1 is day 60. It is impossible to
1278     //   refer explicitly to the occasional February 29.
1279     //
1280     // This is actually more useful than you'd think — if you want a rule that
1281     // always transitions on a given calendar day (other than February 29th),
1282     // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1283     // always refers to December 31st.
1284     unsigned int day = self->day;
1285     if (self->julian && day >= 59 && is_leap_year(year)) {
1286         day += 1;
1287     }
1288 
1289     return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1290            (self->minute * 60) + self->second;
1291 }
1292 
1293 /* Constructor for DayRule. */
1294 static int
dayrule_new(uint8_t julian,unsigned int day,int8_t hour,int8_t minute,int8_t second,DayRule * out)1295 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1296             int8_t second, DayRule *out)
1297 {
1298     // The POSIX standard specifies that Julian days must be in the range (1 <=
1299     // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1300     // be in the range (0 <= n <= 365).
1301     if (day < julian || day > 365) {
1302         PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1303                      julian, day);
1304         return -1;
1305     }
1306 
1307     TransitionRuleType base = {
1308         &dayrule_year_to_timestamp,
1309     };
1310 
1311     DayRule tmp = {
1312         .base = base,
1313         .julian = julian,
1314         .day = day,
1315         .hour = hour,
1316         .minute = minute,
1317         .second = second,
1318     };
1319 
1320     *out = tmp;
1321 
1322     return 0;
1323 }
1324 
1325 /* Calculate the start and end rules for a _tzrule in the given year. */
1326 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1327 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1328 {
1329     assert(rule->start != NULL);
1330     assert(rule->end != NULL);
1331     *start = rule->start->year_to_timestamp(rule->start, year);
1332     *end = rule->end->year_to_timestamp(rule->end, year);
1333 }
1334 
1335 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1336  *
1337  * This takes a local timestamp and fold for disambiguation purposes; the year
1338  * could technically be calculated from the timestamp, but given that the
1339  * callers of this function already have the year information accessible from
1340  * the datetime struct, it is taken as an additional parameter to reduce
1341  * unncessary calculation.
1342  * */
1343 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1344 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1345 {
1346     if (rule->std_only) {
1347         return &(rule->std);
1348     }
1349 
1350     int64_t start, end;
1351     uint8_t isdst;
1352 
1353     tzrule_transitions(rule, year, &start, &end);
1354 
1355     // With fold = 0, the period (denominated in local time) with the smaller
1356     // offset starts at the end of the gap and ends at the end of the fold;
1357     // with fold = 1, it runs from the start of the gap to the beginning of the
1358     // fold.
1359     //
1360     // So in order to determine the DST boundaries we need to know both the
1361     // fold and whether DST is positive or negative (rare), and it turns out
1362     // that this boils down to fold XOR is_positive.
1363     if (fold == (rule->dst_diff >= 0)) {
1364         end -= rule->dst_diff;
1365     }
1366     else {
1367         start += rule->dst_diff;
1368     }
1369 
1370     if (start < end) {
1371         isdst = (ts >= start) && (ts < end);
1372     }
1373     else {
1374         isdst = (ts < end) || (ts >= start);
1375     }
1376 
1377     if (isdst) {
1378         return &(rule->dst);
1379     }
1380     else {
1381         return &(rule->std);
1382     }
1383 }
1384 
1385 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1386  *
1387  * This function can determine the _ttinfo that applies at a given epoch time,
1388  * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1389  * This is to be used in the .fromutc() function.
1390  *
1391  * The year is technically a redundant parameter, because it can be calculated
1392  * from the timestamp, but all callers of this function should have the year
1393  * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1394  * calculation.
1395  **/
1396 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1397 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1398                            unsigned char *fold)
1399 {
1400     if (rule->std_only) {
1401         *fold = 0;
1402         return &(rule->std);
1403     }
1404 
1405     int64_t start, end;
1406     uint8_t isdst;
1407     tzrule_transitions(rule, year, &start, &end);
1408     start -= rule->std.utcoff_seconds;
1409     end -= rule->dst.utcoff_seconds;
1410 
1411     if (start < end) {
1412         isdst = (ts >= start) && (ts < end);
1413     }
1414     else {
1415         isdst = (ts < end) || (ts >= start);
1416     }
1417 
1418     // For positive DST, the ambiguous period is one dst_diff after the end of
1419     // DST; for negative DST, the ambiguous period is one dst_diff before the
1420     // start of DST.
1421     int64_t ambig_start, ambig_end;
1422     if (rule->dst_diff > 0) {
1423         ambig_start = end;
1424         ambig_end = end + rule->dst_diff;
1425     }
1426     else {
1427         ambig_start = start;
1428         ambig_end = start - rule->dst_diff;
1429     }
1430 
1431     *fold = (ts >= ambig_start) && (ts < ambig_end);
1432 
1433     if (isdst) {
1434         return &(rule->dst);
1435     }
1436     else {
1437         return &(rule->std);
1438     }
1439 }
1440 
1441 /* Parse a TZ string in the format specified by the POSIX standard:
1442  *
1443  *  std offset[dst[offset],start[/time],end[/time]]
1444  *
1445  *  std and dst must be 3 or more characters long and must not contain a
1446  *  leading colon, embedded digits, commas, nor a plus or minus signs; The
1447  *  spaces between "std" and "offset" are only for display and are not actually
1448  *  present in the string.
1449  *
1450  *  The format of the offset is ``[+|-]hh[:mm[:ss]]``
1451  *
1452  * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1453  *
1454  * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1455  */
1456 static int
parse_tz_str(PyObject * tz_str_obj,_tzrule * out)1457 parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1458 {
1459     PyObject *std_abbr = NULL;
1460     PyObject *dst_abbr = NULL;
1461     TransitionRuleType *start = NULL;
1462     TransitionRuleType *end = NULL;
1463     // Initialize offsets to invalid value (> 24 hours)
1464     long std_offset = 1 << 20;
1465     long dst_offset = 1 << 20;
1466 
1467     char *tz_str = PyBytes_AsString(tz_str_obj);
1468     if (tz_str == NULL) {
1469         return -1;
1470     }
1471     char *p = tz_str;
1472 
1473     // Read the `std` abbreviation, which must be at least 3 characters long.
1474     Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1475     if (num_chars < 1) {
1476         PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1477         goto error;
1478     }
1479 
1480     p += num_chars;
1481 
1482     // Now read the STD offset, which is required
1483     num_chars = parse_tz_delta(p, &std_offset);
1484     if (num_chars < 0) {
1485         PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1486         goto error;
1487     }
1488     p += num_chars;
1489 
1490     // If the string ends here, there is no DST, otherwise we must parse the
1491     // DST abbreviation and start and end dates and times.
1492     if (*p == '\0') {
1493         goto complete;
1494     }
1495 
1496     num_chars = parse_abbr(p, &dst_abbr);
1497     if (num_chars < 1) {
1498         PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1499         goto error;
1500     }
1501     p += num_chars;
1502 
1503     if (*p == ',') {
1504         // From the POSIX standard:
1505         //
1506         // If no offset follows dst, the alternative time is assumed to be one
1507         // hour ahead of standard time.
1508         dst_offset = std_offset + 3600;
1509     }
1510     else {
1511         num_chars = parse_tz_delta(p, &dst_offset);
1512         if (num_chars < 0) {
1513             PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1514                          tz_str_obj);
1515             goto error;
1516         }
1517 
1518         p += num_chars;
1519     }
1520 
1521     TransitionRuleType **transitions[2] = {&start, &end};
1522     for (size_t i = 0; i < 2; ++i) {
1523         if (*p != ',') {
1524             PyErr_Format(PyExc_ValueError,
1525                          "Missing transition rules in TZ string: %R",
1526                          tz_str_obj);
1527             goto error;
1528         }
1529         p++;
1530 
1531         num_chars = parse_transition_rule(p, transitions[i]);
1532         if (num_chars < 0) {
1533             PyErr_Format(PyExc_ValueError,
1534                          "Malformed transition rule in TZ string: %R",
1535                          tz_str_obj);
1536             goto error;
1537         }
1538         p += num_chars;
1539     }
1540 
1541     if (*p != '\0') {
1542         PyErr_Format(PyExc_ValueError,
1543                      "Extraneous characters at end of TZ string: %R",
1544                      tz_str_obj);
1545         goto error;
1546     }
1547 
1548 complete:
1549     build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1550     Py_DECREF(std_abbr);
1551     Py_XDECREF(dst_abbr);
1552 
1553     return 0;
1554 error:
1555     Py_XDECREF(std_abbr);
1556     if (dst_abbr != NULL && dst_abbr != Py_None) {
1557         Py_DECREF(dst_abbr);
1558     }
1559 
1560     if (start != NULL) {
1561         PyMem_Free(start);
1562     }
1563 
1564     if (end != NULL) {
1565         PyMem_Free(end);
1566     }
1567 
1568     return -1;
1569 }
1570 
1571 static int
parse_uint(const char * const p,uint8_t * value)1572 parse_uint(const char *const p, uint8_t *value)
1573 {
1574     if (!isdigit(*p)) {
1575         return -1;
1576     }
1577 
1578     *value = (*p) - '0';
1579     return 0;
1580 }
1581 
1582 /* Parse the STD and DST abbreviations from a TZ string. */
1583 static Py_ssize_t
parse_abbr(const char * const p,PyObject ** abbr)1584 parse_abbr(const char *const p, PyObject **abbr)
1585 {
1586     const char *ptr = p;
1587     char buff = *ptr;
1588     const char *str_start;
1589     const char *str_end;
1590 
1591     if (*ptr == '<') {
1592         ptr++;
1593         str_start = ptr;
1594         while ((buff = *ptr) != '>') {
1595             // From the POSIX standard:
1596             //
1597             //   In the quoted form, the first character shall be the less-than
1598             //   ( '<' ) character and the last character shall be the
1599             //   greater-than ( '>' ) character. All characters between these
1600             //   quoting characters shall be alphanumeric characters from the
1601             //   portable character set in the current locale, the plus-sign (
1602             //   '+' ) character, or the minus-sign ( '-' ) character. The std
1603             //   and dst fields in this case shall not include the quoting
1604             //   characters.
1605             if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1606                 buff != '-') {
1607                 return -1;
1608             }
1609             ptr++;
1610         }
1611         str_end = ptr;
1612         ptr++;
1613     }
1614     else {
1615         str_start = p;
1616         // From the POSIX standard:
1617         //
1618         //   In the unquoted form, all characters in these fields shall be
1619         //   alphabetic characters from the portable character set in the
1620         //   current locale.
1621         while (isalpha(*ptr)) {
1622             ptr++;
1623         }
1624         str_end = ptr;
1625     }
1626 
1627     *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1628     if (*abbr == NULL) {
1629         return -1;
1630     }
1631 
1632     return ptr - p;
1633 }
1634 
1635 /* Parse a UTC offset from a TZ str. */
1636 static Py_ssize_t
parse_tz_delta(const char * const p,long * total_seconds)1637 parse_tz_delta(const char *const p, long *total_seconds)
1638 {
1639     // From the POSIX spec:
1640     //
1641     //   Indicates the value added to the local time to arrive at Coordinated
1642     //   Universal Time. The offset has the form:
1643     //
1644     //   hh[:mm[:ss]]
1645     //
1646     //   One or more digits may be used; the value is always interpreted as a
1647     //   decimal number.
1648     //
1649     // The POSIX spec says that the values for `hour` must be between 0 and 24
1650     // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1651     // transition times may be signed and range from -167 to 167.
1652     long sign = -1;
1653     long hours = 0;
1654     long minutes = 0;
1655     long seconds = 0;
1656 
1657     const char *ptr = p;
1658     char buff = *ptr;
1659     if (buff == '-' || buff == '+') {
1660         // Negative numbers correspond to *positive* offsets, from the spec:
1661         //
1662         //   If preceded by a '-', the timezone shall be east of the Prime
1663         //   Meridian; otherwise, it shall be west (which may be indicated by
1664         //   an optional preceding '+' ).
1665         if (buff == '-') {
1666             sign = 1;
1667         }
1668 
1669         ptr++;
1670     }
1671 
1672     // The hour can be 1 or 2 numeric characters
1673     for (size_t i = 0; i < 2; ++i) {
1674         buff = *ptr;
1675         if (!isdigit(buff)) {
1676             if (i == 0) {
1677                 return -1;
1678             }
1679             else {
1680                 break;
1681             }
1682         }
1683 
1684         hours *= 10;
1685         hours += buff - '0';
1686         ptr++;
1687     }
1688 
1689     if (hours > 24 || hours < 0) {
1690         return -1;
1691     }
1692 
1693     // Minutes and seconds always of the format ":dd"
1694     long *outputs[2] = {&minutes, &seconds};
1695     for (size_t i = 0; i < 2; ++i) {
1696         if (*ptr != ':') {
1697             goto complete;
1698         }
1699         ptr++;
1700 
1701         for (size_t j = 0; j < 2; ++j) {
1702             buff = *ptr;
1703             if (!isdigit(buff)) {
1704                 return -1;
1705             }
1706             *(outputs[i]) *= 10;
1707             *(outputs[i]) += buff - '0';
1708             ptr++;
1709         }
1710     }
1711 
1712 complete:
1713     *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1714 
1715     return ptr - p;
1716 }
1717 
1718 /* Parse the date portion of a transition rule. */
1719 static Py_ssize_t
parse_transition_rule(const char * const p,TransitionRuleType ** out)1720 parse_transition_rule(const char *const p, TransitionRuleType **out)
1721 {
1722     // The full transition rule indicates when to change back and forth between
1723     // STD and DST, and has the form:
1724     //
1725     //   date[/time],date[/time]
1726     //
1727     // This function parses an individual date[/time] section, and returns
1728     // the number of characters that contributed to the transition rule. This
1729     // does not include the ',' at the end of the first rule.
1730     //
1731     // The POSIX spec states that if *time* is not given, the default is 02:00.
1732     const char *ptr = p;
1733     int8_t hour = 2;
1734     int8_t minute = 0;
1735     int8_t second = 0;
1736 
1737     // Rules come in one of three flavors:
1738     //
1739     //   1. Jn: Julian day n, with no leap days.
1740     //   2. n: Day of year (0-based, with leap days)
1741     //   3. Mm.n.d: Specifying by month, week and day-of-week.
1742 
1743     if (*ptr == 'M') {
1744         uint8_t month, week, day;
1745         ptr++;
1746         if (parse_uint(ptr, &month)) {
1747             return -1;
1748         }
1749         ptr++;
1750         if (*ptr != '.') {
1751             uint8_t tmp;
1752             if (parse_uint(ptr, &tmp)) {
1753                 return -1;
1754             }
1755 
1756             month *= 10;
1757             month += tmp;
1758             ptr++;
1759         }
1760 
1761         uint8_t *values[2] = {&week, &day};
1762         for (size_t i = 0; i < 2; ++i) {
1763             if (*ptr != '.') {
1764                 return -1;
1765             }
1766             ptr++;
1767 
1768             if (parse_uint(ptr, values[i])) {
1769                 return -1;
1770             }
1771             ptr++;
1772         }
1773 
1774         if (*ptr == '/') {
1775             ptr++;
1776             Py_ssize_t num_chars =
1777                 parse_transition_time(ptr, &hour, &minute, &second);
1778             if (num_chars < 0) {
1779                 return -1;
1780             }
1781             ptr += num_chars;
1782         }
1783 
1784         CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1785         if (rv == NULL) {
1786             return -1;
1787         }
1788 
1789         if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1790             PyMem_Free(rv);
1791             return -1;
1792         }
1793 
1794         *out = (TransitionRuleType *)rv;
1795     }
1796     else {
1797         uint8_t julian = 0;
1798         unsigned int day = 0;
1799         if (*ptr == 'J') {
1800             julian = 1;
1801             ptr++;
1802         }
1803 
1804         for (size_t i = 0; i < 3; ++i) {
1805             if (!isdigit(*ptr)) {
1806                 if (i == 0) {
1807                     return -1;
1808                 }
1809                 break;
1810             }
1811             day *= 10;
1812             day += (*ptr) - '0';
1813             ptr++;
1814         }
1815 
1816         if (*ptr == '/') {
1817             ptr++;
1818             Py_ssize_t num_chars =
1819                 parse_transition_time(ptr, &hour, &minute, &second);
1820             if (num_chars < 0) {
1821                 return -1;
1822             }
1823             ptr += num_chars;
1824         }
1825 
1826         DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1827         if (rv == NULL) {
1828             return -1;
1829         }
1830 
1831         if (dayrule_new(julian, day, hour, minute, second, rv)) {
1832             PyMem_Free(rv);
1833             return -1;
1834         }
1835         *out = (TransitionRuleType *)rv;
1836     }
1837 
1838     return ptr - p;
1839 }
1840 
1841 /* Parse the time portion of a transition rule (e.g. following an /) */
1842 static Py_ssize_t
parse_transition_time(const char * const p,int8_t * hour,int8_t * minute,int8_t * second)1843 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1844                       int8_t *second)
1845 {
1846     // From the spec:
1847     //
1848     //   The time has the same format as offset except that no leading sign
1849     //   ( '-' or '+' ) is allowed.
1850     //
1851     // The format for the offset is:
1852     //
1853     //   h[h][:mm[:ss]]
1854     //
1855     // RFC 8536 also allows transition times to be signed and to range from
1856     // -167 to +167, but the current version only supports [0, 99].
1857     //
1858     // TODO: Support the full range of transition hours.
1859     int8_t *components[3] = {hour, minute, second};
1860     const char *ptr = p;
1861     int8_t sign = 1;
1862 
1863     if (*ptr == '-' || *ptr == '+') {
1864         if (*ptr == '-') {
1865             sign = -1;
1866         }
1867         ptr++;
1868     }
1869 
1870     for (size_t i = 0; i < 3; ++i) {
1871         if (i > 0) {
1872             if (*ptr != ':') {
1873                 break;
1874             }
1875             ptr++;
1876         }
1877 
1878         uint8_t buff = 0;
1879         for (size_t j = 0; j < 2; j++) {
1880             if (!isdigit(*ptr)) {
1881                 if (i == 0 && j > 0) {
1882                     break;
1883                 }
1884                 return -1;
1885             }
1886 
1887             buff *= 10;
1888             buff += (*ptr) - '0';
1889             ptr++;
1890         }
1891 
1892         *(components[i]) = sign * buff;
1893     }
1894 
1895     return ptr - p;
1896 }
1897 
1898 /* Constructor for a _tzrule.
1899  *
1900  * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1901  * case `dst_offset` will be ignored and `start` and `end` are expected to be
1902  * NULL as well.
1903  *
1904  * Returns 0 on success.
1905  */
1906 static int
build_tzrule(PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1907 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1908              long dst_offset, TransitionRuleType *start,
1909              TransitionRuleType *end, _tzrule *out)
1910 {
1911     _tzrule rv = {{0}};
1912 
1913     rv.start = start;
1914     rv.end = end;
1915 
1916     if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1917         goto error;
1918     }
1919 
1920     if (dst_abbr != NULL) {
1921         rv.dst_diff = dst_offset - std_offset;
1922         if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1923             goto error;
1924         }
1925     }
1926     else {
1927         rv.std_only = 1;
1928     }
1929 
1930     *out = rv;
1931 
1932     return 0;
1933 error:
1934     xdecref_ttinfo(&rv.std);
1935     xdecref_ttinfo(&rv.dst);
1936     return -1;
1937 }
1938 
1939 /* Destructor for _tzrule. */
1940 static void
free_tzrule(_tzrule * tzrule)1941 free_tzrule(_tzrule *tzrule)
1942 {
1943     xdecref_ttinfo(&(tzrule->std));
1944     if (!tzrule->std_only) {
1945         xdecref_ttinfo(&(tzrule->dst));
1946     }
1947 
1948     if (tzrule->start != NULL) {
1949         PyMem_Free(tzrule->start);
1950     }
1951 
1952     if (tzrule->end != NULL) {
1953         PyMem_Free(tzrule->end);
1954     }
1955 }
1956 
1957 /* Calculate DST offsets from transitions and UTC offsets
1958  *
1959  * This is necessary because each C `ttinfo` only contains the UTC offset,
1960  * time zone abbreviation and an isdst boolean - it does not include the
1961  * amount of the DST offset, but we need the amount for the dst() function.
1962  *
1963  * Thus function uses heuristics to infer what the offset should be, so it
1964  * is not guaranteed that this will work for all zones. If we cannot assign
1965  * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1966  * bool(dt.dst()) will always match ttinfo.isdst.
1967  */
1968 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)1969 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1970                  unsigned char *isdsts, size_t num_transitions,
1971                  size_t num_ttinfos)
1972 {
1973     size_t dst_count = 0;
1974     size_t dst_found = 0;
1975     for (size_t i = 0; i < num_ttinfos; ++i) {
1976         dst_count++;
1977     }
1978 
1979     for (size_t i = 1; i < num_transitions; ++i) {
1980         if (dst_count == dst_found) {
1981             break;
1982         }
1983 
1984         size_t idx = trans_idx[i];
1985         size_t comp_idx = trans_idx[i - 1];
1986 
1987         // Only look at DST offsets that have nto been assigned already
1988         if (!isdsts[idx] || dstoffs[idx] != 0) {
1989             continue;
1990         }
1991 
1992         long dstoff = 0;
1993         long utcoff = utcoffs[idx];
1994 
1995         if (!isdsts[comp_idx]) {
1996             dstoff = utcoff - utcoffs[comp_idx];
1997         }
1998 
1999         if (!dstoff && idx < (num_ttinfos - 1)) {
2000             comp_idx = trans_idx[i + 1];
2001 
2002             // If the following transition is also DST and we couldn't find
2003             // the DST offset by this point, we're going to have to skip it
2004             // and hope this transition gets assigned later
2005             if (isdsts[comp_idx]) {
2006                 continue;
2007             }
2008 
2009             dstoff = utcoff - utcoffs[comp_idx];
2010         }
2011 
2012         if (dstoff) {
2013             dst_found++;
2014             dstoffs[idx] = dstoff;
2015         }
2016     }
2017 
2018     if (dst_found < dst_count) {
2019         // If there are time zones we didn't find a value for, we'll end up
2020         // with dstoff = 0 for something where isdst=1. This is obviously
2021         // wrong — one hour will be a much better guess than 0.
2022         for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2023             if (isdsts[idx] && !dstoffs[idx]) {
2024                 dstoffs[idx] = 3600;
2025             }
2026         }
2027     }
2028 }
2029 
2030 #define _swap(x, y, buffer) \
2031     buffer = x;             \
2032     x = y;                  \
2033     y = buffer;
2034 
2035 /* Calculate transitions in local time from UTC time and offsets.
2036  *
2037  * We want to know when each transition occurs, denominated in the number of
2038  * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2039  * *local time* (note: this is *not* equivalent to the output of
2040  * datetime.timestamp, which is the total number of seconds actual elapsed
2041  * since 1970-01-01T00:00:00Z in UTC).
2042  *
2043  * This is an ambiguous question because "local time" can be ambiguous — but it
2044  * is disambiguated by the `fold` parameter, so we allocate two arrays:
2045  *
2046  *  trans_local[0]: The wall-time transitions for fold=0
2047  *  trans_local[1]: The wall-time transitions for fold=1
2048  *
2049  * This returns 0 on success and a negative number of failure. The trans_local
2050  * arrays must be freed if they are not NULL.
2051  */
2052 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2053 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2054             int64_t *trans_local[2], size_t num_ttinfos,
2055             size_t num_transitions)
2056 {
2057     if (num_transitions == 0) {
2058         return 0;
2059     }
2060 
2061     // Copy the UTC transitions into each array to be modified in place later
2062     for (size_t i = 0; i < 2; ++i) {
2063         trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2064         if (trans_local[i] == NULL) {
2065             return -1;
2066         }
2067 
2068         memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2069     }
2070 
2071     int64_t offset_0, offset_1, buff;
2072     if (num_ttinfos > 1) {
2073         offset_0 = utcoff[0];
2074         offset_1 = utcoff[trans_idx[0]];
2075 
2076         if (offset_1 > offset_0) {
2077             _swap(offset_0, offset_1, buff);
2078         }
2079     }
2080     else {
2081         offset_0 = utcoff[0];
2082         offset_1 = utcoff[0];
2083     }
2084 
2085     trans_local[0][0] += offset_0;
2086     trans_local[1][0] += offset_1;
2087 
2088     for (size_t i = 1; i < num_transitions; ++i) {
2089         offset_0 = utcoff[trans_idx[i - 1]];
2090         offset_1 = utcoff[trans_idx[i]];
2091 
2092         if (offset_1 > offset_0) {
2093             _swap(offset_1, offset_0, buff);
2094         }
2095 
2096         trans_local[0][i] += offset_0;
2097         trans_local[1][i] += offset_1;
2098     }
2099 
2100     return 0;
2101 }
2102 
2103 /* Simple bisect_right binary search implementation */
2104 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2105 _bisect(const int64_t value, const int64_t *arr, size_t size)
2106 {
2107     size_t lo = 0;
2108     size_t hi = size;
2109     size_t m;
2110 
2111     while (lo < hi) {
2112         m = (lo + hi) / 2;
2113         if (arr[m] > value) {
2114             hi = m;
2115         }
2116         else {
2117             lo = m + 1;
2118         }
2119     }
2120 
2121     return hi;
2122 }
2123 
2124 /* Find the ttinfo rules that apply at a given local datetime. */
2125 static _ttinfo *
find_ttinfo(PyZoneInfo_ZoneInfo * self,PyObject * dt)2126 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2127 {
2128     // datetime.time has a .tzinfo attribute that passes None as the dt
2129     // argument; it only really has meaning for fixed-offset zones.
2130     if (dt == Py_None) {
2131         if (self->fixed_offset) {
2132             return &(self->tzrule_after.std);
2133         }
2134         else {
2135             return &NO_TTINFO;
2136         }
2137     }
2138 
2139     int64_t ts;
2140     if (get_local_timestamp(dt, &ts)) {
2141         return NULL;
2142     }
2143 
2144     unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2145     assert(fold < 2);
2146     int64_t *local_transitions = self->trans_list_wall[fold];
2147     size_t num_trans = self->num_transitions;
2148 
2149     if (num_trans && ts < local_transitions[0]) {
2150         return self->ttinfo_before;
2151     }
2152     else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2153         return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2154                                   PyDateTime_GET_YEAR(dt));
2155     }
2156     else {
2157         size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2158         assert(idx < self->num_transitions);
2159         return self->trans_ttinfos[idx];
2160     }
2161 }
2162 
2163 static int
is_leap_year(int year)2164 is_leap_year(int year)
2165 {
2166     const unsigned int ayear = (unsigned int)year;
2167     return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2168 }
2169 
2170 /* Calculates ordinal datetime from year, month and day. */
2171 static int
ymd_to_ord(int y,int m,int d)2172 ymd_to_ord(int y, int m, int d)
2173 {
2174     y -= 1;
2175     int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2176     int yearday = DAYS_BEFORE_MONTH[m];
2177     if (m > 2 && is_leap_year(y + 1)) {
2178         yearday += 1;
2179     }
2180 
2181     return days_before_year + yearday + d;
2182 }
2183 
2184 /* Calculate the number of seconds since 1970-01-01 in local time.
2185  *
2186  * This gets a datetime in the same "units" as self->trans_list_wall so that we
2187  * can easily determine which transitions a datetime falls between. See the
2188  * comment above ts_to_local for more information.
2189  * */
2190 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2191 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2192 {
2193     assert(local_ts != NULL);
2194 
2195     int hour, minute, second;
2196     int ord;
2197     if (PyDateTime_CheckExact(dt)) {
2198         int y = PyDateTime_GET_YEAR(dt);
2199         int m = PyDateTime_GET_MONTH(dt);
2200         int d = PyDateTime_GET_DAY(dt);
2201         hour = PyDateTime_DATE_GET_HOUR(dt);
2202         minute = PyDateTime_DATE_GET_MINUTE(dt);
2203         second = PyDateTime_DATE_GET_SECOND(dt);
2204 
2205         ord = ymd_to_ord(y, m, d);
2206     }
2207     else {
2208         PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2209         if (num == NULL) {
2210             return -1;
2211         }
2212 
2213         ord = PyLong_AsLong(num);
2214         Py_DECREF(num);
2215         if (ord == -1 && PyErr_Occurred()) {
2216             return -1;
2217         }
2218 
2219         num = PyObject_GetAttrString(dt, "hour");
2220         if (num == NULL) {
2221             return -1;
2222         }
2223         hour = PyLong_AsLong(num);
2224         Py_DECREF(num);
2225         if (hour == -1) {
2226             return -1;
2227         }
2228 
2229         num = PyObject_GetAttrString(dt, "minute");
2230         if (num == NULL) {
2231             return -1;
2232         }
2233         minute = PyLong_AsLong(num);
2234         Py_DECREF(num);
2235         if (minute == -1) {
2236             return -1;
2237         }
2238 
2239         num = PyObject_GetAttrString(dt, "second");
2240         if (num == NULL) {
2241             return -1;
2242         }
2243         second = PyLong_AsLong(num);
2244         Py_DECREF(num);
2245         if (second == -1) {
2246             return -1;
2247         }
2248     }
2249 
2250     *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2251                 (int64_t)(hour * 3600 + minute * 60 + second);
2252 
2253     return 0;
2254 }
2255 
2256 /////
2257 // Functions for cache handling
2258 
2259 /* Constructor for StrongCacheNode */
2260 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2261 strong_cache_node_new(PyObject *key, PyObject *zone)
2262 {
2263     StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2264     if (node == NULL) {
2265         return NULL;
2266     }
2267 
2268     Py_INCREF(key);
2269     Py_INCREF(zone);
2270 
2271     node->next = NULL;
2272     node->prev = NULL;
2273     node->key = key;
2274     node->zone = zone;
2275 
2276     return node;
2277 }
2278 
2279 /* Destructor for StrongCacheNode */
2280 void
strong_cache_node_free(StrongCacheNode * node)2281 strong_cache_node_free(StrongCacheNode *node)
2282 {
2283     Py_XDECREF(node->key);
2284     Py_XDECREF(node->zone);
2285 
2286     PyMem_Free(node);
2287 }
2288 
2289 /* Frees all nodes at or after a specified root in the strong cache.
2290  *
2291  * This can be used on the root node to free the entire cache or it can be used
2292  * to clear all nodes that have been expired (which, if everything is going
2293  * right, will actually only be 1 node at a time).
2294  */
2295 void
strong_cache_free(StrongCacheNode * root)2296 strong_cache_free(StrongCacheNode *root)
2297 {
2298     StrongCacheNode *node = root;
2299     StrongCacheNode *next_node;
2300     while (node != NULL) {
2301         next_node = node->next;
2302         strong_cache_node_free(node);
2303 
2304         node = next_node;
2305     }
2306 }
2307 
2308 /* Removes a node from the cache and update its neighbors.
2309  *
2310  * This is used both when ejecting a node from the cache and when moving it to
2311  * the front of the cache.
2312  */
2313 static void
remove_from_strong_cache(StrongCacheNode * node)2314 remove_from_strong_cache(StrongCacheNode *node)
2315 {
2316     if (ZONEINFO_STRONG_CACHE == node) {
2317         ZONEINFO_STRONG_CACHE = node->next;
2318     }
2319 
2320     if (node->prev != NULL) {
2321         node->prev->next = node->next;
2322     }
2323 
2324     if (node->next != NULL) {
2325         node->next->prev = node->prev;
2326     }
2327 
2328     node->next = NULL;
2329     node->prev = NULL;
2330 }
2331 
2332 /* Retrieves the node associated with a key, if it exists.
2333  *
2334  * This traverses the strong cache until it finds a matching key and returns a
2335  * pointer to the relevant node if found. Returns NULL if no node is found.
2336  *
2337  * root may be NULL, indicating an empty cache.
2338  */
2339 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2340 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2341 {
2342     const StrongCacheNode *node = root;
2343     while (node != NULL) {
2344         if (PyObject_RichCompareBool(key, node->key, Py_EQ)) {
2345             return (StrongCacheNode *)node;
2346         }
2347 
2348         node = node->next;
2349     }
2350 
2351     return NULL;
2352 }
2353 
2354 /* Ejects a given key from the class's strong cache, if applicable.
2355  *
2356  * This function is used to enable the per-key functionality in clear_cache.
2357  */
2358 static void
eject_from_strong_cache(const PyTypeObject * const type,PyObject * key)2359 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2360 {
2361     if (type != &PyZoneInfo_ZoneInfoType) {
2362         return;
2363     }
2364 
2365     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2366     if (node != NULL) {
2367         remove_from_strong_cache(node);
2368 
2369         strong_cache_node_free(node);
2370     }
2371 }
2372 
2373 /* Moves a node to the front of the LRU cache.
2374  *
2375  * The strong cache is an LRU cache, so whenever a given node is accessed, if
2376  * it is not at the front of the cache, it needs to be moved there.
2377  */
2378 static void
move_strong_cache_node_to_front(StrongCacheNode ** root,StrongCacheNode * node)2379 move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2380 {
2381     StrongCacheNode *root_p = *root;
2382     if (root_p == node) {
2383         return;
2384     }
2385 
2386     remove_from_strong_cache(node);
2387 
2388     node->prev = NULL;
2389     node->next = root_p;
2390 
2391     if (root_p != NULL) {
2392         root_p->prev = node;
2393     }
2394 
2395     *root = node;
2396 }
2397 
2398 /* Retrieves a ZoneInfo from the strong cache if it's present.
2399  *
2400  * This function finds the ZoneInfo by key and if found will move the node to
2401  * the front of the LRU cache and return a new reference to it. It returns NULL
2402  * if the key is not in the cache.
2403  *
2404  * The strong cache is currently only implemented for the base class, so this
2405  * always returns a cache miss for subclasses.
2406  */
2407 static PyObject *
zone_from_strong_cache(const PyTypeObject * const type,PyObject * const key)2408 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2409 {
2410     if (type != &PyZoneInfo_ZoneInfoType) {
2411         return NULL;  // Strong cache currently only implemented for base class
2412     }
2413 
2414     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2415 
2416     if (node != NULL) {
2417         move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2418         Py_INCREF(node->zone);
2419         return node->zone;
2420     }
2421 
2422     return NULL;  // Cache miss
2423 }
2424 
2425 /* Inserts a new key into the strong LRU cache.
2426  *
2427  * This function is only to be used after a cache miss — it creates a new node
2428  * at the front of the cache and ejects any stale entries (keeping the size of
2429  * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2430  */
2431 static void
update_strong_cache(const PyTypeObject * const type,PyObject * key,PyObject * zone)2432 update_strong_cache(const PyTypeObject *const type, PyObject *key,
2433                     PyObject *zone)
2434 {
2435     if (type != &PyZoneInfo_ZoneInfoType) {
2436         return;
2437     }
2438 
2439     StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2440 
2441     move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2442 
2443     StrongCacheNode *node = new_node->next;
2444     for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2445         if (node == NULL) {
2446             return;
2447         }
2448         node = node->next;
2449     }
2450 
2451     // Everything beyond this point needs to be freed
2452     if (node != NULL) {
2453         if (node->prev != NULL) {
2454             node->prev->next = NULL;
2455         }
2456         strong_cache_free(node);
2457     }
2458 }
2459 
2460 /* Clears all entries into a type's strong cache.
2461  *
2462  * Because the strong cache is not implemented for subclasses, this is a no-op
2463  * for everything except the base class.
2464  */
2465 void
clear_strong_cache(const PyTypeObject * const type)2466 clear_strong_cache(const PyTypeObject *const type)
2467 {
2468     if (type != &PyZoneInfo_ZoneInfoType) {
2469         return;
2470     }
2471 
2472     strong_cache_free(ZONEINFO_STRONG_CACHE);
2473     ZONEINFO_STRONG_CACHE = NULL;
2474 }
2475 
2476 static PyObject *
new_weak_cache(void)2477 new_weak_cache(void)
2478 {
2479     PyObject *weakref_module = PyImport_ImportModule("weakref");
2480     if (weakref_module == NULL) {
2481         return NULL;
2482     }
2483 
2484     PyObject *weak_cache =
2485         PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2486     Py_DECREF(weakref_module);
2487     return weak_cache;
2488 }
2489 
2490 static int
initialize_caches(void)2491 initialize_caches(void)
2492 {
2493     // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2494     if (TIMEDELTA_CACHE == NULL) {
2495         TIMEDELTA_CACHE = PyDict_New();
2496     }
2497     else {
2498         Py_INCREF(TIMEDELTA_CACHE);
2499     }
2500 
2501     if (TIMEDELTA_CACHE == NULL) {
2502         return -1;
2503     }
2504 
2505     if (ZONEINFO_WEAK_CACHE == NULL) {
2506         ZONEINFO_WEAK_CACHE = new_weak_cache();
2507     }
2508     else {
2509         Py_INCREF(ZONEINFO_WEAK_CACHE);
2510     }
2511 
2512     if (ZONEINFO_WEAK_CACHE == NULL) {
2513         return -1;
2514     }
2515 
2516     return 0;
2517 }
2518 
2519 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2520 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2521 {
2522     PyObject *weak_cache = new_weak_cache();
2523     if (weak_cache == NULL) {
2524         return NULL;
2525     }
2526 
2527     PyObject_SetAttrString((PyObject *)cls, "_weak_cache", weak_cache);
2528     Py_DECREF(weak_cache);
2529     Py_RETURN_NONE;
2530 }
2531 
2532 /////
2533 // Specify the ZoneInfo type
2534 static PyMethodDef zoneinfo_methods[] = {
2535     {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2536      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2537      PyDoc_STR("Clear the ZoneInfo cache.")},
2538     {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2539      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2540      PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2541     {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2542      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2543      PyDoc_STR("Create a ZoneInfo file from a file object.")},
2544     {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2545      PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2546                "the given datetime.")},
2547     {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2548      PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2549                "in a zone at the given datetime.")},
2550     {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2551      PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2552                "zone that applies in a zone at a given datetime.")},
2553     {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2554      PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2555                "datetime in local time.")},
2556     {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2557      PyDoc_STR("Function for serialization with the pickle protocol.")},
2558     {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2559      PyDoc_STR("Private method used in unpickling.")},
2560     {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2561      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2562      PyDoc_STR("Function to initialize subclasses.")},
2563     {NULL} /* Sentinel */
2564 };
2565 
2566 static PyMemberDef zoneinfo_members[] = {
2567     {.name = "key",
2568      .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2569      .type = T_OBJECT_EX,
2570      .flags = READONLY,
2571      .doc = NULL},
2572     {NULL}, /* Sentinel */
2573 };
2574 
2575 static PyTypeObject PyZoneInfo_ZoneInfoType = {
2576     PyVarObject_HEAD_INIT(NULL, 0)  //
2577         .tp_name = "zoneinfo.ZoneInfo",
2578     .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2579     .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2580     .tp_repr = (reprfunc)zoneinfo_repr,
2581     .tp_str = (reprfunc)zoneinfo_str,
2582     .tp_getattro = PyObject_GenericGetAttr,
2583     .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2584     /* .tp_doc = zoneinfo_doc, */
2585     .tp_methods = zoneinfo_methods,
2586     .tp_members = zoneinfo_members,
2587     .tp_new = zoneinfo_new,
2588     .tp_dealloc = zoneinfo_dealloc,
2589 };
2590 
2591 /////
2592 // Specify the _zoneinfo module
2593 static PyMethodDef module_methods[] = {{NULL, NULL}};
2594 static void
module_free()2595 module_free()
2596 {
2597     Py_XDECREF(_tzpath_find_tzfile);
2598     _tzpath_find_tzfile = NULL;
2599 
2600     Py_XDECREF(_common_mod);
2601     _common_mod = NULL;
2602 
2603     Py_XDECREF(io_open);
2604     io_open = NULL;
2605 
2606     xdecref_ttinfo(&NO_TTINFO);
2607 
2608     if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2609         Py_DECREF(TIMEDELTA_CACHE);
2610     } else {
2611         Py_CLEAR(TIMEDELTA_CACHE);
2612     }
2613 
2614     if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2615         Py_DECREF(ZONEINFO_WEAK_CACHE);
2616     } else {
2617         Py_CLEAR(ZONEINFO_WEAK_CACHE);
2618     }
2619 
2620     clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2621 }
2622 
2623 static int
zoneinfomodule_exec(PyObject * m)2624 zoneinfomodule_exec(PyObject *m)
2625 {
2626     PyDateTime_IMPORT;
2627     PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2628     if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2629         goto error;
2630     }
2631 
2632     Py_INCREF(&PyZoneInfo_ZoneInfoType);
2633     PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2634 
2635     /* Populate imports */
2636     PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2637     if (_tzpath_module == NULL) {
2638         goto error;
2639     }
2640 
2641     _tzpath_find_tzfile =
2642         PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2643     Py_DECREF(_tzpath_module);
2644     if (_tzpath_find_tzfile == NULL) {
2645         goto error;
2646     }
2647 
2648     PyObject *io_module = PyImport_ImportModule("io");
2649     if (io_module == NULL) {
2650         goto error;
2651     }
2652 
2653     io_open = PyObject_GetAttrString(io_module, "open");
2654     Py_DECREF(io_module);
2655     if (io_open == NULL) {
2656         goto error;
2657     }
2658 
2659     _common_mod = PyImport_ImportModule("zoneinfo._common");
2660     if (_common_mod == NULL) {
2661         goto error;
2662     }
2663 
2664     if (NO_TTINFO.utcoff == NULL) {
2665         NO_TTINFO.utcoff = Py_None;
2666         NO_TTINFO.dstoff = Py_None;
2667         NO_TTINFO.tzname = Py_None;
2668 
2669         for (size_t i = 0; i < 3; ++i) {
2670             Py_INCREF(Py_None);
2671         }
2672     }
2673 
2674     if (initialize_caches()) {
2675         goto error;
2676     }
2677 
2678     return 0;
2679 
2680 error:
2681     return -1;
2682 }
2683 
2684 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2685     {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2686 
2687 static struct PyModuleDef zoneinfomodule = {
2688     PyModuleDef_HEAD_INIT,
2689     .m_name = "_zoneinfo",
2690     .m_doc = "C implementation of the zoneinfo module",
2691     .m_size = 0,
2692     .m_methods = module_methods,
2693     .m_slots = zoneinfomodule_slots,
2694     .m_free = (freefunc)module_free};
2695 
2696 PyMODINIT_FUNC
PyInit__zoneinfo(void)2697 PyInit__zoneinfo(void)
2698 {
2699     return PyModuleDef_Init(&zoneinfomodule);
2700 }
2701