diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/tools/python3/src/Modules/_zoneinfo.c | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Modules/_zoneinfo.c')
-rw-r--r-- | contrib/tools/python3/src/Modules/_zoneinfo.c | 5448 |
1 files changed, 2724 insertions, 2724 deletions
diff --git a/contrib/tools/python3/src/Modules/_zoneinfo.c b/contrib/tools/python3/src/Modules/_zoneinfo.c index d7945d31af..3ac8bb0503 100644 --- a/contrib/tools/python3/src/Modules/_zoneinfo.c +++ b/contrib/tools/python3/src/Modules/_zoneinfo.c @@ -1,2724 +1,2724 @@ -#include "Python.h" -#include "structmember.h" - -#include <ctype.h> -#include <stddef.h> -#include <stdint.h> - -#include "datetime.h" - -// Imports -static PyObject *io_open = NULL; -static PyObject *_tzpath_find_tzfile = NULL; -static PyObject *_common_mod = NULL; - -typedef struct TransitionRuleType TransitionRuleType; -typedef struct StrongCacheNode StrongCacheNode; - -typedef struct { - PyObject *utcoff; - PyObject *dstoff; - PyObject *tzname; - long utcoff_seconds; -} _ttinfo; - -typedef struct { - _ttinfo std; - _ttinfo dst; - int dst_diff; - TransitionRuleType *start; - TransitionRuleType *end; - unsigned char std_only; -} _tzrule; - -typedef struct { - PyDateTime_TZInfo base; - PyObject *key; - PyObject *file_repr; - PyObject *weakreflist; - size_t num_transitions; - size_t num_ttinfos; - int64_t *trans_list_utc; - int64_t *trans_list_wall[2]; - _ttinfo **trans_ttinfos; // References to the ttinfo for each transition - _ttinfo *ttinfo_before; - _tzrule tzrule_after; - _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation - unsigned char fixed_offset; - unsigned char source; -} PyZoneInfo_ZoneInfo; - -struct TransitionRuleType { - int64_t (*year_to_timestamp)(TransitionRuleType *, int); -}; - -typedef struct { - TransitionRuleType base; - uint8_t month; - uint8_t week; - uint8_t day; - int8_t hour; - int8_t minute; - int8_t second; -} CalendarRule; - -typedef struct { - TransitionRuleType base; - uint8_t julian; - unsigned int day; - int8_t hour; - int8_t minute; - int8_t second; -} DayRule; - -struct StrongCacheNode { - StrongCacheNode *next; - StrongCacheNode *prev; - PyObject *key; - PyObject *zone; -}; - -static PyTypeObject PyZoneInfo_ZoneInfoType; - -// Globals -static PyObject *TIMEDELTA_CACHE = NULL; -static PyObject *ZONEINFO_WEAK_CACHE = NULL; -static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL; -static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8; - -static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0}; - -// Constants -static const int EPOCHORDINAL = 719163; -static int DAYS_IN_MONTH[] = { - -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, -}; - -static int DAYS_BEFORE_MONTH[] = { - -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, -}; - -static const int SOURCE_NOCACHE = 0; -static const int SOURCE_CACHE = 1; -static const int SOURCE_FILE = 2; - -// Forward declarations -static int -load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj); -static void -utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, - unsigned char *isdsts, size_t num_transitions, - size_t num_ttinfos); -static int -ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, - int64_t *trans_local[2], size_t num_ttinfos, - size_t num_transitions); - -static int -parse_tz_str(PyObject *tz_str_obj, _tzrule *out); - -static Py_ssize_t -parse_abbr(const char *const p, PyObject **abbr); -static Py_ssize_t -parse_tz_delta(const char *const p, long *total_seconds); -static Py_ssize_t -parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, - int8_t *second); -static Py_ssize_t -parse_transition_rule(const char *const p, TransitionRuleType **out); - -static _ttinfo * -find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year); -static _ttinfo * -find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, - unsigned char *fold); - -static int -build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out); -static void -xdecref_ttinfo(_ttinfo *ttinfo); -static int -ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1); - -static int -build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, - long dst_offset, TransitionRuleType *start, - TransitionRuleType *end, _tzrule *out); -static void -free_tzrule(_tzrule *tzrule); - -static PyObject * -load_timedelta(long seconds); - -static int -get_local_timestamp(PyObject *dt, int64_t *local_ts); -static _ttinfo * -find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt); - -static int -ymd_to_ord(int y, int m, int d); -static int -is_leap_year(int year); - -static size_t -_bisect(const int64_t value, const int64_t *arr, size_t size); - -static int -eject_from_strong_cache(const PyTypeObject *const type, PyObject *key); -static void -clear_strong_cache(const PyTypeObject *const type); -static void -update_strong_cache(const PyTypeObject *const type, PyObject *key, - PyObject *zone); -static PyObject * -zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key); - -static PyObject * -zoneinfo_new_instance(PyTypeObject *type, PyObject *key) -{ - PyObject *file_obj = NULL; - PyObject *file_path = NULL; - - file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL); - if (file_path == NULL) { - return NULL; - } - else if (file_path == Py_None) { - file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key); - if (file_obj == NULL) { - Py_DECREF(file_path); - return NULL; - } - } - - PyObject *self = (PyObject *)(type->tp_alloc(type, 0)); - if (self == NULL) { - goto error; - } - - if (file_obj == NULL) { - file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb"); - if (file_obj == NULL) { - goto error; - } - } - - if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) { - goto error; - } - - PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL); - Py_DECREF(file_obj); - file_obj = NULL; - if (rv == NULL) { - goto error; - } - Py_DECREF(rv); - - ((PyZoneInfo_ZoneInfo *)self)->key = key; - Py_INCREF(key); - - goto cleanup; -error: - Py_XDECREF(self); - self = NULL; -cleanup: - if (file_obj != NULL) { - PyObject *exc, *val, *tb; - PyErr_Fetch(&exc, &val, &tb); - PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL); - _PyErr_ChainExceptions(exc, val, tb); - if (tmp == NULL) { - Py_CLEAR(self); - } - Py_XDECREF(tmp); - Py_DECREF(file_obj); - } - Py_DECREF(file_path); - return self; -} - -static PyObject * -get_weak_cache(PyTypeObject *type) -{ - if (type == &PyZoneInfo_ZoneInfoType) { - return ZONEINFO_WEAK_CACHE; - } - else { - PyObject *cache = - PyObject_GetAttrString((PyObject *)type, "_weak_cache"); - // We are assuming that the type lives at least as long as the function - // that calls get_weak_cache, and that it holds a reference to the - // cache, so we'll return a "borrowed reference". - Py_XDECREF(cache); - return cache; - } -} - -static PyObject * -zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) -{ - PyObject *key = NULL; - static char *kwlist[] = {"key", NULL}; - if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) { - return NULL; - } - - PyObject *instance = zone_from_strong_cache(type, key); - if (instance != NULL || PyErr_Occurred()) { - return instance; - } - - PyObject *weak_cache = get_weak_cache(type); - instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None); - if (instance == NULL) { - return NULL; - } - - if (instance == Py_None) { - Py_DECREF(instance); - PyObject *tmp = zoneinfo_new_instance(type, key); - if (tmp == NULL) { - return NULL; - } - - instance = - PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp); - Py_DECREF(tmp); - if (instance == NULL) { - return NULL; - } - ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE; - } - - update_strong_cache(type, key, instance); - return instance; -} - -static void -zoneinfo_dealloc(PyObject *obj_self) -{ - PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; - - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(obj_self); - } - - if (self->trans_list_utc != NULL) { - PyMem_Free(self->trans_list_utc); - } - - for (size_t i = 0; i < 2; i++) { - if (self->trans_list_wall[i] != NULL) { - PyMem_Free(self->trans_list_wall[i]); - } - } - - if (self->_ttinfos != NULL) { - for (size_t i = 0; i < self->num_ttinfos; ++i) { - xdecref_ttinfo(&(self->_ttinfos[i])); - } - PyMem_Free(self->_ttinfos); - } - - if (self->trans_ttinfos != NULL) { - PyMem_Free(self->trans_ttinfos); - } - - free_tzrule(&(self->tzrule_after)); - - Py_XDECREF(self->key); - Py_XDECREF(self->file_repr); - - Py_TYPE(self)->tp_free((PyObject *)self); -} - -static PyObject * -zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs) -{ - PyObject *file_obj = NULL; - PyObject *file_repr = NULL; - PyObject *key = Py_None; - PyZoneInfo_ZoneInfo *self = NULL; - - static char *kwlist[] = {"", "key", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj, - &key)) { - return NULL; - } - - PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0)); - self = (PyZoneInfo_ZoneInfo *)obj_self; - if (self == NULL) { - return NULL; - } - - file_repr = PyUnicode_FromFormat("%R", file_obj); - if (file_repr == NULL) { - goto error; - } - - if (load_data(self, file_obj)) { - goto error; - } - - self->source = SOURCE_FILE; - self->file_repr = file_repr; - self->key = key; - Py_INCREF(key); - - return obj_self; -error: - Py_XDECREF(file_repr); - Py_XDECREF(self); - return NULL; -} - -static PyObject * -zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"key", NULL}; - PyObject *key = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) { - return NULL; - } - - PyObject *out = zoneinfo_new_instance(cls, key); - if (out != NULL) { - ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE; - } - - return out; -} - -static PyObject * -zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs) -{ - PyObject *only_keys = NULL; - static char *kwlist[] = {"only_keys", NULL}; - - if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist, - &only_keys))) { - return NULL; - } - - PyTypeObject *type = (PyTypeObject *)cls; - PyObject *weak_cache = get_weak_cache(type); - - if (only_keys == NULL || only_keys == Py_None) { - PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL); - if (rv != NULL) { - Py_DECREF(rv); - } - - clear_strong_cache(type); - } - else { - PyObject *item = NULL; - PyObject *pop = PyUnicode_FromString("pop"); - if (pop == NULL) { - return NULL; - } - - PyObject *iter = PyObject_GetIter(only_keys); - if (iter == NULL) { - Py_DECREF(pop); - return NULL; - } - - while ((item = PyIter_Next(iter))) { - // Remove from strong cache - if (eject_from_strong_cache(type, item) < 0) { - Py_DECREF(item); - break; - } - - // Remove from weak cache - PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item, - Py_None, NULL); - - Py_DECREF(item); - if (tmp == NULL) { - break; - } - Py_DECREF(tmp); - } - Py_DECREF(iter); - Py_DECREF(pop); - } - - if (PyErr_Occurred()) { - return NULL; - } - - Py_RETURN_NONE; -} - -static PyObject * -zoneinfo_utcoffset(PyObject *self, PyObject *dt) -{ - _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); - if (tti == NULL) { - return NULL; - } - Py_INCREF(tti->utcoff); - return tti->utcoff; -} - -static PyObject * -zoneinfo_dst(PyObject *self, PyObject *dt) -{ - _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); - if (tti == NULL) { - return NULL; - } - Py_INCREF(tti->dstoff); - return tti->dstoff; -} - -static PyObject * -zoneinfo_tzname(PyObject *self, PyObject *dt) -{ - _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); - if (tti == NULL) { - return NULL; - } - Py_INCREF(tti->tzname); - return tti->tzname; -} - -#define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo) -#define GET_DT_TZINFO(p) \ - (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None) - -static PyObject * -zoneinfo_fromutc(PyObject *obj_self, PyObject *dt) -{ - if (!PyDateTime_Check(dt)) { - PyErr_SetString(PyExc_TypeError, - "fromutc: argument must be a datetime"); - return NULL; - } - if (GET_DT_TZINFO(dt) != obj_self) { - PyErr_SetString(PyExc_ValueError, - "fromutc: dt.tzinfo " - "is not self"); - return NULL; - } - - PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; - - int64_t timestamp; - if (get_local_timestamp(dt, ×tamp)) { - return NULL; - } - size_t num_trans = self->num_transitions; - - _ttinfo *tti = NULL; - unsigned char fold = 0; - - if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) { - tti = self->ttinfo_before; - } - else if (num_trans == 0 || - timestamp > self->trans_list_utc[num_trans - 1]) { - tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp, - PyDateTime_GET_YEAR(dt), &fold); - - // Immediately after the last manual transition, the fold/gap is - // between self->trans_ttinfos[num_transitions - 1] and whatever - // ttinfo applies immediately after the last transition, not between - // the STD and DST rules in the tzrule_after, so we may need to - // adjust the fold value. - if (num_trans) { - _ttinfo *tti_prev = NULL; - if (num_trans == 1) { - tti_prev = self->ttinfo_before; - } - else { - tti_prev = self->trans_ttinfos[num_trans - 2]; - } - int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds; - if (diff > 0 && - timestamp < (self->trans_list_utc[num_trans - 1] + diff)) { - fold = 1; - } - } - } - else { - size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans); - _ttinfo *tti_prev = NULL; - - if (idx >= 2) { - tti_prev = self->trans_ttinfos[idx - 2]; - tti = self->trans_ttinfos[idx - 1]; - } - else { - tti_prev = self->ttinfo_before; - tti = self->trans_ttinfos[0]; - } - - // Detect fold - int64_t shift = - (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds); - if (shift > (timestamp - self->trans_list_utc[idx - 1])) { - fold = 1; - } - } - - PyObject *tmp = PyNumber_Add(dt, tti->utcoff); - if (tmp == NULL) { - return NULL; - } - - if (fold) { - if (PyDateTime_CheckExact(tmp)) { - ((PyDateTime_DateTime *)tmp)->fold = 1; - dt = tmp; - } - else { - PyObject *replace = PyObject_GetAttrString(tmp, "replace"); - PyObject *args = PyTuple_New(0); - PyObject *kwargs = PyDict_New(); - - Py_DECREF(tmp); - if (args == NULL || kwargs == NULL || replace == NULL) { - Py_XDECREF(args); - Py_XDECREF(kwargs); - Py_XDECREF(replace); - return NULL; - } - - dt = NULL; - if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) { - dt = PyObject_Call(replace, args, kwargs); - } - - Py_DECREF(args); - Py_DECREF(kwargs); - Py_DECREF(replace); - - if (dt == NULL) { - return NULL; - } - } - } - else { - dt = tmp; - } - return dt; -} - -static PyObject * -zoneinfo_repr(PyZoneInfo_ZoneInfo *self) -{ - PyObject *rv = NULL; - const char *type_name = Py_TYPE((PyObject *)self)->tp_name; - if (!(self->key == Py_None)) { - rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key); - } - else { - assert(PyUnicode_Check(self->file_repr)); - rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name, - self->file_repr); - } - - return rv; -} - -static PyObject * -zoneinfo_str(PyZoneInfo_ZoneInfo *self) -{ - if (!(self->key == Py_None)) { - Py_INCREF(self->key); - return self->key; - } - else { - return zoneinfo_repr(self); - } -} - -/* Pickles the ZoneInfo object by key and source. - * - * ZoneInfo objects are pickled by reference to the TZif file that they came - * from, which means that the exact transitions may be different or the file - * may not un-pickle if the data has changed on disk in the interim. - * - * It is necessary to include a bit indicating whether or not the object - * was constructed from the cache, because from-cache objects will hit the - * unpickling process's cache, whereas no-cache objects will bypass it. - * - * Objects constructed from ZoneInfo.from_file cannot be pickled. - */ -static PyObject * -zoneinfo_reduce(PyObject *obj_self, PyObject *unused) -{ - PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; - if (self->source == SOURCE_FILE) { - // Objects constructed from files cannot be pickled. - PyObject *pickle = PyImport_ImportModule("pickle"); - if (pickle == NULL) { - return NULL; - } - - PyObject *pickle_error = - PyObject_GetAttrString(pickle, "PicklingError"); - Py_DECREF(pickle); - if (pickle_error == NULL) { - return NULL; - } - - PyErr_Format(pickle_error, - "Cannot pickle a ZoneInfo file from a file stream."); - Py_DECREF(pickle_error); - return NULL; - } - - unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0; - PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle"); - - if (constructor == NULL) { - return NULL; - } - - PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache); - Py_DECREF(constructor); - return rv; -} - -static PyObject * -zoneinfo__unpickle(PyTypeObject *cls, PyObject *args) -{ - PyObject *key; - unsigned char from_cache; - if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) { - return NULL; - } - - if (from_cache) { - PyObject *val_args = Py_BuildValue("(O)", key); - if (val_args == NULL) { - return NULL; - } - - PyObject *rv = zoneinfo_new(cls, val_args, NULL); - - Py_DECREF(val_args); - return rv; - } - else { - return zoneinfo_new_instance(cls, key); - } -} - -/* It is relatively expensive to construct new timedelta objects, and in most - * cases we're looking at a relatively small number of timedeltas, such as - * integer number of hours, etc. We will keep a cache so that we construct - * a minimal number of these. - * - * Possibly this should be replaced with an LRU cache so that it's not possible - * for the memory usage to explode from this, but in order for this to be a - * serious problem, one would need to deliberately craft a malicious time zone - * file with many distinct offsets. As of tzdb 2019c, loading every single zone - * fills the cache with ~450 timedeltas for a total size of ~12kB. - * - * This returns a new reference to the timedelta. - */ -static PyObject * -load_timedelta(long seconds) -{ - PyObject *rv = NULL; - PyObject *pyoffset = PyLong_FromLong(seconds); - if (pyoffset == NULL) { - return NULL; - } - int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset); - if (contains == -1) { - goto error; - } - - if (!contains) { - PyObject *tmp = PyDateTimeAPI->Delta_FromDelta( - 0, seconds, 0, 1, PyDateTimeAPI->DeltaType); - - if (tmp == NULL) { - goto error; - } - - rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp); - Py_DECREF(tmp); - } - else { - rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset); - } - - Py_DECREF(pyoffset); - Py_INCREF(rv); - return rv; -error: - Py_DECREF(pyoffset); - return NULL; -} - -/* Constructor for _ttinfo object - this starts by initializing the _ttinfo - * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially - * initialized _ttinfo objects. - */ -static int -build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out) -{ - out->utcoff = NULL; - out->dstoff = NULL; - out->tzname = NULL; - - out->utcoff_seconds = utcoffset; - out->utcoff = load_timedelta(utcoffset); - if (out->utcoff == NULL) { - return -1; - } - - out->dstoff = load_timedelta(dstoffset); - if (out->dstoff == NULL) { - return -1; - } - - out->tzname = tzname; - Py_INCREF(tzname); - - return 0; -} - -/* Decrease reference count on any non-NULL members of a _ttinfo */ -static void -xdecref_ttinfo(_ttinfo *ttinfo) -{ - if (ttinfo != NULL) { - Py_XDECREF(ttinfo->utcoff); - Py_XDECREF(ttinfo->dstoff); - Py_XDECREF(ttinfo->tzname); - } -} - -/* Equality function for _ttinfo. */ -static int -ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1) -{ - int rv; - if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) < - 1) { - goto end; - } - - if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) < - 1) { - goto end; - } - - if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) < - 1) { - goto end; - } -end: - return rv; -} - -/* Given a file-like object, this populates a ZoneInfo object - * - * The current version calls into a Python function to read the data from - * file into Python objects, and this translates those Python objects into - * C values and calculates derived values (e.g. dstoff) in C. - * - * This returns 0 on success and -1 on failure. - * - * The function will never return while `self` is partially initialized — - * the object only needs to be freed / deallocated if this succeeds. - */ -static int -load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) -{ - PyObject *data_tuple = NULL; - - long *utcoff = NULL; - long *dstoff = NULL; - size_t *trans_idx = NULL; - unsigned char *isdst = NULL; - - self->trans_list_utc = NULL; - self->trans_list_wall[0] = NULL; - self->trans_list_wall[1] = NULL; - self->trans_ttinfos = NULL; - self->_ttinfos = NULL; - self->file_repr = NULL; - - size_t ttinfos_allocated = 0; - - data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj); - - if (data_tuple == NULL) { - goto error; - } - - if (!PyTuple_CheckExact(data_tuple)) { - PyErr_Format(PyExc_TypeError, "Invalid data result type: %r", - data_tuple); - goto error; - } - - // Unpack the data tuple - PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0); - if (trans_idx_list == NULL) { - goto error; - } - - PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1); - if (trans_utc == NULL) { - goto error; - } - - PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2); - if (utcoff_list == NULL) { - goto error; - } - - PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3); - if (isdst_list == NULL) { - goto error; - } - - PyObject *abbr = PyTuple_GetItem(data_tuple, 4); - if (abbr == NULL) { - goto error; - } - - PyObject *tz_str = PyTuple_GetItem(data_tuple, 5); - if (tz_str == NULL) { - goto error; - } - - // Load the relevant sizes - Py_ssize_t num_transitions = PyTuple_Size(trans_utc); - if (num_transitions < 0) { - goto error; - } - - Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list); - if (num_ttinfos < 0) { - goto error; - } - - self->num_transitions = (size_t)num_transitions; - self->num_ttinfos = (size_t)num_ttinfos; - - // Load the transition indices and list - self->trans_list_utc = - PyMem_Malloc(self->num_transitions * sizeof(int64_t)); - if (self->trans_list_utc == NULL) { - goto error; - } - trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t)); - if (trans_idx == NULL) { - goto error; - } - - for (size_t i = 0; i < self->num_transitions; ++i) { - PyObject *num = PyTuple_GetItem(trans_utc, i); - if (num == NULL) { - goto error; - } - self->trans_list_utc[i] = PyLong_AsLongLong(num); - if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) { - goto error; - } - - num = PyTuple_GetItem(trans_idx_list, i); - if (num == NULL) { - goto error; - } - - Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num); - if (cur_trans_idx == -1) { - goto error; - } - - trans_idx[i] = (size_t)cur_trans_idx; - if (trans_idx[i] > self->num_ttinfos) { - PyErr_Format( - PyExc_ValueError, - "Invalid transition index found while reading TZif: %zd", - cur_trans_idx); - - goto error; - } - } - - // Load UTC offsets and isdst (size num_ttinfos) - utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long)); - isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char)); - - if (utcoff == NULL || isdst == NULL) { - goto error; - } - for (size_t i = 0; i < self->num_ttinfos; ++i) { - PyObject *num = PyTuple_GetItem(utcoff_list, i); - if (num == NULL) { - goto error; - } - - utcoff[i] = PyLong_AsLong(num); - if (utcoff[i] == -1 && PyErr_Occurred()) { - goto error; - } - - num = PyTuple_GetItem(isdst_list, i); - if (num == NULL) { - goto error; - } - - int isdst_with_error = PyObject_IsTrue(num); - if (isdst_with_error == -1) { - goto error; - } - else { - isdst[i] = (unsigned char)isdst_with_error; - } - } - - dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long)); - if (dstoff == NULL) { - goto error; - } - - // Derive dstoff and trans_list_wall from the information we've loaded - utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions, - self->num_ttinfos); - - if (ts_to_local(trans_idx, self->trans_list_utc, utcoff, - self->trans_list_wall, self->num_ttinfos, - self->num_transitions)) { - goto error; - } - - // Build _ttinfo objects from utcoff, dstoff and abbr - self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo)); - if (self->_ttinfos == NULL) { - goto error; - } - for (size_t i = 0; i < self->num_ttinfos; ++i) { - PyObject *tzname = PyTuple_GetItem(abbr, i); - if (tzname == NULL) { - goto error; - } - - ttinfos_allocated++; - if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) { - goto error; - } - } - - // Build our mapping from transition to the ttinfo that applies - self->trans_ttinfos = - PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *)); - if (self->trans_ttinfos == NULL) { - goto error; - } - for (size_t i = 0; i < self->num_transitions; ++i) { - size_t ttinfo_idx = trans_idx[i]; - assert(ttinfo_idx < self->num_ttinfos); - self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]); - } - - // Set ttinfo_before to the first non-DST transition - for (size_t i = 0; i < self->num_ttinfos; ++i) { - if (!isdst[i]) { - self->ttinfo_before = &(self->_ttinfos[i]); - break; - } - } - - // If there are only DST ttinfos, pick the first one, if there are no - // ttinfos at all, set ttinfo_before to NULL - if (self->ttinfo_before == NULL && self->num_ttinfos > 0) { - self->ttinfo_before = &(self->_ttinfos[0]); - } - - if (tz_str != Py_None && PyObject_IsTrue(tz_str)) { - if (parse_tz_str(tz_str, &(self->tzrule_after))) { - goto error; - } - } - else { - if (!self->num_ttinfos) { - PyErr_Format(PyExc_ValueError, "No time zone information found."); - goto error; - } - - size_t idx; - if (!self->num_transitions) { - idx = self->num_ttinfos - 1; - } - else { - idx = trans_idx[self->num_transitions - 1]; - } - - _ttinfo *tti = &(self->_ttinfos[idx]); - build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL, - &(self->tzrule_after)); - - // We've abused the build_tzrule constructor to construct an STD-only - // rule mimicking whatever ttinfo we've picked up, but it's possible - // that the one we've picked up is a DST zone, so we need to make sure - // that the dstoff is set correctly in that case. - if (PyObject_IsTrue(tti->dstoff)) { - _ttinfo *tti_after = &(self->tzrule_after.std); - Py_DECREF(tti_after->dstoff); - tti_after->dstoff = tti->dstoff; - Py_INCREF(tti_after->dstoff); - } - } - - // Determine if this is a "fixed offset" zone, meaning that the output of - // the utcoffset, dst and tzname functions does not depend on the specific - // datetime passed. - // - // We make three simplifying assumptions here: - // - // 1. If tzrule_after is not std_only, it has transitions that might occur - // (it is possible to construct TZ strings that specify STD and DST but - // no transitions ever occur, such as AAA0BBB,0/0,J365/25). - // 2. If self->_ttinfos contains more than one _ttinfo object, the objects - // represent different offsets. - // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise - // fixed-offset zone with extra _ttinfos defined may appear to *not* be - // a fixed offset zone). - // - // Violations to these assumptions would be fairly exotic, and exotic - // zones should almost certainly not be used with datetime.time (the - // only thing that would be affected by this). - if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) { - self->fixed_offset = 0; - } - else if (self->num_ttinfos == 0) { - self->fixed_offset = 1; - } - else { - int constant_offset = - ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std); - if (constant_offset < 0) { - goto error; - } - else { - self->fixed_offset = constant_offset; - } - } - - int rv = 0; - goto cleanup; -error: - // These resources only need to be freed if we have failed, if we succeed - // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc - // method to free the relevant resources. - if (self->trans_list_utc != NULL) { - PyMem_Free(self->trans_list_utc); - self->trans_list_utc = NULL; - } - - for (size_t i = 0; i < 2; ++i) { - if (self->trans_list_wall[i] != NULL) { - PyMem_Free(self->trans_list_wall[i]); - self->trans_list_wall[i] = NULL; - } - } - - if (self->_ttinfos != NULL) { - for (size_t i = 0; i < ttinfos_allocated; ++i) { - xdecref_ttinfo(&(self->_ttinfos[i])); - } - PyMem_Free(self->_ttinfos); - self->_ttinfos = NULL; - } - - if (self->trans_ttinfos != NULL) { - PyMem_Free(self->trans_ttinfos); - self->trans_ttinfos = NULL; - } - - rv = -1; -cleanup: - Py_XDECREF(data_tuple); - - if (utcoff != NULL) { - PyMem_Free(utcoff); - } - - if (dstoff != NULL) { - PyMem_Free(dstoff); - } - - if (isdst != NULL) { - PyMem_Free(isdst); - } - - if (trans_idx != NULL) { - PyMem_Free(trans_idx); - } - - return rv; -} - -/* Function to calculate the local timestamp of a transition from the year. */ -int64_t -calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year) -{ - CalendarRule *self = (CalendarRule *)base_self; - - // We want (year, month, day of month); we have year and month, but we - // need to turn (week, day-of-week) into day-of-month - // - // Week 1 is the first week in which day `day` (where 0 = Sunday) appears. - // Week 5 represents the last occurrence of day `day`, so we need to know - // the first weekday of the month and the number of days in the month. - int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7; - uint8_t days_in_month = DAYS_IN_MONTH[self->month]; - if (self->month == 2 && is_leap_year(year)) { - days_in_month += 1; - } - - // This equation seems magical, so I'll break it down: - // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day - // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent - // because this math is mod 7 - // 2. Get first day - desired day mod 7 (adjusting by 7 for negative - // numbers so that -1 % 7 = 6). - // 3. Add 1 because month days are a 1-based index. - int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7; - if (month_day < 0) { - month_day += 7; - } - month_day += 1; - - // Now use a 0-based index version of `week` to calculate the w-th - // occurrence of `day` - month_day += ((int8_t)(self->week) - 1) * 7; - - // month_day will only be > days_in_month if w was 5, and `w` means "last - // occurrence of `d`", so now we just check if we over-shot the end of the - // month and if so knock off 1 week. - if (month_day > days_in_month) { - month_day -= 7; - } - - int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL; - return ((ordinal * 86400) + (int64_t)(self->hour * 3600) + - (int64_t)(self->minute * 60) + (int64_t)(self->second)); -} - -/* Constructor for CalendarRule. */ -int -calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, - int8_t minute, int8_t second, CalendarRule *out) -{ - // These bounds come from the POSIX standard, which describes an Mm.n.d - // rule as: - // - // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <= - // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which - // may occur in either the fourth or the fifth week). Week 1 is the first - // week in which the d'th day occurs. Day zero is Sunday. - if (month <= 0 || month > 12) { - PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]"); - return -1; - } - - if (week <= 0 || week > 5) { - PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]"); - return -1; - } - - // If the 'day' parameter type is changed to a signed type, - // "day < 0" check must be added. - if (/* day < 0 || */ day > 6) { - PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]"); - return -1; - } - - TransitionRuleType base = {&calendarrule_year_to_timestamp}; - - CalendarRule new_offset = { - .base = base, - .month = month, - .week = week, - .day = day, - .hour = hour, - .minute = minute, - .second = second, - }; - - *out = new_offset; - return 0; -} - -/* Function to calculate the local timestamp of a transition from the year. - * - * This translates the day of the year into a local timestamp — either a - * 1-based Julian day, not including leap days, or the 0-based year-day, - * including leap days. - * */ -int64_t -dayrule_year_to_timestamp(TransitionRuleType *base_self, int year) -{ - // The function signature requires a TransitionRuleType pointer, but this - // function is only applicable to DayRule* objects. - DayRule *self = (DayRule *)base_self; - - // ymd_to_ord calculates the number of days since 0001-01-01, but we want - // to know the number of days since 1970-01-01, so we must subtract off - // the equivalent of ymd_to_ord(1970, 1, 1). - // - // We subtract off an additional 1 day to account for January 1st (we want - // the number of full days *before* the date of the transition - partial - // days are accounted for in the hour, minute and second portions. - int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1; - - // The Julian day specification skips over February 29th in leap years, - // from the POSIX standard: - // - // Leap days shall not be counted. That is, in all years-including leap - // years-February 28 is day 59 and March 1 is day 60. It is impossible to - // refer explicitly to the occasional February 29. - // - // This is actually more useful than you'd think — if you want a rule that - // always transitions on a given calendar day (other than February 29th), - // you would use a Julian day, e.g. J91 always refers to April 1st and J365 - // always refers to December 31st. - unsigned int day = self->day; - if (self->julian && day >= 59 && is_leap_year(year)) { - day += 1; - } - - return ((days_before_year + day) * 86400) + (self->hour * 3600) + - (self->minute * 60) + self->second; -} - -/* Constructor for DayRule. */ -static int -dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute, - int8_t second, DayRule *out) -{ - // The POSIX standard specifies that Julian days must be in the range (1 <= - // n <= 365) and that non-Julian (they call it "0-based Julian") days must - // be in the range (0 <= n <= 365). - if (day < julian || day > 365) { - PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u", - julian, day); - return -1; - } - - TransitionRuleType base = { - &dayrule_year_to_timestamp, - }; - - DayRule tmp = { - .base = base, - .julian = julian, - .day = day, - .hour = hour, - .minute = minute, - .second = second, - }; - - *out = tmp; - - return 0; -} - -/* Calculate the start and end rules for a _tzrule in the given year. */ -static void -tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end) -{ - assert(rule->start != NULL); - assert(rule->end != NULL); - *start = rule->start->year_to_timestamp(rule->start, year); - *end = rule->end->year_to_timestamp(rule->end, year); -} - -/* Calculate the _ttinfo that applies at a given local time from a _tzrule. - * - * This takes a local timestamp and fold for disambiguation purposes; the year - * could technically be calculated from the timestamp, but given that the - * callers of this function already have the year information accessible from - * the datetime struct, it is taken as an additional parameter to reduce - * unnecessary calculation. - * */ -static _ttinfo * -find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year) -{ - if (rule->std_only) { - return &(rule->std); - } - - int64_t start, end; - uint8_t isdst; - - tzrule_transitions(rule, year, &start, &end); - - // With fold = 0, the period (denominated in local time) with the smaller - // offset starts at the end of the gap and ends at the end of the fold; - // with fold = 1, it runs from the start of the gap to the beginning of the - // fold. - // - // So in order to determine the DST boundaries we need to know both the - // fold and whether DST is positive or negative (rare), and it turns out - // that this boils down to fold XOR is_positive. - if (fold == (rule->dst_diff >= 0)) { - end -= rule->dst_diff; - } - else { - start += rule->dst_diff; - } - - if (start < end) { - isdst = (ts >= start) && (ts < end); - } - else { - isdst = (ts < end) || (ts >= start); - } - - if (isdst) { - return &(rule->dst); - } - else { - return &(rule->std); - } -} - -/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time. - * - * This function can determine the _ttinfo that applies at a given epoch time, - * (analogous to trans_list_utc), and whether or not the datetime is in a fold. - * This is to be used in the .fromutc() function. - * - * The year is technically a redundant parameter, because it can be calculated - * from the timestamp, but all callers of this function should have the year - * in the datetime struct anyway, so taking it as a parameter saves unnecessary - * calculation. - **/ -static _ttinfo * -find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, - unsigned char *fold) -{ - if (rule->std_only) { - *fold = 0; - return &(rule->std); - } - - int64_t start, end; - uint8_t isdst; - tzrule_transitions(rule, year, &start, &end); - start -= rule->std.utcoff_seconds; - end -= rule->dst.utcoff_seconds; - - if (start < end) { - isdst = (ts >= start) && (ts < end); - } - else { - isdst = (ts < end) || (ts >= start); - } - - // For positive DST, the ambiguous period is one dst_diff after the end of - // DST; for negative DST, the ambiguous period is one dst_diff before the - // start of DST. - int64_t ambig_start, ambig_end; - if (rule->dst_diff > 0) { - ambig_start = end; - ambig_end = end + rule->dst_diff; - } - else { - ambig_start = start; - ambig_end = start - rule->dst_diff; - } - - *fold = (ts >= ambig_start) && (ts < ambig_end); - - if (isdst) { - return &(rule->dst); - } - else { - return &(rule->std); - } -} - -/* Parse a TZ string in the format specified by the POSIX standard: - * - * std offset[dst[offset],start[/time],end[/time]] - * - * std and dst must be 3 or more characters long and must not contain a - * leading colon, embedded digits, commas, nor a plus or minus signs; The - * spaces between "std" and "offset" are only for display and are not actually - * present in the string. - * - * The format of the offset is ``[+|-]hh[:mm[:ss]]`` - * - * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3: - * - * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html - */ -static int -parse_tz_str(PyObject *tz_str_obj, _tzrule *out) -{ - PyObject *std_abbr = NULL; - PyObject *dst_abbr = NULL; - TransitionRuleType *start = NULL; - TransitionRuleType *end = NULL; - // Initialize offsets to invalid value (> 24 hours) - long std_offset = 1 << 20; - long dst_offset = 1 << 20; - - char *tz_str = PyBytes_AsString(tz_str_obj); - if (tz_str == NULL) { - return -1; - } - char *p = tz_str; - - // Read the `std` abbreviation, which must be at least 3 characters long. - Py_ssize_t num_chars = parse_abbr(p, &std_abbr); - if (num_chars < 1) { - PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); - goto error; - } - - p += num_chars; - - // Now read the STD offset, which is required - num_chars = parse_tz_delta(p, &std_offset); - if (num_chars < 0) { - PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj); - goto error; - } - p += num_chars; - - // If the string ends here, there is no DST, otherwise we must parse the - // DST abbreviation and start and end dates and times. - if (*p == '\0') { - goto complete; - } - - num_chars = parse_abbr(p, &dst_abbr); - if (num_chars < 1) { - PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); - goto error; - } - p += num_chars; - - if (*p == ',') { - // From the POSIX standard: - // - // If no offset follows dst, the alternative time is assumed to be one - // hour ahead of standard time. - dst_offset = std_offset + 3600; - } - else { - num_chars = parse_tz_delta(p, &dst_offset); - if (num_chars < 0) { - PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R", - tz_str_obj); - goto error; - } - - p += num_chars; - } - - TransitionRuleType **transitions[2] = {&start, &end}; - for (size_t i = 0; i < 2; ++i) { - if (*p != ',') { - PyErr_Format(PyExc_ValueError, - "Missing transition rules in TZ string: %R", - tz_str_obj); - goto error; - } - p++; - - num_chars = parse_transition_rule(p, transitions[i]); - if (num_chars < 0) { - PyErr_Format(PyExc_ValueError, - "Malformed transition rule in TZ string: %R", - tz_str_obj); - goto error; - } - p += num_chars; - } - - if (*p != '\0') { - PyErr_Format(PyExc_ValueError, - "Extraneous characters at end of TZ string: %R", - tz_str_obj); - goto error; - } - -complete: - build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out); - Py_DECREF(std_abbr); - Py_XDECREF(dst_abbr); - - return 0; -error: - Py_XDECREF(std_abbr); - if (dst_abbr != NULL && dst_abbr != Py_None) { - Py_DECREF(dst_abbr); - } - - if (start != NULL) { - PyMem_Free(start); - } - - if (end != NULL) { - PyMem_Free(end); - } - - return -1; -} - -static int -parse_uint(const char *const p, uint8_t *value) -{ - if (!isdigit(*p)) { - return -1; - } - - *value = (*p) - '0'; - return 0; -} - -/* Parse the STD and DST abbreviations from a TZ string. */ -static Py_ssize_t -parse_abbr(const char *const p, PyObject **abbr) -{ - const char *ptr = p; - char buff = *ptr; - const char *str_start; - const char *str_end; - - if (*ptr == '<') { - ptr++; - str_start = ptr; - while ((buff = *ptr) != '>') { - // From the POSIX standard: - // - // In the quoted form, the first character shall be the less-than - // ( '<' ) character and the last character shall be the - // greater-than ( '>' ) character. All characters between these - // quoting characters shall be alphanumeric characters from the - // portable character set in the current locale, the plus-sign ( - // '+' ) character, or the minus-sign ( '-' ) character. The std - // and dst fields in this case shall not include the quoting - // characters. - if (!isalpha(buff) && !isdigit(buff) && buff != '+' && - buff != '-') { - return -1; - } - ptr++; - } - str_end = ptr; - ptr++; - } - else { - str_start = p; - // From the POSIX standard: - // - // In the unquoted form, all characters in these fields shall be - // alphabetic characters from the portable character set in the - // current locale. - while (isalpha(*ptr)) { - ptr++; - } - str_end = ptr; - } - - *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start); - if (*abbr == NULL) { - return -1; - } - - return ptr - p; -} - -/* Parse a UTC offset from a TZ str. */ -static Py_ssize_t -parse_tz_delta(const char *const p, long *total_seconds) -{ - // From the POSIX spec: - // - // Indicates the value added to the local time to arrive at Coordinated - // Universal Time. The offset has the form: - // - // hh[:mm[:ss]] - // - // One or more digits may be used; the value is always interpreted as a - // decimal number. - // - // The POSIX spec says that the values for `hour` must be between 0 and 24 - // hours, but RFC 8536 §3.3.1 specifies that the hours part of the - // transition times may be signed and range from -167 to 167. - long sign = -1; - long hours = 0; - long minutes = 0; - long seconds = 0; - - const char *ptr = p; - char buff = *ptr; - if (buff == '-' || buff == '+') { - // Negative numbers correspond to *positive* offsets, from the spec: - // - // If preceded by a '-', the timezone shall be east of the Prime - // Meridian; otherwise, it shall be west (which may be indicated by - // an optional preceding '+' ). - if (buff == '-') { - sign = 1; - } - - ptr++; - } - - // The hour can be 1 or 2 numeric characters - for (size_t i = 0; i < 2; ++i) { - buff = *ptr; - if (!isdigit(buff)) { - if (i == 0) { - return -1; - } - else { - break; - } - } - - hours *= 10; - hours += buff - '0'; - ptr++; - } - - if (hours > 24 || hours < 0) { - return -1; - } - - // Minutes and seconds always of the format ":dd" - long *outputs[2] = {&minutes, &seconds}; - for (size_t i = 0; i < 2; ++i) { - if (*ptr != ':') { - goto complete; - } - ptr++; - - for (size_t j = 0; j < 2; ++j) { - buff = *ptr; - if (!isdigit(buff)) { - return -1; - } - *(outputs[i]) *= 10; - *(outputs[i]) += buff - '0'; - ptr++; - } - } - -complete: - *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds); - - return ptr - p; -} - -/* Parse the date portion of a transition rule. */ -static Py_ssize_t -parse_transition_rule(const char *const p, TransitionRuleType **out) -{ - // The full transition rule indicates when to change back and forth between - // STD and DST, and has the form: - // - // date[/time],date[/time] - // - // This function parses an individual date[/time] section, and returns - // the number of characters that contributed to the transition rule. This - // does not include the ',' at the end of the first rule. - // - // The POSIX spec states that if *time* is not given, the default is 02:00. - const char *ptr = p; - int8_t hour = 2; - int8_t minute = 0; - int8_t second = 0; - - // Rules come in one of three flavors: - // - // 1. Jn: Julian day n, with no leap days. - // 2. n: Day of year (0-based, with leap days) - // 3. Mm.n.d: Specifying by month, week and day-of-week. - - if (*ptr == 'M') { - uint8_t month, week, day; - ptr++; - if (parse_uint(ptr, &month)) { - return -1; - } - ptr++; - if (*ptr != '.') { - uint8_t tmp; - if (parse_uint(ptr, &tmp)) { - return -1; - } - - month *= 10; - month += tmp; - ptr++; - } - - uint8_t *values[2] = {&week, &day}; - for (size_t i = 0; i < 2; ++i) { - if (*ptr != '.') { - return -1; - } - ptr++; - - if (parse_uint(ptr, values[i])) { - return -1; - } - ptr++; - } - - if (*ptr == '/') { - ptr++; - Py_ssize_t num_chars = - parse_transition_time(ptr, &hour, &minute, &second); - if (num_chars < 0) { - return -1; - } - ptr += num_chars; - } - - CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule)); - if (rv == NULL) { - return -1; - } - - if (calendarrule_new(month, week, day, hour, minute, second, rv)) { - PyMem_Free(rv); - return -1; - } - - *out = (TransitionRuleType *)rv; - } - else { - uint8_t julian = 0; - unsigned int day = 0; - if (*ptr == 'J') { - julian = 1; - ptr++; - } - - for (size_t i = 0; i < 3; ++i) { - if (!isdigit(*ptr)) { - if (i == 0) { - return -1; - } - break; - } - day *= 10; - day += (*ptr) - '0'; - ptr++; - } - - if (*ptr == '/') { - ptr++; - Py_ssize_t num_chars = - parse_transition_time(ptr, &hour, &minute, &second); - if (num_chars < 0) { - return -1; - } - ptr += num_chars; - } - - DayRule *rv = PyMem_Calloc(1, sizeof(DayRule)); - if (rv == NULL) { - return -1; - } - - if (dayrule_new(julian, day, hour, minute, second, rv)) { - PyMem_Free(rv); - return -1; - } - *out = (TransitionRuleType *)rv; - } - - return ptr - p; -} - -/* Parse the time portion of a transition rule (e.g. following an /) */ -static Py_ssize_t -parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, - int8_t *second) -{ - // From the spec: - // - // The time has the same format as offset except that no leading sign - // ( '-' or '+' ) is allowed. - // - // The format for the offset is: - // - // h[h][:mm[:ss]] - // - // RFC 8536 also allows transition times to be signed and to range from - // -167 to +167, but the current version only supports [0, 99]. - // - // TODO: Support the full range of transition hours. - int8_t *components[3] = {hour, minute, second}; - const char *ptr = p; - int8_t sign = 1; - - if (*ptr == '-' || *ptr == '+') { - if (*ptr == '-') { - sign = -1; - } - ptr++; - } - - for (size_t i = 0; i < 3; ++i) { - if (i > 0) { - if (*ptr != ':') { - break; - } - ptr++; - } - - uint8_t buff = 0; - for (size_t j = 0; j < 2; j++) { - if (!isdigit(*ptr)) { - if (i == 0 && j > 0) { - break; - } - return -1; - } - - buff *= 10; - buff += (*ptr) - '0'; - ptr++; - } - - *(components[i]) = sign * buff; - } - - return ptr - p; -} - -/* Constructor for a _tzrule. - * - * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which - * case `dst_offset` will be ignored and `start` and `end` are expected to be - * NULL as well. - * - * Returns 0 on success. - */ -static int -build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, - long dst_offset, TransitionRuleType *start, - TransitionRuleType *end, _tzrule *out) -{ - _tzrule rv = {{0}}; - - rv.start = start; - rv.end = end; - - if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) { - goto error; - } - - if (dst_abbr != NULL) { - rv.dst_diff = dst_offset - std_offset; - if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) { - goto error; - } - } - else { - rv.std_only = 1; - } - - *out = rv; - - return 0; -error: - xdecref_ttinfo(&rv.std); - xdecref_ttinfo(&rv.dst); - return -1; -} - -/* Destructor for _tzrule. */ -static void -free_tzrule(_tzrule *tzrule) -{ - xdecref_ttinfo(&(tzrule->std)); - if (!tzrule->std_only) { - xdecref_ttinfo(&(tzrule->dst)); - } - - if (tzrule->start != NULL) { - PyMem_Free(tzrule->start); - } - - if (tzrule->end != NULL) { - PyMem_Free(tzrule->end); - } -} - -/* Calculate DST offsets from transitions and UTC offsets - * - * This is necessary because each C `ttinfo` only contains the UTC offset, - * time zone abbreviation and an isdst boolean - it does not include the - * amount of the DST offset, but we need the amount for the dst() function. - * - * Thus function uses heuristics to infer what the offset should be, so it - * is not guaranteed that this will work for all zones. If we cannot assign - * a value for a given DST offset, we'll assume it's 1H rather than 0H, so - * bool(dt.dst()) will always match ttinfo.isdst. - */ -static void -utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, - unsigned char *isdsts, size_t num_transitions, - size_t num_ttinfos) -{ - size_t dst_count = 0; - size_t dst_found = 0; - for (size_t i = 0; i < num_ttinfos; ++i) { - dst_count++; - } - - for (size_t i = 1; i < num_transitions; ++i) { - if (dst_count == dst_found) { - break; - } - - size_t idx = trans_idx[i]; - size_t comp_idx = trans_idx[i - 1]; - - // Only look at DST offsets that have nto been assigned already - if (!isdsts[idx] || dstoffs[idx] != 0) { - continue; - } - - long dstoff = 0; - long utcoff = utcoffs[idx]; - - if (!isdsts[comp_idx]) { - dstoff = utcoff - utcoffs[comp_idx]; - } - - if (!dstoff && idx < (num_ttinfos - 1)) { - comp_idx = trans_idx[i + 1]; - - // If the following transition is also DST and we couldn't find - // the DST offset by this point, we're going to have to skip it - // and hope this transition gets assigned later - if (isdsts[comp_idx]) { - continue; - } - - dstoff = utcoff - utcoffs[comp_idx]; - } - - if (dstoff) { - dst_found++; - dstoffs[idx] = dstoff; - } - } - - if (dst_found < dst_count) { - // If there are time zones we didn't find a value for, we'll end up - // with dstoff = 0 for something where isdst=1. This is obviously - // wrong — one hour will be a much better guess than 0. - for (size_t idx = 0; idx < num_ttinfos; ++idx) { - if (isdsts[idx] && !dstoffs[idx]) { - dstoffs[idx] = 3600; - } - } - } -} - -#define _swap(x, y, buffer) \ - buffer = x; \ - x = y; \ - y = buffer; - -/* Calculate transitions in local time from UTC time and offsets. - * - * We want to know when each transition occurs, denominated in the number of - * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in - * *local time* (note: this is *not* equivalent to the output of - * datetime.timestamp, which is the total number of seconds actual elapsed - * since 1970-01-01T00:00:00Z in UTC). - * - * This is an ambiguous question because "local time" can be ambiguous — but it - * is disambiguated by the `fold` parameter, so we allocate two arrays: - * - * trans_local[0]: The wall-time transitions for fold=0 - * trans_local[1]: The wall-time transitions for fold=1 - * - * This returns 0 on success and a negative number of failure. The trans_local - * arrays must be freed if they are not NULL. - */ -static int -ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, - int64_t *trans_local[2], size_t num_ttinfos, - size_t num_transitions) -{ - if (num_transitions == 0) { - return 0; - } - - // Copy the UTC transitions into each array to be modified in place later - for (size_t i = 0; i < 2; ++i) { - trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t)); - if (trans_local[i] == NULL) { - return -1; - } - - memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t)); - } - - int64_t offset_0, offset_1, buff; - if (num_ttinfos > 1) { - offset_0 = utcoff[0]; - offset_1 = utcoff[trans_idx[0]]; - - if (offset_1 > offset_0) { - _swap(offset_0, offset_1, buff); - } - } - else { - offset_0 = utcoff[0]; - offset_1 = utcoff[0]; - } - - trans_local[0][0] += offset_0; - trans_local[1][0] += offset_1; - - for (size_t i = 1; i < num_transitions; ++i) { - offset_0 = utcoff[trans_idx[i - 1]]; - offset_1 = utcoff[trans_idx[i]]; - - if (offset_1 > offset_0) { - _swap(offset_1, offset_0, buff); - } - - trans_local[0][i] += offset_0; - trans_local[1][i] += offset_1; - } - - return 0; -} - -/* Simple bisect_right binary search implementation */ -static size_t -_bisect(const int64_t value, const int64_t *arr, size_t size) -{ - size_t lo = 0; - size_t hi = size; - size_t m; - - while (lo < hi) { - m = (lo + hi) / 2; - if (arr[m] > value) { - hi = m; - } - else { - lo = m + 1; - } - } - - return hi; -} - -/* Find the ttinfo rules that apply at a given local datetime. */ -static _ttinfo * -find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt) -{ - // datetime.time has a .tzinfo attribute that passes None as the dt - // argument; it only really has meaning for fixed-offset zones. - if (dt == Py_None) { - if (self->fixed_offset) { - return &(self->tzrule_after.std); - } - else { - return &NO_TTINFO; - } - } - - int64_t ts; - if (get_local_timestamp(dt, &ts)) { - return NULL; - } - - unsigned char fold = PyDateTime_DATE_GET_FOLD(dt); - assert(fold < 2); - int64_t *local_transitions = self->trans_list_wall[fold]; - size_t num_trans = self->num_transitions; - - if (num_trans && ts < local_transitions[0]) { - return self->ttinfo_before; - } - else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) { - return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold, - PyDateTime_GET_YEAR(dt)); - } - else { - size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1; - assert(idx < self->num_transitions); - return self->trans_ttinfos[idx]; - } -} - -static int -is_leap_year(int year) -{ - const unsigned int ayear = (unsigned int)year; - return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0); -} - -/* Calculates ordinal datetime from year, month and day. */ -static int -ymd_to_ord(int y, int m, int d) -{ - y -= 1; - int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400); - int yearday = DAYS_BEFORE_MONTH[m]; - if (m > 2 && is_leap_year(y + 1)) { - yearday += 1; - } - - return days_before_year + yearday + d; -} - -/* Calculate the number of seconds since 1970-01-01 in local time. - * - * This gets a datetime in the same "units" as self->trans_list_wall so that we - * can easily determine which transitions a datetime falls between. See the - * comment above ts_to_local for more information. - * */ -static int -get_local_timestamp(PyObject *dt, int64_t *local_ts) -{ - assert(local_ts != NULL); - - int hour, minute, second; - int ord; - if (PyDateTime_CheckExact(dt)) { - int y = PyDateTime_GET_YEAR(dt); - int m = PyDateTime_GET_MONTH(dt); - int d = PyDateTime_GET_DAY(dt); - hour = PyDateTime_DATE_GET_HOUR(dt); - minute = PyDateTime_DATE_GET_MINUTE(dt); - second = PyDateTime_DATE_GET_SECOND(dt); - - ord = ymd_to_ord(y, m, d); - } - else { - PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL); - if (num == NULL) { - return -1; - } - - ord = PyLong_AsLong(num); - Py_DECREF(num); - if (ord == -1 && PyErr_Occurred()) { - return -1; - } - - num = PyObject_GetAttrString(dt, "hour"); - if (num == NULL) { - return -1; - } - hour = PyLong_AsLong(num); - Py_DECREF(num); - if (hour == -1) { - return -1; - } - - num = PyObject_GetAttrString(dt, "minute"); - if (num == NULL) { - return -1; - } - minute = PyLong_AsLong(num); - Py_DECREF(num); - if (minute == -1) { - return -1; - } - - num = PyObject_GetAttrString(dt, "second"); - if (num == NULL) { - return -1; - } - second = PyLong_AsLong(num); - Py_DECREF(num); - if (second == -1) { - return -1; - } - } - - *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 + - (int64_t)(hour * 3600 + minute * 60 + second); - - return 0; -} - -///// -// Functions for cache handling - -/* Constructor for StrongCacheNode */ -static StrongCacheNode * -strong_cache_node_new(PyObject *key, PyObject *zone) -{ - StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode)); - if (node == NULL) { - return NULL; - } - - Py_INCREF(key); - Py_INCREF(zone); - - node->next = NULL; - node->prev = NULL; - node->key = key; - node->zone = zone; - - return node; -} - -/* Destructor for StrongCacheNode */ -void -strong_cache_node_free(StrongCacheNode *node) -{ - Py_XDECREF(node->key); - Py_XDECREF(node->zone); - - PyMem_Free(node); -} - -/* Frees all nodes at or after a specified root in the strong cache. - * - * This can be used on the root node to free the entire cache or it can be used - * to clear all nodes that have been expired (which, if everything is going - * right, will actually only be 1 node at a time). - */ -void -strong_cache_free(StrongCacheNode *root) -{ - StrongCacheNode *node = root; - StrongCacheNode *next_node; - while (node != NULL) { - next_node = node->next; - strong_cache_node_free(node); - - node = next_node; - } -} - -/* Removes a node from the cache and update its neighbors. - * - * This is used both when ejecting a node from the cache and when moving it to - * the front of the cache. - */ -static void -remove_from_strong_cache(StrongCacheNode *node) -{ - if (ZONEINFO_STRONG_CACHE == node) { - ZONEINFO_STRONG_CACHE = node->next; - } - - if (node->prev != NULL) { - node->prev->next = node->next; - } - - if (node->next != NULL) { - node->next->prev = node->prev; - } - - node->next = NULL; - node->prev = NULL; -} - -/* Retrieves the node associated with a key, if it exists. - * - * This traverses the strong cache until it finds a matching key and returns a - * pointer to the relevant node if found. Returns NULL if no node is found. - * - * root may be NULL, indicating an empty cache. - */ -static StrongCacheNode * -find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key) -{ - const StrongCacheNode *node = root; - while (node != NULL) { - int rv = PyObject_RichCompareBool(key, node->key, Py_EQ); - if (rv < 0) { - return NULL; - } - if (rv) { - return (StrongCacheNode *)node; - } - - node = node->next; - } - - return NULL; -} - -/* Ejects a given key from the class's strong cache, if applicable. - * - * This function is used to enable the per-key functionality in clear_cache. - */ -static int -eject_from_strong_cache(const PyTypeObject *const type, PyObject *key) -{ - if (type != &PyZoneInfo_ZoneInfoType) { - return 0; - } - - StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); - if (node != NULL) { - remove_from_strong_cache(node); - - strong_cache_node_free(node); - } - else if (PyErr_Occurred()) { - return -1; - } - return 0; -} - -/* Moves a node to the front of the LRU cache. - * - * The strong cache is an LRU cache, so whenever a given node is accessed, if - * it is not at the front of the cache, it needs to be moved there. - */ -static void -move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node) -{ - StrongCacheNode *root_p = *root; - if (root_p == node) { - return; - } - - remove_from_strong_cache(node); - - node->prev = NULL; - node->next = root_p; - - if (root_p != NULL) { - root_p->prev = node; - } - - *root = node; -} - -/* Retrieves a ZoneInfo from the strong cache if it's present. - * - * This function finds the ZoneInfo by key and if found will move the node to - * the front of the LRU cache and return a new reference to it. It returns NULL - * if the key is not in the cache. - * - * The strong cache is currently only implemented for the base class, so this - * always returns a cache miss for subclasses. - */ -static PyObject * -zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key) -{ - if (type != &PyZoneInfo_ZoneInfoType) { - return NULL; // Strong cache currently only implemented for base class - } - - StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); - - if (node != NULL) { - move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node); - Py_INCREF(node->zone); - return node->zone; - } - - return NULL; // Cache miss -} - -/* Inserts a new key into the strong LRU cache. - * - * This function is only to be used after a cache miss — it creates a new node - * at the front of the cache and ejects any stale entries (keeping the size of - * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE). - */ -static void -update_strong_cache(const PyTypeObject *const type, PyObject *key, - PyObject *zone) -{ - if (type != &PyZoneInfo_ZoneInfoType) { - return; - } - - StrongCacheNode *new_node = strong_cache_node_new(key, zone); - - move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node); - - StrongCacheNode *node = new_node->next; - for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) { - if (node == NULL) { - return; - } - node = node->next; - } - - // Everything beyond this point needs to be freed - if (node != NULL) { - if (node->prev != NULL) { - node->prev->next = NULL; - } - strong_cache_free(node); - } -} - -/* Clears all entries into a type's strong cache. - * - * Because the strong cache is not implemented for subclasses, this is a no-op - * for everything except the base class. - */ -void -clear_strong_cache(const PyTypeObject *const type) -{ - if (type != &PyZoneInfo_ZoneInfoType) { - return; - } - - strong_cache_free(ZONEINFO_STRONG_CACHE); - ZONEINFO_STRONG_CACHE = NULL; -} - -static PyObject * -new_weak_cache(void) -{ - PyObject *weakref_module = PyImport_ImportModule("weakref"); - if (weakref_module == NULL) { - return NULL; - } - - PyObject *weak_cache = - PyObject_CallMethod(weakref_module, "WeakValueDictionary", ""); - Py_DECREF(weakref_module); - return weak_cache; -} - -static int -initialize_caches(void) -{ - // TODO: Move to a PyModule_GetState / PEP 573 based caching system. - if (TIMEDELTA_CACHE == NULL) { - TIMEDELTA_CACHE = PyDict_New(); - } - else { - Py_INCREF(TIMEDELTA_CACHE); - } - - if (TIMEDELTA_CACHE == NULL) { - return -1; - } - - if (ZONEINFO_WEAK_CACHE == NULL) { - ZONEINFO_WEAK_CACHE = new_weak_cache(); - } - else { - Py_INCREF(ZONEINFO_WEAK_CACHE); - } - - if (ZONEINFO_WEAK_CACHE == NULL) { - return -1; - } - - return 0; -} - -static PyObject * -zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs) -{ - PyObject *weak_cache = new_weak_cache(); - if (weak_cache == NULL) { - return NULL; - } - - if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache", - weak_cache) < 0) { - Py_DECREF(weak_cache); - return NULL; - } - Py_DECREF(weak_cache); - Py_RETURN_NONE; -} - -///// -// Specify the ZoneInfo type -static PyMethodDef zoneinfo_methods[] = { - {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache, - METH_VARARGS | METH_KEYWORDS | METH_CLASS, - PyDoc_STR("Clear the ZoneInfo cache.")}, - {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache, - METH_VARARGS | METH_KEYWORDS | METH_CLASS, - PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")}, - {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file, - METH_VARARGS | METH_KEYWORDS | METH_CLASS, - PyDoc_STR("Create a ZoneInfo file from a file object.")}, - {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O, - PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at " - "the given datetime.")}, - {"dst", (PyCFunction)zoneinfo_dst, METH_O, - PyDoc_STR("Retrieve a timedelta representing the amount of DST applied " - "in a zone at the given datetime.")}, - {"tzname", (PyCFunction)zoneinfo_tzname, METH_O, - PyDoc_STR("Retrieve a string containing the abbreviation for the time " - "zone that applies in a zone at a given datetime.")}, - {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O, - PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted " - "datetime in local time.")}, - {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS, - PyDoc_STR("Function for serialization with the pickle protocol.")}, - {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS, - PyDoc_STR("Private method used in unpickling.")}, - {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass, - METH_VARARGS | METH_KEYWORDS | METH_CLASS, - PyDoc_STR("Function to initialize subclasses.")}, - {NULL} /* Sentinel */ -}; - -static PyMemberDef zoneinfo_members[] = { - {.name = "key", - .offset = offsetof(PyZoneInfo_ZoneInfo, key), - .type = T_OBJECT_EX, - .flags = READONLY, - .doc = NULL}, - {NULL}, /* Sentinel */ -}; - -static PyTypeObject PyZoneInfo_ZoneInfoType = { - PyVarObject_HEAD_INIT(NULL, 0) // - .tp_name = "zoneinfo.ZoneInfo", - .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo), - .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist), - .tp_repr = (reprfunc)zoneinfo_repr, - .tp_str = (reprfunc)zoneinfo_str, - .tp_getattro = PyObject_GenericGetAttr, - .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE), - /* .tp_doc = zoneinfo_doc, */ - .tp_methods = zoneinfo_methods, - .tp_members = zoneinfo_members, - .tp_new = zoneinfo_new, - .tp_dealloc = zoneinfo_dealloc, -}; - -///// -// Specify the _zoneinfo module -static PyMethodDef module_methods[] = {{NULL, NULL}}; -static void -module_free() -{ - Py_XDECREF(_tzpath_find_tzfile); - _tzpath_find_tzfile = NULL; - - Py_XDECREF(_common_mod); - _common_mod = NULL; - - Py_XDECREF(io_open); - io_open = NULL; - - xdecref_ttinfo(&NO_TTINFO); - - if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) { - Py_DECREF(TIMEDELTA_CACHE); - } else { - Py_CLEAR(TIMEDELTA_CACHE); - } - - if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) { - Py_DECREF(ZONEINFO_WEAK_CACHE); - } else { - Py_CLEAR(ZONEINFO_WEAK_CACHE); - } - - clear_strong_cache(&PyZoneInfo_ZoneInfoType); -} - -static int -zoneinfomodule_exec(PyObject *m) -{ - PyDateTime_IMPORT; - if (PyDateTimeAPI == NULL) { - goto error; - } - PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType; - if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) { - goto error; - } - - Py_INCREF(&PyZoneInfo_ZoneInfoType); - PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType); - - /* Populate imports */ - PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath"); - if (_tzpath_module == NULL) { - goto error; - } - - _tzpath_find_tzfile = - PyObject_GetAttrString(_tzpath_module, "find_tzfile"); - Py_DECREF(_tzpath_module); - if (_tzpath_find_tzfile == NULL) { - goto error; - } - - PyObject *io_module = PyImport_ImportModule("io"); - if (io_module == NULL) { - goto error; - } - - io_open = PyObject_GetAttrString(io_module, "open"); - Py_DECREF(io_module); - if (io_open == NULL) { - goto error; - } - - _common_mod = PyImport_ImportModule("zoneinfo._common"); - if (_common_mod == NULL) { - goto error; - } - - if (NO_TTINFO.utcoff == NULL) { - NO_TTINFO.utcoff = Py_None; - NO_TTINFO.dstoff = Py_None; - NO_TTINFO.tzname = Py_None; - - for (size_t i = 0; i < 3; ++i) { - Py_INCREF(Py_None); - } - } - - if (initialize_caches()) { - goto error; - } - - return 0; - -error: - return -1; -} - -static PyModuleDef_Slot zoneinfomodule_slots[] = { - {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}}; - -static struct PyModuleDef zoneinfomodule = { - PyModuleDef_HEAD_INIT, - .m_name = "_zoneinfo", - .m_doc = "C implementation of the zoneinfo module", - .m_size = 0, - .m_methods = module_methods, - .m_slots = zoneinfomodule_slots, - .m_free = (freefunc)module_free}; - -PyMODINIT_FUNC -PyInit__zoneinfo(void) -{ - return PyModuleDef_Init(&zoneinfomodule); -} +#include "Python.h" +#include "structmember.h" + +#include <ctype.h> +#include <stddef.h> +#include <stdint.h> + +#include "datetime.h" + +// Imports +static PyObject *io_open = NULL; +static PyObject *_tzpath_find_tzfile = NULL; +static PyObject *_common_mod = NULL; + +typedef struct TransitionRuleType TransitionRuleType; +typedef struct StrongCacheNode StrongCacheNode; + +typedef struct { + PyObject *utcoff; + PyObject *dstoff; + PyObject *tzname; + long utcoff_seconds; +} _ttinfo; + +typedef struct { + _ttinfo std; + _ttinfo dst; + int dst_diff; + TransitionRuleType *start; + TransitionRuleType *end; + unsigned char std_only; +} _tzrule; + +typedef struct { + PyDateTime_TZInfo base; + PyObject *key; + PyObject *file_repr; + PyObject *weakreflist; + size_t num_transitions; + size_t num_ttinfos; + int64_t *trans_list_utc; + int64_t *trans_list_wall[2]; + _ttinfo **trans_ttinfos; // References to the ttinfo for each transition + _ttinfo *ttinfo_before; + _tzrule tzrule_after; + _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation + unsigned char fixed_offset; + unsigned char source; +} PyZoneInfo_ZoneInfo; + +struct TransitionRuleType { + int64_t (*year_to_timestamp)(TransitionRuleType *, int); +}; + +typedef struct { + TransitionRuleType base; + uint8_t month; + uint8_t week; + uint8_t day; + int8_t hour; + int8_t minute; + int8_t second; +} CalendarRule; + +typedef struct { + TransitionRuleType base; + uint8_t julian; + unsigned int day; + int8_t hour; + int8_t minute; + int8_t second; +} DayRule; + +struct StrongCacheNode { + StrongCacheNode *next; + StrongCacheNode *prev; + PyObject *key; + PyObject *zone; +}; + +static PyTypeObject PyZoneInfo_ZoneInfoType; + +// Globals +static PyObject *TIMEDELTA_CACHE = NULL; +static PyObject *ZONEINFO_WEAK_CACHE = NULL; +static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL; +static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8; + +static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0}; + +// Constants +static const int EPOCHORDINAL = 719163; +static int DAYS_IN_MONTH[] = { + -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, +}; + +static int DAYS_BEFORE_MONTH[] = { + -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, +}; + +static const int SOURCE_NOCACHE = 0; +static const int SOURCE_CACHE = 1; +static const int SOURCE_FILE = 2; + +// Forward declarations +static int +load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj); +static void +utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, + unsigned char *isdsts, size_t num_transitions, + size_t num_ttinfos); +static int +ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, + int64_t *trans_local[2], size_t num_ttinfos, + size_t num_transitions); + +static int +parse_tz_str(PyObject *tz_str_obj, _tzrule *out); + +static Py_ssize_t +parse_abbr(const char *const p, PyObject **abbr); +static Py_ssize_t +parse_tz_delta(const char *const p, long *total_seconds); +static Py_ssize_t +parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, + int8_t *second); +static Py_ssize_t +parse_transition_rule(const char *const p, TransitionRuleType **out); + +static _ttinfo * +find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year); +static _ttinfo * +find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, + unsigned char *fold); + +static int +build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out); +static void +xdecref_ttinfo(_ttinfo *ttinfo); +static int +ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1); + +static int +build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, + long dst_offset, TransitionRuleType *start, + TransitionRuleType *end, _tzrule *out); +static void +free_tzrule(_tzrule *tzrule); + +static PyObject * +load_timedelta(long seconds); + +static int +get_local_timestamp(PyObject *dt, int64_t *local_ts); +static _ttinfo * +find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt); + +static int +ymd_to_ord(int y, int m, int d); +static int +is_leap_year(int year); + +static size_t +_bisect(const int64_t value, const int64_t *arr, size_t size); + +static int +eject_from_strong_cache(const PyTypeObject *const type, PyObject *key); +static void +clear_strong_cache(const PyTypeObject *const type); +static void +update_strong_cache(const PyTypeObject *const type, PyObject *key, + PyObject *zone); +static PyObject * +zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key); + +static PyObject * +zoneinfo_new_instance(PyTypeObject *type, PyObject *key) +{ + PyObject *file_obj = NULL; + PyObject *file_path = NULL; + + file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL); + if (file_path == NULL) { + return NULL; + } + else if (file_path == Py_None) { + file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key); + if (file_obj == NULL) { + Py_DECREF(file_path); + return NULL; + } + } + + PyObject *self = (PyObject *)(type->tp_alloc(type, 0)); + if (self == NULL) { + goto error; + } + + if (file_obj == NULL) { + file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb"); + if (file_obj == NULL) { + goto error; + } + } + + if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) { + goto error; + } + + PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL); + Py_DECREF(file_obj); + file_obj = NULL; + if (rv == NULL) { + goto error; + } + Py_DECREF(rv); + + ((PyZoneInfo_ZoneInfo *)self)->key = key; + Py_INCREF(key); + + goto cleanup; +error: + Py_XDECREF(self); + self = NULL; +cleanup: + if (file_obj != NULL) { + PyObject *exc, *val, *tb; + PyErr_Fetch(&exc, &val, &tb); + PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL); + _PyErr_ChainExceptions(exc, val, tb); + if (tmp == NULL) { + Py_CLEAR(self); + } + Py_XDECREF(tmp); + Py_DECREF(file_obj); + } + Py_DECREF(file_path); + return self; +} + +static PyObject * +get_weak_cache(PyTypeObject *type) +{ + if (type == &PyZoneInfo_ZoneInfoType) { + return ZONEINFO_WEAK_CACHE; + } + else { + PyObject *cache = + PyObject_GetAttrString((PyObject *)type, "_weak_cache"); + // We are assuming that the type lives at least as long as the function + // that calls get_weak_cache, and that it holds a reference to the + // cache, so we'll return a "borrowed reference". + Py_XDECREF(cache); + return cache; + } +} + +static PyObject * +zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) +{ + PyObject *key = NULL; + static char *kwlist[] = {"key", NULL}; + if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) { + return NULL; + } + + PyObject *instance = zone_from_strong_cache(type, key); + if (instance != NULL || PyErr_Occurred()) { + return instance; + } + + PyObject *weak_cache = get_weak_cache(type); + instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None); + if (instance == NULL) { + return NULL; + } + + if (instance == Py_None) { + Py_DECREF(instance); + PyObject *tmp = zoneinfo_new_instance(type, key); + if (tmp == NULL) { + return NULL; + } + + instance = + PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp); + Py_DECREF(tmp); + if (instance == NULL) { + return NULL; + } + ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE; + } + + update_strong_cache(type, key, instance); + return instance; +} + +static void +zoneinfo_dealloc(PyObject *obj_self) +{ + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + + if (self->weakreflist != NULL) { + PyObject_ClearWeakRefs(obj_self); + } + + if (self->trans_list_utc != NULL) { + PyMem_Free(self->trans_list_utc); + } + + for (size_t i = 0; i < 2; i++) { + if (self->trans_list_wall[i] != NULL) { + PyMem_Free(self->trans_list_wall[i]); + } + } + + if (self->_ttinfos != NULL) { + for (size_t i = 0; i < self->num_ttinfos; ++i) { + xdecref_ttinfo(&(self->_ttinfos[i])); + } + PyMem_Free(self->_ttinfos); + } + + if (self->trans_ttinfos != NULL) { + PyMem_Free(self->trans_ttinfos); + } + + free_tzrule(&(self->tzrule_after)); + + Py_XDECREF(self->key); + Py_XDECREF(self->file_repr); + + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject * +zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *file_obj = NULL; + PyObject *file_repr = NULL; + PyObject *key = Py_None; + PyZoneInfo_ZoneInfo *self = NULL; + + static char *kwlist[] = {"", "key", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj, + &key)) { + return NULL; + } + + PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0)); + self = (PyZoneInfo_ZoneInfo *)obj_self; + if (self == NULL) { + return NULL; + } + + file_repr = PyUnicode_FromFormat("%R", file_obj); + if (file_repr == NULL) { + goto error; + } + + if (load_data(self, file_obj)) { + goto error; + } + + self->source = SOURCE_FILE; + self->file_repr = file_repr; + self->key = key; + Py_INCREF(key); + + return obj_self; +error: + Py_XDECREF(file_repr); + Py_XDECREF(self); + return NULL; +} + +static PyObject * +zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = {"key", NULL}; + PyObject *key = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) { + return NULL; + } + + PyObject *out = zoneinfo_new_instance(cls, key); + if (out != NULL) { + ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE; + } + + return out; +} + +static PyObject * +zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs) +{ + PyObject *only_keys = NULL; + static char *kwlist[] = {"only_keys", NULL}; + + if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist, + &only_keys))) { + return NULL; + } + + PyTypeObject *type = (PyTypeObject *)cls; + PyObject *weak_cache = get_weak_cache(type); + + if (only_keys == NULL || only_keys == Py_None) { + PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL); + if (rv != NULL) { + Py_DECREF(rv); + } + + clear_strong_cache(type); + } + else { + PyObject *item = NULL; + PyObject *pop = PyUnicode_FromString("pop"); + if (pop == NULL) { + return NULL; + } + + PyObject *iter = PyObject_GetIter(only_keys); + if (iter == NULL) { + Py_DECREF(pop); + return NULL; + } + + while ((item = PyIter_Next(iter))) { + // Remove from strong cache + if (eject_from_strong_cache(type, item) < 0) { + Py_DECREF(item); + break; + } + + // Remove from weak cache + PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item, + Py_None, NULL); + + Py_DECREF(item); + if (tmp == NULL) { + break; + } + Py_DECREF(tmp); + } + Py_DECREF(iter); + Py_DECREF(pop); + } + + if (PyErr_Occurred()) { + return NULL; + } + + Py_RETURN_NONE; +} + +static PyObject * +zoneinfo_utcoffset(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->utcoff); + return tti->utcoff; +} + +static PyObject * +zoneinfo_dst(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->dstoff); + return tti->dstoff; +} + +static PyObject * +zoneinfo_tzname(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->tzname); + return tti->tzname; +} + +#define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo) +#define GET_DT_TZINFO(p) \ + (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None) + +static PyObject * +zoneinfo_fromutc(PyObject *obj_self, PyObject *dt) +{ + if (!PyDateTime_Check(dt)) { + PyErr_SetString(PyExc_TypeError, + "fromutc: argument must be a datetime"); + return NULL; + } + if (GET_DT_TZINFO(dt) != obj_self) { + PyErr_SetString(PyExc_ValueError, + "fromutc: dt.tzinfo " + "is not self"); + return NULL; + } + + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + + int64_t timestamp; + if (get_local_timestamp(dt, ×tamp)) { + return NULL; + } + size_t num_trans = self->num_transitions; + + _ttinfo *tti = NULL; + unsigned char fold = 0; + + if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) { + tti = self->ttinfo_before; + } + else if (num_trans == 0 || + timestamp > self->trans_list_utc[num_trans - 1]) { + tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp, + PyDateTime_GET_YEAR(dt), &fold); + + // Immediately after the last manual transition, the fold/gap is + // between self->trans_ttinfos[num_transitions - 1] and whatever + // ttinfo applies immediately after the last transition, not between + // the STD and DST rules in the tzrule_after, so we may need to + // adjust the fold value. + if (num_trans) { + _ttinfo *tti_prev = NULL; + if (num_trans == 1) { + tti_prev = self->ttinfo_before; + } + else { + tti_prev = self->trans_ttinfos[num_trans - 2]; + } + int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds; + if (diff > 0 && + timestamp < (self->trans_list_utc[num_trans - 1] + diff)) { + fold = 1; + } + } + } + else { + size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans); + _ttinfo *tti_prev = NULL; + + if (idx >= 2) { + tti_prev = self->trans_ttinfos[idx - 2]; + tti = self->trans_ttinfos[idx - 1]; + } + else { + tti_prev = self->ttinfo_before; + tti = self->trans_ttinfos[0]; + } + + // Detect fold + int64_t shift = + (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds); + if (shift > (timestamp - self->trans_list_utc[idx - 1])) { + fold = 1; + } + } + + PyObject *tmp = PyNumber_Add(dt, tti->utcoff); + if (tmp == NULL) { + return NULL; + } + + if (fold) { + if (PyDateTime_CheckExact(tmp)) { + ((PyDateTime_DateTime *)tmp)->fold = 1; + dt = tmp; + } + else { + PyObject *replace = PyObject_GetAttrString(tmp, "replace"); + PyObject *args = PyTuple_New(0); + PyObject *kwargs = PyDict_New(); + + Py_DECREF(tmp); + if (args == NULL || kwargs == NULL || replace == NULL) { + Py_XDECREF(args); + Py_XDECREF(kwargs); + Py_XDECREF(replace); + return NULL; + } + + dt = NULL; + if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) { + dt = PyObject_Call(replace, args, kwargs); + } + + Py_DECREF(args); + Py_DECREF(kwargs); + Py_DECREF(replace); + + if (dt == NULL) { + return NULL; + } + } + } + else { + dt = tmp; + } + return dt; +} + +static PyObject * +zoneinfo_repr(PyZoneInfo_ZoneInfo *self) +{ + PyObject *rv = NULL; + const char *type_name = Py_TYPE((PyObject *)self)->tp_name; + if (!(self->key == Py_None)) { + rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key); + } + else { + assert(PyUnicode_Check(self->file_repr)); + rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name, + self->file_repr); + } + + return rv; +} + +static PyObject * +zoneinfo_str(PyZoneInfo_ZoneInfo *self) +{ + if (!(self->key == Py_None)) { + Py_INCREF(self->key); + return self->key; + } + else { + return zoneinfo_repr(self); + } +} + +/* Pickles the ZoneInfo object by key and source. + * + * ZoneInfo objects are pickled by reference to the TZif file that they came + * from, which means that the exact transitions may be different or the file + * may not un-pickle if the data has changed on disk in the interim. + * + * It is necessary to include a bit indicating whether or not the object + * was constructed from the cache, because from-cache objects will hit the + * unpickling process's cache, whereas no-cache objects will bypass it. + * + * Objects constructed from ZoneInfo.from_file cannot be pickled. + */ +static PyObject * +zoneinfo_reduce(PyObject *obj_self, PyObject *unused) +{ + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + if (self->source == SOURCE_FILE) { + // Objects constructed from files cannot be pickled. + PyObject *pickle = PyImport_ImportModule("pickle"); + if (pickle == NULL) { + return NULL; + } + + PyObject *pickle_error = + PyObject_GetAttrString(pickle, "PicklingError"); + Py_DECREF(pickle); + if (pickle_error == NULL) { + return NULL; + } + + PyErr_Format(pickle_error, + "Cannot pickle a ZoneInfo file from a file stream."); + Py_DECREF(pickle_error); + return NULL; + } + + unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0; + PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle"); + + if (constructor == NULL) { + return NULL; + } + + PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache); + Py_DECREF(constructor); + return rv; +} + +static PyObject * +zoneinfo__unpickle(PyTypeObject *cls, PyObject *args) +{ + PyObject *key; + unsigned char from_cache; + if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) { + return NULL; + } + + if (from_cache) { + PyObject *val_args = Py_BuildValue("(O)", key); + if (val_args == NULL) { + return NULL; + } + + PyObject *rv = zoneinfo_new(cls, val_args, NULL); + + Py_DECREF(val_args); + return rv; + } + else { + return zoneinfo_new_instance(cls, key); + } +} + +/* It is relatively expensive to construct new timedelta objects, and in most + * cases we're looking at a relatively small number of timedeltas, such as + * integer number of hours, etc. We will keep a cache so that we construct + * a minimal number of these. + * + * Possibly this should be replaced with an LRU cache so that it's not possible + * for the memory usage to explode from this, but in order for this to be a + * serious problem, one would need to deliberately craft a malicious time zone + * file with many distinct offsets. As of tzdb 2019c, loading every single zone + * fills the cache with ~450 timedeltas for a total size of ~12kB. + * + * This returns a new reference to the timedelta. + */ +static PyObject * +load_timedelta(long seconds) +{ + PyObject *rv = NULL; + PyObject *pyoffset = PyLong_FromLong(seconds); + if (pyoffset == NULL) { + return NULL; + } + int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset); + if (contains == -1) { + goto error; + } + + if (!contains) { + PyObject *tmp = PyDateTimeAPI->Delta_FromDelta( + 0, seconds, 0, 1, PyDateTimeAPI->DeltaType); + + if (tmp == NULL) { + goto error; + } + + rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp); + Py_DECREF(tmp); + } + else { + rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset); + } + + Py_DECREF(pyoffset); + Py_INCREF(rv); + return rv; +error: + Py_DECREF(pyoffset); + return NULL; +} + +/* Constructor for _ttinfo object - this starts by initializing the _ttinfo + * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially + * initialized _ttinfo objects. + */ +static int +build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out) +{ + out->utcoff = NULL; + out->dstoff = NULL; + out->tzname = NULL; + + out->utcoff_seconds = utcoffset; + out->utcoff = load_timedelta(utcoffset); + if (out->utcoff == NULL) { + return -1; + } + + out->dstoff = load_timedelta(dstoffset); + if (out->dstoff == NULL) { + return -1; + } + + out->tzname = tzname; + Py_INCREF(tzname); + + return 0; +} + +/* Decrease reference count on any non-NULL members of a _ttinfo */ +static void +xdecref_ttinfo(_ttinfo *ttinfo) +{ + if (ttinfo != NULL) { + Py_XDECREF(ttinfo->utcoff); + Py_XDECREF(ttinfo->dstoff); + Py_XDECREF(ttinfo->tzname); + } +} + +/* Equality function for _ttinfo. */ +static int +ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1) +{ + int rv; + if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) < + 1) { + goto end; + } + + if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) < + 1) { + goto end; + } + + if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) < + 1) { + goto end; + } +end: + return rv; +} + +/* Given a file-like object, this populates a ZoneInfo object + * + * The current version calls into a Python function to read the data from + * file into Python objects, and this translates those Python objects into + * C values and calculates derived values (e.g. dstoff) in C. + * + * This returns 0 on success and -1 on failure. + * + * The function will never return while `self` is partially initialized — + * the object only needs to be freed / deallocated if this succeeds. + */ +static int +load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) +{ + PyObject *data_tuple = NULL; + + long *utcoff = NULL; + long *dstoff = NULL; + size_t *trans_idx = NULL; + unsigned char *isdst = NULL; + + self->trans_list_utc = NULL; + self->trans_list_wall[0] = NULL; + self->trans_list_wall[1] = NULL; + self->trans_ttinfos = NULL; + self->_ttinfos = NULL; + self->file_repr = NULL; + + size_t ttinfos_allocated = 0; + + data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj); + + if (data_tuple == NULL) { + goto error; + } + + if (!PyTuple_CheckExact(data_tuple)) { + PyErr_Format(PyExc_TypeError, "Invalid data result type: %r", + data_tuple); + goto error; + } + + // Unpack the data tuple + PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0); + if (trans_idx_list == NULL) { + goto error; + } + + PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1); + if (trans_utc == NULL) { + goto error; + } + + PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2); + if (utcoff_list == NULL) { + goto error; + } + + PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3); + if (isdst_list == NULL) { + goto error; + } + + PyObject *abbr = PyTuple_GetItem(data_tuple, 4); + if (abbr == NULL) { + goto error; + } + + PyObject *tz_str = PyTuple_GetItem(data_tuple, 5); + if (tz_str == NULL) { + goto error; + } + + // Load the relevant sizes + Py_ssize_t num_transitions = PyTuple_Size(trans_utc); + if (num_transitions < 0) { + goto error; + } + + Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list); + if (num_ttinfos < 0) { + goto error; + } + + self->num_transitions = (size_t)num_transitions; + self->num_ttinfos = (size_t)num_ttinfos; + + // Load the transition indices and list + self->trans_list_utc = + PyMem_Malloc(self->num_transitions * sizeof(int64_t)); + if (self->trans_list_utc == NULL) { + goto error; + } + trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t)); + if (trans_idx == NULL) { + goto error; + } + + for (size_t i = 0; i < self->num_transitions; ++i) { + PyObject *num = PyTuple_GetItem(trans_utc, i); + if (num == NULL) { + goto error; + } + self->trans_list_utc[i] = PyLong_AsLongLong(num); + if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) { + goto error; + } + + num = PyTuple_GetItem(trans_idx_list, i); + if (num == NULL) { + goto error; + } + + Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num); + if (cur_trans_idx == -1) { + goto error; + } + + trans_idx[i] = (size_t)cur_trans_idx; + if (trans_idx[i] > self->num_ttinfos) { + PyErr_Format( + PyExc_ValueError, + "Invalid transition index found while reading TZif: %zd", + cur_trans_idx); + + goto error; + } + } + + // Load UTC offsets and isdst (size num_ttinfos) + utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long)); + isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char)); + + if (utcoff == NULL || isdst == NULL) { + goto error; + } + for (size_t i = 0; i < self->num_ttinfos; ++i) { + PyObject *num = PyTuple_GetItem(utcoff_list, i); + if (num == NULL) { + goto error; + } + + utcoff[i] = PyLong_AsLong(num); + if (utcoff[i] == -1 && PyErr_Occurred()) { + goto error; + } + + num = PyTuple_GetItem(isdst_list, i); + if (num == NULL) { + goto error; + } + + int isdst_with_error = PyObject_IsTrue(num); + if (isdst_with_error == -1) { + goto error; + } + else { + isdst[i] = (unsigned char)isdst_with_error; + } + } + + dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long)); + if (dstoff == NULL) { + goto error; + } + + // Derive dstoff and trans_list_wall from the information we've loaded + utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions, + self->num_ttinfos); + + if (ts_to_local(trans_idx, self->trans_list_utc, utcoff, + self->trans_list_wall, self->num_ttinfos, + self->num_transitions)) { + goto error; + } + + // Build _ttinfo objects from utcoff, dstoff and abbr + self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo)); + if (self->_ttinfos == NULL) { + goto error; + } + for (size_t i = 0; i < self->num_ttinfos; ++i) { + PyObject *tzname = PyTuple_GetItem(abbr, i); + if (tzname == NULL) { + goto error; + } + + ttinfos_allocated++; + if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) { + goto error; + } + } + + // Build our mapping from transition to the ttinfo that applies + self->trans_ttinfos = + PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *)); + if (self->trans_ttinfos == NULL) { + goto error; + } + for (size_t i = 0; i < self->num_transitions; ++i) { + size_t ttinfo_idx = trans_idx[i]; + assert(ttinfo_idx < self->num_ttinfos); + self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]); + } + + // Set ttinfo_before to the first non-DST transition + for (size_t i = 0; i < self->num_ttinfos; ++i) { + if (!isdst[i]) { + self->ttinfo_before = &(self->_ttinfos[i]); + break; + } + } + + // If there are only DST ttinfos, pick the first one, if there are no + // ttinfos at all, set ttinfo_before to NULL + if (self->ttinfo_before == NULL && self->num_ttinfos > 0) { + self->ttinfo_before = &(self->_ttinfos[0]); + } + + if (tz_str != Py_None && PyObject_IsTrue(tz_str)) { + if (parse_tz_str(tz_str, &(self->tzrule_after))) { + goto error; + } + } + else { + if (!self->num_ttinfos) { + PyErr_Format(PyExc_ValueError, "No time zone information found."); + goto error; + } + + size_t idx; + if (!self->num_transitions) { + idx = self->num_ttinfos - 1; + } + else { + idx = trans_idx[self->num_transitions - 1]; + } + + _ttinfo *tti = &(self->_ttinfos[idx]); + build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL, + &(self->tzrule_after)); + + // We've abused the build_tzrule constructor to construct an STD-only + // rule mimicking whatever ttinfo we've picked up, but it's possible + // that the one we've picked up is a DST zone, so we need to make sure + // that the dstoff is set correctly in that case. + if (PyObject_IsTrue(tti->dstoff)) { + _ttinfo *tti_after = &(self->tzrule_after.std); + Py_DECREF(tti_after->dstoff); + tti_after->dstoff = tti->dstoff; + Py_INCREF(tti_after->dstoff); + } + } + + // Determine if this is a "fixed offset" zone, meaning that the output of + // the utcoffset, dst and tzname functions does not depend on the specific + // datetime passed. + // + // We make three simplifying assumptions here: + // + // 1. If tzrule_after is not std_only, it has transitions that might occur + // (it is possible to construct TZ strings that specify STD and DST but + // no transitions ever occur, such as AAA0BBB,0/0,J365/25). + // 2. If self->_ttinfos contains more than one _ttinfo object, the objects + // represent different offsets. + // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise + // fixed-offset zone with extra _ttinfos defined may appear to *not* be + // a fixed offset zone). + // + // Violations to these assumptions would be fairly exotic, and exotic + // zones should almost certainly not be used with datetime.time (the + // only thing that would be affected by this). + if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) { + self->fixed_offset = 0; + } + else if (self->num_ttinfos == 0) { + self->fixed_offset = 1; + } + else { + int constant_offset = + ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std); + if (constant_offset < 0) { + goto error; + } + else { + self->fixed_offset = constant_offset; + } + } + + int rv = 0; + goto cleanup; +error: + // These resources only need to be freed if we have failed, if we succeed + // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc + // method to free the relevant resources. + if (self->trans_list_utc != NULL) { + PyMem_Free(self->trans_list_utc); + self->trans_list_utc = NULL; + } + + for (size_t i = 0; i < 2; ++i) { + if (self->trans_list_wall[i] != NULL) { + PyMem_Free(self->trans_list_wall[i]); + self->trans_list_wall[i] = NULL; + } + } + + if (self->_ttinfos != NULL) { + for (size_t i = 0; i < ttinfos_allocated; ++i) { + xdecref_ttinfo(&(self->_ttinfos[i])); + } + PyMem_Free(self->_ttinfos); + self->_ttinfos = NULL; + } + + if (self->trans_ttinfos != NULL) { + PyMem_Free(self->trans_ttinfos); + self->trans_ttinfos = NULL; + } + + rv = -1; +cleanup: + Py_XDECREF(data_tuple); + + if (utcoff != NULL) { + PyMem_Free(utcoff); + } + + if (dstoff != NULL) { + PyMem_Free(dstoff); + } + + if (isdst != NULL) { + PyMem_Free(isdst); + } + + if (trans_idx != NULL) { + PyMem_Free(trans_idx); + } + + return rv; +} + +/* Function to calculate the local timestamp of a transition from the year. */ +int64_t +calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year) +{ + CalendarRule *self = (CalendarRule *)base_self; + + // We want (year, month, day of month); we have year and month, but we + // need to turn (week, day-of-week) into day-of-month + // + // Week 1 is the first week in which day `day` (where 0 = Sunday) appears. + // Week 5 represents the last occurrence of day `day`, so we need to know + // the first weekday of the month and the number of days in the month. + int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7; + uint8_t days_in_month = DAYS_IN_MONTH[self->month]; + if (self->month == 2 && is_leap_year(year)) { + days_in_month += 1; + } + + // This equation seems magical, so I'll break it down: + // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day + // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent + // because this math is mod 7 + // 2. Get first day - desired day mod 7 (adjusting by 7 for negative + // numbers so that -1 % 7 = 6). + // 3. Add 1 because month days are a 1-based index. + int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7; + if (month_day < 0) { + month_day += 7; + } + month_day += 1; + + // Now use a 0-based index version of `week` to calculate the w-th + // occurrence of `day` + month_day += ((int8_t)(self->week) - 1) * 7; + + // month_day will only be > days_in_month if w was 5, and `w` means "last + // occurrence of `d`", so now we just check if we over-shot the end of the + // month and if so knock off 1 week. + if (month_day > days_in_month) { + month_day -= 7; + } + + int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL; + return ((ordinal * 86400) + (int64_t)(self->hour * 3600) + + (int64_t)(self->minute * 60) + (int64_t)(self->second)); +} + +/* Constructor for CalendarRule. */ +int +calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, + int8_t minute, int8_t second, CalendarRule *out) +{ + // These bounds come from the POSIX standard, which describes an Mm.n.d + // rule as: + // + // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <= + // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which + // may occur in either the fourth or the fifth week). Week 1 is the first + // week in which the d'th day occurs. Day zero is Sunday. + if (month <= 0 || month > 12) { + PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]"); + return -1; + } + + if (week <= 0 || week > 5) { + PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]"); + return -1; + } + + // If the 'day' parameter type is changed to a signed type, + // "day < 0" check must be added. + if (/* day < 0 || */ day > 6) { + PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]"); + return -1; + } + + TransitionRuleType base = {&calendarrule_year_to_timestamp}; + + CalendarRule new_offset = { + .base = base, + .month = month, + .week = week, + .day = day, + .hour = hour, + .minute = minute, + .second = second, + }; + + *out = new_offset; + return 0; +} + +/* Function to calculate the local timestamp of a transition from the year. + * + * This translates the day of the year into a local timestamp — either a + * 1-based Julian day, not including leap days, or the 0-based year-day, + * including leap days. + * */ +int64_t +dayrule_year_to_timestamp(TransitionRuleType *base_self, int year) +{ + // The function signature requires a TransitionRuleType pointer, but this + // function is only applicable to DayRule* objects. + DayRule *self = (DayRule *)base_self; + + // ymd_to_ord calculates the number of days since 0001-01-01, but we want + // to know the number of days since 1970-01-01, so we must subtract off + // the equivalent of ymd_to_ord(1970, 1, 1). + // + // We subtract off an additional 1 day to account for January 1st (we want + // the number of full days *before* the date of the transition - partial + // days are accounted for in the hour, minute and second portions. + int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1; + + // The Julian day specification skips over February 29th in leap years, + // from the POSIX standard: + // + // Leap days shall not be counted. That is, in all years-including leap + // years-February 28 is day 59 and March 1 is day 60. It is impossible to + // refer explicitly to the occasional February 29. + // + // This is actually more useful than you'd think — if you want a rule that + // always transitions on a given calendar day (other than February 29th), + // you would use a Julian day, e.g. J91 always refers to April 1st and J365 + // always refers to December 31st. + unsigned int day = self->day; + if (self->julian && day >= 59 && is_leap_year(year)) { + day += 1; + } + + return ((days_before_year + day) * 86400) + (self->hour * 3600) + + (self->minute * 60) + self->second; +} + +/* Constructor for DayRule. */ +static int +dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute, + int8_t second, DayRule *out) +{ + // The POSIX standard specifies that Julian days must be in the range (1 <= + // n <= 365) and that non-Julian (they call it "0-based Julian") days must + // be in the range (0 <= n <= 365). + if (day < julian || day > 365) { + PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u", + julian, day); + return -1; + } + + TransitionRuleType base = { + &dayrule_year_to_timestamp, + }; + + DayRule tmp = { + .base = base, + .julian = julian, + .day = day, + .hour = hour, + .minute = minute, + .second = second, + }; + + *out = tmp; + + return 0; +} + +/* Calculate the start and end rules for a _tzrule in the given year. */ +static void +tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end) +{ + assert(rule->start != NULL); + assert(rule->end != NULL); + *start = rule->start->year_to_timestamp(rule->start, year); + *end = rule->end->year_to_timestamp(rule->end, year); +} + +/* Calculate the _ttinfo that applies at a given local time from a _tzrule. + * + * This takes a local timestamp and fold for disambiguation purposes; the year + * could technically be calculated from the timestamp, but given that the + * callers of this function already have the year information accessible from + * the datetime struct, it is taken as an additional parameter to reduce + * unnecessary calculation. + * */ +static _ttinfo * +find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year) +{ + if (rule->std_only) { + return &(rule->std); + } + + int64_t start, end; + uint8_t isdst; + + tzrule_transitions(rule, year, &start, &end); + + // With fold = 0, the period (denominated in local time) with the smaller + // offset starts at the end of the gap and ends at the end of the fold; + // with fold = 1, it runs from the start of the gap to the beginning of the + // fold. + // + // So in order to determine the DST boundaries we need to know both the + // fold and whether DST is positive or negative (rare), and it turns out + // that this boils down to fold XOR is_positive. + if (fold == (rule->dst_diff >= 0)) { + end -= rule->dst_diff; + } + else { + start += rule->dst_diff; + } + + if (start < end) { + isdst = (ts >= start) && (ts < end); + } + else { + isdst = (ts < end) || (ts >= start); + } + + if (isdst) { + return &(rule->dst); + } + else { + return &(rule->std); + } +} + +/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time. + * + * This function can determine the _ttinfo that applies at a given epoch time, + * (analogous to trans_list_utc), and whether or not the datetime is in a fold. + * This is to be used in the .fromutc() function. + * + * The year is technically a redundant parameter, because it can be calculated + * from the timestamp, but all callers of this function should have the year + * in the datetime struct anyway, so taking it as a parameter saves unnecessary + * calculation. + **/ +static _ttinfo * +find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, + unsigned char *fold) +{ + if (rule->std_only) { + *fold = 0; + return &(rule->std); + } + + int64_t start, end; + uint8_t isdst; + tzrule_transitions(rule, year, &start, &end); + start -= rule->std.utcoff_seconds; + end -= rule->dst.utcoff_seconds; + + if (start < end) { + isdst = (ts >= start) && (ts < end); + } + else { + isdst = (ts < end) || (ts >= start); + } + + // For positive DST, the ambiguous period is one dst_diff after the end of + // DST; for negative DST, the ambiguous period is one dst_diff before the + // start of DST. + int64_t ambig_start, ambig_end; + if (rule->dst_diff > 0) { + ambig_start = end; + ambig_end = end + rule->dst_diff; + } + else { + ambig_start = start; + ambig_end = start - rule->dst_diff; + } + + *fold = (ts >= ambig_start) && (ts < ambig_end); + + if (isdst) { + return &(rule->dst); + } + else { + return &(rule->std); + } +} + +/* Parse a TZ string in the format specified by the POSIX standard: + * + * std offset[dst[offset],start[/time],end[/time]] + * + * std and dst must be 3 or more characters long and must not contain a + * leading colon, embedded digits, commas, nor a plus or minus signs; The + * spaces between "std" and "offset" are only for display and are not actually + * present in the string. + * + * The format of the offset is ``[+|-]hh[:mm[:ss]]`` + * + * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3: + * + * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html + */ +static int +parse_tz_str(PyObject *tz_str_obj, _tzrule *out) +{ + PyObject *std_abbr = NULL; + PyObject *dst_abbr = NULL; + TransitionRuleType *start = NULL; + TransitionRuleType *end = NULL; + // Initialize offsets to invalid value (> 24 hours) + long std_offset = 1 << 20; + long dst_offset = 1 << 20; + + char *tz_str = PyBytes_AsString(tz_str_obj); + if (tz_str == NULL) { + return -1; + } + char *p = tz_str; + + // Read the `std` abbreviation, which must be at least 3 characters long. + Py_ssize_t num_chars = parse_abbr(p, &std_abbr); + if (num_chars < 1) { + PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); + goto error; + } + + p += num_chars; + + // Now read the STD offset, which is required + num_chars = parse_tz_delta(p, &std_offset); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj); + goto error; + } + p += num_chars; + + // If the string ends here, there is no DST, otherwise we must parse the + // DST abbreviation and start and end dates and times. + if (*p == '\0') { + goto complete; + } + + num_chars = parse_abbr(p, &dst_abbr); + if (num_chars < 1) { + PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); + goto error; + } + p += num_chars; + + if (*p == ',') { + // From the POSIX standard: + // + // If no offset follows dst, the alternative time is assumed to be one + // hour ahead of standard time. + dst_offset = std_offset + 3600; + } + else { + num_chars = parse_tz_delta(p, &dst_offset); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R", + tz_str_obj); + goto error; + } + + p += num_chars; + } + + TransitionRuleType **transitions[2] = {&start, &end}; + for (size_t i = 0; i < 2; ++i) { + if (*p != ',') { + PyErr_Format(PyExc_ValueError, + "Missing transition rules in TZ string: %R", + tz_str_obj); + goto error; + } + p++; + + num_chars = parse_transition_rule(p, transitions[i]); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, + "Malformed transition rule in TZ string: %R", + tz_str_obj); + goto error; + } + p += num_chars; + } + + if (*p != '\0') { + PyErr_Format(PyExc_ValueError, + "Extraneous characters at end of TZ string: %R", + tz_str_obj); + goto error; + } + +complete: + build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out); + Py_DECREF(std_abbr); + Py_XDECREF(dst_abbr); + + return 0; +error: + Py_XDECREF(std_abbr); + if (dst_abbr != NULL && dst_abbr != Py_None) { + Py_DECREF(dst_abbr); + } + + if (start != NULL) { + PyMem_Free(start); + } + + if (end != NULL) { + PyMem_Free(end); + } + + return -1; +} + +static int +parse_uint(const char *const p, uint8_t *value) +{ + if (!isdigit(*p)) { + return -1; + } + + *value = (*p) - '0'; + return 0; +} + +/* Parse the STD and DST abbreviations from a TZ string. */ +static Py_ssize_t +parse_abbr(const char *const p, PyObject **abbr) +{ + const char *ptr = p; + char buff = *ptr; + const char *str_start; + const char *str_end; + + if (*ptr == '<') { + ptr++; + str_start = ptr; + while ((buff = *ptr) != '>') { + // From the POSIX standard: + // + // In the quoted form, the first character shall be the less-than + // ( '<' ) character and the last character shall be the + // greater-than ( '>' ) character. All characters between these + // quoting characters shall be alphanumeric characters from the + // portable character set in the current locale, the plus-sign ( + // '+' ) character, or the minus-sign ( '-' ) character. The std + // and dst fields in this case shall not include the quoting + // characters. + if (!isalpha(buff) && !isdigit(buff) && buff != '+' && + buff != '-') { + return -1; + } + ptr++; + } + str_end = ptr; + ptr++; + } + else { + str_start = p; + // From the POSIX standard: + // + // In the unquoted form, all characters in these fields shall be + // alphabetic characters from the portable character set in the + // current locale. + while (isalpha(*ptr)) { + ptr++; + } + str_end = ptr; + } + + *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start); + if (*abbr == NULL) { + return -1; + } + + return ptr - p; +} + +/* Parse a UTC offset from a TZ str. */ +static Py_ssize_t +parse_tz_delta(const char *const p, long *total_seconds) +{ + // From the POSIX spec: + // + // Indicates the value added to the local time to arrive at Coordinated + // Universal Time. The offset has the form: + // + // hh[:mm[:ss]] + // + // One or more digits may be used; the value is always interpreted as a + // decimal number. + // + // The POSIX spec says that the values for `hour` must be between 0 and 24 + // hours, but RFC 8536 §3.3.1 specifies that the hours part of the + // transition times may be signed and range from -167 to 167. + long sign = -1; + long hours = 0; + long minutes = 0; + long seconds = 0; + + const char *ptr = p; + char buff = *ptr; + if (buff == '-' || buff == '+') { + // Negative numbers correspond to *positive* offsets, from the spec: + // + // If preceded by a '-', the timezone shall be east of the Prime + // Meridian; otherwise, it shall be west (which may be indicated by + // an optional preceding '+' ). + if (buff == '-') { + sign = 1; + } + + ptr++; + } + + // The hour can be 1 or 2 numeric characters + for (size_t i = 0; i < 2; ++i) { + buff = *ptr; + if (!isdigit(buff)) { + if (i == 0) { + return -1; + } + else { + break; + } + } + + hours *= 10; + hours += buff - '0'; + ptr++; + } + + if (hours > 24 || hours < 0) { + return -1; + } + + // Minutes and seconds always of the format ":dd" + long *outputs[2] = {&minutes, &seconds}; + for (size_t i = 0; i < 2; ++i) { + if (*ptr != ':') { + goto complete; + } + ptr++; + + for (size_t j = 0; j < 2; ++j) { + buff = *ptr; + if (!isdigit(buff)) { + return -1; + } + *(outputs[i]) *= 10; + *(outputs[i]) += buff - '0'; + ptr++; + } + } + +complete: + *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds); + + return ptr - p; +} + +/* Parse the date portion of a transition rule. */ +static Py_ssize_t +parse_transition_rule(const char *const p, TransitionRuleType **out) +{ + // The full transition rule indicates when to change back and forth between + // STD and DST, and has the form: + // + // date[/time],date[/time] + // + // This function parses an individual date[/time] section, and returns + // the number of characters that contributed to the transition rule. This + // does not include the ',' at the end of the first rule. + // + // The POSIX spec states that if *time* is not given, the default is 02:00. + const char *ptr = p; + int8_t hour = 2; + int8_t minute = 0; + int8_t second = 0; + + // Rules come in one of three flavors: + // + // 1. Jn: Julian day n, with no leap days. + // 2. n: Day of year (0-based, with leap days) + // 3. Mm.n.d: Specifying by month, week and day-of-week. + + if (*ptr == 'M') { + uint8_t month, week, day; + ptr++; + if (parse_uint(ptr, &month)) { + return -1; + } + ptr++; + if (*ptr != '.') { + uint8_t tmp; + if (parse_uint(ptr, &tmp)) { + return -1; + } + + month *= 10; + month += tmp; + ptr++; + } + + uint8_t *values[2] = {&week, &day}; + for (size_t i = 0; i < 2; ++i) { + if (*ptr != '.') { + return -1; + } + ptr++; + + if (parse_uint(ptr, values[i])) { + return -1; + } + ptr++; + } + + if (*ptr == '/') { + ptr++; + Py_ssize_t num_chars = + parse_transition_time(ptr, &hour, &minute, &second); + if (num_chars < 0) { + return -1; + } + ptr += num_chars; + } + + CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule)); + if (rv == NULL) { + return -1; + } + + if (calendarrule_new(month, week, day, hour, minute, second, rv)) { + PyMem_Free(rv); + return -1; + } + + *out = (TransitionRuleType *)rv; + } + else { + uint8_t julian = 0; + unsigned int day = 0; + if (*ptr == 'J') { + julian = 1; + ptr++; + } + + for (size_t i = 0; i < 3; ++i) { + if (!isdigit(*ptr)) { + if (i == 0) { + return -1; + } + break; + } + day *= 10; + day += (*ptr) - '0'; + ptr++; + } + + if (*ptr == '/') { + ptr++; + Py_ssize_t num_chars = + parse_transition_time(ptr, &hour, &minute, &second); + if (num_chars < 0) { + return -1; + } + ptr += num_chars; + } + + DayRule *rv = PyMem_Calloc(1, sizeof(DayRule)); + if (rv == NULL) { + return -1; + } + + if (dayrule_new(julian, day, hour, minute, second, rv)) { + PyMem_Free(rv); + return -1; + } + *out = (TransitionRuleType *)rv; + } + + return ptr - p; +} + +/* Parse the time portion of a transition rule (e.g. following an /) */ +static Py_ssize_t +parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, + int8_t *second) +{ + // From the spec: + // + // The time has the same format as offset except that no leading sign + // ( '-' or '+' ) is allowed. + // + // The format for the offset is: + // + // h[h][:mm[:ss]] + // + // RFC 8536 also allows transition times to be signed and to range from + // -167 to +167, but the current version only supports [0, 99]. + // + // TODO: Support the full range of transition hours. + int8_t *components[3] = {hour, minute, second}; + const char *ptr = p; + int8_t sign = 1; + + if (*ptr == '-' || *ptr == '+') { + if (*ptr == '-') { + sign = -1; + } + ptr++; + } + + for (size_t i = 0; i < 3; ++i) { + if (i > 0) { + if (*ptr != ':') { + break; + } + ptr++; + } + + uint8_t buff = 0; + for (size_t j = 0; j < 2; j++) { + if (!isdigit(*ptr)) { + if (i == 0 && j > 0) { + break; + } + return -1; + } + + buff *= 10; + buff += (*ptr) - '0'; + ptr++; + } + + *(components[i]) = sign * buff; + } + + return ptr - p; +} + +/* Constructor for a _tzrule. + * + * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which + * case `dst_offset` will be ignored and `start` and `end` are expected to be + * NULL as well. + * + * Returns 0 on success. + */ +static int +build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, + long dst_offset, TransitionRuleType *start, + TransitionRuleType *end, _tzrule *out) +{ + _tzrule rv = {{0}}; + + rv.start = start; + rv.end = end; + + if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) { + goto error; + } + + if (dst_abbr != NULL) { + rv.dst_diff = dst_offset - std_offset; + if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) { + goto error; + } + } + else { + rv.std_only = 1; + } + + *out = rv; + + return 0; +error: + xdecref_ttinfo(&rv.std); + xdecref_ttinfo(&rv.dst); + return -1; +} + +/* Destructor for _tzrule. */ +static void +free_tzrule(_tzrule *tzrule) +{ + xdecref_ttinfo(&(tzrule->std)); + if (!tzrule->std_only) { + xdecref_ttinfo(&(tzrule->dst)); + } + + if (tzrule->start != NULL) { + PyMem_Free(tzrule->start); + } + + if (tzrule->end != NULL) { + PyMem_Free(tzrule->end); + } +} + +/* Calculate DST offsets from transitions and UTC offsets + * + * This is necessary because each C `ttinfo` only contains the UTC offset, + * time zone abbreviation and an isdst boolean - it does not include the + * amount of the DST offset, but we need the amount for the dst() function. + * + * Thus function uses heuristics to infer what the offset should be, so it + * is not guaranteed that this will work for all zones. If we cannot assign + * a value for a given DST offset, we'll assume it's 1H rather than 0H, so + * bool(dt.dst()) will always match ttinfo.isdst. + */ +static void +utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, + unsigned char *isdsts, size_t num_transitions, + size_t num_ttinfos) +{ + size_t dst_count = 0; + size_t dst_found = 0; + for (size_t i = 0; i < num_ttinfos; ++i) { + dst_count++; + } + + for (size_t i = 1; i < num_transitions; ++i) { + if (dst_count == dst_found) { + break; + } + + size_t idx = trans_idx[i]; + size_t comp_idx = trans_idx[i - 1]; + + // Only look at DST offsets that have nto been assigned already + if (!isdsts[idx] || dstoffs[idx] != 0) { + continue; + } + + long dstoff = 0; + long utcoff = utcoffs[idx]; + + if (!isdsts[comp_idx]) { + dstoff = utcoff - utcoffs[comp_idx]; + } + + if (!dstoff && idx < (num_ttinfos - 1)) { + comp_idx = trans_idx[i + 1]; + + // If the following transition is also DST and we couldn't find + // the DST offset by this point, we're going to have to skip it + // and hope this transition gets assigned later + if (isdsts[comp_idx]) { + continue; + } + + dstoff = utcoff - utcoffs[comp_idx]; + } + + if (dstoff) { + dst_found++; + dstoffs[idx] = dstoff; + } + } + + if (dst_found < dst_count) { + // If there are time zones we didn't find a value for, we'll end up + // with dstoff = 0 for something where isdst=1. This is obviously + // wrong — one hour will be a much better guess than 0. + for (size_t idx = 0; idx < num_ttinfos; ++idx) { + if (isdsts[idx] && !dstoffs[idx]) { + dstoffs[idx] = 3600; + } + } + } +} + +#define _swap(x, y, buffer) \ + buffer = x; \ + x = y; \ + y = buffer; + +/* Calculate transitions in local time from UTC time and offsets. + * + * We want to know when each transition occurs, denominated in the number of + * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in + * *local time* (note: this is *not* equivalent to the output of + * datetime.timestamp, which is the total number of seconds actual elapsed + * since 1970-01-01T00:00:00Z in UTC). + * + * This is an ambiguous question because "local time" can be ambiguous — but it + * is disambiguated by the `fold` parameter, so we allocate two arrays: + * + * trans_local[0]: The wall-time transitions for fold=0 + * trans_local[1]: The wall-time transitions for fold=1 + * + * This returns 0 on success and a negative number of failure. The trans_local + * arrays must be freed if they are not NULL. + */ +static int +ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, + int64_t *trans_local[2], size_t num_ttinfos, + size_t num_transitions) +{ + if (num_transitions == 0) { + return 0; + } + + // Copy the UTC transitions into each array to be modified in place later + for (size_t i = 0; i < 2; ++i) { + trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t)); + if (trans_local[i] == NULL) { + return -1; + } + + memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t)); + } + + int64_t offset_0, offset_1, buff; + if (num_ttinfos > 1) { + offset_0 = utcoff[0]; + offset_1 = utcoff[trans_idx[0]]; + + if (offset_1 > offset_0) { + _swap(offset_0, offset_1, buff); + } + } + else { + offset_0 = utcoff[0]; + offset_1 = utcoff[0]; + } + + trans_local[0][0] += offset_0; + trans_local[1][0] += offset_1; + + for (size_t i = 1; i < num_transitions; ++i) { + offset_0 = utcoff[trans_idx[i - 1]]; + offset_1 = utcoff[trans_idx[i]]; + + if (offset_1 > offset_0) { + _swap(offset_1, offset_0, buff); + } + + trans_local[0][i] += offset_0; + trans_local[1][i] += offset_1; + } + + return 0; +} + +/* Simple bisect_right binary search implementation */ +static size_t +_bisect(const int64_t value, const int64_t *arr, size_t size) +{ + size_t lo = 0; + size_t hi = size; + size_t m; + + while (lo < hi) { + m = (lo + hi) / 2; + if (arr[m] > value) { + hi = m; + } + else { + lo = m + 1; + } + } + + return hi; +} + +/* Find the ttinfo rules that apply at a given local datetime. */ +static _ttinfo * +find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt) +{ + // datetime.time has a .tzinfo attribute that passes None as the dt + // argument; it only really has meaning for fixed-offset zones. + if (dt == Py_None) { + if (self->fixed_offset) { + return &(self->tzrule_after.std); + } + else { + return &NO_TTINFO; + } + } + + int64_t ts; + if (get_local_timestamp(dt, &ts)) { + return NULL; + } + + unsigned char fold = PyDateTime_DATE_GET_FOLD(dt); + assert(fold < 2); + int64_t *local_transitions = self->trans_list_wall[fold]; + size_t num_trans = self->num_transitions; + + if (num_trans && ts < local_transitions[0]) { + return self->ttinfo_before; + } + else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) { + return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold, + PyDateTime_GET_YEAR(dt)); + } + else { + size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1; + assert(idx < self->num_transitions); + return self->trans_ttinfos[idx]; + } +} + +static int +is_leap_year(int year) +{ + const unsigned int ayear = (unsigned int)year; + return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0); +} + +/* Calculates ordinal datetime from year, month and day. */ +static int +ymd_to_ord(int y, int m, int d) +{ + y -= 1; + int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400); + int yearday = DAYS_BEFORE_MONTH[m]; + if (m > 2 && is_leap_year(y + 1)) { + yearday += 1; + } + + return days_before_year + yearday + d; +} + +/* Calculate the number of seconds since 1970-01-01 in local time. + * + * This gets a datetime in the same "units" as self->trans_list_wall so that we + * can easily determine which transitions a datetime falls between. See the + * comment above ts_to_local for more information. + * */ +static int +get_local_timestamp(PyObject *dt, int64_t *local_ts) +{ + assert(local_ts != NULL); + + int hour, minute, second; + int ord; + if (PyDateTime_CheckExact(dt)) { + int y = PyDateTime_GET_YEAR(dt); + int m = PyDateTime_GET_MONTH(dt); + int d = PyDateTime_GET_DAY(dt); + hour = PyDateTime_DATE_GET_HOUR(dt); + minute = PyDateTime_DATE_GET_MINUTE(dt); + second = PyDateTime_DATE_GET_SECOND(dt); + + ord = ymd_to_ord(y, m, d); + } + else { + PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL); + if (num == NULL) { + return -1; + } + + ord = PyLong_AsLong(num); + Py_DECREF(num); + if (ord == -1 && PyErr_Occurred()) { + return -1; + } + + num = PyObject_GetAttrString(dt, "hour"); + if (num == NULL) { + return -1; + } + hour = PyLong_AsLong(num); + Py_DECREF(num); + if (hour == -1) { + return -1; + } + + num = PyObject_GetAttrString(dt, "minute"); + if (num == NULL) { + return -1; + } + minute = PyLong_AsLong(num); + Py_DECREF(num); + if (minute == -1) { + return -1; + } + + num = PyObject_GetAttrString(dt, "second"); + if (num == NULL) { + return -1; + } + second = PyLong_AsLong(num); + Py_DECREF(num); + if (second == -1) { + return -1; + } + } + + *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 + + (int64_t)(hour * 3600 + minute * 60 + second); + + return 0; +} + +///// +// Functions for cache handling + +/* Constructor for StrongCacheNode */ +static StrongCacheNode * +strong_cache_node_new(PyObject *key, PyObject *zone) +{ + StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode)); + if (node == NULL) { + return NULL; + } + + Py_INCREF(key); + Py_INCREF(zone); + + node->next = NULL; + node->prev = NULL; + node->key = key; + node->zone = zone; + + return node; +} + +/* Destructor for StrongCacheNode */ +void +strong_cache_node_free(StrongCacheNode *node) +{ + Py_XDECREF(node->key); + Py_XDECREF(node->zone); + + PyMem_Free(node); +} + +/* Frees all nodes at or after a specified root in the strong cache. + * + * This can be used on the root node to free the entire cache or it can be used + * to clear all nodes that have been expired (which, if everything is going + * right, will actually only be 1 node at a time). + */ +void +strong_cache_free(StrongCacheNode *root) +{ + StrongCacheNode *node = root; + StrongCacheNode *next_node; + while (node != NULL) { + next_node = node->next; + strong_cache_node_free(node); + + node = next_node; + } +} + +/* Removes a node from the cache and update its neighbors. + * + * This is used both when ejecting a node from the cache and when moving it to + * the front of the cache. + */ +static void +remove_from_strong_cache(StrongCacheNode *node) +{ + if (ZONEINFO_STRONG_CACHE == node) { + ZONEINFO_STRONG_CACHE = node->next; + } + + if (node->prev != NULL) { + node->prev->next = node->next; + } + + if (node->next != NULL) { + node->next->prev = node->prev; + } + + node->next = NULL; + node->prev = NULL; +} + +/* Retrieves the node associated with a key, if it exists. + * + * This traverses the strong cache until it finds a matching key and returns a + * pointer to the relevant node if found. Returns NULL if no node is found. + * + * root may be NULL, indicating an empty cache. + */ +static StrongCacheNode * +find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key) +{ + const StrongCacheNode *node = root; + while (node != NULL) { + int rv = PyObject_RichCompareBool(key, node->key, Py_EQ); + if (rv < 0) { + return NULL; + } + if (rv) { + return (StrongCacheNode *)node; + } + + node = node->next; + } + + return NULL; +} + +/* Ejects a given key from the class's strong cache, if applicable. + * + * This function is used to enable the per-key functionality in clear_cache. + */ +static int +eject_from_strong_cache(const PyTypeObject *const type, PyObject *key) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return 0; + } + + StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); + if (node != NULL) { + remove_from_strong_cache(node); + + strong_cache_node_free(node); + } + else if (PyErr_Occurred()) { + return -1; + } + return 0; +} + +/* Moves a node to the front of the LRU cache. + * + * The strong cache is an LRU cache, so whenever a given node is accessed, if + * it is not at the front of the cache, it needs to be moved there. + */ +static void +move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node) +{ + StrongCacheNode *root_p = *root; + if (root_p == node) { + return; + } + + remove_from_strong_cache(node); + + node->prev = NULL; + node->next = root_p; + + if (root_p != NULL) { + root_p->prev = node; + } + + *root = node; +} + +/* Retrieves a ZoneInfo from the strong cache if it's present. + * + * This function finds the ZoneInfo by key and if found will move the node to + * the front of the LRU cache and return a new reference to it. It returns NULL + * if the key is not in the cache. + * + * The strong cache is currently only implemented for the base class, so this + * always returns a cache miss for subclasses. + */ +static PyObject * +zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return NULL; // Strong cache currently only implemented for base class + } + + StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); + + if (node != NULL) { + move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node); + Py_INCREF(node->zone); + return node->zone; + } + + return NULL; // Cache miss +} + +/* Inserts a new key into the strong LRU cache. + * + * This function is only to be used after a cache miss — it creates a new node + * at the front of the cache and ejects any stale entries (keeping the size of + * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE). + */ +static void +update_strong_cache(const PyTypeObject *const type, PyObject *key, + PyObject *zone) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + StrongCacheNode *new_node = strong_cache_node_new(key, zone); + + move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node); + + StrongCacheNode *node = new_node->next; + for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) { + if (node == NULL) { + return; + } + node = node->next; + } + + // Everything beyond this point needs to be freed + if (node != NULL) { + if (node->prev != NULL) { + node->prev->next = NULL; + } + strong_cache_free(node); + } +} + +/* Clears all entries into a type's strong cache. + * + * Because the strong cache is not implemented for subclasses, this is a no-op + * for everything except the base class. + */ +void +clear_strong_cache(const PyTypeObject *const type) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + strong_cache_free(ZONEINFO_STRONG_CACHE); + ZONEINFO_STRONG_CACHE = NULL; +} + +static PyObject * +new_weak_cache(void) +{ + PyObject *weakref_module = PyImport_ImportModule("weakref"); + if (weakref_module == NULL) { + return NULL; + } + + PyObject *weak_cache = + PyObject_CallMethod(weakref_module, "WeakValueDictionary", ""); + Py_DECREF(weakref_module); + return weak_cache; +} + +static int +initialize_caches(void) +{ + // TODO: Move to a PyModule_GetState / PEP 573 based caching system. + if (TIMEDELTA_CACHE == NULL) { + TIMEDELTA_CACHE = PyDict_New(); + } + else { + Py_INCREF(TIMEDELTA_CACHE); + } + + if (TIMEDELTA_CACHE == NULL) { + return -1; + } + + if (ZONEINFO_WEAK_CACHE == NULL) { + ZONEINFO_WEAK_CACHE = new_weak_cache(); + } + else { + Py_INCREF(ZONEINFO_WEAK_CACHE); + } + + if (ZONEINFO_WEAK_CACHE == NULL) { + return -1; + } + + return 0; +} + +static PyObject * +zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs) +{ + PyObject *weak_cache = new_weak_cache(); + if (weak_cache == NULL) { + return NULL; + } + + if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache", + weak_cache) < 0) { + Py_DECREF(weak_cache); + return NULL; + } + Py_DECREF(weak_cache); + Py_RETURN_NONE; +} + +///// +// Specify the ZoneInfo type +static PyMethodDef zoneinfo_methods[] = { + {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Clear the ZoneInfo cache.")}, + {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")}, + {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Create a ZoneInfo file from a file object.")}, + {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O, + PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at " + "the given datetime.")}, + {"dst", (PyCFunction)zoneinfo_dst, METH_O, + PyDoc_STR("Retrieve a timedelta representing the amount of DST applied " + "in a zone at the given datetime.")}, + {"tzname", (PyCFunction)zoneinfo_tzname, METH_O, + PyDoc_STR("Retrieve a string containing the abbreviation for the time " + "zone that applies in a zone at a given datetime.")}, + {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O, + PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted " + "datetime in local time.")}, + {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS, + PyDoc_STR("Function for serialization with the pickle protocol.")}, + {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS, + PyDoc_STR("Private method used in unpickling.")}, + {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Function to initialize subclasses.")}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef zoneinfo_members[] = { + {.name = "key", + .offset = offsetof(PyZoneInfo_ZoneInfo, key), + .type = T_OBJECT_EX, + .flags = READONLY, + .doc = NULL}, + {NULL}, /* Sentinel */ +}; + +static PyTypeObject PyZoneInfo_ZoneInfoType = { + PyVarObject_HEAD_INIT(NULL, 0) // + .tp_name = "zoneinfo.ZoneInfo", + .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo), + .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist), + .tp_repr = (reprfunc)zoneinfo_repr, + .tp_str = (reprfunc)zoneinfo_str, + .tp_getattro = PyObject_GenericGetAttr, + .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE), + /* .tp_doc = zoneinfo_doc, */ + .tp_methods = zoneinfo_methods, + .tp_members = zoneinfo_members, + .tp_new = zoneinfo_new, + .tp_dealloc = zoneinfo_dealloc, +}; + +///// +// Specify the _zoneinfo module +static PyMethodDef module_methods[] = {{NULL, NULL}}; +static void +module_free() +{ + Py_XDECREF(_tzpath_find_tzfile); + _tzpath_find_tzfile = NULL; + + Py_XDECREF(_common_mod); + _common_mod = NULL; + + Py_XDECREF(io_open); + io_open = NULL; + + xdecref_ttinfo(&NO_TTINFO); + + if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) { + Py_DECREF(TIMEDELTA_CACHE); + } else { + Py_CLEAR(TIMEDELTA_CACHE); + } + + if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) { + Py_DECREF(ZONEINFO_WEAK_CACHE); + } else { + Py_CLEAR(ZONEINFO_WEAK_CACHE); + } + + clear_strong_cache(&PyZoneInfo_ZoneInfoType); +} + +static int +zoneinfomodule_exec(PyObject *m) +{ + PyDateTime_IMPORT; + if (PyDateTimeAPI == NULL) { + goto error; + } + PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType; + if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) { + goto error; + } + + Py_INCREF(&PyZoneInfo_ZoneInfoType); + PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType); + + /* Populate imports */ + PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath"); + if (_tzpath_module == NULL) { + goto error; + } + + _tzpath_find_tzfile = + PyObject_GetAttrString(_tzpath_module, "find_tzfile"); + Py_DECREF(_tzpath_module); + if (_tzpath_find_tzfile == NULL) { + goto error; + } + + PyObject *io_module = PyImport_ImportModule("io"); + if (io_module == NULL) { + goto error; + } + + io_open = PyObject_GetAttrString(io_module, "open"); + Py_DECREF(io_module); + if (io_open == NULL) { + goto error; + } + + _common_mod = PyImport_ImportModule("zoneinfo._common"); + if (_common_mod == NULL) { + goto error; + } + + if (NO_TTINFO.utcoff == NULL) { + NO_TTINFO.utcoff = Py_None; + NO_TTINFO.dstoff = Py_None; + NO_TTINFO.tzname = Py_None; + + for (size_t i = 0; i < 3; ++i) { + Py_INCREF(Py_None); + } + } + + if (initialize_caches()) { + goto error; + } + + return 0; + +error: + return -1; +} + +static PyModuleDef_Slot zoneinfomodule_slots[] = { + {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}}; + +static struct PyModuleDef zoneinfomodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_zoneinfo", + .m_doc = "C implementation of the zoneinfo module", + .m_size = 0, + .m_methods = module_methods, + .m_slots = zoneinfomodule_slots, + .m_free = (freefunc)module_free}; + +PyMODINIT_FUNC +PyInit__zoneinfo(void) +{ + return PyModuleDef_Init(&zoneinfomodule); +} |