diff options
author | vvvv <vvvv@yandex-team.com> | 2024-11-07 12:29:36 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.com> | 2024-11-07 13:49:47 +0300 |
commit | d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch) | |
tree | b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/timezone | |
parent | 13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff) | |
download | ydb-d4c258e9431675bab6745c8638df6e3dfd4dca6b.tar.gz |
Moved other yql/essentials libs YQL-19206
init
commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src/timezone')
6 files changed, 3329 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/localtime.c b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/localtime.c new file mode 100644 index 00000000000..f391fa3f41e --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/localtime.c @@ -0,0 +1,1906 @@ +/* Convert timestamp from pg_time_t to struct pg_tm. */ + +/* + * This file is in the public domain, so clarified as of + * 1996-06-05 by Arthur David Olson. + * + * IDENTIFICATION + * src/timezone/localtime.c + */ + +/* + * Leap second handling from Bradley White. + * POSIX-style TZ environment variable handling from Guy Harris. + */ + +/* this file needs to build in both frontend and backend contexts */ +#include "c.h" + +#include <fcntl.h> + +#include "datatype/timestamp.h" +#include "pgtz.h" + +#include "private.h" +#include "tzfile.h" + + +#ifndef WILDABBR +/* + * Someone might make incorrect use of a time zone abbreviation: + * 1. They might reference tzname[0] before calling tzset (explicitly + * or implicitly). + * 2. They might reference tzname[1] before calling tzset (explicitly + * or implicitly). + * 3. They might reference tzname[1] after setting to a time zone + * in which Daylight Saving Time is never observed. + * 4. They might reference tzname[0] after setting to a time zone + * in which Standard Time is never observed. + * 5. They might reference tm.tm_zone after calling offtime. + * What's best to do in the above cases is open to debate; + * for now, we just set things up so that in any of the five cases + * WILDABBR is used. Another possibility: initialize tzname[0] to the + * string "tzname[0] used before set", and similarly for the other cases. + * And another: initialize tzname[0] to "ERA", with an explanation in the + * manual page of what this "time zone abbreviation" means (doing this so + * that tzname[0] has the "normal" length of three characters). + */ +#define WILDABBR " " +#endif /* !defined WILDABBR */ + +static const char wildabbr[] = WILDABBR; + +static const char gmt[] = "GMT"; + +/* + * The DST rules to use if a POSIX TZ string has no rules. + * Default to US rules as of 2017-05-07. + * POSIX does not specify the default DST rules; + * for historical reasons, US rules are a common default. + */ +#define TZDEFRULESTRING ",M3.2.0,M11.1.0" + +/* structs ttinfo, lsinfo, state have been moved to pgtz.h */ + +enum r_type +{ + JULIAN_DAY, /* Jn = Julian day */ + DAY_OF_YEAR, /* n = day of year */ + MONTH_NTH_DAY_OF_WEEK /* Mm.n.d = month, week, day of week */ +}; + +struct rule +{ + enum r_type r_type; /* type of rule */ + int r_day; /* day number of rule */ + int r_week; /* week number of rule */ + int r_mon; /* month number of rule */ + int32 r_time; /* transition time of rule */ +}; + +/* + * Prototypes for static functions. + */ + +static struct pg_tm *gmtsub(pg_time_t const *timep, int32 offset, + struct pg_tm *tmp); +static bool increment_overflow(int *ip, int j); +static bool increment_overflow_time(pg_time_t *tp, int32 j); +static int64 leapcorr(struct state const *sp, pg_time_t t); +static struct pg_tm *timesub(pg_time_t const *timep, + int32 offset, struct state const *sp, + struct pg_tm *tmp); +static bool typesequiv(struct state const *sp, int a, int b); + + +/* + * Section 4.12.3 of X3.159-1989 requires that + * Except for the strftime function, these functions [asctime, + * ctime, gmtime, localtime] return values in one of two static + * objects: a broken-down time structure and an array of char. + * Thanks to Paul Eggert for noting this. + */ + +static __thread struct pg_tm tm; + +/* Initialize *S to a value based on UTOFF, ISDST, and DESIGIDX. */ +static void +init_ttinfo(struct ttinfo *s, int32 utoff, bool isdst, int desigidx) +{ + s->tt_utoff = utoff; + s->tt_isdst = isdst; + s->tt_desigidx = desigidx; + s->tt_ttisstd = false; + s->tt_ttisut = false; +} + +static int32 +detzcode(const char *const codep) +{ + int32 result; + int i; + int32 one = 1; + int32 halfmaxval = one << (32 - 2); + int32 maxval = halfmaxval - 1 + halfmaxval; + int32 minval = -1 - maxval; + + result = codep[0] & 0x7f; + for (i = 1; i < 4; ++i) + result = (result << 8) | (codep[i] & 0xff); + + if (codep[0] & 0x80) + { + /* + * Do two's-complement negation even on non-two's-complement machines. + * If the result would be minval - 1, return minval. + */ + result -= !TWOS_COMPLEMENT(int32) && result != 0; + result += minval; + } + return result; +} + +static int64 +detzcode64(const char *const codep) +{ + uint64 result; + int i; + int64 one = 1; + int64 halfmaxval = one << (64 - 2); + int64 maxval = halfmaxval - 1 + halfmaxval; + int64 minval = -TWOS_COMPLEMENT(int64) - maxval; + + result = codep[0] & 0x7f; + for (i = 1; i < 8; ++i) + result = (result << 8) | (codep[i] & 0xff); + + if (codep[0] & 0x80) + { + /* + * Do two's-complement negation even on non-two's-complement machines. + * If the result would be minval - 1, return minval. + */ + result -= !TWOS_COMPLEMENT(int64) && result != 0; + result += minval; + } + return result; +} + +static bool +differ_by_repeat(const pg_time_t t1, const pg_time_t t0) +{ + if (TYPE_BIT(pg_time_t) - TYPE_SIGNED(pg_time_t) < SECSPERREPEAT_BITS) + return 0; + return t1 - t0 == SECSPERREPEAT; +} + +/* Input buffer for data read from a compiled tz file. */ +union input_buffer +{ + /* The first part of the buffer, interpreted as a header. */ + struct tzhead tzhead; + + /* The entire buffer. */ + char buf[2 * sizeof(struct tzhead) + 2 * sizeof(struct state) + + 4 * TZ_MAX_TIMES]; +}; + +/* Local storage needed for 'tzloadbody'. */ +union local_storage +{ + /* The results of analyzing the file's contents after it is opened. */ + struct file_analysis + { + /* The input buffer. */ + union input_buffer u; + + /* A temporary state used for parsing a TZ string in the file. */ + struct state st; + } u; + + /* We don't need the "fullname" member */ +}; + +/* Load tz data from the file named NAME into *SP. Read extended + * format if DOEXTEND. Use *LSP for temporary storage. Return 0 on + * success, an errno value on failure. + * PG: If "canonname" is not NULL, then on success the canonical spelling of + * given name is stored there (the buffer must be > TZ_STRLEN_MAX bytes!). + */ +static int +tzloadbody(char const *name, char *canonname, struct state *sp, bool doextend, + union local_storage *lsp) +{ + int i; + int fid; + int stored; + ssize_t nread; + union input_buffer *up = &lsp->u.u; + int tzheadsize = sizeof(struct tzhead); + + sp->goback = sp->goahead = false; + + if (!name) + { + name = TZDEFAULT; + if (!name) + return EINVAL; + } + + if (name[0] == ':') + ++name; + + fid = pg_open_tzfile(name, canonname); + if (fid < 0) + return ENOENT; /* pg_open_tzfile may not set errno */ + + nread = read(fid, up->buf, sizeof up->buf); + if (nread < tzheadsize) + { + int err = nread < 0 ? errno : EINVAL; + + close(fid); + return err; + } + if (close(fid) < 0) + return errno; + for (stored = 4; stored <= 8; stored *= 2) + { + int32 ttisstdcnt = detzcode(up->tzhead.tzh_ttisstdcnt); + int32 ttisutcnt = detzcode(up->tzhead.tzh_ttisutcnt); + int64 prevtr = 0; + int32 prevcorr = 0; + int32 leapcnt = detzcode(up->tzhead.tzh_leapcnt); + int32 timecnt = detzcode(up->tzhead.tzh_timecnt); + int32 typecnt = detzcode(up->tzhead.tzh_typecnt); + int32 charcnt = detzcode(up->tzhead.tzh_charcnt); + char const *p = up->buf + tzheadsize; + + /* + * Although tzfile(5) currently requires typecnt to be nonzero, + * support future formats that may allow zero typecnt in files that + * have a TZ string and no transitions. + */ + if (!(0 <= leapcnt && leapcnt < TZ_MAX_LEAPS + && 0 <= typecnt && typecnt < TZ_MAX_TYPES + && 0 <= timecnt && timecnt < TZ_MAX_TIMES + && 0 <= charcnt && charcnt < TZ_MAX_CHARS + && (ttisstdcnt == typecnt || ttisstdcnt == 0) + && (ttisutcnt == typecnt || ttisutcnt == 0))) + return EINVAL; + if (nread + < (tzheadsize /* struct tzhead */ + + timecnt * stored /* ats */ + + timecnt /* types */ + + typecnt * 6 /* ttinfos */ + + charcnt /* chars */ + + leapcnt * (stored + 4) /* lsinfos */ + + ttisstdcnt /* ttisstds */ + + ttisutcnt)) /* ttisuts */ + return EINVAL; + sp->leapcnt = leapcnt; + sp->timecnt = timecnt; + sp->typecnt = typecnt; + sp->charcnt = charcnt; + + /* + * Read transitions, discarding those out of pg_time_t range. But + * pretend the last transition before TIME_T_MIN occurred at + * TIME_T_MIN. + */ + timecnt = 0; + for (i = 0; i < sp->timecnt; ++i) + { + int64 at + = stored == 4 ? detzcode(p) : detzcode64(p); + + sp->types[i] = at <= TIME_T_MAX; + if (sp->types[i]) + { + pg_time_t attime + = ((TYPE_SIGNED(pg_time_t) ? at < TIME_T_MIN : at < 0) + ? TIME_T_MIN : at); + + if (timecnt && attime <= sp->ats[timecnt - 1]) + { + if (attime < sp->ats[timecnt - 1]) + return EINVAL; + sp->types[i - 1] = 0; + timecnt--; + } + sp->ats[timecnt++] = attime; + } + p += stored; + } + + timecnt = 0; + for (i = 0; i < sp->timecnt; ++i) + { + unsigned char typ = *p++; + + if (sp->typecnt <= typ) + return EINVAL; + if (sp->types[i]) + sp->types[timecnt++] = typ; + } + sp->timecnt = timecnt; + for (i = 0; i < sp->typecnt; ++i) + { + struct ttinfo *ttisp; + unsigned char isdst, + desigidx; + + ttisp = &sp->ttis[i]; + ttisp->tt_utoff = detzcode(p); + p += 4; + isdst = *p++; + if (!(isdst < 2)) + return EINVAL; + ttisp->tt_isdst = isdst; + desigidx = *p++; + if (!(desigidx < sp->charcnt)) + return EINVAL; + ttisp->tt_desigidx = desigidx; + } + for (i = 0; i < sp->charcnt; ++i) + sp->chars[i] = *p++; + sp->chars[i] = '\0'; /* ensure '\0' at end */ + + /* Read leap seconds, discarding those out of pg_time_t range. */ + leapcnt = 0; + for (i = 0; i < sp->leapcnt; ++i) + { + int64 tr = stored == 4 ? detzcode(p) : detzcode64(p); + int32 corr = detzcode(p + stored); + + p += stored + 4; + /* Leap seconds cannot occur before the Epoch. */ + if (tr < 0) + return EINVAL; + if (tr <= TIME_T_MAX) + { + /* + * Leap seconds cannot occur more than once per UTC month, and + * UTC months are at least 28 days long (minus 1 second for a + * negative leap second). Each leap second's correction must + * differ from the previous one's by 1 second. + */ + if (tr - prevtr < 28 * SECSPERDAY - 1 + || (corr != prevcorr - 1 && corr != prevcorr + 1)) + return EINVAL; + sp->lsis[leapcnt].ls_trans = prevtr = tr; + sp->lsis[leapcnt].ls_corr = prevcorr = corr; + leapcnt++; + } + } + sp->leapcnt = leapcnt; + + for (i = 0; i < sp->typecnt; ++i) + { + struct ttinfo *ttisp; + + ttisp = &sp->ttis[i]; + if (ttisstdcnt == 0) + ttisp->tt_ttisstd = false; + else + { + if (*p != true && *p != false) + return EINVAL; + ttisp->tt_ttisstd = *p++; + } + } + for (i = 0; i < sp->typecnt; ++i) + { + struct ttinfo *ttisp; + + ttisp = &sp->ttis[i]; + if (ttisutcnt == 0) + ttisp->tt_ttisut = false; + else + { + if (*p != true && *p != false) + return EINVAL; + ttisp->tt_ttisut = *p++; + } + } + + /* + * If this is an old file, we're done. + */ + if (up->tzhead.tzh_version[0] == '\0') + break; + nread -= p - up->buf; + memmove(up->buf, p, nread); + } + if (doextend && nread > 2 && + up->buf[0] == '\n' && up->buf[nread - 1] == '\n' && + sp->typecnt + 2 <= TZ_MAX_TYPES) + { + struct state *ts = &lsp->u.st; + + up->buf[nread - 1] = '\0'; + if (tzparse(&up->buf[1], ts, false)) + { + /* + * Attempt to reuse existing abbreviations. Without this, + * America/Anchorage would be right on the edge after 2037 when + * TZ_MAX_CHARS is 50, as sp->charcnt equals 40 (for LMT AST AWT + * APT AHST AHDT YST AKDT AKST) and ts->charcnt equals 10 (for + * AKST AKDT). Reusing means sp->charcnt can stay 40 in this + * example. + */ + int gotabbr = 0; + int charcnt = sp->charcnt; + + for (i = 0; i < ts->typecnt; i++) + { + char *tsabbr = ts->chars + ts->ttis[i].tt_desigidx; + int j; + + for (j = 0; j < charcnt; j++) + if (strcmp(sp->chars + j, tsabbr) == 0) + { + ts->ttis[i].tt_desigidx = j; + gotabbr++; + break; + } + if (!(j < charcnt)) + { + int tsabbrlen = strlen(tsabbr); + + if (j + tsabbrlen < TZ_MAX_CHARS) + { + strcpy(sp->chars + j, tsabbr); + charcnt = j + tsabbrlen + 1; + ts->ttis[i].tt_desigidx = j; + gotabbr++; + } + } + } + if (gotabbr == ts->typecnt) + { + sp->charcnt = charcnt; + + /* + * Ignore any trailing, no-op transitions generated by zic as + * they don't help here and can run afoul of bugs in zic 2016j + * or earlier. + */ + while (1 < sp->timecnt + && (sp->types[sp->timecnt - 1] + == sp->types[sp->timecnt - 2])) + sp->timecnt--; + + for (i = 0; i < ts->timecnt; i++) + if (sp->timecnt == 0 + || (sp->ats[sp->timecnt - 1] + < ts->ats[i] + leapcorr(sp, ts->ats[i]))) + break; + while (i < ts->timecnt + && sp->timecnt < TZ_MAX_TIMES) + { + sp->ats[sp->timecnt] + = ts->ats[i] + leapcorr(sp, ts->ats[i]); + sp->types[sp->timecnt] = (sp->typecnt + + ts->types[i]); + sp->timecnt++; + i++; + } + for (i = 0; i < ts->typecnt; i++) + sp->ttis[sp->typecnt++] = ts->ttis[i]; + } + } + } + if (sp->typecnt == 0) + return EINVAL; + if (sp->timecnt > 1) + { + for (i = 1; i < sp->timecnt; ++i) + if (typesequiv(sp, sp->types[i], sp->types[0]) && + differ_by_repeat(sp->ats[i], sp->ats[0])) + { + sp->goback = true; + break; + } + for (i = sp->timecnt - 2; i >= 0; --i) + if (typesequiv(sp, sp->types[sp->timecnt - 1], + sp->types[i]) && + differ_by_repeat(sp->ats[sp->timecnt - 1], + sp->ats[i])) + { + sp->goahead = true; + break; + } + } + + /* + * Infer sp->defaulttype from the data. Although this default type is + * always zero for data from recent tzdb releases, things are trickier for + * data from tzdb 2018e or earlier. + * + * The first set of heuristics work around bugs in 32-bit data generated + * by tzdb 2013c or earlier. The workaround is for zones like + * Australia/Macquarie where timestamps before the first transition have a + * time type that is not the earliest standard-time type. See: + * https://mm.icann.org/pipermail/tz/2013-May/019368.html + */ + + /* + * If type 0 is unused in transitions, it's the type to use for early + * times. + */ + for (i = 0; i < sp->timecnt; ++i) + if (sp->types[i] == 0) + break; + i = i < sp->timecnt ? -1 : 0; + + /* + * Absent the above, if there are transition times and the first + * transition is to a daylight time find the standard type less than and + * closest to the type of the first transition. + */ + if (i < 0 && sp->timecnt > 0 && sp->ttis[sp->types[0]].tt_isdst) + { + i = sp->types[0]; + while (--i >= 0) + if (!sp->ttis[i].tt_isdst) + break; + } + + /* + * The next heuristics are for data generated by tzdb 2018e or earlier, + * for zones like EST5EDT where the first transition is to DST. + */ + + /* + * If no result yet, find the first standard type. If there is none, punt + * to type zero. + */ + if (i < 0) + { + i = 0; + while (sp->ttis[i].tt_isdst) + if (++i >= sp->typecnt) + { + i = 0; + break; + } + } + + /* + * A simple 'sp->defaulttype = 0;' would suffice here if we didn't have to + * worry about 2018e-or-earlier data. Even simpler would be to remove the + * defaulttype member and just use 0 in its place. + */ + sp->defaulttype = i; + + return 0; +} + +/* Load tz data from the file named NAME into *SP. Read extended + * format if DOEXTEND. Return 0 on success, an errno value on failure. + * PG: If "canonname" is not NULL, then on success the canonical spelling of + * given name is stored there (the buffer must be > TZ_STRLEN_MAX bytes!). + */ +int +tzload(const char *name, char *canonname, struct state *sp, bool doextend) +{ + union local_storage *lsp = malloc(sizeof *lsp); + + if (!lsp) + return errno; + else + { + int err = tzloadbody(name, canonname, sp, doextend, lsp); + + free(lsp); + return err; + } +} + +static bool +typesequiv(const struct state *sp, int a, int b) +{ + bool result; + + if (sp == NULL || + a < 0 || a >= sp->typecnt || + b < 0 || b >= sp->typecnt) + result = false; + else + { + const struct ttinfo *ap = &sp->ttis[a]; + const struct ttinfo *bp = &sp->ttis[b]; + + result = (ap->tt_utoff == bp->tt_utoff + && ap->tt_isdst == bp->tt_isdst + && ap->tt_ttisstd == bp->tt_ttisstd + && ap->tt_ttisut == bp->tt_ttisut + && (strcmp(&sp->chars[ap->tt_desigidx], + &sp->chars[bp->tt_desigidx]) + == 0)); + } + return result; +} + +static const int mon_lengths[2][MONSPERYEAR] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} +}; + +static const int year_lengths[2] = { + DAYSPERNYEAR, DAYSPERLYEAR +}; + +/* + * Given a pointer into a timezone string, scan until a character that is not + * a valid character in a time zone abbreviation is found. + * Return a pointer to that character. + */ + +static const char * +getzname(const char *strp) +{ + char c; + + while ((c = *strp) != '\0' && !is_digit(c) && c != ',' && c != '-' && + c != '+') + ++strp; + return strp; +} + +/* + * Given a pointer into an extended timezone string, scan until the ending + * delimiter of the time zone abbreviation is located. + * Return a pointer to the delimiter. + * + * As with getzname above, the legal character set is actually quite + * restricted, with other characters producing undefined results. + * We don't do any checking here; checking is done later in common-case code. + */ + +static const char * +getqzname(const char *strp, const int delim) +{ + int c; + + while ((c = *strp) != '\0' && c != delim) + ++strp; + return strp; +} + +/* + * Given a pointer into a timezone string, extract a number from that string. + * Check that the number is within a specified range; if it is not, return + * NULL. + * Otherwise, return a pointer to the first character not part of the number. + */ + +static const char * +getnum(const char *strp, int *const nump, const int min, const int max) +{ + char c; + int num; + + if (strp == NULL || !is_digit(c = *strp)) + return NULL; + num = 0; + do + { + num = num * 10 + (c - '0'); + if (num > max) + return NULL; /* illegal value */ + c = *++strp; + } while (is_digit(c)); + if (num < min) + return NULL; /* illegal value */ + *nump = num; + return strp; +} + +/* + * Given a pointer into a timezone string, extract a number of seconds, + * in hh[:mm[:ss]] form, from the string. + * If any error occurs, return NULL. + * Otherwise, return a pointer to the first character not part of the number + * of seconds. + */ + +static const char * +getsecs(const char *strp, int32 *const secsp) +{ + int num; + + /* + * 'HOURSPERDAY * DAYSPERWEEK - 1' allows quasi-Posix rules like + * "M10.4.6/26", which does not conform to Posix, but which specifies the + * equivalent of "02:00 on the first Sunday on or after 23 Oct". + */ + strp = getnum(strp, &num, 0, HOURSPERDAY * DAYSPERWEEK - 1); + if (strp == NULL) + return NULL; + *secsp = num * (int32) SECSPERHOUR; + if (*strp == ':') + { + ++strp; + strp = getnum(strp, &num, 0, MINSPERHOUR - 1); + if (strp == NULL) + return NULL; + *secsp += num * SECSPERMIN; + if (*strp == ':') + { + ++strp; + /* 'SECSPERMIN' allows for leap seconds. */ + strp = getnum(strp, &num, 0, SECSPERMIN); + if (strp == NULL) + return NULL; + *secsp += num; + } + } + return strp; +} + +/* + * Given a pointer into a timezone string, extract an offset, in + * [+-]hh[:mm[:ss]] form, from the string. + * If any error occurs, return NULL. + * Otherwise, return a pointer to the first character not part of the time. + */ + +static const char * +getoffset(const char *strp, int32 *const offsetp) +{ + bool neg = false; + + if (*strp == '-') + { + neg = true; + ++strp; + } + else if (*strp == '+') + ++strp; + strp = getsecs(strp, offsetp); + if (strp == NULL) + return NULL; /* illegal time */ + if (neg) + *offsetp = -*offsetp; + return strp; +} + +/* + * Given a pointer into a timezone string, extract a rule in the form + * date[/time]. See POSIX section 8 for the format of "date" and "time". + * If a valid rule is not found, return NULL. + * Otherwise, return a pointer to the first character not part of the rule. + */ + +static const char * +getrule(const char *strp, struct rule *const rulep) +{ + if (*strp == 'J') + { + /* + * Julian day. + */ + rulep->r_type = JULIAN_DAY; + ++strp; + strp = getnum(strp, &rulep->r_day, 1, DAYSPERNYEAR); + } + else if (*strp == 'M') + { + /* + * Month, week, day. + */ + rulep->r_type = MONTH_NTH_DAY_OF_WEEK; + ++strp; + strp = getnum(strp, &rulep->r_mon, 1, MONSPERYEAR); + if (strp == NULL) + return NULL; + if (*strp++ != '.') + return NULL; + strp = getnum(strp, &rulep->r_week, 1, 5); + if (strp == NULL) + return NULL; + if (*strp++ != '.') + return NULL; + strp = getnum(strp, &rulep->r_day, 0, DAYSPERWEEK - 1); + } + else if (is_digit(*strp)) + { + /* + * Day of year. + */ + rulep->r_type = DAY_OF_YEAR; + strp = getnum(strp, &rulep->r_day, 0, DAYSPERLYEAR - 1); + } + else + return NULL; /* invalid format */ + if (strp == NULL) + return NULL; + if (*strp == '/') + { + /* + * Time specified. + */ + ++strp; + strp = getoffset(strp, &rulep->r_time); + } + else + rulep->r_time = 2 * SECSPERHOUR; /* default = 2:00:00 */ + return strp; +} + +/* + * Given a year, a rule, and the offset from UT at the time that rule takes + * effect, calculate the year-relative time that rule takes effect. + */ + +static int32 +transtime(const int year, const struct rule *const rulep, + const int32 offset) +{ + bool leapyear; + int32 value; + int i; + int d, + m1, + yy0, + yy1, + yy2, + dow; + + INITIALIZE(value); + leapyear = isleap(year); + switch (rulep->r_type) + { + + case JULIAN_DAY: + + /* + * Jn - Julian day, 1 == January 1, 60 == March 1 even in leap + * years. In non-leap years, or if the day number is 59 or less, + * just add SECSPERDAY times the day number-1 to the time of + * January 1, midnight, to get the day. + */ + value = (rulep->r_day - 1) * SECSPERDAY; + if (leapyear && rulep->r_day >= 60) + value += SECSPERDAY; + break; + + case DAY_OF_YEAR: + + /* + * n - day of year. Just add SECSPERDAY times the day number to + * the time of January 1, midnight, to get the day. + */ + value = rulep->r_day * SECSPERDAY; + break; + + case MONTH_NTH_DAY_OF_WEEK: + + /* + * Mm.n.d - nth "dth day" of month m. + */ + + /* + * Use Zeller's Congruence to get day-of-week of first day of + * month. + */ + m1 = (rulep->r_mon + 9) % 12 + 1; + yy0 = (rulep->r_mon <= 2) ? (year - 1) : year; + yy1 = yy0 / 100; + yy2 = yy0 % 100; + dow = ((26 * m1 - 2) / 10 + + 1 + yy2 + yy2 / 4 + yy1 / 4 - 2 * yy1) % 7; + if (dow < 0) + dow += DAYSPERWEEK; + + /* + * "dow" is the day-of-week of the first day of the month. Get the + * day-of-month (zero-origin) of the first "dow" day of the month. + */ + d = rulep->r_day - dow; + if (d < 0) + d += DAYSPERWEEK; + for (i = 1; i < rulep->r_week; ++i) + { + if (d + DAYSPERWEEK >= + mon_lengths[(int) leapyear][rulep->r_mon - 1]) + break; + d += DAYSPERWEEK; + } + + /* + * "d" is the day-of-month (zero-origin) of the day we want. + */ + value = d * SECSPERDAY; + for (i = 0; i < rulep->r_mon - 1; ++i) + value += mon_lengths[(int) leapyear][i] * SECSPERDAY; + break; + } + + /* + * "value" is the year-relative time of 00:00:00 UT on the day in + * question. To get the year-relative time of the specified local time on + * that day, add the transition time and the current offset from UT. + */ + return value + rulep->r_time + offset; +} + +/* + * Given a POSIX section 8-style TZ string, fill in the rule tables as + * appropriate. + * Returns true on success, false on failure. + */ +bool +tzparse(const char *name, struct state *sp, bool lastditch) +{ + const char *stdname; + const char *dstname = NULL; + size_t stdlen; + size_t dstlen; + size_t charcnt; + int32 stdoffset; + int32 dstoffset; + char *cp; + bool load_ok; + + stdname = name; + if (lastditch) + { + /* Unlike IANA, don't assume name is exactly "GMT" */ + stdlen = strlen(name); /* length of standard zone name */ + name += stdlen; + stdoffset = 0; + } + else + { + if (*name == '<') + { + name++; + stdname = name; + name = getqzname(name, '>'); + if (*name != '>') + return false; + stdlen = name - stdname; + name++; + } + else + { + name = getzname(name); + stdlen = name - stdname; + } + if (*name == '\0') /* we allow empty STD abbrev, unlike IANA */ + return false; + name = getoffset(name, &stdoffset); + if (name == NULL) + return false; + } + charcnt = stdlen + 1; + if (sizeof sp->chars < charcnt) + return false; + + /* + * The IANA code always tries to tzload(TZDEFRULES) here. We do not want + * to do that; it would be bad news in the lastditch case, where we can't + * assume pg_open_tzfile() is sane yet. Moreover, if we did load it and + * it contains leap-second-dependent info, that would cause problems too. + * Finally, IANA has deprecated the TZDEFRULES feature, so it presumably + * will die at some point. Desupporting it now seems like good + * future-proofing. + */ + load_ok = false; + sp->goback = sp->goahead = false; /* simulate failed tzload() */ + sp->leapcnt = 0; /* intentionally assume no leap seconds */ + + if (*name != '\0') + { + if (*name == '<') + { + dstname = ++name; + name = getqzname(name, '>'); + if (*name != '>') + return false; + dstlen = name - dstname; + name++; + } + else + { + dstname = name; + name = getzname(name); + dstlen = name - dstname; /* length of DST abbr. */ + } + if (!dstlen) + return false; + charcnt += dstlen + 1; + if (sizeof sp->chars < charcnt) + return false; + if (*name != '\0' && *name != ',' && *name != ';') + { + name = getoffset(name, &dstoffset); + if (name == NULL) + return false; + } + else + dstoffset = stdoffset - SECSPERHOUR; + if (*name == '\0' && !load_ok) + name = TZDEFRULESTRING; + if (*name == ',' || *name == ';') + { + struct rule start; + struct rule end; + int year; + int yearlim; + int timecnt; + pg_time_t janfirst; + int32 janoffset = 0; + int yearbeg; + + ++name; + if ((name = getrule(name, &start)) == NULL) + return false; + if (*name++ != ',') + return false; + if ((name = getrule(name, &end)) == NULL) + return false; + if (*name != '\0') + return false; + sp->typecnt = 2; /* standard time and DST */ + + /* + * Two transitions per year, from EPOCH_YEAR forward. + */ + init_ttinfo(&sp->ttis[0], -stdoffset, false, 0); + init_ttinfo(&sp->ttis[1], -dstoffset, true, stdlen + 1); + sp->defaulttype = 0; + timecnt = 0; + janfirst = 0; + yearbeg = EPOCH_YEAR; + + do + { + int32 yearsecs + = year_lengths[isleap(yearbeg - 1)] * SECSPERDAY; + + yearbeg--; + if (increment_overflow_time(&janfirst, -yearsecs)) + { + janoffset = -yearsecs; + break; + } + } while (EPOCH_YEAR - YEARSPERREPEAT / 2 < yearbeg); + + yearlim = yearbeg + YEARSPERREPEAT + 1; + for (year = yearbeg; year < yearlim; year++) + { + int32 + starttime = transtime(year, &start, stdoffset), + endtime = transtime(year, &end, dstoffset); + int32 + yearsecs = (year_lengths[isleap(year)] + * SECSPERDAY); + bool reversed = endtime < starttime; + + if (reversed) + { + int32 swap = starttime; + + starttime = endtime; + endtime = swap; + } + if (reversed + || (starttime < endtime + && (endtime - starttime + < (yearsecs + + (stdoffset - dstoffset))))) + { + if (TZ_MAX_TIMES - 2 < timecnt) + break; + sp->ats[timecnt] = janfirst; + if (!increment_overflow_time + (&sp->ats[timecnt], + janoffset + starttime)) + sp->types[timecnt++] = !reversed; + sp->ats[timecnt] = janfirst; + if (!increment_overflow_time + (&sp->ats[timecnt], + janoffset + endtime)) + { + sp->types[timecnt++] = reversed; + yearlim = year + YEARSPERREPEAT + 1; + } + } + if (increment_overflow_time + (&janfirst, janoffset + yearsecs)) + break; + janoffset = 0; + } + sp->timecnt = timecnt; + if (!timecnt) + { + sp->ttis[0] = sp->ttis[1]; + sp->typecnt = 1; /* Perpetual DST. */ + } + else if (YEARSPERREPEAT < year - yearbeg) + sp->goback = sp->goahead = true; + } + else + { + int32 theirstdoffset; + int32 theirdstoffset; + int32 theiroffset; + bool isdst; + int i; + int j; + + if (*name != '\0') + return false; + + /* + * Initial values of theirstdoffset and theirdstoffset. + */ + theirstdoffset = 0; + for (i = 0; i < sp->timecnt; ++i) + { + j = sp->types[i]; + if (!sp->ttis[j].tt_isdst) + { + theirstdoffset = + -sp->ttis[j].tt_utoff; + break; + } + } + theirdstoffset = 0; + for (i = 0; i < sp->timecnt; ++i) + { + j = sp->types[i]; + if (sp->ttis[j].tt_isdst) + { + theirdstoffset = + -sp->ttis[j].tt_utoff; + break; + } + } + + /* + * Initially we're assumed to be in standard time. + */ + isdst = false; + theiroffset = theirstdoffset; + + /* + * Now juggle transition times and types tracking offsets as you + * do. + */ + for (i = 0; i < sp->timecnt; ++i) + { + j = sp->types[i]; + sp->types[i] = sp->ttis[j].tt_isdst; + if (sp->ttis[j].tt_ttisut) + { + /* No adjustment to transition time */ + } + else + { + /* + * If daylight saving time is in effect, and the + * transition time was not specified as standard time, add + * the daylight saving time offset to the transition time; + * otherwise, add the standard time offset to the + * transition time. + */ + /* + * Transitions from DST to DDST will effectively disappear + * since POSIX provides for only one DST offset. + */ + if (isdst && !sp->ttis[j].tt_ttisstd) + { + sp->ats[i] += dstoffset - + theirdstoffset; + } + else + { + sp->ats[i] += stdoffset - + theirstdoffset; + } + } + theiroffset = -sp->ttis[j].tt_utoff; + if (sp->ttis[j].tt_isdst) + theirdstoffset = theiroffset; + else + theirstdoffset = theiroffset; + } + + /* + * Finally, fill in ttis. + */ + init_ttinfo(&sp->ttis[0], -stdoffset, false, 0); + init_ttinfo(&sp->ttis[1], -dstoffset, true, stdlen + 1); + sp->typecnt = 2; + sp->defaulttype = 0; + } + } + else + { + dstlen = 0; + sp->typecnt = 1; /* only standard time */ + sp->timecnt = 0; + init_ttinfo(&sp->ttis[0], -stdoffset, false, 0); + sp->defaulttype = 0; + } + sp->charcnt = charcnt; + cp = sp->chars; + memcpy(cp, stdname, stdlen); + cp += stdlen; + *cp++ = '\0'; + if (dstlen != 0) + { + memcpy(cp, dstname, dstlen); + *(cp + dstlen) = '\0'; + } + return true; +} + +static void +gmtload(struct state *const sp) +{ + if (tzload(gmt, NULL, sp, true) != 0) + tzparse(gmt, sp, true); +} + + +/* + * The easy way to behave "as if no library function calls" localtime + * is to not call it, so we drop its guts into "localsub", which can be + * freely called. (And no, the PANS doesn't require the above behavior, + * but it *is* desirable.) + */ +static struct pg_tm * +localsub(struct state const *sp, pg_time_t const *timep, + struct pg_tm *const tmp) +{ + const struct ttinfo *ttisp; + int i; + struct pg_tm *result; + const pg_time_t t = *timep; + + if (sp == NULL) + return gmtsub(timep, 0, tmp); + if ((sp->goback && t < sp->ats[0]) || + (sp->goahead && t > sp->ats[sp->timecnt - 1])) + { + pg_time_t newt = t; + pg_time_t seconds; + pg_time_t years; + + if (t < sp->ats[0]) + seconds = sp->ats[0] - t; + else + seconds = t - sp->ats[sp->timecnt - 1]; + --seconds; + years = (seconds / SECSPERREPEAT + 1) * YEARSPERREPEAT; + seconds = years * AVGSECSPERYEAR; + if (t < sp->ats[0]) + newt += seconds; + else + newt -= seconds; + if (newt < sp->ats[0] || + newt > sp->ats[sp->timecnt - 1]) + return NULL; /* "cannot happen" */ + result = localsub(sp, &newt, tmp); + if (result) + { + int64 newy; + + newy = result->tm_year; + if (t < sp->ats[0]) + newy -= years; + else + newy += years; + if (!(INT_MIN <= newy && newy <= INT_MAX)) + return NULL; + result->tm_year = newy; + } + return result; + } + if (sp->timecnt == 0 || t < sp->ats[0]) + { + i = sp->defaulttype; + } + else + { + int lo = 1; + int hi = sp->timecnt; + + while (lo < hi) + { + int mid = (lo + hi) >> 1; + + if (t < sp->ats[mid]) + hi = mid; + else + lo = mid + 1; + } + i = (int) sp->types[lo - 1]; + } + ttisp = &sp->ttis[i]; + + /* + * To get (wrong) behavior that's compatible with System V Release 2.0 + * you'd replace the statement below with t += ttisp->tt_utoff; + * timesub(&t, 0L, sp, tmp); + */ + result = timesub(&t, ttisp->tt_utoff, sp, tmp); + if (result) + { + result->tm_isdst = ttisp->tt_isdst; + result->tm_zone = unconstify(char *, &sp->chars[ttisp->tt_desigidx]); + } + return result; +} + + +struct pg_tm * +pg_localtime(const pg_time_t *timep, const pg_tz *tz) +{ + return localsub(&tz->state, timep, &tm); +} + + +/* + * gmtsub is to gmtime as localsub is to localtime. + * + * Except we have a private "struct state" for GMT, so no sp is passed in. + */ + +static struct pg_tm * +gmtsub(pg_time_t const *timep, int32 offset, + struct pg_tm *tmp) +{ + struct pg_tm *result; + + /* GMT timezone state data is kept here */ + static __thread struct state *gmtptr = NULL; + + if (gmtptr == NULL) + { + /* Allocate on first use */ + gmtptr = (struct state *) malloc(sizeof(struct state)); + if (gmtptr == NULL) + return NULL; /* errno should be set by malloc */ + gmtload(gmtptr); + } + + result = timesub(timep, offset, gmtptr, tmp); + + /* + * Could get fancy here and deliver something such as "+xx" or "-xx" if + * offset is non-zero, but this is no time for a treasure hunt. + */ + if (offset != 0) + tmp->tm_zone = wildabbr; + else + tmp->tm_zone = gmtptr->chars; + + return result; +} + +struct pg_tm * +pg_gmtime(const pg_time_t *timep) +{ + return gmtsub(timep, 0, &tm); +} + +/* + * Return the number of leap years through the end of the given year + * where, to make the math easy, the answer for year zero is defined as zero. + */ + +static int +leaps_thru_end_of_nonneg(int y) +{ + return y / 4 - y / 100 + y / 400; +} + +static int +leaps_thru_end_of(const int y) +{ + return (y < 0 + ? -1 - leaps_thru_end_of_nonneg(-1 - y) + : leaps_thru_end_of_nonneg(y)); +} + +static struct pg_tm * +timesub(const pg_time_t *timep, int32 offset, + const struct state *sp, struct pg_tm *tmp) +{ + const struct lsinfo *lp; + pg_time_t tdays; + int idays; /* unsigned would be so 2003 */ + int64 rem; + int y; + const int *ip; + int64 corr; + bool hit; + int i; + + corr = 0; + hit = false; + i = (sp == NULL) ? 0 : sp->leapcnt; + while (--i >= 0) + { + lp = &sp->lsis[i]; + if (*timep >= lp->ls_trans) + { + corr = lp->ls_corr; + hit = (*timep == lp->ls_trans + && (i == 0 ? 0 : lp[-1].ls_corr) < corr); + break; + } + } + y = EPOCH_YEAR; + tdays = *timep / SECSPERDAY; + rem = *timep % SECSPERDAY; + while (tdays < 0 || tdays >= year_lengths[isleap(y)]) + { + int newy; + pg_time_t tdelta; + int idelta; + int leapdays; + + tdelta = tdays / DAYSPERLYEAR; + if (!((!TYPE_SIGNED(pg_time_t) || INT_MIN <= tdelta) + && tdelta <= INT_MAX)) + goto out_of_range; + idelta = tdelta; + if (idelta == 0) + idelta = (tdays < 0) ? -1 : 1; + newy = y; + if (increment_overflow(&newy, idelta)) + goto out_of_range; + leapdays = leaps_thru_end_of(newy - 1) - + leaps_thru_end_of(y - 1); + tdays -= ((pg_time_t) newy - y) * DAYSPERNYEAR; + tdays -= leapdays; + y = newy; + } + + /* + * Given the range, we can now fearlessly cast... + */ + idays = tdays; + rem += offset - corr; + while (rem < 0) + { + rem += SECSPERDAY; + --idays; + } + while (rem >= SECSPERDAY) + { + rem -= SECSPERDAY; + ++idays; + } + while (idays < 0) + { + if (increment_overflow(&y, -1)) + goto out_of_range; + idays += year_lengths[isleap(y)]; + } + while (idays >= year_lengths[isleap(y)]) + { + idays -= year_lengths[isleap(y)]; + if (increment_overflow(&y, 1)) + goto out_of_range; + } + tmp->tm_year = y; + if (increment_overflow(&tmp->tm_year, -TM_YEAR_BASE)) + goto out_of_range; + tmp->tm_yday = idays; + + /* + * The "extra" mods below avoid overflow problems. + */ + tmp->tm_wday = EPOCH_WDAY + + ((y - EPOCH_YEAR) % DAYSPERWEEK) * + (DAYSPERNYEAR % DAYSPERWEEK) + + leaps_thru_end_of(y - 1) - + leaps_thru_end_of(EPOCH_YEAR - 1) + + idays; + tmp->tm_wday %= DAYSPERWEEK; + if (tmp->tm_wday < 0) + tmp->tm_wday += DAYSPERWEEK; + tmp->tm_hour = (int) (rem / SECSPERHOUR); + rem %= SECSPERHOUR; + tmp->tm_min = (int) (rem / SECSPERMIN); + + /* + * A positive leap second requires a special representation. This uses + * "... ??:59:60" et seq. + */ + tmp->tm_sec = (int) (rem % SECSPERMIN) + hit; + ip = mon_lengths[isleap(y)]; + for (tmp->tm_mon = 0; idays >= ip[tmp->tm_mon]; ++(tmp->tm_mon)) + idays -= ip[tmp->tm_mon]; + tmp->tm_mday = (int) (idays + 1); + tmp->tm_isdst = 0; + tmp->tm_gmtoff = offset; + return tmp; + +out_of_range: + errno = EOVERFLOW; + return NULL; +} + +/* + * Normalize logic courtesy Paul Eggert. + */ + +static bool +increment_overflow(int *ip, int j) +{ + int const i = *ip; + + /*---------- + * If i >= 0 there can only be overflow if i + j > INT_MAX + * or if j > INT_MAX - i; given i >= 0, INT_MAX - i cannot overflow. + * If i < 0 there can only be overflow if i + j < INT_MIN + * or if j < INT_MIN - i; given i < 0, INT_MIN - i cannot overflow. + *---------- + */ + if ((i >= 0) ? (j > INT_MAX - i) : (j < INT_MIN - i)) + return true; + *ip += j; + return false; +} + +static bool +increment_overflow_time(pg_time_t *tp, int32 j) +{ + /*---------- + * This is like + * 'if (! (TIME_T_MIN <= *tp + j && *tp + j <= TIME_T_MAX)) ...', + * except that it does the right thing even if *tp + j would overflow. + *---------- + */ + if (!(j < 0 + ? (TYPE_SIGNED(pg_time_t) ? TIME_T_MIN - j <= *tp : -1 - j < *tp) + : *tp <= TIME_T_MAX - j)) + return true; + *tp += j; + return false; +} + +static int64 +leapcorr(struct state const *sp, pg_time_t t) +{ + struct lsinfo const *lp; + int i; + + i = sp->leapcnt; + while (--i >= 0) + { + lp = &sp->lsis[i]; + if (t >= lp->ls_trans) + return lp->ls_corr; + } + return 0; +} + +/* + * Find the next DST transition time in the given zone after the given time + * + * *timep and *tz are input arguments, the other parameters are output values. + * + * When the function result is 1, *boundary is set to the pg_time_t + * representation of the next DST transition time after *timep, + * *before_gmtoff and *before_isdst are set to the GMT offset and isdst + * state prevailing just before that boundary (in particular, the state + * prevailing at *timep), and *after_gmtoff and *after_isdst are set to + * the state prevailing just after that boundary. + * + * When the function result is 0, there is no known DST transition + * after *timep, but *before_gmtoff and *before_isdst indicate the GMT + * offset and isdst state prevailing at *timep. (This would occur in + * DST-less time zones, or if a zone has permanently ceased using DST.) + * + * A function result of -1 indicates failure (this case does not actually + * occur in our current implementation). + */ +int +pg_next_dst_boundary(const pg_time_t *timep, + long int *before_gmtoff, + int *before_isdst, + pg_time_t *boundary, + long int *after_gmtoff, + int *after_isdst, + const pg_tz *tz) +{ + const struct state *sp; + const struct ttinfo *ttisp; + int i; + int j; + const pg_time_t t = *timep; + + sp = &tz->state; + if (sp->timecnt == 0) + { + /* non-DST zone, use lowest-numbered standard type */ + i = 0; + while (sp->ttis[i].tt_isdst) + if (++i >= sp->typecnt) + { + i = 0; + break; + } + ttisp = &sp->ttis[i]; + *before_gmtoff = ttisp->tt_utoff; + *before_isdst = ttisp->tt_isdst; + return 0; + } + if ((sp->goback && t < sp->ats[0]) || + (sp->goahead && t > sp->ats[sp->timecnt - 1])) + { + /* For values outside the transition table, extrapolate */ + pg_time_t newt = t; + pg_time_t seconds; + pg_time_t tcycles; + int64 icycles; + int result; + + if (t < sp->ats[0]) + seconds = sp->ats[0] - t; + else + seconds = t - sp->ats[sp->timecnt - 1]; + --seconds; + tcycles = seconds / YEARSPERREPEAT / AVGSECSPERYEAR; + ++tcycles; + icycles = tcycles; + if (tcycles - icycles >= 1 || icycles - tcycles >= 1) + return -1; + seconds = icycles; + seconds *= YEARSPERREPEAT; + seconds *= AVGSECSPERYEAR; + if (t < sp->ats[0]) + newt += seconds; + else + newt -= seconds; + if (newt < sp->ats[0] || + newt > sp->ats[sp->timecnt - 1]) + return -1; /* "cannot happen" */ + + result = pg_next_dst_boundary(&newt, before_gmtoff, + before_isdst, + boundary, + after_gmtoff, + after_isdst, + tz); + if (t < sp->ats[0]) + *boundary -= seconds; + else + *boundary += seconds; + return result; + } + + if (t >= sp->ats[sp->timecnt - 1]) + { + /* No known transition > t, so use last known segment's type */ + i = sp->types[sp->timecnt - 1]; + ttisp = &sp->ttis[i]; + *before_gmtoff = ttisp->tt_utoff; + *before_isdst = ttisp->tt_isdst; + return 0; + } + if (t < sp->ats[0]) + { + /* For "before", use lowest-numbered standard type */ + i = 0; + while (sp->ttis[i].tt_isdst) + if (++i >= sp->typecnt) + { + i = 0; + break; + } + ttisp = &sp->ttis[i]; + *before_gmtoff = ttisp->tt_utoff; + *before_isdst = ttisp->tt_isdst; + *boundary = sp->ats[0]; + /* And for "after", use the first segment's type */ + i = sp->types[0]; + ttisp = &sp->ttis[i]; + *after_gmtoff = ttisp->tt_utoff; + *after_isdst = ttisp->tt_isdst; + return 1; + } + /* Else search to find the boundary following t */ + { + int lo = 1; + int hi = sp->timecnt - 1; + + while (lo < hi) + { + int mid = (lo + hi) >> 1; + + if (t < sp->ats[mid]) + hi = mid; + else + lo = mid + 1; + } + i = lo; + } + j = sp->types[i - 1]; + ttisp = &sp->ttis[j]; + *before_gmtoff = ttisp->tt_utoff; + *before_isdst = ttisp->tt_isdst; + *boundary = sp->ats[i]; + j = sp->types[i]; + ttisp = &sp->ttis[j]; + *after_gmtoff = ttisp->tt_utoff; + *after_isdst = ttisp->tt_isdst; + return 1; +} + +/* + * Identify a timezone abbreviation's meaning in the given zone + * + * Determine the GMT offset and DST flag associated with the abbreviation. + * This is generally used only when the abbreviation has actually changed + * meaning over time; therefore, we also take a UTC cutoff time, and return + * the meaning in use at or most recently before that time, or the meaning + * in first use after that time if the abbrev was never used before that. + * + * On success, returns true and sets *gmtoff and *isdst. If the abbreviation + * was never used at all in this zone, returns false. + * + * Note: abbrev is matched case-sensitively; it should be all-upper-case. + */ +bool +pg_interpret_timezone_abbrev(const char *abbrev, + const pg_time_t *timep, + long int *gmtoff, + int *isdst, + const pg_tz *tz) +{ + const struct state *sp; + const char *abbrs; + const struct ttinfo *ttisp; + int abbrind; + int cutoff; + int i; + const pg_time_t t = *timep; + + sp = &tz->state; + + /* + * Locate the abbreviation in the zone's abbreviation list. We assume + * there are not duplicates in the list. + */ + abbrs = sp->chars; + abbrind = 0; + while (abbrind < sp->charcnt) + { + if (strcmp(abbrev, abbrs + abbrind) == 0) + break; + while (abbrs[abbrind] != '\0') + abbrind++; + abbrind++; + } + if (abbrind >= sp->charcnt) + return false; /* not there! */ + + /* + * Unlike pg_next_dst_boundary, we needn't sweat about extrapolation + * (goback/goahead zones). Finding the newest or oldest meaning of the + * abbreviation should get us what we want, since extrapolation would just + * be repeating the newest or oldest meanings. + * + * Use binary search to locate the first transition > cutoff time. + */ + { + int lo = 0; + int hi = sp->timecnt; + + while (lo < hi) + { + int mid = (lo + hi) >> 1; + + if (t < sp->ats[mid]) + hi = mid; + else + lo = mid + 1; + } + cutoff = lo; + } + + /* + * Scan backwards to find the latest interval using the given abbrev + * before the cutoff time. + */ + for (i = cutoff - 1; i >= 0; i--) + { + ttisp = &sp->ttis[sp->types[i]]; + if (ttisp->tt_desigidx == abbrind) + { + *gmtoff = ttisp->tt_utoff; + *isdst = ttisp->tt_isdst; + return true; + } + } + + /* + * Not there, so scan forwards to find the first one after. + */ + for (i = cutoff; i < sp->timecnt; i++) + { + ttisp = &sp->ttis[sp->types[i]]; + if (ttisp->tt_desigidx == abbrind) + { + *gmtoff = ttisp->tt_utoff; + *isdst = ttisp->tt_isdst; + return true; + } + } + + return false; /* hm, not actually used in any interval? */ +} + +/* + * If the given timezone uses only one GMT offset, store that offset + * into *gmtoff and return true, else return false. + */ +bool +pg_get_timezone_offset(const pg_tz *tz, long int *gmtoff) +{ + /* + * The zone could have more than one ttinfo, if it's historically used + * more than one abbreviation. We return true as long as they all have + * the same gmtoff. + */ + const struct state *sp; + int i; + + sp = &tz->state; + for (i = 1; i < sp->typecnt; i++) + { + if (sp->ttis[i].tt_utoff != sp->ttis[0].tt_utoff) + return false; + } + *gmtoff = sp->ttis[0].tt_utoff; + return true; +} + +/* + * Return the name of the current timezone + */ +const char * +pg_get_timezone_name(pg_tz *tz) +{ + if (tz) + return tz->TZname; + return NULL; +} + +/* + * Check whether timezone is acceptable. + * + * What we are doing here is checking for leap-second-aware timekeeping. + * We need to reject such TZ settings because they'll wreak havoc with our + * date/time arithmetic. + */ +bool +pg_tz_acceptable(pg_tz *tz) +{ + struct pg_tm *tt; + pg_time_t time2000; + + /* + * To detect leap-second timekeeping, run pg_localtime for what should be + * GMT midnight, 2000-01-01. Insist that the tm_sec value be zero; any + * other result has to be due to leap seconds. + */ + time2000 = (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY; + tt = pg_localtime(&time2000, tz); + if (!tt || tt->tm_sec != 0) + return false; + + return true; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/pgtz.c b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/pgtz.c new file mode 100644 index 00000000000..ca8201cd3b5 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/pgtz.c @@ -0,0 +1,502 @@ +/*------------------------------------------------------------------------- + * + * pgtz.c + * Timezone Library Integration Functions + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/timezone/pgtz.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <fcntl.h> +#include <time.h> + +#include "common/file_utils.h" +#include "datatype/timestamp.h" +#include "miscadmin.h" +#include "pgtz.h" +#include "storage/fd.h" +#include "utils/hsearch.h" + + +/* Current session timezone (controlled by TimeZone GUC) */ +__thread pg_tz *session_timezone = NULL; + +/* Current log timezone (controlled by log_timezone GUC) */ +__thread pg_tz *log_timezone = NULL; + + +static bool scan_directory_ci(const char *dirname, + const char *fname, int fnamelen, + char *canonname, int canonnamelen); + + +/* + * Return full pathname of timezone data directory + */ +static const char * +pg_TZDIR(void) +{ +#ifndef SYSTEMTZDIR + /* normal case: timezone stuff is under our share dir */ + static __thread bool done_tzdir = false; + static __thread char tzdir[MAXPGPATH]; + + if (done_tzdir) + return tzdir; + + get_share_path(my_exec_path, tzdir); + strlcpy(tzdir + strlen(tzdir), "/timezone", MAXPGPATH - strlen(tzdir)); + + done_tzdir = true; + return tzdir; +#else + /* we're configured to use system's timezone database */ + return SYSTEMTZDIR; +#endif +} + + +/* + * Given a timezone name, open() the timezone data file. Return the + * file descriptor if successful, -1 if not. + * + * The input name is searched for case-insensitively (we assume that the + * timezone database does not contain case-equivalent names). + * + * If "canonname" is not NULL, then on success the canonical spelling of the + * given name is stored there (the buffer must be > TZ_STRLEN_MAX bytes!). + */ +int +pg_open_tzfile(const char *name, char *canonname) +{ + const char *fname; + char fullname[MAXPGPATH]; + int fullnamelen; + int orignamelen; + + /* Initialize fullname with base name of tzdata directory */ + strlcpy(fullname, pg_TZDIR(), sizeof(fullname)); + orignamelen = fullnamelen = strlen(fullname); + + if (fullnamelen + 1 + strlen(name) >= MAXPGPATH) + return -1; /* not gonna fit */ + + /* + * If the caller doesn't need the canonical spelling, first just try to + * open the name as-is. This can be expected to succeed if the given name + * is already case-correct, or if the filesystem is case-insensitive; and + * we don't need to distinguish those situations if we aren't tasked with + * reporting the canonical spelling. + */ + if (canonname == NULL) + { + int result; + + fullname[fullnamelen] = '/'; + /* test above ensured this will fit: */ + strcpy(fullname + fullnamelen + 1, name); + result = open(fullname, O_RDONLY | PG_BINARY, 0); + if (result >= 0) + return result; + /* If that didn't work, fall through to do it the hard way */ + fullname[fullnamelen] = '\0'; + } + + /* + * Loop to split the given name into directory levels; for each level, + * search using scan_directory_ci(). + */ + fname = name; + for (;;) + { + const char *slashptr; + int fnamelen; + + slashptr = strchr(fname, '/'); + if (slashptr) + fnamelen = slashptr - fname; + else + fnamelen = strlen(fname); + if (!scan_directory_ci(fullname, fname, fnamelen, + fullname + fullnamelen + 1, + MAXPGPATH - fullnamelen - 1)) + return -1; + fullname[fullnamelen++] = '/'; + fullnamelen += strlen(fullname + fullnamelen); + if (slashptr) + fname = slashptr + 1; + else + break; + } + + if (canonname) + strlcpy(canonname, fullname + orignamelen + 1, TZ_STRLEN_MAX + 1); + + return open(fullname, O_RDONLY | PG_BINARY, 0); +} + + +/* + * Scan specified directory for a case-insensitive match to fname + * (of length fnamelen --- fname may not be null terminated!). If found, + * copy the actual filename into canonname and return true. + */ +static bool +scan_directory_ci(const char *dirname, const char *fname, int fnamelen, + char *canonname, int canonnamelen) +{ + bool found = false; + DIR *dirdesc; + struct dirent *direntry; + + dirdesc = AllocateDir(dirname); + + while ((direntry = ReadDirExtended(dirdesc, dirname, LOG)) != NULL) + { + /* + * Ignore . and .., plus any other "hidden" files. This is a security + * measure to prevent access to files outside the timezone directory. + */ + if (direntry->d_name[0] == '.') + continue; + + if (strlen(direntry->d_name) == fnamelen && + pg_strncasecmp(direntry->d_name, fname, fnamelen) == 0) + { + /* Found our match */ + strlcpy(canonname, direntry->d_name, canonnamelen); + found = true; + break; + } + } + + FreeDir(dirdesc); + + return found; +} + + +/* + * We keep loaded timezones in a hashtable so we don't have to + * load and parse the TZ definition file every time one is selected. + * Because we want timezone names to be found case-insensitively, + * the hash key is the uppercased name of the zone. + */ +typedef struct +{ + /* tznameupper contains the all-upper-case name of the timezone */ + char tznameupper[TZ_STRLEN_MAX + 1]; + pg_tz tz; +} pg_tz_cache; + +static __thread HTAB *timezone_cache = NULL; + +void destroy_timezone_hashtable() +{ + hash_destroy(timezone_cache); + timezone_cache = NULL; +} + +static bool +init_timezone_hashtable(void) +{ + HASHCTL hash_ctl; + + hash_ctl.keysize = TZ_STRLEN_MAX + 1; + hash_ctl.entrysize = sizeof(pg_tz_cache); + + timezone_cache = hash_create("Timezones", + 4, + &hash_ctl, + HASH_ELEM | HASH_STRINGS); + if (!timezone_cache) + return false; + + return true; +} + +/* + * Load a timezone from file or from cache. + * Does not verify that the timezone is acceptable! + * + * "GMT" is always interpreted as the tzparse() definition, without attempting + * to load a definition from the filesystem. This has a number of benefits: + * 1. It's guaranteed to succeed, so we don't have the failure mode wherein + * the bootstrap default timezone setting doesn't work (as could happen if + * the OS attempts to supply a leap-second-aware version of "GMT"). + * 2. Because we aren't accessing the filesystem, we can safely initialize + * the "GMT" zone definition before my_exec_path is known. + * 3. It's quick enough that we don't waste much time when the bootstrap + * default timezone setting is later overridden from postgresql.conf. + */ +pg_tz * +pg_tzset(const char *tzname) +{ + pg_tz_cache *tzp; + struct state tzstate; + char uppername[TZ_STRLEN_MAX + 1]; + char canonname[TZ_STRLEN_MAX + 1]; + char *p; + + if (strlen(tzname) > TZ_STRLEN_MAX) + return NULL; /* not going to fit */ + + if (!timezone_cache) + if (!init_timezone_hashtable()) + return NULL; + + /* + * Upcase the given name to perform a case-insensitive hashtable search. + * (We could alternatively downcase it, but we prefer upcase so that we + * can get consistently upcased results from tzparse() in case the name is + * a POSIX-style timezone spec.) + */ + p = uppername; + while (*tzname) + *p++ = pg_toupper((unsigned char) *tzname++); + *p = '\0'; + + tzp = (pg_tz_cache *) hash_search(timezone_cache, + uppername, + HASH_FIND, + NULL); + if (tzp) + { + /* Timezone found in cache, nothing more to do */ + return &tzp->tz; + } + + /* + * "GMT" is always sent to tzparse(), as per discussion above. + */ + if (strcmp(uppername, "GMT") == 0 || strcmp(uppername, "Z") == 0) + { + if (!tzparse(uppername, &tzstate, true)) + { + /* This really, really should not happen ... */ + elog(ERROR, "could not initialize GMT time zone"); + } + /* Use uppercase name as canonical */ + strcpy(canonname, uppername); + } + else if (tzload(uppername, canonname, &tzstate, true) != 0) + { + if (uppername[0] == ':' || !tzparse(uppername, &tzstate, false)) + { + /* Unknown timezone. Fail our call instead of loading GMT! */ + return NULL; + } + /* For POSIX timezone specs, use uppercase name as canonical */ + strcpy(canonname, uppername); + } + + /* Save timezone in the cache */ + tzp = (pg_tz_cache *) hash_search(timezone_cache, + uppername, + HASH_ENTER, + NULL); + + /* hash_search already copied uppername into the hash key */ + strcpy(tzp->tz.TZname, canonname); + memcpy(&tzp->tz.state, &tzstate, sizeof(tzstate)); + + return &tzp->tz; +} + +/* + * Load a fixed-GMT-offset timezone. + * This is used for SQL-spec SET TIME ZONE INTERVAL 'foo' cases. + * It's otherwise equivalent to pg_tzset(). + * + * The GMT offset is specified in seconds, positive values meaning west of + * Greenwich (ie, POSIX not ISO sign convention). However, we use ISO + * sign convention in the displayable abbreviation for the zone. + * + * Caution: this can fail (return NULL) if the specified offset is outside + * the range allowed by the zic library. + */ +pg_tz * +pg_tzset_offset(long gmtoffset) +{ + long absoffset = (gmtoffset < 0) ? -gmtoffset : gmtoffset; + char offsetstr[64]; + char tzname[128]; + + snprintf(offsetstr, sizeof(offsetstr), + "%02ld", absoffset / SECS_PER_HOUR); + absoffset %= SECS_PER_HOUR; + if (absoffset != 0) + { + snprintf(offsetstr + strlen(offsetstr), + sizeof(offsetstr) - strlen(offsetstr), + ":%02ld", absoffset / SECS_PER_MINUTE); + absoffset %= SECS_PER_MINUTE; + if (absoffset != 0) + snprintf(offsetstr + strlen(offsetstr), + sizeof(offsetstr) - strlen(offsetstr), + ":%02ld", absoffset); + } + if (gmtoffset > 0) + snprintf(tzname, sizeof(tzname), "<-%s>+%s", + offsetstr, offsetstr); + else + snprintf(tzname, sizeof(tzname), "<+%s>-%s", + offsetstr, offsetstr); + + return pg_tzset(tzname); +} + + +/* + * Initialize timezone library + * + * This is called before GUC variable initialization begins. Its purpose + * is to ensure that log_timezone has a valid value before any logging GUC + * variables could become set to values that require elog.c to provide + * timestamps (e.g., log_line_prefix). We may as well initialize + * session_timezone to something valid, too. + */ +void +pg_timezone_initialize(void) +{ + /* + * We may not yet know where PGSHAREDIR is (in particular this is true in + * an EXEC_BACKEND subprocess). So use "GMT", which pg_tzset forces to be + * interpreted without reference to the filesystem. This corresponds to + * the bootstrap default for these variables in guc_tables.c, although in + * principle it could be different. + */ + session_timezone = pg_tzset("GMT"); + log_timezone = session_timezone; +} + + +/* + * Functions to enumerate available timezones + * + * Note that pg_tzenumerate_next() will return a pointer into the pg_tzenum + * structure, so the data is only valid up to the next call. + * + * All data is allocated using palloc in the current context. + */ +#define MAX_TZDIR_DEPTH 10 + +struct pg_tzenum +{ + int baselen; + int depth; + DIR *dirdesc[MAX_TZDIR_DEPTH]; + char *dirname[MAX_TZDIR_DEPTH]; + struct pg_tz tz; +}; + +/* typedef pg_tzenum is declared in pgtime.h */ + +pg_tzenum * +pg_tzenumerate_start(void) +{ + pg_tzenum *ret = (pg_tzenum *) palloc0(sizeof(pg_tzenum)); + char *startdir = pstrdup(pg_TZDIR()); + + ret->baselen = strlen(startdir) + 1; + ret->depth = 0; + ret->dirname[0] = startdir; + ret->dirdesc[0] = AllocateDir(startdir); + if (!ret->dirdesc[0]) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", startdir))); + return ret; +} + +void +pg_tzenumerate_end(pg_tzenum *dir) +{ + while (dir->depth >= 0) + { + FreeDir(dir->dirdesc[dir->depth]); + pfree(dir->dirname[dir->depth]); + dir->depth--; + } + pfree(dir); +} + +pg_tz * +pg_tzenumerate_next(pg_tzenum *dir) +{ + while (dir->depth >= 0) + { + struct dirent *direntry; + char fullname[MAXPGPATH * 2]; + + direntry = ReadDir(dir->dirdesc[dir->depth], dir->dirname[dir->depth]); + + if (!direntry) + { + /* End of this directory */ + FreeDir(dir->dirdesc[dir->depth]); + pfree(dir->dirname[dir->depth]); + dir->depth--; + continue; + } + + if (direntry->d_name[0] == '.') + continue; + + snprintf(fullname, sizeof(fullname), "%s/%s", + dir->dirname[dir->depth], direntry->d_name); + + if (get_dirent_type(fullname, direntry, true, ERROR) == PGFILETYPE_DIR) + { + /* Step into the subdirectory */ + if (dir->depth >= MAX_TZDIR_DEPTH - 1) + ereport(ERROR, + (errmsg_internal("timezone directory stack overflow"))); + dir->depth++; + dir->dirname[dir->depth] = pstrdup(fullname); + dir->dirdesc[dir->depth] = AllocateDir(fullname); + if (!dir->dirdesc[dir->depth]) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", + fullname))); + + /* Start over reading in the new directory */ + continue; + } + + /* + * Load this timezone using tzload() not pg_tzset(), so we don't fill + * the cache. Also, don't ask for the canonical spelling: we already + * know it, and pg_open_tzfile's way of finding it out is pretty + * inefficient. + */ + if (tzload(fullname + dir->baselen, NULL, &dir->tz.state, true) != 0) + { + /* Zone could not be loaded, ignore it */ + continue; + } + + if (!pg_tz_acceptable(&dir->tz)) + { + /* Ignore leap-second zones */ + continue; + } + + /* OK, return the canonical zone name spelling. */ + strlcpy(dir->tz.TZname, fullname + dir->baselen, + sizeof(dir->tz.TZname)); + + /* Timezone loaded OK. */ + return &dir->tz; + } + + /* Nothing more found */ + return NULL; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/pgtz.h b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/pgtz.h new file mode 100644 index 00000000000..a5169aa2aa9 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/pgtz.h @@ -0,0 +1,81 @@ +/*------------------------------------------------------------------------- + * + * pgtz.h + * Timezone Library Integration Functions + * + * Note: this file contains only definitions that are private to the + * timezone library. Public definitions are in pgtime.h. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/timezone/pgtz.h + * + *------------------------------------------------------------------------- + */ +#ifndef _PGTZ_H +#define _PGTZ_H + +#include "pgtime.h" +#include "tzfile.h" + + +#define SMALLEST(a, b) (((a) < (b)) ? (a) : (b)) +#define BIGGEST(a, b) (((a) > (b)) ? (a) : (b)) + +struct ttinfo +{ /* time type information */ + int32 tt_utoff; /* UT offset in seconds */ + bool tt_isdst; /* used to set tm_isdst */ + int tt_desigidx; /* abbreviation list index */ + bool tt_ttisstd; /* transition is std time */ + bool tt_ttisut; /* transition is UT */ +}; + +struct lsinfo +{ /* leap second information */ + pg_time_t ls_trans; /* transition time */ + int64 ls_corr; /* correction to apply */ +}; + +struct state +{ + int leapcnt; + int timecnt; + int typecnt; + int charcnt; + bool goback; + bool goahead; + pg_time_t ats[TZ_MAX_TIMES]; + unsigned char types[TZ_MAX_TIMES]; + struct ttinfo ttis[TZ_MAX_TYPES]; + char chars[BIGGEST(BIGGEST(TZ_MAX_CHARS + 1, 4 /* sizeof gmt */ ), + (2 * (TZ_STRLEN_MAX + 1)))]; + struct lsinfo lsis[TZ_MAX_LEAPS]; + + /* + * The time type to use for early times or if no transitions. It is always + * zero for recent tzdb releases. It might be nonzero for data from tzdb + * 2018e or earlier. + */ + int defaulttype; +}; + + +struct pg_tz +{ + /* TZname contains the canonically-cased name of the timezone */ + char TZname[TZ_STRLEN_MAX + 1]; + struct state state; +}; + + +/* in pgtz.c */ +extern int pg_open_tzfile(const char *name, char *canonname); + +/* in localtime.c */ +extern int tzload(const char *name, char *canonname, struct state *sp, + bool doextend); +extern bool tzparse(const char *name, struct state *sp, bool lastditch); + +#endif /* _PGTZ_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/private.h b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/private.h new file mode 100644 index 00000000000..39d40e43a9f --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/private.h @@ -0,0 +1,159 @@ +/* Private header for tzdb code. */ + +#ifndef PRIVATE_H + +#define PRIVATE_H + +/* + * This file is in the public domain, so clarified as of + * 1996-06-05 by Arthur David Olson. + * + * IDENTIFICATION + * src/timezone/private.h + */ + +/* + * This header is for use ONLY with the time conversion code. + * There is no guarantee that it will remain unchanged, + * or that it will remain at all. + * Do NOT copy it to any system include directory. + * Thank you! + */ + +#include <limits.h> /* for CHAR_BIT et al. */ +#include <sys/wait.h> /* for WIFEXITED and WEXITSTATUS */ +#include <unistd.h> /* for F_OK and R_OK */ + +#include "pgtime.h" + +/* This string was in the Factory zone through version 2016f. */ +#define GRANDPARENTED "Local time zone must be set--see zic manual page" + +/* + * IANA has a bunch of HAVE_FOO #defines here, but in PG we want pretty + * much all of that to be done by PG's configure script. + */ + +#ifndef ENOTSUP +#define ENOTSUP EINVAL +#endif +#ifndef EOVERFLOW +#define EOVERFLOW EINVAL +#endif + +/* Unlike <ctype.h>'s isdigit, this also works if c < 0 | c > UCHAR_MAX. */ +#define is_digit(c) ((unsigned)(c) - '0' <= 9) + +/* PG doesn't currently rely on <inttypes.h>, so work around strtoimax() */ +#undef strtoimax +#define strtoimax strtoll + + +/* + * Finally, some convenience items. + */ + +#define TYPE_BIT(type) (sizeof (type) * CHAR_BIT) +#define TYPE_SIGNED(type) (((type) -1) < 0) +#define TWOS_COMPLEMENT(t) ((t) ~ (t) 0 < 0) + +/* + * Max and min values of the integer type T, of which only the bottom + * B bits are used, and where the highest-order used bit is considered + * to be a sign bit if T is signed. + */ +#define MAXVAL(t, b) \ + ((t) (((t) 1 << ((b) - 1 - TYPE_SIGNED(t))) \ + - 1 + ((t) 1 << ((b) - 1 - TYPE_SIGNED(t))))) +#define MINVAL(t, b) \ + ((t) (TYPE_SIGNED(t) ? - TWOS_COMPLEMENT(t) - MAXVAL(t, b) : 0)) + +/* The extreme time values, assuming no padding. */ +#define TIME_T_MIN MINVAL(pg_time_t, TYPE_BIT(pg_time_t)) +#define TIME_T_MAX MAXVAL(pg_time_t, TYPE_BIT(pg_time_t)) + +/* + * 302 / 1000 is log10(2.0) rounded up. + * Subtract one for the sign bit if the type is signed; + * add one for integer division truncation; + * add one more for a minus sign if the type is signed. + */ +#define INT_STRLEN_MAXIMUM(type) \ + ((TYPE_BIT(type) - TYPE_SIGNED(type)) * 302 / 1000 + \ + 1 + TYPE_SIGNED(type)) + +/* + * INITIALIZE(x) + */ +#define INITIALIZE(x) ((x) = 0) + +#undef _ +#define _(msgid) (msgid) + +/* Handy macros that are independent of tzfile implementation. */ + +#define YEARSPERREPEAT 400 /* years before a Gregorian repeat */ + +#define SECSPERMIN 60 +#define MINSPERHOUR 60 +#define HOURSPERDAY 24 +#define DAYSPERWEEK 7 +#define DAYSPERNYEAR 365 +#define DAYSPERLYEAR 366 +#define SECSPERHOUR (SECSPERMIN * MINSPERHOUR) +#define SECSPERDAY ((int32) SECSPERHOUR * HOURSPERDAY) +#define MONSPERYEAR 12 + +#define TM_SUNDAY 0 +#define TM_MONDAY 1 +#define TM_TUESDAY 2 +#define TM_WEDNESDAY 3 +#define TM_THURSDAY 4 +#define TM_FRIDAY 5 +#define TM_SATURDAY 6 + +#define TM_JANUARY 0 +#define TM_FEBRUARY 1 +#define TM_MARCH 2 +#define TM_APRIL 3 +#define TM_MAY 4 +#define TM_JUNE 5 +#define TM_JULY 6 +#define TM_AUGUST 7 +#define TM_SEPTEMBER 8 +#define TM_OCTOBER 9 +#define TM_NOVEMBER 10 +#define TM_DECEMBER 11 + +#define TM_YEAR_BASE 1900 + +#define EPOCH_YEAR 1970 +#define EPOCH_WDAY TM_THURSDAY + +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + +/* + * Since everything in isleap is modulo 400 (or a factor of 400), we know that + * isleap(y) == isleap(y % 400) + * and so + * isleap(a + b) == isleap((a + b) % 400) + * or + * isleap(a + b) == isleap(a % 400 + b % 400) + * This is true even if % means modulo rather than Fortran remainder + * (which is allowed by C89 but not by C99 or later). + * We use this to avoid addition overflow problems. + */ + +#define isleap_sum(a, b) isleap((a) % 400 + (b) % 400) + + +/* + * The Gregorian year averages 365.2425 days, which is 31556952 seconds. + */ + +#define AVGSECSPERYEAR 31556952L +#define SECSPERREPEAT \ + ((int64) YEARSPERREPEAT * (int64) AVGSECSPERYEAR) +#define SECSPERREPEAT_BITS 34 /* ceil(log2(SECSPERREPEAT)) */ + +#endif /* !defined PRIVATE_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/strftime.c b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/strftime.c new file mode 100644 index 00000000000..9247a34157f --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/strftime.c @@ -0,0 +1,571 @@ +/* Convert a broken-down timestamp to a string. */ + +/* + * Copyright 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Based on the UCB version with the copyright notice appearing above. + * + * This is ANSIish only when "multibyte character == plain character". + * + * IDENTIFICATION + * src/timezone/strftime.c + */ + +#include "postgres.h" + +#include <fcntl.h> + +#include "private.h" + + +struct lc_time_T +{ + const char *mon[MONSPERYEAR]; + const char *month[MONSPERYEAR]; + const char *wday[DAYSPERWEEK]; + const char *weekday[DAYSPERWEEK]; + const char *X_fmt; + const char *x_fmt; + const char *c_fmt; + const char *am; + const char *pm; + const char *date_fmt; +}; + +#define Locale (&C_time_locale) + +static const struct lc_time_T C_time_locale = { + { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }, { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" + }, { + "Sun", "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat" + }, { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" + }, + + /* X_fmt */ + "%H:%M:%S", + + /* + * x_fmt + * + * C99 and later require this format. Using just numbers (as here) makes + * Quakers happier; it's also compatible with SVR4. + */ + "%m/%d/%y", + + /* + * c_fmt + * + * C99 and later require this format. Previously this code used "%D %X", + * but we now conform to C99. Note that "%a %b %d %H:%M:%S %Y" is used by + * Solaris 2.3. + */ + "%a %b %e %T %Y", + + /* am */ + "AM", + + /* pm */ + "PM", + + /* date_fmt */ + "%a %b %e %H:%M:%S %Z %Y" +}; + +enum warn +{ + IN_NONE, IN_SOME, IN_THIS, IN_ALL +}; + +static char *_add(const char *str, char *pt, const char *ptlim); +static char *_conv(int n, const char *format, char *pt, const char *ptlim); +static char *_fmt(const char *format, const struct pg_tm *t, char *pt, const char *ptlim, + enum warn *warnp); +static char *_yconv(int a, int b, bool convert_top, bool convert_yy, char *pt, char const *ptlim); + + +/* + * Convert timestamp t to string s, a caller-allocated buffer of size maxsize, + * using the given format pattern. + * + * See also timestamptz_to_str. + */ +size_t +pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t) +{ + char *p; + int saved_errno = errno; + enum warn warn = IN_NONE; + + p = _fmt(format, t, s, s + maxsize, &warn); + if (!p) + { + errno = EOVERFLOW; + return 0; + } + if (p == s + maxsize) + { + errno = ERANGE; + return 0; + } + *p = '\0'; + errno = saved_errno; + return p - s; +} + +static char * +_fmt(const char *format, const struct pg_tm *t, char *pt, + const char *ptlim, enum warn *warnp) +{ + for (; *format; ++format) + { + if (*format == '%') + { + label: + switch (*++format) + { + case '\0': + --format; + break; + case 'A': + pt = _add((t->tm_wday < 0 || + t->tm_wday >= DAYSPERWEEK) ? + "?" : Locale->weekday[t->tm_wday], + pt, ptlim); + continue; + case 'a': + pt = _add((t->tm_wday < 0 || + t->tm_wday >= DAYSPERWEEK) ? + "?" : Locale->wday[t->tm_wday], + pt, ptlim); + continue; + case 'B': + pt = _add((t->tm_mon < 0 || + t->tm_mon >= MONSPERYEAR) ? + "?" : Locale->month[t->tm_mon], + pt, ptlim); + continue; + case 'b': + case 'h': + pt = _add((t->tm_mon < 0 || + t->tm_mon >= MONSPERYEAR) ? + "?" : Locale->mon[t->tm_mon], + pt, ptlim); + continue; + case 'C': + + /* + * %C used to do a... _fmt("%a %b %e %X %Y", t); + * ...whereas now POSIX 1003.2 calls for something + * completely different. (ado, 1993-05-24) + */ + pt = _yconv(t->tm_year, TM_YEAR_BASE, + true, false, pt, ptlim); + continue; + case 'c': + { + enum warn warn2 = IN_SOME; + + pt = _fmt(Locale->c_fmt, t, pt, ptlim, &warn2); + if (warn2 == IN_ALL) + warn2 = IN_THIS; + if (warn2 > *warnp) + *warnp = warn2; + } + continue; + case 'D': + pt = _fmt("%m/%d/%y", t, pt, ptlim, warnp); + continue; + case 'd': + pt = _conv(t->tm_mday, "%02d", pt, ptlim); + continue; + case 'E': + case 'O': + + /* + * Locale modifiers of C99 and later. The sequences %Ec + * %EC %Ex %EX %Ey %EY %Od %oe %OH %OI %Om %OM %OS %Ou %OU + * %OV %Ow %OW %Oy are supposed to provide alternative + * representations. + */ + goto label; + case 'e': + pt = _conv(t->tm_mday, "%2d", pt, ptlim); + continue; + case 'F': + pt = _fmt("%Y-%m-%d", t, pt, ptlim, warnp); + continue; + case 'H': + pt = _conv(t->tm_hour, "%02d", pt, ptlim); + continue; + case 'I': + pt = _conv((t->tm_hour % 12) ? + (t->tm_hour % 12) : 12, + "%02d", pt, ptlim); + continue; + case 'j': + pt = _conv(t->tm_yday + 1, "%03d", pt, ptlim); + continue; + case 'k': + + /* + * This used to be... _conv(t->tm_hour % 12 ? t->tm_hour % + * 12 : 12, 2, ' '); ...and has been changed to the below + * to match SunOS 4.1.1 and Arnold Robbins' strftime + * version 3.0. That is, "%k" and "%l" have been swapped. + * (ado, 1993-05-24) + */ + pt = _conv(t->tm_hour, "%2d", pt, ptlim); + continue; +#ifdef KITCHEN_SINK + case 'K': + + /* + * After all this time, still unclaimed! + */ + pt = _add("kitchen sink", pt, ptlim); + continue; +#endif /* defined KITCHEN_SINK */ + case 'l': + + /* + * This used to be... _conv(t->tm_hour, 2, ' '); ...and + * has been changed to the below to match SunOS 4.1.1 and + * Arnold Robbin's strftime version 3.0. That is, "%k" and + * "%l" have been swapped. (ado, 1993-05-24) + */ + pt = _conv((t->tm_hour % 12) ? + (t->tm_hour % 12) : 12, + "%2d", pt, ptlim); + continue; + case 'M': + pt = _conv(t->tm_min, "%02d", pt, ptlim); + continue; + case 'm': + pt = _conv(t->tm_mon + 1, "%02d", pt, ptlim); + continue; + case 'n': + pt = _add("\n", pt, ptlim); + continue; + case 'p': + pt = _add((t->tm_hour >= (HOURSPERDAY / 2)) ? + Locale->pm : + Locale->am, + pt, ptlim); + continue; + case 'R': + pt = _fmt("%H:%M", t, pt, ptlim, warnp); + continue; + case 'r': + pt = _fmt("%I:%M:%S %p", t, pt, ptlim, warnp); + continue; + case 'S': + pt = _conv(t->tm_sec, "%02d", pt, ptlim); + continue; + case 'T': + pt = _fmt("%H:%M:%S", t, pt, ptlim, warnp); + continue; + case 't': + pt = _add("\t", pt, ptlim); + continue; + case 'U': + pt = _conv((t->tm_yday + DAYSPERWEEK - + t->tm_wday) / DAYSPERWEEK, + "%02d", pt, ptlim); + continue; + case 'u': + + /* + * From Arnold Robbins' strftime version 3.0: "ISO 8601: + * Weekday as a decimal number [1 (Monday) - 7]" (ado, + * 1993-05-24) + */ + pt = _conv((t->tm_wday == 0) ? + DAYSPERWEEK : t->tm_wday, + "%d", pt, ptlim); + continue; + case 'V': /* ISO 8601 week number */ + case 'G': /* ISO 8601 year (four digits) */ + case 'g': /* ISO 8601 year (two digits) */ +/* + * From Arnold Robbins' strftime version 3.0: "the week number of the + * year (the first Monday as the first day of week 1) as a decimal number + * (01-53)." + * (ado, 1993-05-24) + * + * From <https://www.cl.cam.ac.uk/~mgk25/iso-time.html> by Markus Kuhn: + * "Week 01 of a year is per definition the first week which has the + * Thursday in this year, which is equivalent to the week which contains + * the fourth day of January. In other words, the first week of a new year + * is the week which has the majority of its days in the new year. Week 01 + * might also contain days from the previous year and the week before week + * 01 of a year is the last week (52 or 53) of the previous year even if + * it contains days from the new year. A week starts with Monday (day 1) + * and ends with Sunday (day 7). For example, the first week of the year + * 1997 lasts from 1996-12-30 to 1997-01-05..." + * (ado, 1996-01-02) + */ + { + int year; + int base; + int yday; + int wday; + int w; + + year = t->tm_year; + base = TM_YEAR_BASE; + yday = t->tm_yday; + wday = t->tm_wday; + for (;;) + { + int len; + int bot; + int top; + + len = isleap_sum(year, base) ? + DAYSPERLYEAR : + DAYSPERNYEAR; + + /* + * What yday (-3 ... 3) does the ISO year begin + * on? + */ + bot = ((yday + 11 - wday) % + DAYSPERWEEK) - 3; + + /* + * What yday does the NEXT ISO year begin on? + */ + top = bot - + (len % DAYSPERWEEK); + if (top < -3) + top += DAYSPERWEEK; + top += len; + if (yday >= top) + { + ++base; + w = 1; + break; + } + if (yday >= bot) + { + w = 1 + ((yday - bot) / + DAYSPERWEEK); + break; + } + --base; + yday += isleap_sum(year, base) ? + DAYSPERLYEAR : + DAYSPERNYEAR; + } + if (*format == 'V') + pt = _conv(w, "%02d", + pt, ptlim); + else if (*format == 'g') + { + *warnp = IN_ALL; + pt = _yconv(year, base, + false, true, + pt, ptlim); + } + else + pt = _yconv(year, base, + true, true, + pt, ptlim); + } + continue; + case 'v': + + /* + * From Arnold Robbins' strftime version 3.0: "date as + * dd-bbb-YYYY" (ado, 1993-05-24) + */ + pt = _fmt("%e-%b-%Y", t, pt, ptlim, warnp); + continue; + case 'W': + pt = _conv((t->tm_yday + DAYSPERWEEK - + (t->tm_wday ? + (t->tm_wday - 1) : + (DAYSPERWEEK - 1))) / DAYSPERWEEK, + "%02d", pt, ptlim); + continue; + case 'w': + pt = _conv(t->tm_wday, "%d", pt, ptlim); + continue; + case 'X': + pt = _fmt(Locale->X_fmt, t, pt, ptlim, warnp); + continue; + case 'x': + { + enum warn warn2 = IN_SOME; + + pt = _fmt(Locale->x_fmt, t, pt, ptlim, &warn2); + if (warn2 == IN_ALL) + warn2 = IN_THIS; + if (warn2 > *warnp) + *warnp = warn2; + } + continue; + case 'y': + *warnp = IN_ALL; + pt = _yconv(t->tm_year, TM_YEAR_BASE, + false, true, + pt, ptlim); + continue; + case 'Y': + pt = _yconv(t->tm_year, TM_YEAR_BASE, + true, true, + pt, ptlim); + continue; + case 'Z': + if (t->tm_zone != NULL) + pt = _add(t->tm_zone, pt, ptlim); + + /* + * C99 and later say that %Z must be replaced by the empty + * string if the time zone abbreviation is not + * determinable. + */ + continue; + case 'z': + { + long diff; + char const *sign; + bool negative; + + if (t->tm_isdst < 0) + continue; + diff = t->tm_gmtoff; + negative = diff < 0; + if (diff == 0) + { + if (t->tm_zone != NULL) + negative = t->tm_zone[0] == '-'; + } + if (negative) + { + sign = "-"; + diff = -diff; + } + else + sign = "+"; + pt = _add(sign, pt, ptlim); + diff /= SECSPERMIN; + diff = (diff / MINSPERHOUR) * 100 + + (diff % MINSPERHOUR); + pt = _conv(diff, "%04d", pt, ptlim); + } + continue; + case '+': + pt = _fmt(Locale->date_fmt, t, pt, ptlim, + warnp); + continue; + case '%': + + /* + * X311J/88-090 (4.12.3.5): if conversion char is + * undefined, behavior is undefined. Print out the + * character itself as printf(3) also does. + */ + default: + break; + } + } + if (pt == ptlim) + break; + *pt++ = *format; + } + return pt; +} + +static char * +_conv(int n, const char *format, char *pt, const char *ptlim) +{ + char buf[INT_STRLEN_MAXIMUM(int) + 1]; + + sprintf(buf, format, n); + return _add(buf, pt, ptlim); +} + +static char * +_add(const char *str, char *pt, const char *ptlim) +{ + while (pt < ptlim && (*pt = *str++) != '\0') + ++pt; + return pt; +} + +/* + * POSIX and the C Standard are unclear or inconsistent about + * what %C and %y do if the year is negative or exceeds 9999. + * Use the convention that %C concatenated with %y yields the + * same output as %Y, and that %Y contains at least 4 bytes, + * with more only if necessary. + */ + +static char * +_yconv(int a, int b, bool convert_top, bool convert_yy, + char *pt, const char *ptlim) +{ + int lead; + int trail; + +#define DIVISOR 100 + trail = a % DIVISOR + b % DIVISOR; + lead = a / DIVISOR + b / DIVISOR + trail / DIVISOR; + trail %= DIVISOR; + if (trail < 0 && lead > 0) + { + trail += DIVISOR; + --lead; + } + else if (lead < 0 && trail > 0) + { + trail -= DIVISOR; + ++lead; + } + if (convert_top) + { + if (lead == 0 && trail < 0) + pt = _add("-0", pt, ptlim); + else + pt = _conv(lead, "%02d", pt, ptlim); + } + if (convert_yy) + pt = _conv(((trail < 0) ? -trail : trail), "%02d", pt, ptlim); + return pt; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/tzfile.h b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/tzfile.h new file mode 100644 index 00000000000..8f3eb6bd3f2 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/tzfile.h @@ -0,0 +1,110 @@ +/* Layout and location of TZif files. */ + +#ifndef TZFILE_H + +#define TZFILE_H + +/* + * This file is in the public domain, so clarified as of + * 1996-06-05 by Arthur David Olson. + * + * IDENTIFICATION + * src/timezone/tzfile.h + */ + +/* + * This header is for use ONLY with the time conversion code. + * There is no guarantee that it will remain unchanged, + * or that it will remain at all. + * Do NOT copy it to any system include directory. + * Thank you! + */ + +/* + * Information about time zone files. + */ + +#define TZDEFAULT "/etc/localtime" +#define TZDEFRULES "posixrules" + + +/* See Internet RFC 8536 for more details about the following format. */ + +/* + * Each file begins with. . . + */ + +#define TZ_MAGIC "TZif" + +struct tzhead +{ + char tzh_magic[4]; /* TZ_MAGIC */ + char tzh_version[1]; /* '\0' or '2' or '3' as of 2013 */ + char tzh_reserved[15]; /* reserved; must be zero */ + char tzh_ttisutcnt[4]; /* coded number of trans. time flags */ + char tzh_ttisstdcnt[4]; /* coded number of trans. time flags */ + char tzh_leapcnt[4]; /* coded number of leap seconds */ + char tzh_timecnt[4]; /* coded number of transition times */ + char tzh_typecnt[4]; /* coded number of local time types */ + char tzh_charcnt[4]; /* coded number of abbr. chars */ +}; + +/* + * . . .followed by. . . + * + * tzh_timecnt (char [4])s coded transition times a la time(2) + * tzh_timecnt (unsigned char)s types of local time starting at above + * tzh_typecnt repetitions of + * one (char [4]) coded UT offset in seconds + * one (unsigned char) used to set tm_isdst + * one (unsigned char) that's an abbreviation list index + * tzh_charcnt (char)s '\0'-terminated zone abbreviations + * tzh_leapcnt repetitions of + * one (char [4]) coded leap second transition times + * one (char [4]) total correction after above + * tzh_ttisstdcnt (char)s indexed by type; if 1, transition + * time is standard time, if 0, + * transition time is local (wall clock) + * time; if absent, transition times are + * assumed to be local time + * tzh_ttisutcnt (char)s indexed by type; if 1, transition + * time is UT, if 0, transition time is + * local time; if absent, transition + * times are assumed to be local time. + * When this is 1, the corresponding + * std/wall indicator must also be 1. + */ + +/* + * If tzh_version is '2' or greater, the above is followed by a second instance + * of tzhead and a second instance of the data in which each coded transition + * time uses 8 rather than 4 chars, + * then a POSIX-TZ-environment-variable-style string for use in handling + * instants after the last transition time stored in the file + * (with nothing between the newlines if there is no POSIX representation for + * such instants). + * + * If tz_version is '3' or greater, the above is extended as follows. + * First, the POSIX TZ string's hour offset may range from -167 + * through 167 as compared to the POSIX-required 0 through 24. + * Second, its DST start time may be January 1 at 00:00 and its stop + * time December 31 at 24:00 plus the difference between DST and + * standard time, indicating DST all year. + */ + +/* + * In the current implementation, "tzset()" refuses to deal with files that + * exceed any of the limits below. + */ + +#define TZ_MAX_TIMES 2000 + +/* This must be at least 17 for Europe/Samara and Europe/Vilnius. */ +#define TZ_MAX_TYPES 256 /* Limited by what (unsigned char)'s can hold */ + +#define TZ_MAX_CHARS 50 /* Maximum number of abbreviation characters */ + /* (limited by what unsigned chars can hold) */ + +#define TZ_MAX_LEAPS 50 /* Maximum number of leap second corrections */ + +#endif /* !defined TZFILE_H */ |