diff options
author | vvvv <vvvv@yandex-team.com> | 2024-11-07 12:29:36 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.com> | 2024-11-07 13:49:47 +0300 |
commit | d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch) | |
tree | b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt | |
parent | 13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff) | |
download | ydb-d4c258e9431675bab6745c8638df6e3dfd4dca6b.tar.gz |
Moved other yql/essentials libs YQL-19206
init
commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt')
111 files changed, 191360 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/acl.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/acl.c new file mode 100644 index 00000000000..2dcbcd7e2f0 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/acl.c @@ -0,0 +1,5410 @@ +/*------------------------------------------------------------------------- + * + * acl.c + * Basic access control list data structures manipulation routines. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/acl.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> + +#include "access/htup_details.h" +#include "catalog/catalog.h" +#include "catalog/namespace.h" +#include "catalog/pg_auth_members.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_class.h" +#include "catalog/pg_database.h" +#include "catalog/pg_foreign_data_wrapper.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_language.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_parameter_acl.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_tablespace.h" +#include "catalog/pg_type.h" +#include "commands/dbcommands.h" +#include "commands/proclang.h" +#include "commands/tablespace.h" +#include "common/hashfn.h" +#include "foreign/foreign.h" +#include "funcapi.h" +#include "lib/qunique.h" +#include "miscadmin.h" +#include "utils/acl.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/guc.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" +#include "utils/varlena.h" + +typedef struct +{ + const char *name; + AclMode value; +} priv_map; + +/* + * We frequently need to test whether a given role is a member of some other + * role. In most of these tests the "given role" is the same, namely the + * active current user. So we can optimize it by keeping cached lists of all + * the roles the "given role" is a member of, directly or indirectly. + * + * Possibly this mechanism should be generalized to allow caching membership + * info for multiple roles? + * + * Each element of cached_roles is an OID list of constituent roles for the + * corresponding element of cached_role (always including the cached_role + * itself). There's a separate cache for each RoleRecurseType, with the + * corresponding semantics. + */ +enum RoleRecurseType +{ + ROLERECURSE_MEMBERS = 0, /* recurse unconditionally */ + ROLERECURSE_PRIVS = 1, /* recurse through inheritable grants */ + ROLERECURSE_SETROLE = 2 /* recurse through grants with set_option */ +}; +static __thread Oid cached_role[] = {InvalidOid, InvalidOid, InvalidOid}; +static __thread List *cached_roles[] = {NIL, NIL, NIL}; +static __thread uint32 cached_db_hash; + + +static const char *getid(const char *s, char *n, Node *escontext); +static void putid(char *p, const char *s); +static Acl *allocacl(int n); +static void check_acl(const Acl *acl); +static const char *aclparse(const char *s, AclItem *aip, Node *escontext); +static bool aclitem_match(const AclItem *a1, const AclItem *a2); +static int aclitemComparator(const void *arg1, const void *arg2); +static void check_circularity(const Acl *old_acl, const AclItem *mod_aip, + Oid ownerId); +static Acl *recursive_revoke(Acl *acl, Oid grantee, AclMode revoke_privs, + Oid ownerId, DropBehavior behavior); + +static AclMode convert_any_priv_string(text *priv_type_text, + const priv_map *privileges); + +static Oid convert_table_name(text *tablename); +static AclMode convert_table_priv_string(text *priv_type_text); +static AclMode convert_sequence_priv_string(text *priv_type_text); +static AttrNumber convert_column_name(Oid tableoid, text *column); +static AclMode convert_column_priv_string(text *priv_type_text); +static Oid convert_database_name(text *databasename); +static AclMode convert_database_priv_string(text *priv_type_text); +static Oid convert_foreign_data_wrapper_name(text *fdwname); +static AclMode convert_foreign_data_wrapper_priv_string(text *priv_type_text); +static Oid convert_function_name(text *functionname); +static AclMode convert_function_priv_string(text *priv_type_text); +static Oid convert_language_name(text *languagename); +static AclMode convert_language_priv_string(text *priv_type_text); +static Oid convert_schema_name(text *schemaname); +static AclMode convert_schema_priv_string(text *priv_type_text); +static Oid convert_server_name(text *servername); +static AclMode convert_server_priv_string(text *priv_type_text); +static Oid convert_tablespace_name(text *tablespacename); +static AclMode convert_tablespace_priv_string(text *priv_type_text); +static Oid convert_type_name(text *typename); +static AclMode convert_type_priv_string(text *priv_type_text); +static AclMode convert_parameter_priv_string(text *priv_text); +static AclMode convert_role_priv_string(text *priv_type_text); +static AclResult pg_role_aclcheck(Oid role_oid, Oid roleid, AclMode mode); + +static void RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue); + + +/* + * getid + * Consumes the first alphanumeric string (identifier) found in string + * 's', ignoring any leading white space. If it finds a double quote + * it returns the word inside the quotes. + * + * RETURNS: + * the string position in 's' that points to the next non-space character + * in 's', after any quotes. Also: + * - loads the identifier into 'n'. (If no identifier is found, 'n' + * contains an empty string.) 'n' must be NAMEDATALEN bytes. + * + * Errors are reported via ereport, unless escontext is an ErrorSaveData node, + * in which case we log the error there and return NULL. + */ +static const char * +getid(const char *s, char *n, Node *escontext) +{ + int len = 0; + bool in_quotes = false; + + Assert(s && n); + + while (isspace((unsigned char) *s)) + s++; + /* This code had better match what putid() does, below */ + for (; + *s != '\0' && + (isalnum((unsigned char) *s) || + *s == '_' || + *s == '"' || + in_quotes); + s++) + { + if (*s == '"') + { + /* safe to look at next char (could be '\0' though) */ + if (*(s + 1) != '"') + { + in_quotes = !in_quotes; + continue; + } + /* it's an escaped double quote; skip the escaping char */ + s++; + } + + /* Add the character to the string */ + if (len >= NAMEDATALEN - 1) + ereturn(escontext, NULL, + (errcode(ERRCODE_NAME_TOO_LONG), + errmsg("identifier too long"), + errdetail("Identifier must be less than %d characters.", + NAMEDATALEN))); + + n[len++] = *s; + } + n[len] = '\0'; + while (isspace((unsigned char) *s)) + s++; + return s; +} + +/* + * Write a role name at *p, adding double quotes if needed. + * There must be at least (2*NAMEDATALEN)+2 bytes available at *p. + * This needs to be kept in sync with dequoteAclUserName in pg_dump/dumputils.c + */ +static void +putid(char *p, const char *s) +{ + const char *src; + bool safe = true; + + for (src = s; *src; src++) + { + /* This test had better match what getid() does, above */ + if (!isalnum((unsigned char) *src) && *src != '_') + { + safe = false; + break; + } + } + if (!safe) + *p++ = '"'; + for (src = s; *src; src++) + { + /* A double quote character in a username is encoded as "" */ + if (*src == '"') + *p++ = '"'; + *p++ = *src; + } + if (!safe) + *p++ = '"'; + *p = '\0'; +} + +/* + * aclparse + * Consumes and parses an ACL specification of the form: + * [group|user] [A-Za-z0-9]*=[rwaR]* + * from string 's', ignoring any leading white space or white space + * between the optional id type keyword (group|user) and the actual + * ACL specification. + * + * The group|user decoration is unnecessary in the roles world, + * but we still accept it for backward compatibility. + * + * This routine is called by the parser as well as aclitemin(), hence + * the added generality. + * + * RETURNS: + * the string position in 's' immediately following the ACL + * specification. Also: + * - loads the structure pointed to by 'aip' with the appropriate + * UID/GID, id type identifier and mode type values. + * + * Errors are reported via ereport, unless escontext is an ErrorSaveData node, + * in which case we log the error there and return NULL. + */ +static const char * +aclparse(const char *s, AclItem *aip, Node *escontext) +{ + AclMode privs, + goption, + read; + char name[NAMEDATALEN]; + char name2[NAMEDATALEN]; + + Assert(s && aip); + + s = getid(s, name, escontext); + if (s == NULL) + return NULL; + if (*s != '=') + { + /* we just read a keyword, not a name */ + if (strcmp(name, "group") != 0 && strcmp(name, "user") != 0) + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("unrecognized key word: \"%s\"", name), + errhint("ACL key word must be \"group\" or \"user\"."))); + /* move s to the name beyond the keyword */ + s = getid(s, name, escontext); + if (s == NULL) + return NULL; + if (name[0] == '\0') + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("missing name"), + errhint("A name must follow the \"group\" or \"user\" key word."))); + } + + if (*s != '=') + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("missing \"=\" sign"))); + + privs = goption = ACL_NO_RIGHTS; + + for (++s, read = 0; isalpha((unsigned char) *s) || *s == '*'; s++) + { + switch (*s) + { + case '*': + goption |= read; + break; + case ACL_INSERT_CHR: + read = ACL_INSERT; + break; + case ACL_SELECT_CHR: + read = ACL_SELECT; + break; + case ACL_UPDATE_CHR: + read = ACL_UPDATE; + break; + case ACL_DELETE_CHR: + read = ACL_DELETE; + break; + case ACL_TRUNCATE_CHR: + read = ACL_TRUNCATE; + break; + case ACL_REFERENCES_CHR: + read = ACL_REFERENCES; + break; + case ACL_TRIGGER_CHR: + read = ACL_TRIGGER; + break; + case ACL_EXECUTE_CHR: + read = ACL_EXECUTE; + break; + case ACL_USAGE_CHR: + read = ACL_USAGE; + break; + case ACL_CREATE_CHR: + read = ACL_CREATE; + break; + case ACL_CREATE_TEMP_CHR: + read = ACL_CREATE_TEMP; + break; + case ACL_CONNECT_CHR: + read = ACL_CONNECT; + break; + case ACL_SET_CHR: + read = ACL_SET; + break; + case ACL_ALTER_SYSTEM_CHR: + read = ACL_ALTER_SYSTEM; + break; + case 'R': /* ignore old RULE privileges */ + read = 0; + break; + default: + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid mode character: must be one of \"%s\"", + ACL_ALL_RIGHTS_STR))); + } + + privs |= read; + } + + if (name[0] == '\0') + aip->ai_grantee = ACL_ID_PUBLIC; + else + { + aip->ai_grantee = get_role_oid(name, true); + if (!OidIsValid(aip->ai_grantee)) + ereturn(escontext, NULL, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", name))); + } + + /* + * XXX Allow a degree of backward compatibility by defaulting the grantor + * to the superuser. + */ + if (*s == '/') + { + s = getid(s + 1, name2, escontext); + if (s == NULL) + return NULL; + if (name2[0] == '\0') + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("a name must follow the \"/\" sign"))); + aip->ai_grantor = get_role_oid(name2, true); + if (!OidIsValid(aip->ai_grantor)) + ereturn(escontext, NULL, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", name2))); + } + else + { + aip->ai_grantor = BOOTSTRAP_SUPERUSERID; + ereport(WARNING, + (errcode(ERRCODE_INVALID_GRANTOR), + errmsg("defaulting grantor to user ID %u", + BOOTSTRAP_SUPERUSERID))); + } + + ACLITEM_SET_PRIVS_GOPTIONS(*aip, privs, goption); + + return s; +} + +/* + * allocacl + * Allocates storage for a new Acl with 'n' entries. + * + * RETURNS: + * the new Acl + */ +static Acl * +allocacl(int n) +{ + Acl *new_acl; + Size size; + + if (n < 0) + elog(ERROR, "invalid size: %d", n); + size = ACL_N_SIZE(n); + new_acl = (Acl *) palloc0(size); + SET_VARSIZE(new_acl, size); + new_acl->ndim = 1; + new_acl->dataoffset = 0; /* we never put in any nulls */ + new_acl->elemtype = ACLITEMOID; + ARR_LBOUND(new_acl)[0] = 1; + ARR_DIMS(new_acl)[0] = n; + return new_acl; +} + +/* + * Create a zero-entry ACL + */ +Acl * +make_empty_acl(void) +{ + return allocacl(0); +} + +/* + * Copy an ACL + */ +Acl * +aclcopy(const Acl *orig_acl) +{ + Acl *result_acl; + + result_acl = allocacl(ACL_NUM(orig_acl)); + + memcpy(ACL_DAT(result_acl), + ACL_DAT(orig_acl), + ACL_NUM(orig_acl) * sizeof(AclItem)); + + return result_acl; +} + +/* + * Concatenate two ACLs + * + * This is a bit cheesy, since we may produce an ACL with redundant entries. + * Be careful what the result is used for! + */ +Acl * +aclconcat(const Acl *left_acl, const Acl *right_acl) +{ + Acl *result_acl; + + result_acl = allocacl(ACL_NUM(left_acl) + ACL_NUM(right_acl)); + + memcpy(ACL_DAT(result_acl), + ACL_DAT(left_acl), + ACL_NUM(left_acl) * sizeof(AclItem)); + + memcpy(ACL_DAT(result_acl) + ACL_NUM(left_acl), + ACL_DAT(right_acl), + ACL_NUM(right_acl) * sizeof(AclItem)); + + return result_acl; +} + +/* + * Merge two ACLs + * + * This produces a properly merged ACL with no redundant entries. + * Returns NULL on NULL input. + */ +Acl * +aclmerge(const Acl *left_acl, const Acl *right_acl, Oid ownerId) +{ + Acl *result_acl; + AclItem *aip; + int i, + num; + + /* Check for cases where one or both are empty/null */ + if (left_acl == NULL || ACL_NUM(left_acl) == 0) + { + if (right_acl == NULL || ACL_NUM(right_acl) == 0) + return NULL; + else + return aclcopy(right_acl); + } + else + { + if (right_acl == NULL || ACL_NUM(right_acl) == 0) + return aclcopy(left_acl); + } + + /* Merge them the hard way, one item at a time */ + result_acl = aclcopy(left_acl); + + aip = ACL_DAT(right_acl); + num = ACL_NUM(right_acl); + + for (i = 0; i < num; i++, aip++) + { + Acl *tmp_acl; + + tmp_acl = aclupdate(result_acl, aip, ACL_MODECHG_ADD, + ownerId, DROP_RESTRICT); + pfree(result_acl); + result_acl = tmp_acl; + } + + return result_acl; +} + +/* + * Sort the items in an ACL (into an arbitrary but consistent order) + */ +void +aclitemsort(Acl *acl) +{ + if (acl != NULL && ACL_NUM(acl) > 1) + qsort(ACL_DAT(acl), ACL_NUM(acl), sizeof(AclItem), aclitemComparator); +} + +/* + * Check if two ACLs are exactly equal + * + * This will not detect equality if the two arrays contain the same items + * in different orders. To handle that case, sort both inputs first, + * using aclitemsort(). + */ +bool +aclequal(const Acl *left_acl, const Acl *right_acl) +{ + /* Check for cases where one or both are empty/null */ + if (left_acl == NULL || ACL_NUM(left_acl) == 0) + { + if (right_acl == NULL || ACL_NUM(right_acl) == 0) + return true; + else + return false; + } + else + { + if (right_acl == NULL || ACL_NUM(right_acl) == 0) + return false; + } + + if (ACL_NUM(left_acl) != ACL_NUM(right_acl)) + return false; + + if (memcmp(ACL_DAT(left_acl), + ACL_DAT(right_acl), + ACL_NUM(left_acl) * sizeof(AclItem)) == 0) + return true; + + return false; +} + +/* + * Verify that an ACL array is acceptable (one-dimensional and has no nulls) + */ +static void +check_acl(const Acl *acl) +{ + if (ARR_ELEMTYPE(acl) != ACLITEMOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ACL array contains wrong data type"))); + if (ARR_NDIM(acl) != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ACL arrays must be one-dimensional"))); + if (ARR_HASNULL(acl)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("ACL arrays must not contain null values"))); +} + +/* + * aclitemin + * Allocates storage for, and fills in, a new AclItem given a string + * 's' that contains an ACL specification. See aclparse for details. + * + * RETURNS: + * the new AclItem + */ +Datum +aclitemin(PG_FUNCTION_ARGS) +{ + const char *s = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + AclItem *aip; + + aip = (AclItem *) palloc(sizeof(AclItem)); + + s = aclparse(s, aip, escontext); + if (s == NULL) + PG_RETURN_NULL(); + + while (isspace((unsigned char) *s)) + ++s; + if (*s) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("extra garbage at the end of the ACL specification"))); + + PG_RETURN_ACLITEM_P(aip); +} + +/* + * aclitemout + * Allocates storage for, and fills in, a new null-delimited string + * containing a formatted ACL specification. See aclparse for details. + * + * RETURNS: + * the new string + */ +Datum +aclitemout(PG_FUNCTION_ARGS) +{ + AclItem *aip = PG_GETARG_ACLITEM_P(0); + char *p; + char *out; + HeapTuple htup; + unsigned i; + + out = palloc(strlen("=/") + + 2 * N_ACL_RIGHTS + + 2 * (2 * NAMEDATALEN + 2) + + 1); + + p = out; + *p = '\0'; + + if (aip->ai_grantee != ACL_ID_PUBLIC) + { + htup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(aip->ai_grantee)); + if (HeapTupleIsValid(htup)) + { + putid(p, NameStr(((Form_pg_authid) GETSTRUCT(htup))->rolname)); + ReleaseSysCache(htup); + } + else + { + /* Generate numeric OID if we don't find an entry */ + sprintf(p, "%u", aip->ai_grantee); + } + } + while (*p) + ++p; + + *p++ = '='; + + for (i = 0; i < N_ACL_RIGHTS; ++i) + { + if (ACLITEM_GET_PRIVS(*aip) & (UINT64CONST(1) << i)) + *p++ = ACL_ALL_RIGHTS_STR[i]; + if (ACLITEM_GET_GOPTIONS(*aip) & (UINT64CONST(1) << i)) + *p++ = '*'; + } + + *p++ = '/'; + *p = '\0'; + + htup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(aip->ai_grantor)); + if (HeapTupleIsValid(htup)) + { + putid(p, NameStr(((Form_pg_authid) GETSTRUCT(htup))->rolname)); + ReleaseSysCache(htup); + } + else + { + /* Generate numeric OID if we don't find an entry */ + sprintf(p, "%u", aip->ai_grantor); + } + + PG_RETURN_CSTRING(out); +} + +/* + * aclitem_match + * Two AclItems are considered to match iff they have the same + * grantee and grantor; the privileges are ignored. + */ +static bool +aclitem_match(const AclItem *a1, const AclItem *a2) +{ + return a1->ai_grantee == a2->ai_grantee && + a1->ai_grantor == a2->ai_grantor; +} + +/* + * aclitemComparator + * qsort comparison function for AclItems + */ +static int +aclitemComparator(const void *arg1, const void *arg2) +{ + const AclItem *a1 = (const AclItem *) arg1; + const AclItem *a2 = (const AclItem *) arg2; + + if (a1->ai_grantee > a2->ai_grantee) + return 1; + if (a1->ai_grantee < a2->ai_grantee) + return -1; + if (a1->ai_grantor > a2->ai_grantor) + return 1; + if (a1->ai_grantor < a2->ai_grantor) + return -1; + if (a1->ai_privs > a2->ai_privs) + return 1; + if (a1->ai_privs < a2->ai_privs) + return -1; + return 0; +} + +/* + * aclitem equality operator + */ +Datum +aclitem_eq(PG_FUNCTION_ARGS) +{ + AclItem *a1 = PG_GETARG_ACLITEM_P(0); + AclItem *a2 = PG_GETARG_ACLITEM_P(1); + bool result; + + result = a1->ai_privs == a2->ai_privs && + a1->ai_grantee == a2->ai_grantee && + a1->ai_grantor == a2->ai_grantor; + PG_RETURN_BOOL(result); +} + +/* + * aclitem hash function + * + * We make aclitems hashable not so much because anyone is likely to hash + * them, as because we want array equality to work on aclitem arrays, and + * with the typcache mechanism we must have a hash or btree opclass. + */ +Datum +hash_aclitem(PG_FUNCTION_ARGS) +{ + AclItem *a = PG_GETARG_ACLITEM_P(0); + + /* not very bright, but avoids any issue of padding in struct */ + PG_RETURN_UINT32((uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor)); +} + +/* + * 64-bit hash function for aclitem. + * + * Similar to hash_aclitem, but accepts a seed and returns a uint64 value. + */ +Datum +hash_aclitem_extended(PG_FUNCTION_ARGS) +{ + AclItem *a = PG_GETARG_ACLITEM_P(0); + uint64 seed = PG_GETARG_INT64(1); + uint32 sum = (uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor); + + return (seed == 0) ? UInt64GetDatum(sum) : hash_uint32_extended(sum, seed); +} + +/* + * acldefault() --- create an ACL describing default access permissions + * + * Change this routine if you want to alter the default access policy for + * newly-created objects (or any object with a NULL acl entry). When + * you make a change here, don't forget to update the GRANT man page, + * which explains all the default permissions. + * + * Note that these are the hard-wired "defaults" that are used in the + * absence of any pg_default_acl entry. + */ +Acl * +acldefault(ObjectType objtype, Oid ownerId) +{ + AclMode world_default; + AclMode owner_default; + int nacl; + Acl *acl; + AclItem *aip; + + switch (objtype) + { + case OBJECT_COLUMN: + /* by default, columns have no extra privileges */ + world_default = ACL_NO_RIGHTS; + owner_default = ACL_NO_RIGHTS; + break; + case OBJECT_TABLE: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_RELATION; + break; + case OBJECT_SEQUENCE: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_SEQUENCE; + break; + case OBJECT_DATABASE: + /* for backwards compatibility, grant some rights by default */ + world_default = ACL_CREATE_TEMP | ACL_CONNECT; + owner_default = ACL_ALL_RIGHTS_DATABASE; + break; + case OBJECT_FUNCTION: + /* Grant EXECUTE by default, for now */ + world_default = ACL_EXECUTE; + owner_default = ACL_ALL_RIGHTS_FUNCTION; + break; + case OBJECT_LANGUAGE: + /* Grant USAGE by default, for now */ + world_default = ACL_USAGE; + owner_default = ACL_ALL_RIGHTS_LANGUAGE; + break; + case OBJECT_LARGEOBJECT: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_LARGEOBJECT; + break; + case OBJECT_SCHEMA: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_SCHEMA; + break; + case OBJECT_TABLESPACE: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_TABLESPACE; + break; + case OBJECT_FDW: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_FDW; + break; + case OBJECT_FOREIGN_SERVER: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_FOREIGN_SERVER; + break; + case OBJECT_DOMAIN: + case OBJECT_TYPE: + world_default = ACL_USAGE; + owner_default = ACL_ALL_RIGHTS_TYPE; + break; + case OBJECT_PARAMETER_ACL: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_PARAMETER_ACL; + break; + default: + elog(ERROR, "unrecognized object type: %d", (int) objtype); + world_default = ACL_NO_RIGHTS; /* keep compiler quiet */ + owner_default = ACL_NO_RIGHTS; + break; + } + + nacl = 0; + if (world_default != ACL_NO_RIGHTS) + nacl++; + if (owner_default != ACL_NO_RIGHTS) + nacl++; + + acl = allocacl(nacl); + aip = ACL_DAT(acl); + + if (world_default != ACL_NO_RIGHTS) + { + aip->ai_grantee = ACL_ID_PUBLIC; + aip->ai_grantor = ownerId; + ACLITEM_SET_PRIVS_GOPTIONS(*aip, world_default, ACL_NO_RIGHTS); + aip++; + } + + /* + * Note that the owner's entry shows all ordinary privileges but no grant + * options. This is because his grant options come "from the system" and + * not from his own efforts. (The SQL spec says that the owner's rights + * come from a "_SYSTEM" authid.) However, we do consider that the + * owner's ordinary privileges are self-granted; this lets him revoke + * them. We implement the owner's grant options without any explicit + * "_SYSTEM"-like ACL entry, by internally special-casing the owner + * wherever we are testing grant options. + */ + if (owner_default != ACL_NO_RIGHTS) + { + aip->ai_grantee = ownerId; + aip->ai_grantor = ownerId; + ACLITEM_SET_PRIVS_GOPTIONS(*aip, owner_default, ACL_NO_RIGHTS); + } + + return acl; +} + + +/* + * SQL-accessible version of acldefault(). Hackish mapping from "char" type to + * OBJECT_* values. + */ +Datum +acldefault_sql(PG_FUNCTION_ARGS) +{ + char objtypec = PG_GETARG_CHAR(0); + Oid owner = PG_GETARG_OID(1); + ObjectType objtype = 0; + + switch (objtypec) + { + case 'c': + objtype = OBJECT_COLUMN; + break; + case 'r': + objtype = OBJECT_TABLE; + break; + case 's': + objtype = OBJECT_SEQUENCE; + break; + case 'd': + objtype = OBJECT_DATABASE; + break; + case 'f': + objtype = OBJECT_FUNCTION; + break; + case 'l': + objtype = OBJECT_LANGUAGE; + break; + case 'L': + objtype = OBJECT_LARGEOBJECT; + break; + case 'n': + objtype = OBJECT_SCHEMA; + break; + case 'p': + objtype = OBJECT_PARAMETER_ACL; + break; + case 't': + objtype = OBJECT_TABLESPACE; + break; + case 'F': + objtype = OBJECT_FDW; + break; + case 'S': + objtype = OBJECT_FOREIGN_SERVER; + break; + case 'T': + objtype = OBJECT_TYPE; + break; + default: + elog(ERROR, "unrecognized object type abbreviation: %c", objtypec); + } + + PG_RETURN_ACL_P(acldefault(objtype, owner)); +} + + +/* + * Update an ACL array to add or remove specified privileges. + * + * old_acl: the input ACL array + * mod_aip: defines the privileges to be added, removed, or substituted + * modechg: ACL_MODECHG_ADD, ACL_MODECHG_DEL, or ACL_MODECHG_EQL + * ownerId: Oid of object owner + * behavior: RESTRICT or CASCADE behavior for recursive removal + * + * ownerid and behavior are only relevant when the update operation specifies + * deletion of grant options. + * + * The result is a modified copy; the input object is not changed. + * + * NB: caller is responsible for having detoasted the input ACL, if needed. + */ +Acl * +aclupdate(const Acl *old_acl, const AclItem *mod_aip, + int modechg, Oid ownerId, DropBehavior behavior) +{ + Acl *new_acl = NULL; + AclItem *old_aip, + *new_aip = NULL; + AclMode old_rights, + old_goptions, + new_rights, + new_goptions; + int dst, + num; + + /* Caller probably already checked old_acl, but be safe */ + check_acl(old_acl); + + /* If granting grant options, check for circularity */ + if (modechg != ACL_MODECHG_DEL && + ACLITEM_GET_GOPTIONS(*mod_aip) != ACL_NO_RIGHTS) + check_circularity(old_acl, mod_aip, ownerId); + + num = ACL_NUM(old_acl); + old_aip = ACL_DAT(old_acl); + + /* + * Search the ACL for an existing entry for this grantee and grantor. If + * one exists, just modify the entry in-place (well, in the same position, + * since we actually return a copy); otherwise, insert the new entry at + * the end. + */ + + for (dst = 0; dst < num; ++dst) + { + if (aclitem_match(mod_aip, old_aip + dst)) + { + /* found a match, so modify existing item */ + new_acl = allocacl(num); + new_aip = ACL_DAT(new_acl); + memcpy(new_acl, old_acl, ACL_SIZE(old_acl)); + break; + } + } + + if (dst == num) + { + /* need to append a new item */ + new_acl = allocacl(num + 1); + new_aip = ACL_DAT(new_acl); + memcpy(new_aip, old_aip, num * sizeof(AclItem)); + + /* initialize the new entry with no permissions */ + new_aip[dst].ai_grantee = mod_aip->ai_grantee; + new_aip[dst].ai_grantor = mod_aip->ai_grantor; + ACLITEM_SET_PRIVS_GOPTIONS(new_aip[dst], + ACL_NO_RIGHTS, ACL_NO_RIGHTS); + num++; /* set num to the size of new_acl */ + } + + old_rights = ACLITEM_GET_RIGHTS(new_aip[dst]); + old_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]); + + /* apply the specified permissions change */ + switch (modechg) + { + case ACL_MODECHG_ADD: + ACLITEM_SET_RIGHTS(new_aip[dst], + old_rights | ACLITEM_GET_RIGHTS(*mod_aip)); + break; + case ACL_MODECHG_DEL: + ACLITEM_SET_RIGHTS(new_aip[dst], + old_rights & ~ACLITEM_GET_RIGHTS(*mod_aip)); + break; + case ACL_MODECHG_EQL: + ACLITEM_SET_RIGHTS(new_aip[dst], + ACLITEM_GET_RIGHTS(*mod_aip)); + break; + } + + new_rights = ACLITEM_GET_RIGHTS(new_aip[dst]); + new_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]); + + /* + * If the adjusted entry has no permissions, delete it from the list. + */ + if (new_rights == ACL_NO_RIGHTS) + { + memmove(new_aip + dst, + new_aip + dst + 1, + (num - dst - 1) * sizeof(AclItem)); + /* Adjust array size to be 'num - 1' items */ + ARR_DIMS(new_acl)[0] = num - 1; + SET_VARSIZE(new_acl, ACL_N_SIZE(num - 1)); + } + + /* + * Remove abandoned privileges (cascading revoke). Currently we can only + * handle this when the grantee is not PUBLIC. + */ + if ((old_goptions & ~new_goptions) != 0) + { + Assert(mod_aip->ai_grantee != ACL_ID_PUBLIC); + new_acl = recursive_revoke(new_acl, mod_aip->ai_grantee, + (old_goptions & ~new_goptions), + ownerId, behavior); + } + + return new_acl; +} + +/* + * Update an ACL array to reflect a change of owner to the parent object + * + * old_acl: the input ACL array (must not be NULL) + * oldOwnerId: Oid of the old object owner + * newOwnerId: Oid of the new object owner + * + * The result is a modified copy; the input object is not changed. + * + * NB: caller is responsible for having detoasted the input ACL, if needed. + */ +Acl * +aclnewowner(const Acl *old_acl, Oid oldOwnerId, Oid newOwnerId) +{ + Acl *new_acl; + AclItem *new_aip; + AclItem *old_aip; + AclItem *dst_aip; + AclItem *src_aip; + AclItem *targ_aip; + bool newpresent = false; + int dst, + src, + targ, + num; + + check_acl(old_acl); + + /* + * Make a copy of the given ACL, substituting new owner ID for old + * wherever it appears as either grantor or grantee. Also note if the new + * owner ID is already present. + */ + num = ACL_NUM(old_acl); + old_aip = ACL_DAT(old_acl); + new_acl = allocacl(num); + new_aip = ACL_DAT(new_acl); + memcpy(new_aip, old_aip, num * sizeof(AclItem)); + for (dst = 0, dst_aip = new_aip; dst < num; dst++, dst_aip++) + { + if (dst_aip->ai_grantor == oldOwnerId) + dst_aip->ai_grantor = newOwnerId; + else if (dst_aip->ai_grantor == newOwnerId) + newpresent = true; + if (dst_aip->ai_grantee == oldOwnerId) + dst_aip->ai_grantee = newOwnerId; + else if (dst_aip->ai_grantee == newOwnerId) + newpresent = true; + } + + /* + * If the old ACL contained any references to the new owner, then we may + * now have generated an ACL containing duplicate entries. Find them and + * merge them so that there are not duplicates. (This is relatively + * expensive since we use a stupid O(N^2) algorithm, but it's unlikely to + * be the normal case.) + * + * To simplify deletion of duplicate entries, we temporarily leave them in + * the array but set their privilege masks to zero; when we reach such an + * entry it's just skipped. (Thus, a side effect of this code will be to + * remove privilege-free entries, should there be any in the input.) dst + * is the next output slot, targ is the currently considered input slot + * (always >= dst), and src scans entries to the right of targ looking for + * duplicates. Once an entry has been emitted to dst it is known + * duplicate-free and need not be considered anymore. + */ + if (newpresent) + { + dst = 0; + for (targ = 0, targ_aip = new_aip; targ < num; targ++, targ_aip++) + { + /* ignore if deleted in an earlier pass */ + if (ACLITEM_GET_RIGHTS(*targ_aip) == ACL_NO_RIGHTS) + continue; + /* find and merge any duplicates */ + for (src = targ + 1, src_aip = targ_aip + 1; src < num; + src++, src_aip++) + { + if (ACLITEM_GET_RIGHTS(*src_aip) == ACL_NO_RIGHTS) + continue; + if (aclitem_match(targ_aip, src_aip)) + { + ACLITEM_SET_RIGHTS(*targ_aip, + ACLITEM_GET_RIGHTS(*targ_aip) | + ACLITEM_GET_RIGHTS(*src_aip)); + /* mark the duplicate deleted */ + ACLITEM_SET_RIGHTS(*src_aip, ACL_NO_RIGHTS); + } + } + /* and emit to output */ + new_aip[dst] = *targ_aip; + dst++; + } + /* Adjust array size to be 'dst' items */ + ARR_DIMS(new_acl)[0] = dst; + SET_VARSIZE(new_acl, ACL_N_SIZE(dst)); + } + + return new_acl; +} + + +/* + * When granting grant options, we must disallow attempts to set up circular + * chains of grant options. Suppose A (the object owner) grants B some + * privileges with grant option, and B re-grants them to C. If C could + * grant the privileges to B as well, then A would be unable to effectively + * revoke the privileges from B, since recursive_revoke would consider that + * B still has 'em from C. + * + * We check for this by recursively deleting all grant options belonging to + * the target grantee, and then seeing if the would-be grantor still has the + * grant option or not. + */ +static void +check_circularity(const Acl *old_acl, const AclItem *mod_aip, + Oid ownerId) +{ + Acl *acl; + AclItem *aip; + int i, + num; + AclMode own_privs; + + check_acl(old_acl); + + /* + * For now, grant options can only be granted to roles, not PUBLIC. + * Otherwise we'd have to work a bit harder here. + */ + Assert(mod_aip->ai_grantee != ACL_ID_PUBLIC); + + /* The owner always has grant options, no need to check */ + if (mod_aip->ai_grantor == ownerId) + return; + + /* Make a working copy */ + acl = allocacl(ACL_NUM(old_acl)); + memcpy(acl, old_acl, ACL_SIZE(old_acl)); + + /* Zap all grant options of target grantee, plus what depends on 'em */ +cc_restart: + num = ACL_NUM(acl); + aip = ACL_DAT(acl); + for (i = 0; i < num; i++) + { + if (aip[i].ai_grantee == mod_aip->ai_grantee && + ACLITEM_GET_GOPTIONS(aip[i]) != ACL_NO_RIGHTS) + { + Acl *new_acl; + + /* We'll actually zap ordinary privs too, but no matter */ + new_acl = aclupdate(acl, &aip[i], ACL_MODECHG_DEL, + ownerId, DROP_CASCADE); + + pfree(acl); + acl = new_acl; + + goto cc_restart; + } + } + + /* Now we can compute grantor's independently-derived privileges */ + own_privs = aclmask(acl, + mod_aip->ai_grantor, + ownerId, + ACL_GRANT_OPTION_FOR(ACLITEM_GET_GOPTIONS(*mod_aip)), + ACLMASK_ALL); + own_privs = ACL_OPTION_TO_PRIVS(own_privs); + + if ((ACLITEM_GET_GOPTIONS(*mod_aip) & ~own_privs) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_GRANT_OPERATION), + errmsg("grant options cannot be granted back to your own grantor"))); + + pfree(acl); +} + + +/* + * Ensure that no privilege is "abandoned". A privilege is abandoned + * if the user that granted the privilege loses the grant option. (So + * the chain through which it was granted is broken.) Either the + * abandoned privileges are revoked as well, or an error message is + * printed, depending on the drop behavior option. + * + * acl: the input ACL list + * grantee: the user from whom some grant options have been revoked + * revoke_privs: the grant options being revoked + * ownerId: Oid of object owner + * behavior: RESTRICT or CASCADE behavior for recursive removal + * + * The input Acl object is pfree'd if replaced. + */ +static Acl * +recursive_revoke(Acl *acl, + Oid grantee, + AclMode revoke_privs, + Oid ownerId, + DropBehavior behavior) +{ + AclMode still_has; + AclItem *aip; + int i, + num; + + check_acl(acl); + + /* The owner can never truly lose grant options, so short-circuit */ + if (grantee == ownerId) + return acl; + + /* The grantee might still have some grant options via another grantor */ + still_has = aclmask(acl, grantee, ownerId, + ACL_GRANT_OPTION_FOR(revoke_privs), + ACLMASK_ALL); + revoke_privs &= ~ACL_OPTION_TO_PRIVS(still_has); + if (revoke_privs == ACL_NO_RIGHTS) + return acl; + +restart: + num = ACL_NUM(acl); + aip = ACL_DAT(acl); + for (i = 0; i < num; i++) + { + if (aip[i].ai_grantor == grantee + && (ACLITEM_GET_PRIVS(aip[i]) & revoke_privs) != 0) + { + AclItem mod_acl; + Acl *new_acl; + + if (behavior == DROP_RESTRICT) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("dependent privileges exist"), + errhint("Use CASCADE to revoke them too."))); + + mod_acl.ai_grantor = grantee; + mod_acl.ai_grantee = aip[i].ai_grantee; + ACLITEM_SET_PRIVS_GOPTIONS(mod_acl, + revoke_privs, + revoke_privs); + + new_acl = aclupdate(acl, &mod_acl, ACL_MODECHG_DEL, + ownerId, behavior); + + pfree(acl); + acl = new_acl; + + goto restart; + } + } + + return acl; +} + + +/* + * aclmask --- compute bitmask of all privileges held by roleid. + * + * When 'how' = ACLMASK_ALL, this simply returns the privilege bits + * held by the given roleid according to the given ACL list, ANDed + * with 'mask'. (The point of passing 'mask' is to let the routine + * exit early if all privileges of interest have been found.) + * + * When 'how' = ACLMASK_ANY, returns as soon as any bit in the mask + * is known true. (This lets us exit soonest in cases where the + * caller is only going to test for zero or nonzero result.) + * + * Usage patterns: + * + * To see if any of a set of privileges are held: + * if (aclmask(acl, roleid, ownerId, privs, ACLMASK_ANY) != 0) + * + * To see if all of a set of privileges are held: + * if (aclmask(acl, roleid, ownerId, privs, ACLMASK_ALL) == privs) + * + * To determine exactly which of a set of privileges are held: + * heldprivs = aclmask(acl, roleid, ownerId, privs, ACLMASK_ALL); + */ +AclMode +aclmask(const Acl *acl, Oid roleid, Oid ownerId, + AclMode mask, AclMaskHow how) +{ + AclMode result; + AclMode remaining; + AclItem *aidat; + int i, + num; + + /* + * Null ACL should not happen, since caller should have inserted + * appropriate default + */ + if (acl == NULL) + elog(ERROR, "null ACL"); + + check_acl(acl); + + /* Quick exit for mask == 0 */ + if (mask == 0) + return 0; + + result = 0; + + /* Owner always implicitly has all grant options */ + if ((mask & ACLITEM_ALL_GOPTION_BITS) && + has_privs_of_role(roleid, ownerId)) + { + result = mask & ACLITEM_ALL_GOPTION_BITS; + if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0)) + return result; + } + + num = ACL_NUM(acl); + aidat = ACL_DAT(acl); + + /* + * Check privileges granted directly to roleid or to public + */ + for (i = 0; i < num; i++) + { + AclItem *aidata = &aidat[i]; + + if (aidata->ai_grantee == ACL_ID_PUBLIC || + aidata->ai_grantee == roleid) + { + result |= aidata->ai_privs & mask; + if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0)) + return result; + } + } + + /* + * Check privileges granted indirectly via role memberships. We do this in + * a separate pass to minimize expensive indirect membership tests. In + * particular, it's worth testing whether a given ACL entry grants any + * privileges still of interest before we perform the has_privs_of_role + * test. + */ + remaining = mask & ~result; + for (i = 0; i < num; i++) + { + AclItem *aidata = &aidat[i]; + + if (aidata->ai_grantee == ACL_ID_PUBLIC || + aidata->ai_grantee == roleid) + continue; /* already checked it */ + + if ((aidata->ai_privs & remaining) && + has_privs_of_role(roleid, aidata->ai_grantee)) + { + result |= aidata->ai_privs & mask; + if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0)) + return result; + remaining = mask & ~result; + } + } + + return result; +} + + +/* + * aclmask_direct --- compute bitmask of all privileges held by roleid. + * + * This is exactly like aclmask() except that we consider only privileges + * held *directly* by roleid, not those inherited via role membership. + */ +static AclMode +aclmask_direct(const Acl *acl, Oid roleid, Oid ownerId, + AclMode mask, AclMaskHow how) +{ + AclMode result; + AclItem *aidat; + int i, + num; + + /* + * Null ACL should not happen, since caller should have inserted + * appropriate default + */ + if (acl == NULL) + elog(ERROR, "null ACL"); + + check_acl(acl); + + /* Quick exit for mask == 0 */ + if (mask == 0) + return 0; + + result = 0; + + /* Owner always implicitly has all grant options */ + if ((mask & ACLITEM_ALL_GOPTION_BITS) && + roleid == ownerId) + { + result = mask & ACLITEM_ALL_GOPTION_BITS; + if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0)) + return result; + } + + num = ACL_NUM(acl); + aidat = ACL_DAT(acl); + + /* + * Check privileges granted directly to roleid (and not to public) + */ + for (i = 0; i < num; i++) + { + AclItem *aidata = &aidat[i]; + + if (aidata->ai_grantee == roleid) + { + result |= aidata->ai_privs & mask; + if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0)) + return result; + } + } + + return result; +} + + +/* + * aclmembers + * Find out all the roleids mentioned in an Acl. + * Note that we do not distinguish grantors from grantees. + * + * *roleids is set to point to a palloc'd array containing distinct OIDs + * in sorted order. The length of the array is the function result. + */ +int +aclmembers(const Acl *acl, Oid **roleids) +{ + Oid *list; + const AclItem *acldat; + int i, + j; + + if (acl == NULL || ACL_NUM(acl) == 0) + { + *roleids = NULL; + return 0; + } + + check_acl(acl); + + /* Allocate the worst-case space requirement */ + list = palloc(ACL_NUM(acl) * 2 * sizeof(Oid)); + acldat = ACL_DAT(acl); + + /* + * Walk the ACL collecting mentioned RoleIds. + */ + j = 0; + for (i = 0; i < ACL_NUM(acl); i++) + { + const AclItem *ai = &acldat[i]; + + if (ai->ai_grantee != ACL_ID_PUBLIC) + list[j++] = ai->ai_grantee; + /* grantor is currently never PUBLIC, but let's check anyway */ + if (ai->ai_grantor != ACL_ID_PUBLIC) + list[j++] = ai->ai_grantor; + } + + /* Sort the array */ + qsort(list, j, sizeof(Oid), oid_cmp); + + /* + * We could repalloc the array down to minimum size, but it's hardly worth + * it since it's only transient memory. + */ + *roleids = list; + + /* Remove duplicates from the array */ + return qunique(list, j, sizeof(Oid), oid_cmp); +} + + +/* + * aclinsert (exported function) + */ +Datum +aclinsert(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("aclinsert is no longer supported"))); + + PG_RETURN_NULL(); /* keep compiler quiet */ +} + +Datum +aclremove(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("aclremove is no longer supported"))); + + PG_RETURN_NULL(); /* keep compiler quiet */ +} + +Datum +aclcontains(PG_FUNCTION_ARGS) +{ + Acl *acl = PG_GETARG_ACL_P(0); + AclItem *aip = PG_GETARG_ACLITEM_P(1); + AclItem *aidat; + int i, + num; + + check_acl(acl); + num = ACL_NUM(acl); + aidat = ACL_DAT(acl); + for (i = 0; i < num; ++i) + { + if (aip->ai_grantee == aidat[i].ai_grantee && + aip->ai_grantor == aidat[i].ai_grantor && + (ACLITEM_GET_RIGHTS(*aip) & ACLITEM_GET_RIGHTS(aidat[i])) == ACLITEM_GET_RIGHTS(*aip)) + PG_RETURN_BOOL(true); + } + PG_RETURN_BOOL(false); +} + +Datum +makeaclitem(PG_FUNCTION_ARGS) +{ + Oid grantee = PG_GETARG_OID(0); + Oid grantor = PG_GETARG_OID(1); + text *privtext = PG_GETARG_TEXT_PP(2); + bool goption = PG_GETARG_BOOL(3); + AclItem *result; + AclMode priv; + static const priv_map any_priv_map[] = { + {"SELECT", ACL_SELECT}, + {"INSERT", ACL_INSERT}, + {"UPDATE", ACL_UPDATE}, + {"DELETE", ACL_DELETE}, + {"TRUNCATE", ACL_TRUNCATE}, + {"REFERENCES", ACL_REFERENCES}, + {"TRIGGER", ACL_TRIGGER}, + {"EXECUTE", ACL_EXECUTE}, + {"USAGE", ACL_USAGE}, + {"CREATE", ACL_CREATE}, + {"TEMP", ACL_CREATE_TEMP}, + {"TEMPORARY", ACL_CREATE_TEMP}, + {"CONNECT", ACL_CONNECT}, + {"SET", ACL_SET}, + {"ALTER SYSTEM", ACL_ALTER_SYSTEM}, + {"RULE", 0}, /* ignore old RULE privileges */ + {NULL, 0} + }; + + priv = convert_any_priv_string(privtext, any_priv_map); + + result = (AclItem *) palloc(sizeof(AclItem)); + + result->ai_grantee = grantee; + result->ai_grantor = grantor; + + ACLITEM_SET_PRIVS_GOPTIONS(*result, priv, + (goption ? priv : ACL_NO_RIGHTS)); + + PG_RETURN_ACLITEM_P(result); +} + + +/* + * convert_any_priv_string: recognize privilege strings for has_foo_privilege + * + * We accept a comma-separated list of case-insensitive privilege names, + * producing a bitmask of the OR'd privilege bits. We are liberal about + * whitespace between items, not so much about whitespace within items. + * The allowed privilege names are given as an array of priv_map structs, + * terminated by one with a NULL name pointer. + */ +static AclMode +convert_any_priv_string(text *priv_type_text, + const priv_map *privileges) +{ + AclMode result = 0; + char *priv_type = text_to_cstring(priv_type_text); + char *chunk; + char *next_chunk; + + /* We rely on priv_type being a private, modifiable string */ + for (chunk = priv_type; chunk; chunk = next_chunk) + { + int chunk_len; + const priv_map *this_priv; + + /* Split string at commas */ + next_chunk = strchr(chunk, ','); + if (next_chunk) + *next_chunk++ = '\0'; + + /* Drop leading/trailing whitespace in this chunk */ + while (*chunk && isspace((unsigned char) *chunk)) + chunk++; + chunk_len = strlen(chunk); + while (chunk_len > 0 && isspace((unsigned char) chunk[chunk_len - 1])) + chunk_len--; + chunk[chunk_len] = '\0'; + + /* Match to the privileges list */ + for (this_priv = privileges; this_priv->name; this_priv++) + { + if (pg_strcasecmp(this_priv->name, chunk) == 0) + { + result |= this_priv->value; + break; + } + } + if (!this_priv->name) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized privilege type: \"%s\"", chunk))); + } + + pfree(priv_type); + return result; +} + + +static const char * +convert_aclright_to_string(int aclright) +{ + switch (aclright) + { + case ACL_INSERT: + return "INSERT"; + case ACL_SELECT: + return "SELECT"; + case ACL_UPDATE: + return "UPDATE"; + case ACL_DELETE: + return "DELETE"; + case ACL_TRUNCATE: + return "TRUNCATE"; + case ACL_REFERENCES: + return "REFERENCES"; + case ACL_TRIGGER: + return "TRIGGER"; + case ACL_EXECUTE: + return "EXECUTE"; + case ACL_USAGE: + return "USAGE"; + case ACL_CREATE: + return "CREATE"; + case ACL_CREATE_TEMP: + return "TEMPORARY"; + case ACL_CONNECT: + return "CONNECT"; + case ACL_SET: + return "SET"; + case ACL_ALTER_SYSTEM: + return "ALTER SYSTEM"; + default: + elog(ERROR, "unrecognized aclright: %d", aclright); + return NULL; + } +} + + +/*---------- + * Convert an aclitem[] to a table. + * + * Example: + * + * aclexplode('{=r/joe,foo=a*w/joe}'::aclitem[]) + * + * returns the table + * + * {{ OID(joe), 0::OID, 'SELECT', false }, + * { OID(joe), OID(foo), 'INSERT', true }, + * { OID(joe), OID(foo), 'UPDATE', false }} + *---------- + */ +Datum +aclexplode(PG_FUNCTION_ARGS) +{ + Acl *acl = PG_GETARG_ACL_P(0); + FuncCallContext *funcctx; + int *idx; + AclItem *aidat; + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext oldcontext; + + check_acl(acl); + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* + * build tupdesc for result tuples (matches out parameters in pg_proc + * entry) + */ + tupdesc = CreateTemplateTupleDesc(4); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "grantor", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "grantee", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "privilege_type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_grantable", + BOOLOID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* allocate memory for user context */ + idx = (int *) palloc(sizeof(int[2])); + idx[0] = 0; /* ACL array item index */ + idx[1] = -1; /* privilege type counter */ + funcctx->user_fctx = (void *) idx; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + idx = (int *) funcctx->user_fctx; + aidat = ACL_DAT(acl); + + /* need test here in case acl has no items */ + while (idx[0] < ACL_NUM(acl)) + { + AclItem *aidata; + AclMode priv_bit; + + idx[1]++; + if (idx[1] == N_ACL_RIGHTS) + { + idx[1] = 0; + idx[0]++; + if (idx[0] >= ACL_NUM(acl)) /* done */ + break; + } + aidata = &aidat[idx[0]]; + priv_bit = UINT64CONST(1) << idx[1]; + + if (ACLITEM_GET_PRIVS(*aidata) & priv_bit) + { + Datum result; + Datum values[4]; + bool nulls[4] = {0}; + HeapTuple tuple; + + values[0] = ObjectIdGetDatum(aidata->ai_grantor); + values[1] = ObjectIdGetDatum(aidata->ai_grantee); + values[2] = CStringGetTextDatum(convert_aclright_to_string(priv_bit)); + values[3] = BoolGetDatum((ACLITEM_GET_GOPTIONS(*aidata) & priv_bit) != 0); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + + SRF_RETURN_NEXT(funcctx, result); + } + } + + SRF_RETURN_DONE(funcctx); +} + + +/* + * has_table_privilege variants + * These are all named "has_table_privilege" at the SQL level. + * They take various combinations of relation name, relation OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not. The variants that take a relation OID + * return NULL if the OID doesn't exist (rather than failing, as + * they did before Postgres 8.4). + */ + +/* + * has_table_privilege_name_name + * Check user privileges on a table given + * name username, text tablename, and text priv name. + */ +Datum +has_table_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name rolename = PG_GETARG_NAME(0); + text *tablename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid tableoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*rolename)); + tableoid = convert_table_name(tablename); + mode = convert_table_priv_string(priv_type_text); + + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_table_privilege_name + * Check user privileges on a table given + * text tablename and text priv name. + * current_user is assumed + */ +Datum +has_table_privilege_name(PG_FUNCTION_ARGS) +{ + text *tablename = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid tableoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + tableoid = convert_table_name(tablename); + mode = convert_table_priv_string(priv_type_text); + + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_table_privilege_name_id + * Check user privileges on a table given + * name usename, table oid, and text priv name. + */ +Datum +has_table_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid tableoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_table_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid))) + PG_RETURN_NULL(); + + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_table_privilege_id + * Check user privileges on a table given + * table oid, and text priv name. + * current_user is assumed + */ +Datum +has_table_privilege_id(PG_FUNCTION_ARGS) +{ + Oid tableoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_table_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid))) + PG_RETURN_NULL(); + + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_table_privilege_id_name + * Check user privileges on a table given + * roleid, text tablename, and text priv name. + */ +Datum +has_table_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *tablename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid tableoid; + AclMode mode; + AclResult aclresult; + + tableoid = convert_table_name(tablename); + mode = convert_table_priv_string(priv_type_text); + + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_table_privilege_id_id + * Check user privileges on a table given + * roleid, table oid, and text priv name. + */ +Datum +has_table_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid tableoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_table_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid))) + PG_RETURN_NULL(); + + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_table_privilege family. + */ + +/* + * Given a table name expressed as a string, look it up and return Oid + */ +static Oid +convert_table_name(text *tablename) +{ + RangeVar *relrv; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(tablename)); + + /* We might not even have permissions on this relation; don't lock it. */ + return RangeVarGetRelid(relrv, NoLock, false); +} + +/* + * convert_table_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_table_priv_string(text *priv_type_text) +{ + static const priv_map table_priv_map[] = { + {"SELECT", ACL_SELECT}, + {"SELECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SELECT)}, + {"INSERT", ACL_INSERT}, + {"INSERT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_INSERT)}, + {"UPDATE", ACL_UPDATE}, + {"UPDATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_UPDATE)}, + {"DELETE", ACL_DELETE}, + {"DELETE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_DELETE)}, + {"TRUNCATE", ACL_TRUNCATE}, + {"TRUNCATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_TRUNCATE)}, + {"REFERENCES", ACL_REFERENCES}, + {"REFERENCES WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_REFERENCES)}, + {"TRIGGER", ACL_TRIGGER}, + {"TRIGGER WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_TRIGGER)}, + {"RULE", 0}, /* ignore old RULE privileges */ + {"RULE WITH GRANT OPTION", 0}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, table_priv_map); +} + +/* + * has_sequence_privilege variants + * These are all named "has_sequence_privilege" at the SQL level. + * They take various combinations of relation name, relation OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not. The variants that take a relation OID + * return NULL if the OID doesn't exist. + */ + +/* + * has_sequence_privilege_name_name + * Check user privileges on a sequence given + * name username, text sequencename, and text priv name. + */ +Datum +has_sequence_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name rolename = PG_GETARG_NAME(0); + text *sequencename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid sequenceoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*rolename)); + mode = convert_sequence_priv_string(priv_type_text); + sequenceoid = convert_table_name(sequencename); + if (get_rel_relkind(sequenceoid) != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", + text_to_cstring(sequencename)))); + + aclresult = pg_class_aclcheck(sequenceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_sequence_privilege_name + * Check user privileges on a sequence given + * text sequencename and text priv name. + * current_user is assumed + */ +Datum +has_sequence_privilege_name(PG_FUNCTION_ARGS) +{ + text *sequencename = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid sequenceoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_sequence_priv_string(priv_type_text); + sequenceoid = convert_table_name(sequencename); + if (get_rel_relkind(sequenceoid) != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", + text_to_cstring(sequencename)))); + + aclresult = pg_class_aclcheck(sequenceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_sequence_privilege_name_id + * Check user privileges on a sequence given + * name usename, sequence oid, and text priv name. + */ +Datum +has_sequence_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid sequenceoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + char relkind; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_sequence_priv_string(priv_type_text); + relkind = get_rel_relkind(sequenceoid); + if (relkind == '\0') + PG_RETURN_NULL(); + else if (relkind != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", + get_rel_name(sequenceoid)))); + + aclresult = pg_class_aclcheck(sequenceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_sequence_privilege_id + * Check user privileges on a sequence given + * sequence oid, and text priv name. + * current_user is assumed + */ +Datum +has_sequence_privilege_id(PG_FUNCTION_ARGS) +{ + Oid sequenceoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + char relkind; + + roleid = GetUserId(); + mode = convert_sequence_priv_string(priv_type_text); + relkind = get_rel_relkind(sequenceoid); + if (relkind == '\0') + PG_RETURN_NULL(); + else if (relkind != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", + get_rel_name(sequenceoid)))); + + aclresult = pg_class_aclcheck(sequenceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_sequence_privilege_id_name + * Check user privileges on a sequence given + * roleid, text sequencename, and text priv name. + */ +Datum +has_sequence_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *sequencename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid sequenceoid; + AclMode mode; + AclResult aclresult; + + mode = convert_sequence_priv_string(priv_type_text); + sequenceoid = convert_table_name(sequencename); + if (get_rel_relkind(sequenceoid) != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", + text_to_cstring(sequencename)))); + + aclresult = pg_class_aclcheck(sequenceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_sequence_privilege_id_id + * Check user privileges on a sequence given + * roleid, sequence oid, and text priv name. + */ +Datum +has_sequence_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid sequenceoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + char relkind; + + mode = convert_sequence_priv_string(priv_type_text); + relkind = get_rel_relkind(sequenceoid); + if (relkind == '\0') + PG_RETURN_NULL(); + else if (relkind != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", + get_rel_name(sequenceoid)))); + + aclresult = pg_class_aclcheck(sequenceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * convert_sequence_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_sequence_priv_string(text *priv_type_text) +{ + static const priv_map sequence_priv_map[] = { + {"USAGE", ACL_USAGE}, + {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)}, + {"SELECT", ACL_SELECT}, + {"SELECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SELECT)}, + {"UPDATE", ACL_UPDATE}, + {"UPDATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_UPDATE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, sequence_priv_map); +} + + +/* + * has_any_column_privilege variants + * These are all named "has_any_column_privilege" at the SQL level. + * They take various combinations of relation name, relation OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege for any column of the table, false if not. The variants + * that take a relation OID return NULL if the OID doesn't exist. + */ + +/* + * has_any_column_privilege_name_name + * Check user privileges on any column of a table given + * name username, text tablename, and text priv name. + */ +Datum +has_any_column_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name rolename = PG_GETARG_NAME(0); + text *tablename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid tableoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*rolename)); + tableoid = convert_table_name(tablename); + mode = convert_column_priv_string(priv_type_text); + + /* First check at table level, then examine each column if needed */ + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + if (aclresult != ACLCHECK_OK) + aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode, + ACLMASK_ANY); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_any_column_privilege_name + * Check user privileges on any column of a table given + * text tablename and text priv name. + * current_user is assumed + */ +Datum +has_any_column_privilege_name(PG_FUNCTION_ARGS) +{ + text *tablename = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid tableoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + tableoid = convert_table_name(tablename); + mode = convert_column_priv_string(priv_type_text); + + /* First check at table level, then examine each column if needed */ + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + if (aclresult != ACLCHECK_OK) + aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode, + ACLMASK_ANY); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_any_column_privilege_name_id + * Check user privileges on any column of a table given + * name usename, table oid, and text priv name. + */ +Datum +has_any_column_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid tableoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_column_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid))) + PG_RETURN_NULL(); + + /* First check at table level, then examine each column if needed */ + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + if (aclresult != ACLCHECK_OK) + aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode, + ACLMASK_ANY); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_any_column_privilege_id + * Check user privileges on any column of a table given + * table oid, and text priv name. + * current_user is assumed + */ +Datum +has_any_column_privilege_id(PG_FUNCTION_ARGS) +{ + Oid tableoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_column_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid))) + PG_RETURN_NULL(); + + /* First check at table level, then examine each column if needed */ + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + if (aclresult != ACLCHECK_OK) + aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode, + ACLMASK_ANY); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_any_column_privilege_id_name + * Check user privileges on any column of a table given + * roleid, text tablename, and text priv name. + */ +Datum +has_any_column_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *tablename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid tableoid; + AclMode mode; + AclResult aclresult; + + tableoid = convert_table_name(tablename); + mode = convert_column_priv_string(priv_type_text); + + /* First check at table level, then examine each column if needed */ + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + if (aclresult != ACLCHECK_OK) + aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode, + ACLMASK_ANY); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_any_column_privilege_id_id + * Check user privileges on any column of a table given + * roleid, table oid, and text priv name. + */ +Datum +has_any_column_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid tableoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_column_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid))) + PG_RETURN_NULL(); + + /* First check at table level, then examine each column if needed */ + aclresult = pg_class_aclcheck(tableoid, roleid, mode); + if (aclresult != ACLCHECK_OK) + aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode, + ACLMASK_ANY); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + + +/* + * has_column_privilege variants + * These are all named "has_column_privilege" at the SQL level. + * They take various combinations of relation name, relation OID, + * column name, column attnum, user name, user OID, or + * implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not. The variants that take a relation OID + * return NULL (rather than throwing an error) if that relation OID + * doesn't exist. Likewise, the variants that take an integer attnum + * return NULL (rather than throwing an error) if there is no such + * pg_attribute entry. All variants return NULL if an attisdropped + * column is selected. These rules are meant to avoid unnecessary + * failures in queries that scan pg_attribute. + */ + +/* + * column_privilege_check: check column privileges, but don't throw an error + * for dropped column or table + * + * Returns 1 if have the privilege, 0 if not, -1 if dropped column/table. + */ +static int +column_privilege_check(Oid tableoid, AttrNumber attnum, + Oid roleid, AclMode mode) +{ + AclResult aclresult; + bool is_missing = false; + + /* + * If convert_column_name failed, we can just return -1 immediately. + */ + if (attnum == InvalidAttrNumber) + return -1; + + /* + * Check for column-level privileges first. This serves in part as a check + * on whether the column even exists, so we need to do it before checking + * table-level privilege. + */ + aclresult = pg_attribute_aclcheck_ext(tableoid, attnum, roleid, + mode, &is_missing); + if (aclresult == ACLCHECK_OK) + return 1; + else if (is_missing) + return -1; + + /* Next check if we have the privilege at the table level */ + aclresult = pg_class_aclcheck_ext(tableoid, roleid, mode, &is_missing); + if (aclresult == ACLCHECK_OK) + return 1; + else if (is_missing) + return -1; + else + return 0; +} + +/* + * has_column_privilege_name_name_name + * Check user privileges on a column given + * name username, text tablename, text colname, and text priv name. + */ +Datum +has_column_privilege_name_name_name(PG_FUNCTION_ARGS) +{ + Name rolename = PG_GETARG_NAME(0); + text *tablename = PG_GETARG_TEXT_PP(1); + text *column = PG_GETARG_TEXT_PP(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + Oid roleid; + Oid tableoid; + AttrNumber colattnum; + AclMode mode; + int privresult; + + roleid = get_role_oid_or_public(NameStr(*rolename)); + tableoid = convert_table_name(tablename); + colattnum = convert_column_name(tableoid, column); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_name_name_attnum + * Check user privileges on a column given + * name username, text tablename, int attnum, and text priv name. + */ +Datum +has_column_privilege_name_name_attnum(PG_FUNCTION_ARGS) +{ + Name rolename = PG_GETARG_NAME(0); + text *tablename = PG_GETARG_TEXT_PP(1); + AttrNumber colattnum = PG_GETARG_INT16(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + Oid roleid; + Oid tableoid; + AclMode mode; + int privresult; + + roleid = get_role_oid_or_public(NameStr(*rolename)); + tableoid = convert_table_name(tablename); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_name_id_name + * Check user privileges on a column given + * name username, table oid, text colname, and text priv name. + */ +Datum +has_column_privilege_name_id_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid tableoid = PG_GETARG_OID(1); + text *column = PG_GETARG_TEXT_PP(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + Oid roleid; + AttrNumber colattnum; + AclMode mode; + int privresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + colattnum = convert_column_name(tableoid, column); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_name_id_attnum + * Check user privileges on a column given + * name username, table oid, int attnum, and text priv name. + */ +Datum +has_column_privilege_name_id_attnum(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid tableoid = PG_GETARG_OID(1); + AttrNumber colattnum = PG_GETARG_INT16(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + Oid roleid; + AclMode mode; + int privresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_id_name_name + * Check user privileges on a column given + * oid roleid, text tablename, text colname, and text priv name. + */ +Datum +has_column_privilege_id_name_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *tablename = PG_GETARG_TEXT_PP(1); + text *column = PG_GETARG_TEXT_PP(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + Oid tableoid; + AttrNumber colattnum; + AclMode mode; + int privresult; + + tableoid = convert_table_name(tablename); + colattnum = convert_column_name(tableoid, column); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_id_name_attnum + * Check user privileges on a column given + * oid roleid, text tablename, int attnum, and text priv name. + */ +Datum +has_column_privilege_id_name_attnum(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *tablename = PG_GETARG_TEXT_PP(1); + AttrNumber colattnum = PG_GETARG_INT16(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + Oid tableoid; + AclMode mode; + int privresult; + + tableoid = convert_table_name(tablename); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_id_id_name + * Check user privileges on a column given + * oid roleid, table oid, text colname, and text priv name. + */ +Datum +has_column_privilege_id_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid tableoid = PG_GETARG_OID(1); + text *column = PG_GETARG_TEXT_PP(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + AttrNumber colattnum; + AclMode mode; + int privresult; + + colattnum = convert_column_name(tableoid, column); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_id_id_attnum + * Check user privileges on a column given + * oid roleid, table oid, int attnum, and text priv name. + */ +Datum +has_column_privilege_id_id_attnum(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid tableoid = PG_GETARG_OID(1); + AttrNumber colattnum = PG_GETARG_INT16(2); + text *priv_type_text = PG_GETARG_TEXT_PP(3); + AclMode mode; + int privresult; + + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_name_name + * Check user privileges on a column given + * text tablename, text colname, and text priv name. + * current_user is assumed + */ +Datum +has_column_privilege_name_name(PG_FUNCTION_ARGS) +{ + text *tablename = PG_GETARG_TEXT_PP(0); + text *column = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid tableoid; + AttrNumber colattnum; + AclMode mode; + int privresult; + + roleid = GetUserId(); + tableoid = convert_table_name(tablename); + colattnum = convert_column_name(tableoid, column); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_name_attnum + * Check user privileges on a column given + * text tablename, int attnum, and text priv name. + * current_user is assumed + */ +Datum +has_column_privilege_name_attnum(PG_FUNCTION_ARGS) +{ + text *tablename = PG_GETARG_TEXT_PP(0); + AttrNumber colattnum = PG_GETARG_INT16(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid tableoid; + AclMode mode; + int privresult; + + roleid = GetUserId(); + tableoid = convert_table_name(tablename); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_id_name + * Check user privileges on a column given + * table oid, text colname, and text priv name. + * current_user is assumed + */ +Datum +has_column_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid tableoid = PG_GETARG_OID(0); + text *column = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AttrNumber colattnum; + AclMode mode; + int privresult; + + roleid = GetUserId(); + colattnum = convert_column_name(tableoid, column); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * has_column_privilege_id_attnum + * Check user privileges on a column given + * table oid, int attnum, and text priv name. + * current_user is assumed + */ +Datum +has_column_privilege_id_attnum(PG_FUNCTION_ARGS) +{ + Oid tableoid = PG_GETARG_OID(0); + AttrNumber colattnum = PG_GETARG_INT16(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + int privresult; + + roleid = GetUserId(); + mode = convert_column_priv_string(priv_type_text); + + privresult = column_privilege_check(tableoid, colattnum, roleid, mode); + if (privresult < 0) + PG_RETURN_NULL(); + PG_RETURN_BOOL(privresult); +} + +/* + * Support routines for has_column_privilege family. + */ + +/* + * Given a table OID and a column name expressed as a string, look it up + * and return the column number. Returns InvalidAttrNumber in cases + * where caller should return NULL instead of failing. + */ +static AttrNumber +convert_column_name(Oid tableoid, text *column) +{ + char *colname; + HeapTuple attTuple; + AttrNumber attnum; + + colname = text_to_cstring(column); + + /* + * We don't use get_attnum() here because it will report that dropped + * columns don't exist. We need to treat dropped columns differently from + * nonexistent columns. + */ + attTuple = SearchSysCache2(ATTNAME, + ObjectIdGetDatum(tableoid), + CStringGetDatum(colname)); + if (HeapTupleIsValid(attTuple)) + { + Form_pg_attribute attributeForm; + + attributeForm = (Form_pg_attribute) GETSTRUCT(attTuple); + /* We want to return NULL for dropped columns */ + if (attributeForm->attisdropped) + attnum = InvalidAttrNumber; + else + attnum = attributeForm->attnum; + ReleaseSysCache(attTuple); + } + else + { + char *tablename = get_rel_name(tableoid); + + /* + * If the table OID is bogus, or it's just been dropped, we'll get + * NULL back. In such cases we want has_column_privilege to return + * NULL too, so just return InvalidAttrNumber. + */ + if (tablename != NULL) + { + /* tableoid exists, colname does not, so throw error */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colname, tablename))); + } + /* tableoid doesn't exist, so act like attisdropped case */ + attnum = InvalidAttrNumber; + } + + pfree(colname); + return attnum; +} + +/* + * convert_column_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_column_priv_string(text *priv_type_text) +{ + static const priv_map column_priv_map[] = { + {"SELECT", ACL_SELECT}, + {"SELECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SELECT)}, + {"INSERT", ACL_INSERT}, + {"INSERT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_INSERT)}, + {"UPDATE", ACL_UPDATE}, + {"UPDATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_UPDATE)}, + {"REFERENCES", ACL_REFERENCES}, + {"REFERENCES WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_REFERENCES)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, column_priv_map); +} + + +/* + * has_database_privilege variants + * These are all named "has_database_privilege" at the SQL level. + * They take various combinations of database name, database OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not, or NULL if object doesn't exist. + */ + +/* + * has_database_privilege_name_name + * Check user privileges on a database given + * name username, text databasename, and text priv name. + */ +Datum +has_database_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *databasename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid databaseoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + databaseoid = convert_database_name(databasename); + mode = convert_database_priv_string(priv_type_text); + + aclresult = object_aclcheck(DatabaseRelationId, databaseoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_database_privilege_name + * Check user privileges on a database given + * text databasename and text priv name. + * current_user is assumed + */ +Datum +has_database_privilege_name(PG_FUNCTION_ARGS) +{ + text *databasename = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid databaseoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + databaseoid = convert_database_name(databasename); + mode = convert_database_priv_string(priv_type_text); + + aclresult = object_aclcheck(DatabaseRelationId, databaseoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_database_privilege_name_id + * Check user privileges on a database given + * name usename, database oid, and text priv name. + */ +Datum +has_database_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid databaseoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_database_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(databaseoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(DatabaseRelationId, databaseoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_database_privilege_id + * Check user privileges on a database given + * database oid, and text priv name. + * current_user is assumed + */ +Datum +has_database_privilege_id(PG_FUNCTION_ARGS) +{ + Oid databaseoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_database_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(databaseoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(DatabaseRelationId, databaseoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_database_privilege_id_name + * Check user privileges on a database given + * roleid, text databasename, and text priv name. + */ +Datum +has_database_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *databasename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid databaseoid; + AclMode mode; + AclResult aclresult; + + databaseoid = convert_database_name(databasename); + mode = convert_database_priv_string(priv_type_text); + + aclresult = object_aclcheck(DatabaseRelationId, databaseoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_database_privilege_id_id + * Check user privileges on a database given + * roleid, database oid, and text priv name. + */ +Datum +has_database_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid databaseoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_database_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(databaseoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(DatabaseRelationId, databaseoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_database_privilege family. + */ + +/* + * Given a database name expressed as a string, look it up and return Oid + */ +static Oid +convert_database_name(text *databasename) +{ + char *dbname = text_to_cstring(databasename); + + return get_database_oid(dbname, false); +} + +/* + * convert_database_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_database_priv_string(text *priv_type_text) +{ + static const priv_map database_priv_map[] = { + {"CREATE", ACL_CREATE}, + {"CREATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {"TEMPORARY", ACL_CREATE_TEMP}, + {"TEMPORARY WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE_TEMP)}, + {"TEMP", ACL_CREATE_TEMP}, + {"TEMP WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE_TEMP)}, + {"CONNECT", ACL_CONNECT}, + {"CONNECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CONNECT)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, database_priv_map); +} + + +/* + * has_foreign_data_wrapper_privilege variants + * These are all named "has_foreign_data_wrapper_privilege" at the SQL level. + * They take various combinations of foreign-data wrapper name, + * fdw OID, user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not. + */ + +/* + * has_foreign_data_wrapper_privilege_name_name + * Check user privileges on a foreign-data wrapper given + * name username, text fdwname, and text priv name. + */ +Datum +has_foreign_data_wrapper_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *fdwname = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid fdwid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + fdwid = convert_foreign_data_wrapper_name(fdwname); + mode = convert_foreign_data_wrapper_priv_string(priv_type_text); + + aclresult = object_aclcheck(ForeignDataWrapperRelationId, fdwid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_foreign_data_wrapper_privilege_name + * Check user privileges on a foreign-data wrapper given + * text fdwname and text priv name. + * current_user is assumed + */ +Datum +has_foreign_data_wrapper_privilege_name(PG_FUNCTION_ARGS) +{ + text *fdwname = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid fdwid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + fdwid = convert_foreign_data_wrapper_name(fdwname); + mode = convert_foreign_data_wrapper_priv_string(priv_type_text); + + aclresult = object_aclcheck(ForeignDataWrapperRelationId, fdwid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_foreign_data_wrapper_privilege_name_id + * Check user privileges on a foreign-data wrapper given + * name usename, foreign-data wrapper oid, and text priv name. + */ +Datum +has_foreign_data_wrapper_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid fdwid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_foreign_data_wrapper_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fdwid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ForeignDataWrapperRelationId, fdwid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_foreign_data_wrapper_privilege_id + * Check user privileges on a foreign-data wrapper given + * foreign-data wrapper oid, and text priv name. + * current_user is assumed + */ +Datum +has_foreign_data_wrapper_privilege_id(PG_FUNCTION_ARGS) +{ + Oid fdwid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_foreign_data_wrapper_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fdwid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ForeignDataWrapperRelationId, fdwid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_foreign_data_wrapper_privilege_id_name + * Check user privileges on a foreign-data wrapper given + * roleid, text fdwname, and text priv name. + */ +Datum +has_foreign_data_wrapper_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *fdwname = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid fdwid; + AclMode mode; + AclResult aclresult; + + fdwid = convert_foreign_data_wrapper_name(fdwname); + mode = convert_foreign_data_wrapper_priv_string(priv_type_text); + + aclresult = object_aclcheck(ForeignDataWrapperRelationId, fdwid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_foreign_data_wrapper_privilege_id_id + * Check user privileges on a foreign-data wrapper given + * roleid, fdw oid, and text priv name. + */ +Datum +has_foreign_data_wrapper_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid fdwid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_foreign_data_wrapper_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fdwid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ForeignDataWrapperRelationId, fdwid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_foreign_data_wrapper_privilege family. + */ + +/* + * Given a FDW name expressed as a string, look it up and return Oid + */ +static Oid +convert_foreign_data_wrapper_name(text *fdwname) +{ + char *fdwstr = text_to_cstring(fdwname); + + return get_foreign_data_wrapper_oid(fdwstr, false); +} + +/* + * convert_foreign_data_wrapper_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_foreign_data_wrapper_priv_string(text *priv_type_text) +{ + static const priv_map foreign_data_wrapper_priv_map[] = { + {"USAGE", ACL_USAGE}, + {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, foreign_data_wrapper_priv_map); +} + + +/* + * has_function_privilege variants + * These are all named "has_function_privilege" at the SQL level. + * They take various combinations of function name, function OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not, or NULL if object doesn't exist. + */ + +/* + * has_function_privilege_name_name + * Check user privileges on a function given + * name username, text functionname, and text priv name. + */ +Datum +has_function_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *functionname = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid functionoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + functionoid = convert_function_name(functionname); + mode = convert_function_priv_string(priv_type_text); + + aclresult = object_aclcheck(ProcedureRelationId, functionoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_function_privilege_name + * Check user privileges on a function given + * text functionname and text priv name. + * current_user is assumed + */ +Datum +has_function_privilege_name(PG_FUNCTION_ARGS) +{ + text *functionname = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid functionoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + functionoid = convert_function_name(functionname); + mode = convert_function_priv_string(priv_type_text); + + aclresult = object_aclcheck(ProcedureRelationId, functionoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_function_privilege_name_id + * Check user privileges on a function given + * name usename, function oid, and text priv name. + */ +Datum +has_function_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid functionoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_function_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(functionoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ProcedureRelationId, functionoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_function_privilege_id + * Check user privileges on a function given + * function oid, and text priv name. + * current_user is assumed + */ +Datum +has_function_privilege_id(PG_FUNCTION_ARGS) +{ + Oid functionoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_function_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(functionoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ProcedureRelationId, functionoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_function_privilege_id_name + * Check user privileges on a function given + * roleid, text functionname, and text priv name. + */ +Datum +has_function_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *functionname = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid functionoid; + AclMode mode; + AclResult aclresult; + + functionoid = convert_function_name(functionname); + mode = convert_function_priv_string(priv_type_text); + + aclresult = object_aclcheck(ProcedureRelationId, functionoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_function_privilege_id_id + * Check user privileges on a function given + * roleid, function oid, and text priv name. + */ +Datum +has_function_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid functionoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_function_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(functionoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ProcedureRelationId, functionoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_function_privilege family. + */ + +/* + * Given a function name expressed as a string, look it up and return Oid + */ +static Oid +convert_function_name(text *functionname) +{ + char *funcname = text_to_cstring(functionname); + Oid oid; + + oid = DatumGetObjectId(DirectFunctionCall1(regprocedurein, + CStringGetDatum(funcname))); + + if (!OidIsValid(oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function \"%s\" does not exist", funcname))); + + return oid; +} + +/* + * convert_function_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_function_priv_string(text *priv_type_text) +{ + static const priv_map function_priv_map[] = { + {"EXECUTE", ACL_EXECUTE}, + {"EXECUTE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_EXECUTE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, function_priv_map); +} + + +/* + * has_language_privilege variants + * These are all named "has_language_privilege" at the SQL level. + * They take various combinations of language name, language OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not, or NULL if object doesn't exist. + */ + +/* + * has_language_privilege_name_name + * Check user privileges on a language given + * name username, text languagename, and text priv name. + */ +Datum +has_language_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *languagename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid languageoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + languageoid = convert_language_name(languagename); + mode = convert_language_priv_string(priv_type_text); + + aclresult = object_aclcheck(LanguageRelationId, languageoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_language_privilege_name + * Check user privileges on a language given + * text languagename and text priv name. + * current_user is assumed + */ +Datum +has_language_privilege_name(PG_FUNCTION_ARGS) +{ + text *languagename = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid languageoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + languageoid = convert_language_name(languagename); + mode = convert_language_priv_string(priv_type_text); + + aclresult = object_aclcheck(LanguageRelationId, languageoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_language_privilege_name_id + * Check user privileges on a language given + * name usename, language oid, and text priv name. + */ +Datum +has_language_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid languageoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_language_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(LANGOID, ObjectIdGetDatum(languageoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(LanguageRelationId, languageoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_language_privilege_id + * Check user privileges on a language given + * language oid, and text priv name. + * current_user is assumed + */ +Datum +has_language_privilege_id(PG_FUNCTION_ARGS) +{ + Oid languageoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_language_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(LANGOID, ObjectIdGetDatum(languageoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(LanguageRelationId, languageoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_language_privilege_id_name + * Check user privileges on a language given + * roleid, text languagename, and text priv name. + */ +Datum +has_language_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *languagename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid languageoid; + AclMode mode; + AclResult aclresult; + + languageoid = convert_language_name(languagename); + mode = convert_language_priv_string(priv_type_text); + + aclresult = object_aclcheck(LanguageRelationId, languageoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_language_privilege_id_id + * Check user privileges on a language given + * roleid, language oid, and text priv name. + */ +Datum +has_language_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid languageoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_language_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(LANGOID, ObjectIdGetDatum(languageoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(LanguageRelationId, languageoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_language_privilege family. + */ + +/* + * Given a language name expressed as a string, look it up and return Oid + */ +static Oid +convert_language_name(text *languagename) +{ + char *langname = text_to_cstring(languagename); + + return get_language_oid(langname, false); +} + +/* + * convert_language_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_language_priv_string(text *priv_type_text) +{ + static const priv_map language_priv_map[] = { + {"USAGE", ACL_USAGE}, + {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, language_priv_map); +} + + +/* + * has_schema_privilege variants + * These are all named "has_schema_privilege" at the SQL level. + * They take various combinations of schema name, schema OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not, or NULL if object doesn't exist. + */ + +/* + * has_schema_privilege_name_name + * Check user privileges on a schema given + * name username, text schemaname, and text priv name. + */ +Datum +has_schema_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *schemaname = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid schemaoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + schemaoid = convert_schema_name(schemaname); + mode = convert_schema_priv_string(priv_type_text); + + aclresult = object_aclcheck(NamespaceRelationId, schemaoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_schema_privilege_name + * Check user privileges on a schema given + * text schemaname and text priv name. + * current_user is assumed + */ +Datum +has_schema_privilege_name(PG_FUNCTION_ARGS) +{ + text *schemaname = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid schemaoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + schemaoid = convert_schema_name(schemaname); + mode = convert_schema_priv_string(priv_type_text); + + aclresult = object_aclcheck(NamespaceRelationId, schemaoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_schema_privilege_name_id + * Check user privileges on a schema given + * name usename, schema oid, and text priv name. + */ +Datum +has_schema_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid schemaoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_schema_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(NamespaceRelationId, schemaoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_schema_privilege_id + * Check user privileges on a schema given + * schema oid, and text priv name. + * current_user is assumed + */ +Datum +has_schema_privilege_id(PG_FUNCTION_ARGS) +{ + Oid schemaoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_schema_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(NamespaceRelationId, schemaoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_schema_privilege_id_name + * Check user privileges on a schema given + * roleid, text schemaname, and text priv name. + */ +Datum +has_schema_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *schemaname = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid schemaoid; + AclMode mode; + AclResult aclresult; + + schemaoid = convert_schema_name(schemaname); + mode = convert_schema_priv_string(priv_type_text); + + aclresult = object_aclcheck(NamespaceRelationId, schemaoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_schema_privilege_id_id + * Check user privileges on a schema given + * roleid, schema oid, and text priv name. + */ +Datum +has_schema_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid schemaoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_schema_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(NamespaceRelationId, schemaoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_schema_privilege family. + */ + +/* + * Given a schema name expressed as a string, look it up and return Oid + */ +static Oid +convert_schema_name(text *schemaname) +{ + char *nspname = text_to_cstring(schemaname); + + return get_namespace_oid(nspname, false); +} + +/* + * convert_schema_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_schema_priv_string(text *priv_type_text) +{ + static const priv_map schema_priv_map[] = { + {"CREATE", ACL_CREATE}, + {"CREATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {"USAGE", ACL_USAGE}, + {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, schema_priv_map); +} + + +/* + * has_server_privilege variants + * These are all named "has_server_privilege" at the SQL level. + * They take various combinations of foreign server name, + * server OID, user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not. + */ + +/* + * has_server_privilege_name_name + * Check user privileges on a foreign server given + * name username, text servername, and text priv name. + */ +Datum +has_server_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *servername = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid serverid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + serverid = convert_server_name(servername); + mode = convert_server_priv_string(priv_type_text); + + aclresult = object_aclcheck(ForeignServerRelationId, serverid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_server_privilege_name + * Check user privileges on a foreign server given + * text servername and text priv name. + * current_user is assumed + */ +Datum +has_server_privilege_name(PG_FUNCTION_ARGS) +{ + text *servername = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid serverid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + serverid = convert_server_name(servername); + mode = convert_server_priv_string(priv_type_text); + + aclresult = object_aclcheck(ForeignServerRelationId, serverid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_server_privilege_name_id + * Check user privileges on a foreign server given + * name usename, foreign server oid, and text priv name. + */ +Datum +has_server_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid serverid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_server_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(FOREIGNSERVEROID, ObjectIdGetDatum(serverid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ForeignServerRelationId, serverid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_server_privilege_id + * Check user privileges on a foreign server given + * server oid, and text priv name. + * current_user is assumed + */ +Datum +has_server_privilege_id(PG_FUNCTION_ARGS) +{ + Oid serverid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_server_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(FOREIGNSERVEROID, ObjectIdGetDatum(serverid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ForeignServerRelationId, serverid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_server_privilege_id_name + * Check user privileges on a foreign server given + * roleid, text servername, and text priv name. + */ +Datum +has_server_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *servername = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid serverid; + AclMode mode; + AclResult aclresult; + + serverid = convert_server_name(servername); + mode = convert_server_priv_string(priv_type_text); + + aclresult = object_aclcheck(ForeignServerRelationId, serverid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_server_privilege_id_id + * Check user privileges on a foreign server given + * roleid, server oid, and text priv name. + */ +Datum +has_server_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid serverid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_server_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(FOREIGNSERVEROID, ObjectIdGetDatum(serverid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(ForeignServerRelationId, serverid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_server_privilege family. + */ + +/* + * Given a server name expressed as a string, look it up and return Oid + */ +static Oid +convert_server_name(text *servername) +{ + char *serverstr = text_to_cstring(servername); + + return get_foreign_server_oid(serverstr, false); +} + +/* + * convert_server_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_server_priv_string(text *priv_type_text) +{ + static const priv_map server_priv_map[] = { + {"USAGE", ACL_USAGE}, + {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, server_priv_map); +} + + +/* + * has_tablespace_privilege variants + * These are all named "has_tablespace_privilege" at the SQL level. + * They take various combinations of tablespace name, tablespace OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not. + */ + +/* + * has_tablespace_privilege_name_name + * Check user privileges on a tablespace given + * name username, text tablespacename, and text priv name. + */ +Datum +has_tablespace_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *tablespacename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid tablespaceoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + tablespaceoid = convert_tablespace_name(tablespacename); + mode = convert_tablespace_priv_string(priv_type_text); + + aclresult = object_aclcheck(TableSpaceRelationId, tablespaceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_tablespace_privilege_name + * Check user privileges on a tablespace given + * text tablespacename and text priv name. + * current_user is assumed + */ +Datum +has_tablespace_privilege_name(PG_FUNCTION_ARGS) +{ + text *tablespacename = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid tablespaceoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + tablespaceoid = convert_tablespace_name(tablespacename); + mode = convert_tablespace_priv_string(priv_type_text); + + aclresult = object_aclcheck(TableSpaceRelationId, tablespaceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_tablespace_privilege_name_id + * Check user privileges on a tablespace given + * name usename, tablespace oid, and text priv name. + */ +Datum +has_tablespace_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid tablespaceoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_tablespace_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tablespaceoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(TableSpaceRelationId, tablespaceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_tablespace_privilege_id + * Check user privileges on a tablespace given + * tablespace oid, and text priv name. + * current_user is assumed + */ +Datum +has_tablespace_privilege_id(PG_FUNCTION_ARGS) +{ + Oid tablespaceoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_tablespace_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tablespaceoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(TableSpaceRelationId, tablespaceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_tablespace_privilege_id_name + * Check user privileges on a tablespace given + * roleid, text tablespacename, and text priv name. + */ +Datum +has_tablespace_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *tablespacename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid tablespaceoid; + AclMode mode; + AclResult aclresult; + + tablespaceoid = convert_tablespace_name(tablespacename); + mode = convert_tablespace_priv_string(priv_type_text); + + aclresult = object_aclcheck(TableSpaceRelationId, tablespaceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_tablespace_privilege_id_id + * Check user privileges on a tablespace given + * roleid, tablespace oid, and text priv name. + */ +Datum +has_tablespace_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid tablespaceoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_tablespace_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tablespaceoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(TableSpaceRelationId, tablespaceoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_tablespace_privilege family. + */ + +/* + * Given a tablespace name expressed as a string, look it up and return Oid + */ +static Oid +convert_tablespace_name(text *tablespacename) +{ + char *spcname = text_to_cstring(tablespacename); + + return get_tablespace_oid(spcname, false); +} + +/* + * convert_tablespace_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_tablespace_priv_string(text *priv_type_text) +{ + static const priv_map tablespace_priv_map[] = { + {"CREATE", ACL_CREATE}, + {"CREATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, tablespace_priv_map); +} + +/* + * has_type_privilege variants + * These are all named "has_type_privilege" at the SQL level. + * They take various combinations of type name, type OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not, or NULL if object doesn't exist. + */ + +/* + * has_type_privilege_name_name + * Check user privileges on a type given + * name username, text typename, and text priv name. + */ +Datum +has_type_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *typename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid typeoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + typeoid = convert_type_name(typename); + mode = convert_type_priv_string(priv_type_text); + + aclresult = object_aclcheck(TypeRelationId, typeoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_type_privilege_name + * Check user privileges on a type given + * text typename and text priv name. + * current_user is assumed + */ +Datum +has_type_privilege_name(PG_FUNCTION_ARGS) +{ + text *typename = PG_GETARG_TEXT_PP(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid typeoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + typeoid = convert_type_name(typename); + mode = convert_type_priv_string(priv_type_text); + + aclresult = object_aclcheck(TypeRelationId, typeoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_type_privilege_name_id + * Check user privileges on a type given + * name usename, type oid, and text priv name. + */ +Datum +has_type_privilege_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid typeoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid_or_public(NameStr(*username)); + mode = convert_type_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(TYPEOID, ObjectIdGetDatum(typeoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(TypeRelationId, typeoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_type_privilege_id + * Check user privileges on a type given + * type oid, and text priv name. + * current_user is assumed + */ +Datum +has_type_privilege_id(PG_FUNCTION_ARGS) +{ + Oid typeoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_type_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(TYPEOID, ObjectIdGetDatum(typeoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(TypeRelationId, typeoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_type_privilege_id_name + * Check user privileges on a type given + * roleid, text typename, and text priv name. + */ +Datum +has_type_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *typename = PG_GETARG_TEXT_PP(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid typeoid; + AclMode mode; + AclResult aclresult; + + typeoid = convert_type_name(typename); + mode = convert_type_priv_string(priv_type_text); + + aclresult = object_aclcheck(TypeRelationId, typeoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * has_type_privilege_id_id + * Check user privileges on a type given + * roleid, type oid, and text priv name. + */ +Datum +has_type_privilege_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid typeoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_type_priv_string(priv_type_text); + + if (!SearchSysCacheExists1(TYPEOID, ObjectIdGetDatum(typeoid))) + PG_RETURN_NULL(); + + aclresult = object_aclcheck(TypeRelationId, typeoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for has_type_privilege family. + */ + +/* + * Given a type name expressed as a string, look it up and return Oid + */ +static Oid +convert_type_name(text *typename) +{ + char *typname = text_to_cstring(typename); + Oid oid; + + oid = DatumGetObjectId(DirectFunctionCall1(regtypein, + CStringGetDatum(typname))); + + if (!OidIsValid(oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type \"%s\" does not exist", typname))); + + return oid; +} + +/* + * convert_type_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_type_priv_string(text *priv_type_text) +{ + static const priv_map type_priv_map[] = { + {"USAGE", ACL_USAGE}, + {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, type_priv_map); +} + +/* + * has_parameter_privilege variants + * These are all named "has_parameter_privilege" at the SQL level. + * They take various combinations of parameter name with + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has been granted + * the indicated privilege or false if not. + */ + +/* + * has_param_priv_byname + * + * Helper function to check user privileges on a parameter given the + * role by Oid, parameter by text name, and privileges as AclMode. + */ +static bool +has_param_priv_byname(Oid roleid, const text *parameter, AclMode priv) +{ + char *paramstr = text_to_cstring(parameter); + + return pg_parameter_aclcheck(paramstr, roleid, priv) == ACLCHECK_OK; +} + +/* + * has_parameter_privilege_name_name + * Check user privileges on a parameter given name username, text + * parameter, and text priv name. + */ +Datum +has_parameter_privilege_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + text *parameter = PG_GETARG_TEXT_PP(1); + AclMode priv = convert_parameter_priv_string(PG_GETARG_TEXT_PP(2)); + Oid roleid = get_role_oid_or_public(NameStr(*username)); + + PG_RETURN_BOOL(has_param_priv_byname(roleid, parameter, priv)); +} + +/* + * has_parameter_privilege_name + * Check user privileges on a parameter given text parameter and text priv + * name. current_user is assumed + */ +Datum +has_parameter_privilege_name(PG_FUNCTION_ARGS) +{ + text *parameter = PG_GETARG_TEXT_PP(0); + AclMode priv = convert_parameter_priv_string(PG_GETARG_TEXT_PP(1)); + + PG_RETURN_BOOL(has_param_priv_byname(GetUserId(), parameter, priv)); +} + +/* + * has_parameter_privilege_id_name + * Check user privileges on a parameter given roleid, text parameter, and + * text priv name. + */ +Datum +has_parameter_privilege_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + text *parameter = PG_GETARG_TEXT_PP(1); + AclMode priv = convert_parameter_priv_string(PG_GETARG_TEXT_PP(2)); + + PG_RETURN_BOOL(has_param_priv_byname(roleid, parameter, priv)); +} + +/* + * Support routines for has_parameter_privilege family. + */ + +/* + * convert_parameter_priv_string + * Convert text string to AclMode value. + */ +static AclMode +convert_parameter_priv_string(text *priv_text) +{ + static const priv_map parameter_priv_map[] = { + {"SET", ACL_SET}, + {"SET WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SET)}, + {"ALTER SYSTEM", ACL_ALTER_SYSTEM}, + {"ALTER SYSTEM WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_ALTER_SYSTEM)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_text, parameter_priv_map); +} + +/* + * pg_has_role variants + * These are all named "pg_has_role" at the SQL level. + * They take various combinations of role name, role OID, + * user name, user OID, or implicit user = current_user. + * + * The result is a boolean value: true if user has the indicated + * privilege, false if not. + */ + +/* + * pg_has_role_name_name + * Check user privileges on a role given + * name username, name rolename, and text priv name. + */ +Datum +pg_has_role_name_name(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Name rolename = PG_GETARG_NAME(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + Oid roleoid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid(NameStr(*username), false); + roleoid = get_role_oid(NameStr(*rolename), false); + mode = convert_role_priv_string(priv_type_text); + + aclresult = pg_role_aclcheck(roleoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * pg_has_role_name + * Check user privileges on a role given + * name rolename and text priv name. + * current_user is assumed + */ +Datum +pg_has_role_name(PG_FUNCTION_ARGS) +{ + Name rolename = PG_GETARG_NAME(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + Oid roleoid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + roleoid = get_role_oid(NameStr(*rolename), false); + mode = convert_role_priv_string(priv_type_text); + + aclresult = pg_role_aclcheck(roleoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * pg_has_role_name_id + * Check user privileges on a role given + * name usename, role oid, and text priv name. + */ +Datum +pg_has_role_name_id(PG_FUNCTION_ARGS) +{ + Name username = PG_GETARG_NAME(0); + Oid roleoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = get_role_oid(NameStr(*username), false); + mode = convert_role_priv_string(priv_type_text); + + aclresult = pg_role_aclcheck(roleoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * pg_has_role_id + * Check user privileges on a role given + * role oid, and text priv name. + * current_user is assumed + */ +Datum +pg_has_role_id(PG_FUNCTION_ARGS) +{ + Oid roleoid = PG_GETARG_OID(0); + text *priv_type_text = PG_GETARG_TEXT_PP(1); + Oid roleid; + AclMode mode; + AclResult aclresult; + + roleid = GetUserId(); + mode = convert_role_priv_string(priv_type_text); + + aclresult = pg_role_aclcheck(roleoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * pg_has_role_id_name + * Check user privileges on a role given + * roleid, name rolename, and text priv name. + */ +Datum +pg_has_role_id_name(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Name rolename = PG_GETARG_NAME(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + Oid roleoid; + AclMode mode; + AclResult aclresult; + + roleoid = get_role_oid(NameStr(*rolename), false); + mode = convert_role_priv_string(priv_type_text); + + aclresult = pg_role_aclcheck(roleoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * pg_has_role_id_id + * Check user privileges on a role given + * roleid, role oid, and text priv name. + */ +Datum +pg_has_role_id_id(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Oid roleoid = PG_GETARG_OID(1); + text *priv_type_text = PG_GETARG_TEXT_PP(2); + AclMode mode; + AclResult aclresult; + + mode = convert_role_priv_string(priv_type_text); + + aclresult = pg_role_aclcheck(roleoid, roleid, mode); + + PG_RETURN_BOOL(aclresult == ACLCHECK_OK); +} + +/* + * Support routines for pg_has_role family. + */ + +/* + * convert_role_priv_string + * Convert text string to AclMode value. + * + * We use USAGE to denote whether the privileges of the role are accessible + * (has_privs_of_role), MEMBER to denote is_member, and MEMBER WITH GRANT + * (or ADMIN) OPTION to denote is_admin. There is no ACL bit corresponding + * to MEMBER so we cheat and use ACL_CREATE for that. This convention + * is shared only with pg_role_aclcheck, below. + */ +static AclMode +convert_role_priv_string(text *priv_type_text) +{ + static const priv_map role_priv_map[] = { + {"USAGE", ACL_USAGE}, + {"MEMBER", ACL_CREATE}, + {"SET", ACL_SET}, + {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {"USAGE WITH ADMIN OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {"MEMBER WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {"MEMBER WITH ADMIN OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {"SET WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {"SET WITH ADMIN OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)}, + {NULL, 0} + }; + + return convert_any_priv_string(priv_type_text, role_priv_map); +} + +/* + * pg_role_aclcheck + * Quick-and-dirty support for pg_has_role + */ +static AclResult +pg_role_aclcheck(Oid role_oid, Oid roleid, AclMode mode) +{ + if (mode & ACL_GRANT_OPTION_FOR(ACL_CREATE)) + { + if (is_admin_of_role(roleid, role_oid)) + return ACLCHECK_OK; + } + if (mode & ACL_CREATE) + { + if (is_member_of_role(roleid, role_oid)) + return ACLCHECK_OK; + } + if (mode & ACL_USAGE) + { + if (has_privs_of_role(roleid, role_oid)) + return ACLCHECK_OK; + } + if (mode & ACL_SET) + { + if (member_can_set_role(roleid, role_oid)) + return ACLCHECK_OK; + } + return ACLCHECK_NO_PRIV; +} + + +/* + * initialization function (called by InitPostgres) + */ +void +initialize_acl(void) +{ + if (!IsBootstrapProcessingMode()) + { + cached_db_hash = + GetSysCacheHashValue1(DATABASEOID, + ObjectIdGetDatum(MyDatabaseId)); + + /* + * In normal mode, set a callback on any syscache invalidation of rows + * of pg_auth_members (for roles_is_member_of()) pg_database (for + * roles_is_member_of()) + */ + CacheRegisterSyscacheCallback(AUTHMEMROLEMEM, + RoleMembershipCacheCallback, + (Datum) 0); + CacheRegisterSyscacheCallback(AUTHOID, + RoleMembershipCacheCallback, + (Datum) 0); + CacheRegisterSyscacheCallback(DATABASEOID, + RoleMembershipCacheCallback, + (Datum) 0); + } +} + +/* + * RoleMembershipCacheCallback + * Syscache inval callback function + */ +static void +RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + if (cacheid == DATABASEOID && + hashvalue != cached_db_hash && + hashvalue != 0) + { + return; /* ignore pg_database changes for other DBs */ + } + + /* Force membership caches to be recomputed on next use */ + cached_role[ROLERECURSE_MEMBERS] = InvalidOid; + cached_role[ROLERECURSE_PRIVS] = InvalidOid; + cached_role[ROLERECURSE_SETROLE] = InvalidOid; +} + +/* + * Get a list of roles that the specified roleid is a member of + * + * Type ROLERECURSE_MEMBERS recurses through all grants; ROLERECURSE_PRIVS + * recurses only through inheritable grants; and ROLERECURSE_SETROLE recurses + * only through grants with set_option. + * + * Since indirect membership testing is relatively expensive, we cache + * a list of memberships. Hence, the result is only guaranteed good until + * the next call of roles_is_member_of()! + * + * For the benefit of select_best_grantor, the result is defined to be + * in breadth-first order, ie, closer relationships earlier. + * + * If admin_of is not InvalidOid, this function sets *admin_role, either + * to the OID of the first role in the result list that directly possesses + * ADMIN OPTION on the role corresponding to admin_of, or to InvalidOid if + * there is no such role. + */ +static List * +roles_is_member_of(Oid roleid, enum RoleRecurseType type, + Oid admin_of, Oid *admin_role) +{ + Oid dba; + List *roles_list; + ListCell *l; + List *new_cached_roles; + MemoryContext oldctx; + + Assert(OidIsValid(admin_of) == PointerIsValid(admin_role)); + if (admin_role != NULL) + *admin_role = InvalidOid; + + /* If cache is valid and ADMIN OPTION not sought, just return the list */ + if (cached_role[type] == roleid && !OidIsValid(admin_of) && + OidIsValid(cached_role[type])) + return cached_roles[type]; + + /* + * Role expansion happens in a non-database backend when guc.c checks + * ROLE_PG_READ_ALL_SETTINGS for a physical walsender SHOW command. In + * that case, no role gets pg_database_owner. + */ + if (!OidIsValid(MyDatabaseId)) + dba = InvalidOid; + else + { + HeapTuple dbtup; + + dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); + if (!HeapTupleIsValid(dbtup)) + elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); + dba = ((Form_pg_database) GETSTRUCT(dbtup))->datdba; + ReleaseSysCache(dbtup); + } + + /* + * Find all the roles that roleid is a member of, including multi-level + * recursion. The role itself will always be the first element of the + * resulting list. + * + * Each element of the list is scanned to see if it adds any indirect + * memberships. We can use a single list as both the record of + * already-found memberships and the agenda of roles yet to be scanned. + * This is a bit tricky but works because the foreach() macro doesn't + * fetch the next list element until the bottom of the loop. + */ + roles_list = list_make1_oid(roleid); + + foreach(l, roles_list) + { + Oid memberid = lfirst_oid(l); + CatCList *memlist; + int i; + + /* Find roles that memberid is directly a member of */ + memlist = SearchSysCacheList1(AUTHMEMMEMROLE, + ObjectIdGetDatum(memberid)); + for (i = 0; i < memlist->n_members; i++) + { + HeapTuple tup = &memlist->members[i]->tuple; + Form_pg_auth_members form = (Form_pg_auth_members) GETSTRUCT(tup); + Oid otherid = form->roleid; + + /* + * While otherid==InvalidOid shouldn't appear in the catalog, the + * OidIsValid() avoids crashing if that arises. + */ + if (otherid == admin_of && form->admin_option && + OidIsValid(admin_of) && !OidIsValid(*admin_role)) + *admin_role = memberid; + + /* If we're supposed to ignore non-heritable grants, do so. */ + if (type == ROLERECURSE_PRIVS && !form->inherit_option) + continue; + + /* If we're supposed to ignore non-SET grants, do so. */ + if (type == ROLERECURSE_SETROLE && !form->set_option) + continue; + + /* + * Even though there shouldn't be any loops in the membership + * graph, we must test for having already seen this role. It is + * legal for instance to have both A->B and A->C->B. + */ + roles_list = list_append_unique_oid(roles_list, otherid); + } + ReleaseSysCacheList(memlist); + + /* implement pg_database_owner implicit membership */ + if (memberid == dba && OidIsValid(dba)) + roles_list = list_append_unique_oid(roles_list, + ROLE_PG_DATABASE_OWNER); + } + + /* + * Copy the completed list into TopMemoryContext so it will persist. + */ + oldctx = MemoryContextSwitchTo(TopMemoryContext); + new_cached_roles = list_copy(roles_list); + MemoryContextSwitchTo(oldctx); + list_free(roles_list); + + /* + * Now safe to assign to state variable + */ + cached_role[type] = InvalidOid; /* just paranoia */ + list_free(cached_roles[type]); + cached_roles[type] = new_cached_roles; + cached_role[type] = roleid; + + /* And now we can return the answer */ + return cached_roles[type]; +} + + +/* + * Does member have the privileges of role (directly or indirectly)? + * + * This is defined not to recurse through grants that are not inherited, + * and only inherited grants confer the associated privileges automatically. + * + * See also member_can_set_role, below. + */ +bool +has_privs_of_role(Oid member, Oid role) +{ + /* Fast path for simple case */ + if (member == role) + return true; + + /* Superusers have every privilege, so are part of every role */ + if (superuser_arg(member)) + return true; + + /* + * Find all the roles that member has the privileges of, including + * multi-level recursion, then see if target role is any one of them. + */ + return list_member_oid(roles_is_member_of(member, ROLERECURSE_PRIVS, + InvalidOid, NULL), + role); +} + +/* + * Can member use SET ROLE to this role? + * + * There must be a chain of grants from 'member' to 'role' each of which + * permits SET ROLE; that is, each of which has set_option = true. + * + * It doesn't matter whether the grants are inheritable. That's a separate + * question; see has_privs_of_role. + * + * This function should be used to determine whether the session user can + * use SET ROLE to become the target user. We also use it to determine whether + * the session user can change an existing object to be owned by the target + * user, or create new objects owned by the target user. + */ +bool +member_can_set_role(Oid member, Oid role) +{ + /* Fast path for simple case */ + if (member == role) + return true; + + /* Superusers have every privilege, so can always SET ROLE */ + if (superuser_arg(member)) + return true; + + /* + * Find all the roles that member can access via SET ROLE, including + * multi-level recursion, then see if target role is any one of them. + */ + return list_member_oid(roles_is_member_of(member, ROLERECURSE_SETROLE, + InvalidOid, NULL), + role); +} + +/* + * Permission violation error unless able to SET ROLE to target role. + */ +void +check_can_set_role(Oid member, Oid role) +{ + if (!member_can_set_role(member, role)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be able to SET ROLE \"%s\"", + GetUserNameFromId(role, false)))); +} + +/* + * Is member a member of role (directly or indirectly)? + * + * This is defined to recurse through grants whether they are inherited or not. + * + * Do not use this for privilege checking, instead use has_privs_of_role(). + * Don't use it for determining whether it's possible to SET ROLE to some + * other role; for that, use member_can_set_role(). And don't use it for + * determining whether it's OK to create an object owned by some other role: + * use member_can_set_role() for that, too. + * + * In short, calling this function is the wrong thing to do nearly everywhere. + */ +bool +is_member_of_role(Oid member, Oid role) +{ + /* Fast path for simple case */ + if (member == role) + return true; + + /* Superusers have every privilege, so are part of every role */ + if (superuser_arg(member)) + return true; + + /* + * Find all the roles that member is a member of, including multi-level + * recursion, then see if target role is any one of them. + */ + return list_member_oid(roles_is_member_of(member, ROLERECURSE_MEMBERS, + InvalidOid, NULL), + role); +} + +/* + * Is member a member of role, not considering superuserness? + * + * This is identical to is_member_of_role except we ignore superuser + * status. + * + * Do not use this for privilege checking, instead use has_privs_of_role() + */ +bool +is_member_of_role_nosuper(Oid member, Oid role) +{ + /* Fast path for simple case */ + if (member == role) + return true; + + /* + * Find all the roles that member is a member of, including multi-level + * recursion, then see if target role is any one of them. + */ + return list_member_oid(roles_is_member_of(member, ROLERECURSE_MEMBERS, + InvalidOid, NULL), + role); +} + + +/* + * Is member an admin of role? That is, is member the role itself (subject to + * restrictions below), a member (directly or indirectly) WITH ADMIN OPTION, + * or a superuser? + */ +bool +is_admin_of_role(Oid member, Oid role) +{ + Oid admin_role; + + if (superuser_arg(member)) + return true; + + /* By policy, a role cannot have WITH ADMIN OPTION on itself. */ + if (member == role) + return false; + + (void) roles_is_member_of(member, ROLERECURSE_MEMBERS, role, &admin_role); + return OidIsValid(admin_role); +} + +/* + * Find a role whose privileges "member" inherits which has ADMIN OPTION + * on "role", ignoring super-userness. + * + * There might be more than one such role; prefer one which involves fewer + * hops. That is, if member has ADMIN OPTION, prefer that over all other + * options; if not, prefer a role from which member inherits more directly + * over more indirect inheritance. + */ +Oid +select_best_admin(Oid member, Oid role) +{ + Oid admin_role; + + /* By policy, a role cannot have WITH ADMIN OPTION on itself. */ + if (member == role) + return InvalidOid; + + (void) roles_is_member_of(member, ROLERECURSE_PRIVS, role, &admin_role); + return admin_role; +} + + +/* does what it says ... */ +static int +count_one_bits(AclMode mask) +{ + int nbits = 0; + + /* this code relies on AclMode being an unsigned type */ + while (mask) + { + if (mask & 1) + nbits++; + mask >>= 1; + } + return nbits; +} + + +/* + * Select the effective grantor ID for a GRANT or REVOKE operation. + * + * The grantor must always be either the object owner or some role that has + * been explicitly granted grant options. This ensures that all granted + * privileges appear to flow from the object owner, and there are never + * multiple "original sources" of a privilege. Therefore, if the would-be + * grantor is a member of a role that has the needed grant options, we have + * to do the grant as that role instead. + * + * It is possible that the would-be grantor is a member of several roles + * that have different subsets of the desired grant options, but no one + * role has 'em all. In this case we pick a role with the largest number + * of desired options. Ties are broken in favor of closer ancestors. + * + * roleId: the role attempting to do the GRANT/REVOKE + * privileges: the privileges to be granted/revoked + * acl: the ACL of the object in question + * ownerId: the role owning the object in question + * *grantorId: receives the OID of the role to do the grant as + * *grantOptions: receives the grant options actually held by grantorId + * + * If no grant options exist, we set grantorId to roleId, grantOptions to 0. + */ +void +select_best_grantor(Oid roleId, AclMode privileges, + const Acl *acl, Oid ownerId, + Oid *grantorId, AclMode *grantOptions) +{ + AclMode needed_goptions = ACL_GRANT_OPTION_FOR(privileges); + List *roles_list; + int nrights; + ListCell *l; + + /* + * The object owner is always treated as having all grant options, so if + * roleId is the owner it's easy. Also, if roleId is a superuser it's + * easy: superusers are implicitly members of every role, so they act as + * the object owner. + */ + if (roleId == ownerId || superuser_arg(roleId)) + { + *grantorId = ownerId; + *grantOptions = needed_goptions; + return; + } + + /* + * Otherwise we have to do a careful search to see if roleId has the + * privileges of any suitable role. Note: we can hang onto the result of + * roles_is_member_of() throughout this loop, because aclmask_direct() + * doesn't query any role memberships. + */ + roles_list = roles_is_member_of(roleId, ROLERECURSE_PRIVS, + InvalidOid, NULL); + + /* initialize candidate result as default */ + *grantorId = roleId; + *grantOptions = ACL_NO_RIGHTS; + nrights = 0; + + foreach(l, roles_list) + { + Oid otherrole = lfirst_oid(l); + AclMode otherprivs; + + otherprivs = aclmask_direct(acl, otherrole, ownerId, + needed_goptions, ACLMASK_ALL); + if (otherprivs == needed_goptions) + { + /* Found a suitable grantor */ + *grantorId = otherrole; + *grantOptions = otherprivs; + return; + } + + /* + * If it has just some of the needed privileges, remember best + * candidate. + */ + if (otherprivs != ACL_NO_RIGHTS) + { + int nnewrights = count_one_bits(otherprivs); + + if (nnewrights > nrights) + { + *grantorId = otherrole; + *grantOptions = otherprivs; + nrights = nnewrights; + } + } + } +} + +/* + * get_role_oid - Given a role name, look up the role's OID. + * + * If missing_ok is false, throw an error if role name not found. If + * true, just return InvalidOid. + */ +Oid +get_role_oid(const char *rolname, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid1(AUTHNAME, Anum_pg_authid_oid, + CStringGetDatum(rolname)); + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", rolname))); + return oid; +} + +/* + * get_role_oid_or_public - As above, but return ACL_ID_PUBLIC if the + * role name is "public". + */ +Oid +get_role_oid_or_public(const char *rolname) +{ + if (strcmp(rolname, "public") == 0) + return ACL_ID_PUBLIC; + + return get_role_oid(rolname, false); +} + +/* + * Given a RoleSpec node, return the OID it corresponds to. If missing_ok is + * true, return InvalidOid if the role does not exist. + * + * PUBLIC is always disallowed here. Routines wanting to handle the PUBLIC + * case must check the case separately. + */ +Oid +get_rolespec_oid(const RoleSpec *role, bool missing_ok) +{ + Oid oid; + + switch (role->roletype) + { + case ROLESPEC_CSTRING: + Assert(role->rolename); + oid = get_role_oid(role->rolename, missing_ok); + break; + + case ROLESPEC_CURRENT_ROLE: + case ROLESPEC_CURRENT_USER: + oid = GetUserId(); + break; + + case ROLESPEC_SESSION_USER: + oid = GetSessionUserId(); + break; + + case ROLESPEC_PUBLIC: + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", "public"))); + oid = InvalidOid; /* make compiler happy */ + break; + + default: + elog(ERROR, "unexpected role type %d", role->roletype); + } + + return oid; +} + +/* + * Given a RoleSpec node, return the pg_authid HeapTuple it corresponds to. + * Caller must ReleaseSysCache when done with the result tuple. + */ +HeapTuple +get_rolespec_tuple(const RoleSpec *role) +{ + HeapTuple tuple; + + switch (role->roletype) + { + case ROLESPEC_CSTRING: + Assert(role->rolename); + tuple = SearchSysCache1(AUTHNAME, CStringGetDatum(role->rolename)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", role->rolename))); + break; + + case ROLESPEC_CURRENT_ROLE: + case ROLESPEC_CURRENT_USER: + tuple = SearchSysCache1(AUTHOID, GetUserId()); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for role %u", GetUserId()); + break; + + case ROLESPEC_SESSION_USER: + tuple = SearchSysCache1(AUTHOID, GetSessionUserId()); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for role %u", GetSessionUserId()); + break; + + case ROLESPEC_PUBLIC: + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", "public"))); + tuple = NULL; /* make compiler happy */ + break; + + default: + elog(ERROR, "unexpected role type %d", role->roletype); + } + + return tuple; +} + +/* + * Given a RoleSpec, returns a palloc'ed copy of the corresponding role's name. + */ +char * +get_rolespec_name(const RoleSpec *role) +{ + HeapTuple tp; + Form_pg_authid authForm; + char *rolename; + + tp = get_rolespec_tuple(role); + authForm = (Form_pg_authid) GETSTRUCT(tp); + rolename = pstrdup(NameStr(authForm->rolname)); + ReleaseSysCache(tp); + + return rolename; +} + +/* + * Given a RoleSpec, throw an error if the name is reserved, using detail_msg, + * if provided (which must be already translated). + * + * If node is NULL, no error is thrown. If detail_msg is NULL then no detail + * message is provided. + */ +void +check_rolespec_name(const RoleSpec *role, const char *detail_msg) +{ + if (!role) + return; + + if (role->roletype != ROLESPEC_CSTRING) + return; + + if (IsReservedName(role->rolename)) + { + if (detail_msg) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("role name \"%s\" is reserved", + role->rolename), + errdetail_internal("%s", detail_msg))); + else + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("role name \"%s\" is reserved", + role->rolename))); + } +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/amutils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/amutils.c new file mode 100644 index 00000000000..48852bf79e2 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/amutils.c @@ -0,0 +1,467 @@ +/*------------------------------------------------------------------------- + * + * amutils.c + * SQL-level APIs related to index access methods. + * + * Copyright (c) 2016-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/amutils.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/amapi.h" +#include "access/htup_details.h" +#include "catalog/pg_class.h" +#include "catalog/pg_index.h" +#include "utils/builtins.h" +#include "utils/syscache.h" + + +/* Convert string property name to enum, for efficiency */ +struct am_propname +{ + const char *name; + IndexAMProperty prop; +}; + +static const struct am_propname am_propnames[] = +{ + { + "asc", AMPROP_ASC + }, + { + "desc", AMPROP_DESC + }, + { + "nulls_first", AMPROP_NULLS_FIRST + }, + { + "nulls_last", AMPROP_NULLS_LAST + }, + { + "orderable", AMPROP_ORDERABLE + }, + { + "distance_orderable", AMPROP_DISTANCE_ORDERABLE + }, + { + "returnable", AMPROP_RETURNABLE + }, + { + "search_array", AMPROP_SEARCH_ARRAY + }, + { + "search_nulls", AMPROP_SEARCH_NULLS + }, + { + "clusterable", AMPROP_CLUSTERABLE + }, + { + "index_scan", AMPROP_INDEX_SCAN + }, + { + "bitmap_scan", AMPROP_BITMAP_SCAN + }, + { + "backward_scan", AMPROP_BACKWARD_SCAN + }, + { + "can_order", AMPROP_CAN_ORDER + }, + { + "can_unique", AMPROP_CAN_UNIQUE + }, + { + "can_multi_col", AMPROP_CAN_MULTI_COL + }, + { + "can_exclude", AMPROP_CAN_EXCLUDE + }, + { + "can_include", AMPROP_CAN_INCLUDE + }, +}; + +static IndexAMProperty +lookup_prop_name(const char *name) +{ + int i; + + for (i = 0; i < lengthof(am_propnames); i++) + { + if (pg_strcasecmp(am_propnames[i].name, name) == 0) + return am_propnames[i].prop; + } + + /* We do not throw an error, so that AMs can define their own properties */ + return AMPROP_UNKNOWN; +} + +/* + * Common code for properties that are just bit tests of indoptions. + * + * tuple: the pg_index heaptuple + * attno: identify the index column to test the indoptions of. + * guard: if false, a boolean false result is forced (saves code in caller). + * iopt_mask: mask for interesting indoption bit. + * iopt_expect: value for a "true" result (should be 0 or iopt_mask). + * + * Returns false to indicate a NULL result (for "unknown/inapplicable"), + * otherwise sets *res to the boolean value to return. + */ +static bool +test_indoption(HeapTuple tuple, int attno, bool guard, + int16 iopt_mask, int16 iopt_expect, + bool *res) +{ + Datum datum; + int2vector *indoption; + int16 indoption_val; + + if (!guard) + { + *res = false; + return true; + } + + datum = SysCacheGetAttrNotNull(INDEXRELID, tuple, Anum_pg_index_indoption); + + indoption = ((int2vector *) DatumGetPointer(datum)); + indoption_val = indoption->values[attno - 1]; + + *res = (indoption_val & iopt_mask) == iopt_expect; + + return true; +} + + +/* + * Test property of an index AM, index, or index column. + * + * This is common code for different SQL-level funcs, so the amoid and + * index_oid parameters are mutually exclusive; we look up the amoid from the + * index_oid if needed, or if no index oid is given, we're looking at AM-wide + * properties. + */ +static Datum +indexam_property(FunctionCallInfo fcinfo, + const char *propname, + Oid amoid, Oid index_oid, int attno) +{ + bool res = false; + bool isnull = false; + int natts = 0; + IndexAMProperty prop; + IndexAmRoutine *routine; + + /* Try to convert property name to enum (no error if not known) */ + prop = lookup_prop_name(propname); + + /* If we have an index OID, look up the AM, and get # of columns too */ + if (OidIsValid(index_oid)) + { + HeapTuple tuple; + Form_pg_class rd_rel; + + Assert(!OidIsValid(amoid)); + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(tuple)) + PG_RETURN_NULL(); + rd_rel = (Form_pg_class) GETSTRUCT(tuple); + if (rd_rel->relkind != RELKIND_INDEX && + rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + { + ReleaseSysCache(tuple); + PG_RETURN_NULL(); + } + amoid = rd_rel->relam; + natts = rd_rel->relnatts; + ReleaseSysCache(tuple); + } + + /* + * At this point, either index_oid == InvalidOid or it's a valid index + * OID. Also, after this test and the one below, either attno == 0 for + * index-wide or AM-wide tests, or it's a valid column number in a valid + * index. + */ + if (attno < 0 || attno > natts) + PG_RETURN_NULL(); + + /* + * Get AM information. If we don't have a valid AM OID, return NULL. + */ + routine = GetIndexAmRoutineByAmId(amoid, true); + if (routine == NULL) + PG_RETURN_NULL(); + + /* + * If there's an AM property routine, give it a chance to override the + * generic logic. Proceed if it returns false. + */ + if (routine->amproperty && + routine->amproperty(index_oid, attno, prop, propname, + &res, &isnull)) + { + if (isnull) + PG_RETURN_NULL(); + PG_RETURN_BOOL(res); + } + + if (attno > 0) + { + HeapTuple tuple; + Form_pg_index rd_index; + bool iskey = true; + + /* + * Handle column-level properties. Many of these need the pg_index row + * (which we also need to use to check for nonkey atts) so we fetch + * that first. + */ + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(tuple)) + PG_RETURN_NULL(); + rd_index = (Form_pg_index) GETSTRUCT(tuple); + + Assert(index_oid == rd_index->indexrelid); + Assert(attno > 0 && attno <= rd_index->indnatts); + + isnull = true; + + /* + * If amcaninclude, we might be looking at an attno for a nonkey + * column, for which we (generically) assume that most properties are + * null. + */ + if (routine->amcaninclude + && attno > rd_index->indnkeyatts) + iskey = false; + + switch (prop) + { + case AMPROP_ASC: + if (iskey && + test_indoption(tuple, attno, routine->amcanorder, + INDOPTION_DESC, 0, &res)) + isnull = false; + break; + + case AMPROP_DESC: + if (iskey && + test_indoption(tuple, attno, routine->amcanorder, + INDOPTION_DESC, INDOPTION_DESC, &res)) + isnull = false; + break; + + case AMPROP_NULLS_FIRST: + if (iskey && + test_indoption(tuple, attno, routine->amcanorder, + INDOPTION_NULLS_FIRST, INDOPTION_NULLS_FIRST, &res)) + isnull = false; + break; + + case AMPROP_NULLS_LAST: + if (iskey && + test_indoption(tuple, attno, routine->amcanorder, + INDOPTION_NULLS_FIRST, 0, &res)) + isnull = false; + break; + + case AMPROP_ORDERABLE: + + /* + * generic assumption is that nonkey columns are not orderable + */ + res = iskey ? routine->amcanorder : false; + isnull = false; + break; + + case AMPROP_DISTANCE_ORDERABLE: + + /* + * The conditions for whether a column is distance-orderable + * are really up to the AM (at time of writing, only GiST + * supports it at all). The planner has its own idea based on + * whether it finds an operator with amoppurpose 'o', but + * getting there from just the index column type seems like a + * lot of work. So instead we expect the AM to handle this in + * its amproperty routine. The generic result is to return + * false if the AM says it never supports this, or if this is + * a nonkey column, and null otherwise (meaning we don't + * know). + */ + if (!iskey || !routine->amcanorderbyop) + { + res = false; + isnull = false; + } + break; + + case AMPROP_RETURNABLE: + + /* note that we ignore iskey for this property */ + + isnull = false; + res = false; + + if (routine->amcanreturn) + { + /* + * If possible, the AM should handle this test in its + * amproperty function without opening the rel. But this + * is the generic fallback if it does not. + */ + Relation indexrel = index_open(index_oid, AccessShareLock); + + res = index_can_return(indexrel, attno); + index_close(indexrel, AccessShareLock); + } + break; + + case AMPROP_SEARCH_ARRAY: + if (iskey) + { + res = routine->amsearcharray; + isnull = false; + } + break; + + case AMPROP_SEARCH_NULLS: + if (iskey) + { + res = routine->amsearchnulls; + isnull = false; + } + break; + + default: + break; + } + + ReleaseSysCache(tuple); + + if (!isnull) + PG_RETURN_BOOL(res); + PG_RETURN_NULL(); + } + + if (OidIsValid(index_oid)) + { + /* + * Handle index-level properties. Currently, these only depend on the + * AM, but that might not be true forever, so we make users name an + * index not just an AM. + */ + switch (prop) + { + case AMPROP_CLUSTERABLE: + PG_RETURN_BOOL(routine->amclusterable); + + case AMPROP_INDEX_SCAN: + PG_RETURN_BOOL(routine->amgettuple ? true : false); + + case AMPROP_BITMAP_SCAN: + PG_RETURN_BOOL(routine->amgetbitmap ? true : false); + + case AMPROP_BACKWARD_SCAN: + PG_RETURN_BOOL(routine->amcanbackward); + + default: + PG_RETURN_NULL(); + } + } + + /* + * Handle AM-level properties (those that control what you can say in + * CREATE INDEX). + */ + switch (prop) + { + case AMPROP_CAN_ORDER: + PG_RETURN_BOOL(routine->amcanorder); + + case AMPROP_CAN_UNIQUE: + PG_RETURN_BOOL(routine->amcanunique); + + case AMPROP_CAN_MULTI_COL: + PG_RETURN_BOOL(routine->amcanmulticol); + + case AMPROP_CAN_EXCLUDE: + PG_RETURN_BOOL(routine->amgettuple ? true : false); + + case AMPROP_CAN_INCLUDE: + PG_RETURN_BOOL(routine->amcaninclude); + + default: + PG_RETURN_NULL(); + } +} + +/* + * Test property of an AM specified by AM OID + */ +Datum +pg_indexam_has_property(PG_FUNCTION_ARGS) +{ + Oid amoid = PG_GETARG_OID(0); + char *propname = text_to_cstring(PG_GETARG_TEXT_PP(1)); + + return indexam_property(fcinfo, propname, amoid, InvalidOid, 0); +} + +/* + * Test property of an index specified by index OID + */ +Datum +pg_index_has_property(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + char *propname = text_to_cstring(PG_GETARG_TEXT_PP(1)); + + return indexam_property(fcinfo, propname, InvalidOid, relid, 0); +} + +/* + * Test property of an index column specified by index OID and column number + */ +Datum +pg_index_column_has_property(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int32 attno = PG_GETARG_INT32(1); + char *propname = text_to_cstring(PG_GETARG_TEXT_PP(2)); + + /* Reject attno 0 immediately, so that attno > 0 identifies this case */ + if (attno <= 0) + PG_RETURN_NULL(); + + return indexam_property(fcinfo, propname, InvalidOid, relid, attno); +} + +/* + * Return the name of the given phase, as used for progress reporting by the + * given AM. + */ +Datum +pg_indexam_progress_phasename(PG_FUNCTION_ARGS) +{ + Oid amoid = PG_GETARG_OID(0); + int32 phasenum = PG_GETARG_INT32(1); + IndexAmRoutine *routine; + char *name; + + routine = GetIndexAmRoutineByAmId(amoid, true); + if (routine == NULL || !routine->ambuildphasename) + PG_RETURN_NULL(); + + name = routine->ambuildphasename(phasenum); + if (!name) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(CStringGetTextDatum(name)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_expanded.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_expanded.c new file mode 100644 index 00000000000..4509fddeb91 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_expanded.c @@ -0,0 +1,453 @@ +/*------------------------------------------------------------------------- + * + * array_expanded.c + * Basic functions for manipulating expanded arrays. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_expanded.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tupmacs.h" +#include "utils/array.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" + + +/* "Methods" required for an expanded object */ +static Size EA_get_flat_size(ExpandedObjectHeader *eohptr); +static void EA_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size); + +static const ExpandedObjectMethods EA_methods = +{ + EA_get_flat_size, + EA_flatten_into +}; + +/* Other local functions */ +static void copy_byval_expanded_array(ExpandedArrayHeader *eah, + ExpandedArrayHeader *oldeah); + + +/* + * expand_array: convert an array Datum into an expanded array + * + * The expanded object will be a child of parentcontext. + * + * Some callers can provide cache space to avoid repeated lookups of element + * type data across calls; if so, pass a metacache pointer, making sure that + * metacache->element_type is initialized to InvalidOid before first call. + * If no cross-call caching is required, pass NULL for metacache. + */ +Datum +expand_array(Datum arraydatum, MemoryContext parentcontext, + ArrayMetaState *metacache) +{ + ArrayType *array; + ExpandedArrayHeader *eah; + MemoryContext objcxt; + MemoryContext oldcxt; + ArrayMetaState fakecache; + + /* + * Allocate private context for expanded object. We start by assuming + * that the array won't be very large; but if it does grow a lot, don't + * constrain aset.c's large-context behavior. + */ + objcxt = AllocSetContextCreate(parentcontext, + "expanded array", + ALLOCSET_START_SMALL_SIZES); + + /* Set up expanded array header */ + eah = (ExpandedArrayHeader *) + MemoryContextAlloc(objcxt, sizeof(ExpandedArrayHeader)); + + EOH_init_header(&eah->hdr, &EA_methods, objcxt); + eah->ea_magic = EA_MAGIC; + + /* If the source is an expanded array, we may be able to optimize */ + if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum))) + { + ExpandedArrayHeader *oldeah = (ExpandedArrayHeader *) DatumGetEOHP(arraydatum); + + Assert(oldeah->ea_magic == EA_MAGIC); + + /* + * Update caller's cache if provided; we don't need it this time, but + * next call might be for a non-expanded source array. Furthermore, + * if the caller didn't provide a cache area, use some local storage + * to cache anyway, thereby avoiding a catalog lookup in the case + * where we fall through to the flat-copy code path. + */ + if (metacache == NULL) + metacache = &fakecache; + metacache->element_type = oldeah->element_type; + metacache->typlen = oldeah->typlen; + metacache->typbyval = oldeah->typbyval; + metacache->typalign = oldeah->typalign; + + /* + * If element type is pass-by-value and we have a Datum-array + * representation, just copy the source's metadata and Datum/isnull + * arrays. The original flat array, if present at all, adds no + * additional information so we need not copy it. + */ + if (oldeah->typbyval && oldeah->dvalues != NULL) + { + copy_byval_expanded_array(eah, oldeah); + /* return a R/W pointer to the expanded array */ + return EOHPGetRWDatum(&eah->hdr); + } + + /* + * Otherwise, either we have only a flat representation or the + * elements are pass-by-reference. In either case, the best thing + * seems to be to copy the source as a flat representation and then + * deconstruct that later if necessary. For the pass-by-ref case, we + * could perhaps save some cycles with custom code that generates the + * deconstructed representation in parallel with copying the values, + * but it would be a lot of extra code for fairly marginal gain. So, + * fall through into the flat-source code path. + */ + } + + /* + * Detoast and copy source array into private context, as a flat array. + * + * Note that this coding risks leaking some memory in the private context + * if we have to fetch data from a TOAST table; however, experimentation + * says that the leak is minimal. Doing it this way saves a copy step, + * which seems worthwhile, especially if the array is large enough to need + * external storage. + */ + oldcxt = MemoryContextSwitchTo(objcxt); + array = DatumGetArrayTypePCopy(arraydatum); + MemoryContextSwitchTo(oldcxt); + + eah->ndims = ARR_NDIM(array); + /* note these pointers point into the fvalue header! */ + eah->dims = ARR_DIMS(array); + eah->lbound = ARR_LBOUND(array); + + /* Save array's element-type data for possible use later */ + eah->element_type = ARR_ELEMTYPE(array); + if (metacache && metacache->element_type == eah->element_type) + { + /* We have a valid cache of representational data */ + eah->typlen = metacache->typlen; + eah->typbyval = metacache->typbyval; + eah->typalign = metacache->typalign; + } + else + { + /* No, so look it up */ + get_typlenbyvalalign(eah->element_type, + &eah->typlen, + &eah->typbyval, + &eah->typalign); + /* Update cache if provided */ + if (metacache) + { + metacache->element_type = eah->element_type; + metacache->typlen = eah->typlen; + metacache->typbyval = eah->typbyval; + metacache->typalign = eah->typalign; + } + } + + /* we don't make a deconstructed representation now */ + eah->dvalues = NULL; + eah->dnulls = NULL; + eah->dvalueslen = 0; + eah->nelems = 0; + eah->flat_size = 0; + + /* remember we have a flat representation */ + eah->fvalue = array; + eah->fstartptr = ARR_DATA_PTR(array); + eah->fendptr = ((char *) array) + ARR_SIZE(array); + + /* return a R/W pointer to the expanded array */ + return EOHPGetRWDatum(&eah->hdr); +} + +/* + * helper for expand_array(): copy pass-by-value Datum-array representation + */ +static void +copy_byval_expanded_array(ExpandedArrayHeader *eah, + ExpandedArrayHeader *oldeah) +{ + MemoryContext objcxt = eah->hdr.eoh_context; + int ndims = oldeah->ndims; + int dvalueslen = oldeah->dvalueslen; + + /* Copy array dimensionality information */ + eah->ndims = ndims; + /* We can alloc both dimensionality arrays with one palloc */ + eah->dims = (int *) MemoryContextAlloc(objcxt, ndims * 2 * sizeof(int)); + eah->lbound = eah->dims + ndims; + /* .. but don't assume the source's arrays are contiguous */ + memcpy(eah->dims, oldeah->dims, ndims * sizeof(int)); + memcpy(eah->lbound, oldeah->lbound, ndims * sizeof(int)); + + /* Copy element-type data */ + eah->element_type = oldeah->element_type; + eah->typlen = oldeah->typlen; + eah->typbyval = oldeah->typbyval; + eah->typalign = oldeah->typalign; + + /* Copy the deconstructed representation */ + eah->dvalues = (Datum *) MemoryContextAlloc(objcxt, + dvalueslen * sizeof(Datum)); + memcpy(eah->dvalues, oldeah->dvalues, dvalueslen * sizeof(Datum)); + if (oldeah->dnulls) + { + eah->dnulls = (bool *) MemoryContextAlloc(objcxt, + dvalueslen * sizeof(bool)); + memcpy(eah->dnulls, oldeah->dnulls, dvalueslen * sizeof(bool)); + } + else + eah->dnulls = NULL; + eah->dvalueslen = dvalueslen; + eah->nelems = oldeah->nelems; + eah->flat_size = oldeah->flat_size; + + /* we don't make a flat representation */ + eah->fvalue = NULL; + eah->fstartptr = NULL; + eah->fendptr = NULL; +} + +/* + * get_flat_size method for expanded arrays + */ +static Size +EA_get_flat_size(ExpandedObjectHeader *eohptr) +{ + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) eohptr; + int nelems; + int ndims; + Datum *dvalues; + bool *dnulls; + Size nbytes; + int i; + + Assert(eah->ea_magic == EA_MAGIC); + + /* Easy if we have a valid flattened value */ + if (eah->fvalue) + return ARR_SIZE(eah->fvalue); + + /* If we have a cached size value, believe that */ + if (eah->flat_size) + return eah->flat_size; + + /* + * Compute space needed by examining dvalues/dnulls. Note that the result + * array will have a nulls bitmap if dnulls isn't NULL, even if the array + * doesn't actually contain any nulls now. + */ + nelems = eah->nelems; + ndims = eah->ndims; + Assert(nelems == ArrayGetNItems(ndims, eah->dims)); + dvalues = eah->dvalues; + dnulls = eah->dnulls; + nbytes = 0; + for (i = 0; i < nelems; i++) + { + if (dnulls && dnulls[i]) + continue; + nbytes = att_addlength_datum(nbytes, eah->typlen, dvalues[i]); + nbytes = att_align_nominal(nbytes, eah->typalign); + /* check for overflow of total request */ + if (!AllocSizeIsValid(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + } + + if (dnulls) + nbytes += ARR_OVERHEAD_WITHNULLS(ndims, nelems); + else + nbytes += ARR_OVERHEAD_NONULLS(ndims); + + /* cache for next time */ + eah->flat_size = nbytes; + + return nbytes; +} + +/* + * flatten_into method for expanded arrays + */ +static void +EA_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size) +{ + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) eohptr; + ArrayType *aresult = (ArrayType *) result; + int nelems; + int ndims; + int32 dataoffset; + + Assert(eah->ea_magic == EA_MAGIC); + + /* Easy if we have a valid flattened value */ + if (eah->fvalue) + { + Assert(allocated_size == ARR_SIZE(eah->fvalue)); + memcpy(result, eah->fvalue, allocated_size); + return; + } + + /* Else allocation should match previous get_flat_size result */ + Assert(allocated_size == eah->flat_size); + + /* Fill result array from dvalues/dnulls */ + nelems = eah->nelems; + ndims = eah->ndims; + + if (eah->dnulls) + dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nelems); + else + dataoffset = 0; /* marker for no null bitmap */ + + /* We must ensure that any pad space is zero-filled */ + memset(aresult, 0, allocated_size); + + SET_VARSIZE(aresult, allocated_size); + aresult->ndim = ndims; + aresult->dataoffset = dataoffset; + aresult->elemtype = eah->element_type; + memcpy(ARR_DIMS(aresult), eah->dims, ndims * sizeof(int)); + memcpy(ARR_LBOUND(aresult), eah->lbound, ndims * sizeof(int)); + + CopyArrayEls(aresult, + eah->dvalues, eah->dnulls, nelems, + eah->typlen, eah->typbyval, eah->typalign, + false); +} + +/* + * Argument fetching support code + */ + +/* + * DatumGetExpandedArray: get a writable expanded array from an input argument + * + * Caution: if the input is a read/write pointer, this returns the input + * argument; so callers must be sure that their changes are "safe", that is + * they cannot leave the array in a corrupt state. + */ +ExpandedArrayHeader * +DatumGetExpandedArray(Datum d) +{ + /* If it's a writable expanded array already, just return it */ + if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + { + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) DatumGetEOHP(d); + + Assert(eah->ea_magic == EA_MAGIC); + return eah; + } + + /* Else expand the hard way */ + d = expand_array(d, CurrentMemoryContext, NULL); + return (ExpandedArrayHeader *) DatumGetEOHP(d); +} + +/* + * As above, when caller has the ability to cache element type info + */ +ExpandedArrayHeader * +DatumGetExpandedArrayX(Datum d, ArrayMetaState *metacache) +{ + /* If it's a writable expanded array already, just return it */ + if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + { + ExpandedArrayHeader *eah = (ExpandedArrayHeader *) DatumGetEOHP(d); + + Assert(eah->ea_magic == EA_MAGIC); + /* Update cache if provided */ + if (metacache) + { + metacache->element_type = eah->element_type; + metacache->typlen = eah->typlen; + metacache->typbyval = eah->typbyval; + metacache->typalign = eah->typalign; + } + return eah; + } + + /* Else expand using caller's cache if any */ + d = expand_array(d, CurrentMemoryContext, metacache); + return (ExpandedArrayHeader *) DatumGetEOHP(d); +} + +/* + * DatumGetAnyArrayP: return either an expanded array or a detoasted varlena + * array. The result must not be modified in-place. + */ +AnyArrayType * +DatumGetAnyArrayP(Datum d) +{ + ExpandedArrayHeader *eah; + + /* + * If it's an expanded array (RW or RO), return the header pointer. + */ + if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(d))) + { + eah = (ExpandedArrayHeader *) DatumGetEOHP(d); + Assert(eah->ea_magic == EA_MAGIC); + return (AnyArrayType *) eah; + } + + /* Else do regular detoasting as needed */ + return (AnyArrayType *) PG_DETOAST_DATUM(d); +} + +/* + * Create the Datum/isnull representation of an expanded array object + * if we didn't do so previously + */ +void +deconstruct_expanded_array(ExpandedArrayHeader *eah) +{ + if (eah->dvalues == NULL) + { + MemoryContext oldcxt = MemoryContextSwitchTo(eah->hdr.eoh_context); + Datum *dvalues; + bool *dnulls; + int nelems; + + dnulls = NULL; + deconstruct_array(eah->fvalue, + eah->element_type, + eah->typlen, eah->typbyval, eah->typalign, + &dvalues, + ARR_HASNULL(eah->fvalue) ? &dnulls : NULL, + &nelems); + + /* + * Update header only after successful completion of this step. If + * deconstruct_array fails partway through, worst consequence is some + * leaked memory in the object's context. If the caller fails at a + * later point, that's fine, since the deconstructed representation is + * valid anyhow. + */ + eah->dvalues = dvalues; + eah->dnulls = dnulls; + eah->dvalueslen = eah->nelems = nelems; + MemoryContextSwitchTo(oldcxt); + } +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_selfuncs.c new file mode 100644 index 00000000000..9207a5ed193 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_selfuncs.c @@ -0,0 +1,1193 @@ +/*------------------------------------------------------------------------- + * + * array_selfuncs.c + * Functions for selectivity estimation of array operators + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_selfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" +#include "utils/typcache.h" + + +/* Default selectivity constant for "@>" and "<@" operators */ +#define DEFAULT_CONTAIN_SEL 0.005 + +/* Default selectivity constant for "&&" operator */ +#define DEFAULT_OVERLAP_SEL 0.01 + +/* Default selectivity for given operator */ +#define DEFAULT_SEL(operator) \ + ((operator) == OID_ARRAY_OVERLAP_OP ? \ + DEFAULT_OVERLAP_SEL : DEFAULT_CONTAIN_SEL) + +static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval, + Oid elemtype, Oid operator); +static Selectivity mcelem_array_selec(ArrayType *array, + TypeCacheEntry *typentry, + Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + float4 *hist, int nhist, + Oid operator); +static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + Oid operator, TypeCacheEntry *typentry); +static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + float4 *hist, int nhist, + Oid operator, TypeCacheEntry *typentry); +static float *calc_hist(const float4 *hist, int nhist, int n); +static float *calc_distr(const float *p, int n, int m, float rest); +static int floor_log2(uint32 n); +static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, + int *index, TypeCacheEntry *typentry); +static int element_compare(const void *key1, const void *key2, void *arg); +static int float_compare_desc(const void *key1, const void *key2); + + +/* + * scalararraysel_containment + * Estimate selectivity of ScalarArrayOpExpr via array containment. + * + * If we have const =/<> ANY/ALL (array_var) then we can estimate the + * selectivity as though this were an array containment operator, + * array_var op ARRAY[const]. + * + * scalararraysel() has already verified that the ScalarArrayOpExpr's operator + * is the array element type's default equality or inequality operator, and + * has aggressively simplified both inputs to constants. + * + * Returns selectivity (0..1), or -1 if we fail to estimate selectivity. + */ +Selectivity +scalararraysel_containment(PlannerInfo *root, + Node *leftop, Node *rightop, + Oid elemtype, bool isEquality, bool useOr, + int varRelid) +{ + Selectivity selec; + VariableStatData vardata; + Datum constval; + TypeCacheEntry *typentry; + FmgrInfo *cmpfunc; + + /* + * rightop must be a variable, else punt. + */ + examine_variable(root, rightop, varRelid, &vardata); + if (!vardata.rel) + { + ReleaseVariableStats(vardata); + return -1.0; + } + + /* + * leftop must be a constant, else punt. + */ + if (!IsA(leftop, Const)) + { + ReleaseVariableStats(vardata); + return -1.0; + } + if (((Const *) leftop)->constisnull) + { + /* qual can't succeed if null on left */ + ReleaseVariableStats(vardata); + return (Selectivity) 0.0; + } + constval = ((Const *) leftop)->constvalue; + + /* Get element type's default comparison function */ + typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + { + ReleaseVariableStats(vardata); + return -1.0; + } + cmpfunc = &typentry->cmp_proc_finfo; + + /* + * If the operator is <>, swap ANY/ALL, then invert the result later. + */ + if (!isEquality) + useOr = !useOr; + + /* Get array element stats for var, if available */ + if (HeapTupleIsValid(vardata.statsTuple) && + statistic_proc_security_check(&vardata, cmpfunc->fn_oid)) + { + Form_pg_statistic stats; + AttStatsSlot sslot; + AttStatsSlot hslot; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + + /* MCELEM will be an array of same type as element */ + if (get_attstatsslot(&sslot, vardata.statsTuple, + STATISTIC_KIND_MCELEM, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) + { + /* For ALL case, also get histogram of distinct-element counts */ + if (useOr || + !get_attstatsslot(&hslot, vardata.statsTuple, + STATISTIC_KIND_DECHIST, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + memset(&hslot, 0, sizeof(hslot)); + + /* + * For = ANY, estimate as var @> ARRAY[const]. + * + * For = ALL, estimate as var <@ ARRAY[const]. + */ + if (useOr) + selec = mcelem_array_contain_overlap_selec(sslot.values, + sslot.nvalues, + sslot.numbers, + sslot.nnumbers, + &constval, 1, + OID_ARRAY_CONTAINS_OP, + typentry); + else + selec = mcelem_array_contained_selec(sslot.values, + sslot.nvalues, + sslot.numbers, + sslot.nnumbers, + &constval, 1, + hslot.numbers, + hslot.nnumbers, + OID_ARRAY_CONTAINED_OP, + typentry); + + free_attstatsslot(&hslot); + free_attstatsslot(&sslot); + } + else + { + /* No most-common-elements info, so do without */ + if (useOr) + selec = mcelem_array_contain_overlap_selec(NULL, 0, + NULL, 0, + &constval, 1, + OID_ARRAY_CONTAINS_OP, + typentry); + else + selec = mcelem_array_contained_selec(NULL, 0, + NULL, 0, + &constval, 1, + NULL, 0, + OID_ARRAY_CONTAINED_OP, + typentry); + } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + selec *= (1.0 - stats->stanullfrac); + } + else + { + /* No stats at all, so do without */ + if (useOr) + selec = mcelem_array_contain_overlap_selec(NULL, 0, + NULL, 0, + &constval, 1, + OID_ARRAY_CONTAINS_OP, + typentry); + else + selec = mcelem_array_contained_selec(NULL, 0, + NULL, 0, + &constval, 1, + NULL, 0, + OID_ARRAY_CONTAINED_OP, + typentry); + /* we assume no nulls here, so no stanullfrac correction */ + } + + ReleaseVariableStats(vardata); + + /* + * If the operator is <>, invert the results. + */ + if (!isEquality) + selec = 1.0 - selec; + + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * arraycontsel -- restriction selectivity for array @>, &&, <@ operators + */ +Datum +arraycontsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Selectivity selec; + Oid element_typeid; + + /* + * If expression is not (variable op something) or (something op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + } + + /* + * The "&&", "@>" and "<@" operators are strict, so we can cope with a + * NULL constant right away. + */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + /* + * If var is on the right, commute the operator, so that we can assume the + * var is on the left in what follows. + */ + if (!varonleft) + { + if (operator == OID_ARRAY_CONTAINS_OP) + operator = OID_ARRAY_CONTAINED_OP; + else if (operator == OID_ARRAY_CONTAINED_OP) + operator = OID_ARRAY_CONTAINS_OP; + } + + /* + * OK, there's a Var and a Const we're dealing with here. We need the + * Const to be an array with same element type as column, else we can't do + * anything useful. (Such cases will likely fail at runtime, but here + * we'd rather just return a default estimate.) + */ + element_typeid = get_base_element_type(((Const *) other)->consttype); + if (element_typeid != InvalidOid && + element_typeid == get_base_element_type(vardata.vartype)) + { + selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue, + element_typeid, operator); + } + else + { + selec = DEFAULT_SEL(operator); + } + + ReleaseVariableStats(vardata); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators + */ +Datum +arraycontjoinsel(PG_FUNCTION_ARGS) +{ + /* For the moment this is just a stub */ + Oid operator = PG_GETARG_OID(1); + + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); +} + +/* + * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const" + * or "arraycolumn <@ const" based on the statistics + * + * This function is mainly responsible for extracting the pg_statistic data + * to be used; we then pass the problem on to mcelem_array_selec(). + */ +static Selectivity +calc_arraycontsel(VariableStatData *vardata, Datum constval, + Oid elemtype, Oid operator) +{ + Selectivity selec; + TypeCacheEntry *typentry; + FmgrInfo *cmpfunc; + ArrayType *array; + + /* Get element type's default comparison function */ + typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + return DEFAULT_SEL(operator); + cmpfunc = &typentry->cmp_proc_finfo; + + /* + * The caller made sure the const is an array with same element type, so + * get it now + */ + array = DatumGetArrayTypeP(constval); + + if (HeapTupleIsValid(vardata->statsTuple) && + statistic_proc_security_check(vardata, cmpfunc->fn_oid)) + { + Form_pg_statistic stats; + AttStatsSlot sslot; + AttStatsSlot hslot; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + + /* MCELEM will be an array of same type as column */ + if (get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_MCELEM, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) + { + /* + * For "array <@ const" case we also need histogram of distinct + * element counts. + */ + if (operator != OID_ARRAY_CONTAINED_OP || + !get_attstatsslot(&hslot, vardata->statsTuple, + STATISTIC_KIND_DECHIST, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + memset(&hslot, 0, sizeof(hslot)); + + /* Use the most-common-elements slot for the array Var. */ + selec = mcelem_array_selec(array, typentry, + sslot.values, sslot.nvalues, + sslot.numbers, sslot.nnumbers, + hslot.numbers, hslot.nnumbers, + operator); + + free_attstatsslot(&hslot); + free_attstatsslot(&sslot); + } + else + { + /* No most-common-elements info, so do without */ + selec = mcelem_array_selec(array, typentry, + NULL, 0, NULL, 0, NULL, 0, + operator); + } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + selec *= (1.0 - stats->stanullfrac); + } + else + { + /* No stats at all, so do without */ + selec = mcelem_array_selec(array, typentry, + NULL, 0, NULL, 0, NULL, 0, + operator); + /* we assume no nulls here, so no stanullfrac correction */ + } + + /* If constant was toasted, release the copy we made */ + if (PointerGetDatum(array) != constval) + pfree(array); + + return selec; +} + +/* + * Array selectivity estimation based on most common elements statistics + * + * This function just deconstructs and sorts the array constant's contents, + * and then passes the problem on to mcelem_array_contain_overlap_selec or + * mcelem_array_contained_selec depending on the operator. + */ +static Selectivity +mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry, + Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + float4 *hist, int nhist, + Oid operator) +{ + Selectivity selec; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + bool null_present; + int nonnull_nitems; + int i; + + /* + * Prepare constant array data for sorting. Sorting lets us find unique + * elements and efficiently merge with the MCELEM array. + */ + deconstruct_array(array, + typentry->type_id, + typentry->typlen, + typentry->typbyval, + typentry->typalign, + &elem_values, &elem_nulls, &num_elems); + + /* Collapse out any null elements */ + nonnull_nitems = 0; + null_present = false; + for (i = 0; i < num_elems; i++) + { + if (elem_nulls[i]) + null_present = true; + else + elem_values[nonnull_nitems++] = elem_values[i]; + } + + /* + * Query "column @> '{anything, null}'" matches nothing. For the other + * two operators, presence of a null in the constant can be ignored. + */ + if (null_present && operator == OID_ARRAY_CONTAINS_OP) + { + pfree(elem_values); + pfree(elem_nulls); + return (Selectivity) 0.0; + } + + /* Sort extracted elements using their default comparison function. */ + qsort_arg(elem_values, nonnull_nitems, sizeof(Datum), + element_compare, typentry); + + /* Separate cases according to operator */ + if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP) + selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem, + numbers, nnumbers, + elem_values, nonnull_nitems, + operator, typentry); + else if (operator == OID_ARRAY_CONTAINED_OP) + selec = mcelem_array_contained_selec(mcelem, nmcelem, + numbers, nnumbers, + elem_values, nonnull_nitems, + hist, nhist, + operator, typentry); + else + { + elog(ERROR, "arraycontsel called for unrecognized operator %u", + operator); + selec = 0.0; /* keep compiler quiet */ + } + + pfree(elem_values); + pfree(elem_nulls); + return selec; +} + +/* + * Estimate selectivity of "column @> const" and "column && const" based on + * most common element statistics. This estimation assumes element + * occurrences are independent. + * + * mcelem (of length nmcelem) and numbers (of length nnumbers) are from + * the array column's MCELEM statistics slot, or are NULL/0 if stats are + * not available. array_data (of length nitems) is the constant's elements. + * + * Both the mcelem and array_data arrays are assumed presorted according + * to the element type's cmpfunc. Null elements are not present. + * + * TODO: this estimate probably could be improved by using the distinct + * elements count histogram. For example, excepting the special case of + * "column @> '{}'", we can multiply the calculated selectivity by the + * fraction of nonempty arrays in the column. + */ +static Selectivity +mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + Oid operator, TypeCacheEntry *typentry) +{ + Selectivity selec, + elem_selec; + int mcelem_index, + i; + bool use_bsearch; + float4 minfreq; + + /* + * There should be three more Numbers than Values, because the last three + * cells should hold minimal and maximal frequency among the non-null + * elements, and then the frequency of null elements. Ignore the Numbers + * if not right. + */ + if (nnumbers != nmcelem + 3) + { + numbers = NULL; + nnumbers = 0; + } + + if (numbers) + { + /* Grab the lowest observed frequency */ + minfreq = numbers[nmcelem]; + } + else + { + /* Without statistics make some default assumptions */ + minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL; + } + + /* Decide whether it is faster to use binary search or not. */ + if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems) + use_bsearch = true; + else + use_bsearch = false; + + if (operator == OID_ARRAY_CONTAINS_OP) + { + /* + * Initial selectivity for "column @> const" query is 1.0, and it will + * be decreased with each element of constant array. + */ + selec = 1.0; + } + else + { + /* + * Initial selectivity for "column && const" query is 0.0, and it will + * be increased with each element of constant array. + */ + selec = 0.0; + } + + /* Scan mcelem and array in parallel. */ + mcelem_index = 0; + for (i = 0; i < nitems; i++) + { + bool match = false; + + /* Ignore any duplicates in the array data. */ + if (i > 0 && + element_compare(&array_data[i - 1], &array_data[i], typentry) == 0) + continue; + + /* Find the smallest MCELEM >= this array item. */ + if (use_bsearch) + { + match = find_next_mcelem(mcelem, nmcelem, array_data[i], + &mcelem_index, typentry); + } + else + { + while (mcelem_index < nmcelem) + { + int cmp = element_compare(&mcelem[mcelem_index], + &array_data[i], + typentry); + + if (cmp < 0) + mcelem_index++; + else + { + if (cmp == 0) + match = true; /* mcelem is found */ + break; + } + } + } + + if (match && numbers) + { + /* MCELEM matches the array item; use its frequency. */ + elem_selec = numbers[mcelem_index]; + mcelem_index++; + } + else + { + /* + * The element is not in MCELEM. Punt, but assume that the + * selectivity cannot be more than minfreq / 2. + */ + elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2); + } + + /* + * Update overall selectivity using the current element's selectivity + * and an assumption of element occurrence independence. + */ + if (operator == OID_ARRAY_CONTAINS_OP) + selec *= elem_selec; + else + selec = selec + elem_selec - selec * elem_selec; + + /* Clamp intermediate results to stay sane despite roundoff error */ + CLAMP_PROBABILITY(selec); + } + + return selec; +} + +/* + * Estimate selectivity of "column <@ const" based on most common element + * statistics. + * + * mcelem (of length nmcelem) and numbers (of length nnumbers) are from + * the array column's MCELEM statistics slot, or are NULL/0 if stats are + * not available. array_data (of length nitems) is the constant's elements. + * hist (of length nhist) is from the array column's DECHIST statistics slot, + * or is NULL/0 if those stats are not available. + * + * Both the mcelem and array_data arrays are assumed presorted according + * to the element type's cmpfunc. Null elements are not present. + * + * Independent element occurrence would imply a particular distribution of + * distinct element counts among matching rows. Real data usually falsifies + * that assumption. For example, in a set of 11-element integer arrays having + * elements in the range [0..10], element occurrences are typically not + * independent. If they were, a sufficiently-large set would include all + * distinct element counts 0 through 11. We correct for this using the + * histogram of distinct element counts. + * + * In the "column @> const" and "column && const" cases, we usually have a + * "const" with low number of elements (otherwise we have selectivity close + * to 0 or 1 respectively). That's why the effect of dependence related + * to distinct element count distribution is negligible there. In the + * "column <@ const" case, number of elements is usually high (otherwise we + * have selectivity close to 0). That's why we should do a correction with + * the array distinct element count distribution here. + * + * Using the histogram of distinct element counts produces a different + * distribution law than independent occurrences of elements. This + * distribution law can be described as follows: + * + * P(o1, o2, ..., on) = f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * + * (1 - f2)^(1 - o2) * ... * fn^on * (1 - fn)^(1 - on) * hist[m] / ind[m] + * + * where: + * o1, o2, ..., on - occurrences of elements 1, 2, ..., n + * (1 - occurrence, 0 - no occurrence) in row + * f1, f2, ..., fn - frequencies of elements 1, 2, ..., n + * (scalar values in [0..1]) according to collected statistics + * m = o1 + o2 + ... + on = total number of distinct elements in row + * hist[m] - histogram data for occurrence of m elements. + * ind[m] - probability of m occurrences from n events assuming their + * probabilities to be equal to frequencies of array elements. + * + * ind[m] = sum(f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * (1 - f2)^(1 - o2) * + * ... * fn^on * (1 - fn)^(1 - on), o1, o2, ..., on) | o1 + o2 + .. on = m + */ +static Selectivity +mcelem_array_contained_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + float4 *hist, int nhist, + Oid operator, TypeCacheEntry *typentry) +{ + int mcelem_index, + i, + unique_nitems = 0; + float selec, + minfreq, + nullelem_freq; + float *dist, + *mcelem_dist, + *hist_part; + float avg_count, + mult, + rest; + float *elem_selec; + + /* + * There should be three more Numbers than Values in the MCELEM slot, + * because the last three cells should hold minimal and maximal frequency + * among the non-null elements, and then the frequency of null elements. + * Punt if not right, because we can't do much without the element freqs. + */ + if (numbers == NULL || nnumbers != nmcelem + 3) + return DEFAULT_CONTAIN_SEL; + + /* Can't do much without a count histogram, either */ + if (hist == NULL || nhist < 3) + return DEFAULT_CONTAIN_SEL; + + /* + * Grab some of the summary statistics that compute_array_stats() stores: + * lowest frequency, frequency of null elements, and average distinct + * element count. + */ + minfreq = numbers[nmcelem]; + nullelem_freq = numbers[nmcelem + 2]; + avg_count = hist[nhist - 1]; + + /* + * "rest" will be the sum of the frequencies of all elements not + * represented in MCELEM. The average distinct element count is the sum + * of the frequencies of *all* elements. Begin with that; we will proceed + * to subtract the MCELEM frequencies. + */ + rest = avg_count; + + /* + * mult is a multiplier representing estimate of probability that each + * mcelem that is not present in constant doesn't occur. + */ + mult = 1.0f; + + /* + * elem_selec is array of estimated frequencies for elements in the + * constant. + */ + elem_selec = (float *) palloc(sizeof(float) * nitems); + + /* Scan mcelem and array in parallel. */ + mcelem_index = 0; + for (i = 0; i < nitems; i++) + { + bool match = false; + + /* Ignore any duplicates in the array data. */ + if (i > 0 && + element_compare(&array_data[i - 1], &array_data[i], typentry) == 0) + continue; + + /* + * Iterate over MCELEM until we find an entry greater than or equal to + * this element of the constant. Update "rest" and "mult" for mcelem + * entries skipped over. + */ + while (mcelem_index < nmcelem) + { + int cmp = element_compare(&mcelem[mcelem_index], + &array_data[i], + typentry); + + if (cmp < 0) + { + mult *= (1.0f - numbers[mcelem_index]); + rest -= numbers[mcelem_index]; + mcelem_index++; + } + else + { + if (cmp == 0) + match = true; /* mcelem is found */ + break; + } + } + + if (match) + { + /* MCELEM matches the array item. */ + elem_selec[unique_nitems] = numbers[mcelem_index]; + /* "rest" is decremented for all mcelems, matched or not */ + rest -= numbers[mcelem_index]; + mcelem_index++; + } + else + { + /* + * The element is not in MCELEM. Punt, but assume that the + * selectivity cannot be more than minfreq / 2. + */ + elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL, + minfreq / 2); + } + + unique_nitems++; + } + + /* + * If we handled all constant elements without exhausting the MCELEM + * array, finish walking it to complete calculation of "rest" and "mult". + */ + while (mcelem_index < nmcelem) + { + mult *= (1.0f - numbers[mcelem_index]); + rest -= numbers[mcelem_index]; + mcelem_index++; + } + + /* + * The presence of many distinct rare elements materially decreases + * selectivity. Use the Poisson distribution to estimate the probability + * of a column value having zero occurrences of such elements. See above + * for the definition of "rest". + */ + mult *= exp(-rest); + + /*---------- + * Using the distinct element count histogram requires + * O(unique_nitems * (nmcelem + unique_nitems)) + * operations. Beyond a certain computational cost threshold, it's + * reasonable to sacrifice accuracy for decreased planning time. We limit + * the number of operations to EFFORT * nmcelem; since nmcelem is limited + * by the column's statistics target, the work done is user-controllable. + * + * If the number of operations would be too large, we can reduce it + * without losing all accuracy by reducing unique_nitems and considering + * only the most-common elements of the constant array. To make the + * results exactly match what we would have gotten with only those + * elements to start with, we'd have to remove any discarded elements' + * frequencies from "mult", but since this is only an approximation + * anyway, we don't bother with that. Therefore it's sufficient to qsort + * elem_selec[] and take the largest elements. (They will no longer match + * up with the elements of array_data[], but we don't care.) + *---------- + */ +#define EFFORT 100 + + if ((nmcelem + unique_nitems) > 0 && + unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems)) + { + /* + * Use the quadratic formula to solve for largest allowable N. We + * have A = 1, B = nmcelem, C = - EFFORT * nmcelem. + */ + double b = (double) nmcelem; + int n; + + n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2); + + /* Sort, then take just the first n elements */ + qsort(elem_selec, unique_nitems, sizeof(float), + float_compare_desc); + unique_nitems = n; + } + + /* + * Calculate probabilities of each distinct element count for both mcelems + * and constant elements. At this point, assume independent element + * occurrence. + */ + dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f); + mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest); + + /* ignore hist[nhist-1], which is the average not a histogram member */ + hist_part = calc_hist(hist, nhist - 1, unique_nitems); + + selec = 0.0f; + for (i = 0; i <= unique_nitems; i++) + { + /* + * mult * dist[i] / mcelem_dist[i] gives us probability of qual + * matching from assumption of independent element occurrence with the + * condition that distinct element count = i. + */ + if (mcelem_dist[i] > 0) + selec += hist_part[i] * mult * dist[i] / mcelem_dist[i]; + } + + pfree(dist); + pfree(mcelem_dist); + pfree(hist_part); + pfree(elem_selec); + + /* Take into account occurrence of NULL element. */ + selec *= (1.0f - nullelem_freq); + + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * Calculate the first n distinct element count probabilities from a + * histogram of distinct element counts. + * + * Returns a palloc'd array of n+1 entries, with array[k] being the + * probability of element count k, k in [0..n]. + * + * We assume that a histogram box with bounds a and b gives 1 / ((b - a + 1) * + * (nhist - 1)) probability to each value in (a,b) and an additional half of + * that to a and b themselves. + */ +static float * +calc_hist(const float4 *hist, int nhist, int n) +{ + float *hist_part; + int k, + i = 0; + float prev_interval = 0, + next_interval; + float frac; + + hist_part = (float *) palloc((n + 1) * sizeof(float)); + + /* + * frac is a probability contribution for each interval between histogram + * values. We have nhist - 1 intervals, so contribution of each one will + * be 1 / (nhist - 1). + */ + frac = 1.0f / ((float) (nhist - 1)); + + for (k = 0; k <= n; k++) + { + int count = 0; + + /* + * Count the histogram boundaries equal to k. (Although the histogram + * should theoretically contain only exact integers, entries are + * floats so there could be roundoff error in large values. Treat any + * fractional value as equal to the next larger k.) + */ + while (i < nhist && hist[i] <= k) + { + count++; + i++; + } + + if (count > 0) + { + /* k is an exact bound for at least one histogram box. */ + float val; + + /* Find length between current histogram value and the next one */ + if (i < nhist) + next_interval = hist[i] - hist[i - 1]; + else + next_interval = 0; + + /* + * count - 1 histogram boxes contain k exclusively. They + * contribute a total of (count - 1) * frac probability. Also + * factor in the partial histogram boxes on either side. + */ + val = (float) (count - 1); + if (next_interval > 0) + val += 0.5f / next_interval; + if (prev_interval > 0) + val += 0.5f / prev_interval; + hist_part[k] = frac * val; + + prev_interval = next_interval; + } + else + { + /* k does not appear as an exact histogram bound. */ + if (prev_interval > 0) + hist_part[k] = frac / prev_interval; + else + hist_part[k] = 0.0f; + } + } + + return hist_part; +} + +/* + * Consider n independent events with probabilities p[]. This function + * calculates probabilities of exact k of events occurrence for k in [0..m]. + * Returns a palloc'd array of size m+1. + * + * "rest" is the sum of the probabilities of all low-probability events not + * included in p. + * + * Imagine matrix M of size (n + 1) x (m + 1). Element M[i,j] denotes the + * probability that exactly j of first i events occur. Obviously M[0,0] = 1. + * For any constant j, each increment of i increases the probability iff the + * event occurs. So, by the law of total probability: + * M[i,j] = M[i - 1, j] * (1 - p[i]) + M[i - 1, j - 1] * p[i] + * for i > 0, j > 0. + * M[i,0] = M[i - 1, 0] * (1 - p[i]) for i > 0. + */ +static float * +calc_distr(const float *p, int n, int m, float rest) +{ + float *row, + *prev_row, + *tmp; + int i, + j; + + /* + * Since we return only the last row of the matrix and need only the + * current and previous row for calculations, allocate two rows. + */ + row = (float *) palloc((m + 1) * sizeof(float)); + prev_row = (float *) palloc((m + 1) * sizeof(float)); + + /* M[0,0] = 1 */ + row[0] = 1.0f; + for (i = 1; i <= n; i++) + { + float t = p[i - 1]; + + /* Swap rows */ + tmp = row; + row = prev_row; + prev_row = tmp; + + /* Calculate next row */ + for (j = 0; j <= i && j <= m; j++) + { + float val = 0.0f; + + if (j < i) + val += prev_row[j] * (1.0f - t); + if (j > 0) + val += prev_row[j - 1] * t; + row[j] = val; + } + } + + /* + * The presence of many distinct rare (not in "p") elements materially + * decreases selectivity. Model their collective occurrence with the + * Poisson distribution. + */ + if (rest > DEFAULT_CONTAIN_SEL) + { + float t; + + /* Swap rows */ + tmp = row; + row = prev_row; + prev_row = tmp; + + for (i = 0; i <= m; i++) + row[i] = 0.0f; + + /* Value of Poisson distribution for 0 occurrences */ + t = exp(-rest); + + /* + * Calculate convolution of previously computed distribution and the + * Poisson distribution. + */ + for (i = 0; i <= m; i++) + { + for (j = 0; j <= m - i; j++) + row[j + i] += prev_row[j] * t; + + /* Get Poisson distribution value for (i + 1) occurrences */ + t *= rest / (float) (i + 1); + } + } + + pfree(prev_row); + return row; +} + +/* Fast function for floor value of 2 based logarithm calculation. */ +static int +floor_log2(uint32 n) +{ + int logval = 0; + + if (n == 0) + return -1; + if (n >= (1 << 16)) + { + n >>= 16; + logval += 16; + } + if (n >= (1 << 8)) + { + n >>= 8; + logval += 8; + } + if (n >= (1 << 4)) + { + n >>= 4; + logval += 4; + } + if (n >= (1 << 2)) + { + n >>= 2; + logval += 2; + } + if (n >= (1 << 1)) + { + logval += 1; + } + return logval; +} + +/* + * find_next_mcelem binary-searches a most common elements array, starting + * from *index, for the first member >= value. It saves the position of the + * match into *index and returns true if it's an exact match. (Note: we + * assume the mcelem elements are distinct so there can't be more than one + * exact match.) + */ +static bool +find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index, + TypeCacheEntry *typentry) +{ + int l = *index, + r = nmcelem - 1, + i, + res; + + while (l <= r) + { + i = (l + r) / 2; + res = element_compare(&mcelem[i], &value, typentry); + if (res == 0) + { + *index = i; + return true; + } + else if (res < 0) + l = i + 1; + else + r = i - 1; + } + *index = l; + return false; +} + +/* + * Comparison function for elements. + * + * We use the element type's default btree opclass, and its default collation + * if the type is collation-sensitive. + * + * XXX consider using SortSupport infrastructure + */ +static int +element_compare(const void *key1, const void *key2, void *arg) +{ + Datum d1 = *((const Datum *) key1); + Datum d2 = *((const Datum *) key2); + TypeCacheEntry *typentry = (TypeCacheEntry *) arg; + FmgrInfo *cmpfunc = &typentry->cmp_proc_finfo; + Datum c; + + c = FunctionCall2Coll(cmpfunc, typentry->typcollation, d1, d2); + return DatumGetInt32(c); +} + +/* + * Comparison function for sorting floats into descending order. + */ +static int +float_compare_desc(const void *key1, const void *key2) +{ + float d1 = *((const float *) key1); + float d2 = *((const float *) key2); + + if (d1 > d2) + return -1; + else if (d1 < d2) + return 1; + else + return 0; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_typanalyze.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_typanalyze.c new file mode 100644 index 00000000000..ce6a8179f6f --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_typanalyze.c @@ -0,0 +1,791 @@ +/*------------------------------------------------------------------------- + * + * array_typanalyze.c + * Functions for gathering statistics from array columns + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_typanalyze.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/detoast.h" +#include "commands/vacuum.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/typcache.h" + + +/* + * To avoid consuming too much memory, IO and CPU load during analysis, and/or + * too much space in the resulting pg_statistic rows, we ignore arrays that + * are wider than ARRAY_WIDTH_THRESHOLD (after detoasting!). Note that this + * number is considerably more than the similar WIDTH_THRESHOLD limit used + * in analyze.c's standard typanalyze code. + */ +#define ARRAY_WIDTH_THRESHOLD 0x10000 + +/* Extra data for compute_array_stats function */ +typedef struct +{ + /* Information about array element type */ + Oid type_id; /* element type's OID */ + Oid eq_opr; /* default equality operator's OID */ + Oid coll_id; /* collation to use */ + bool typbyval; /* physical properties of element type */ + int16 typlen; + char typalign; + + /* + * Lookup data for element type's comparison and hash functions (these are + * in the type's typcache entry, which we expect to remain valid over the + * lifespan of the ANALYZE run) + */ + FmgrInfo *cmp; + FmgrInfo *hash; + + /* Saved state from std_typanalyze() */ + AnalyzeAttrComputeStatsFunc std_compute_stats; + void *std_extra_data; +} ArrayAnalyzeExtraData; + +/* + * While compute_array_stats is running, we keep a pointer to the extra data + * here for use by assorted subroutines. compute_array_stats doesn't + * currently need to be re-entrant, so avoiding this is not worth the extra + * notational cruft that would be needed. + */ +static __thread ArrayAnalyzeExtraData *array_extra_data; + +/* A hash table entry for the Lossy Counting algorithm */ +typedef struct +{ + Datum key; /* This is 'e' from the LC algorithm. */ + int frequency; /* This is 'f'. */ + int delta; /* And this is 'delta'. */ + int last_container; /* For de-duplication of array elements. */ +} TrackItem; + +/* A hash table entry for distinct-elements counts */ +typedef struct +{ + int count; /* Count of distinct elements in an array */ + int frequency; /* Number of arrays seen with this count */ +} DECountItem; + +static void compute_array_stats(VacAttrStats *stats, + AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows); +static void prune_element_hashtable(HTAB *elements_tab, int b_current); +static uint32 element_hash(const void *key, Size keysize); +static int element_match(const void *key1, const void *key2, Size keysize); +static int element_compare(const void *key1, const void *key2); +static int trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg); +static int trackitem_compare_element(const void *e1, const void *e2, void *arg); +static int countitem_compare_count(const void *e1, const void *e2, void *arg); + + +/* + * array_typanalyze -- typanalyze function for array columns + */ +Datum +array_typanalyze(PG_FUNCTION_ARGS) +{ + VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0); + Oid element_typeid; + TypeCacheEntry *typentry; + ArrayAnalyzeExtraData *extra_data; + + /* + * Call the standard typanalyze function. It may fail to find needed + * operators, in which case we also can't do anything, so just fail. + */ + if (!std_typanalyze(stats)) + PG_RETURN_BOOL(false); + + /* + * Check attribute data type is a varlena array (or a domain over one). + */ + element_typeid = get_base_element_type(stats->attrtypid); + if (!OidIsValid(element_typeid)) + elog(ERROR, "array_typanalyze was invoked for non-array type %u", + stats->attrtypid); + + /* + * Gather information about the element type. If we fail to find + * something, return leaving the state from std_typanalyze() in place. + */ + typentry = lookup_type_cache(element_typeid, + TYPECACHE_EQ_OPR | + TYPECACHE_CMP_PROC_FINFO | + TYPECACHE_HASH_PROC_FINFO); + + if (!OidIsValid(typentry->eq_opr) || + !OidIsValid(typentry->cmp_proc_finfo.fn_oid) || + !OidIsValid(typentry->hash_proc_finfo.fn_oid)) + PG_RETURN_BOOL(true); + + /* Store our findings for use by compute_array_stats() */ + extra_data = (ArrayAnalyzeExtraData *) palloc(sizeof(ArrayAnalyzeExtraData)); + extra_data->type_id = typentry->type_id; + extra_data->eq_opr = typentry->eq_opr; + extra_data->coll_id = stats->attrcollid; /* collation we should use */ + extra_data->typbyval = typentry->typbyval; + extra_data->typlen = typentry->typlen; + extra_data->typalign = typentry->typalign; + extra_data->cmp = &typentry->cmp_proc_finfo; + extra_data->hash = &typentry->hash_proc_finfo; + + /* Save old compute_stats and extra_data for scalar statistics ... */ + extra_data->std_compute_stats = stats->compute_stats; + extra_data->std_extra_data = stats->extra_data; + + /* ... and replace with our info */ + stats->compute_stats = compute_array_stats; + stats->extra_data = extra_data; + + /* + * Note we leave stats->minrows set as std_typanalyze set it. Should it + * be increased for array analysis purposes? + */ + + PG_RETURN_BOOL(true); +} + +/* + * compute_array_stats() -- compute statistics for an array column + * + * This function computes statistics useful for determining selectivity of + * the array operators <@, &&, and @>. It is invoked by ANALYZE via the + * compute_stats hook after sample rows have been collected. + * + * We also invoke the standard compute_stats function, which will compute + * "scalar" statistics relevant to the btree-style array comparison operators. + * However, exact duplicates of an entire array may be rare despite many + * arrays sharing individual elements. This especially afflicts long arrays, + * which are also liable to lack all scalar statistics due to the low + * WIDTH_THRESHOLD used in analyze.c. So, in addition to the standard stats, + * we find the most common array elements and compute a histogram of distinct + * element counts. + * + * The algorithm used is Lossy Counting, as proposed in the paper "Approximate + * frequency counts over data streams" by G. S. Manku and R. Motwani, in + * Proceedings of the 28th International Conference on Very Large Data Bases, + * Hong Kong, China, August 2002, section 4.2. The paper is available at + * http://www.vldb.org/conf/2002/S10P03.pdf + * + * The Lossy Counting (aka LC) algorithm goes like this: + * Let s be the threshold frequency for an item (the minimum frequency we + * are interested in) and epsilon the error margin for the frequency. Let D + * be a set of triples (e, f, delta), where e is an element value, f is that + * element's frequency (actually, its current occurrence count) and delta is + * the maximum error in f. We start with D empty and process the elements in + * batches of size w. (The batch size is also known as "bucket size" and is + * equal to 1/epsilon.) Let the current batch number be b_current, starting + * with 1. For each element e we either increment its f count, if it's + * already in D, or insert a new triple into D with values (e, 1, b_current + * - 1). After processing each batch we prune D, by removing from it all + * elements with f + delta <= b_current. After the algorithm finishes we + * suppress all elements from D that do not satisfy f >= (s - epsilon) * N, + * where N is the total number of elements in the input. We emit the + * remaining elements with estimated frequency f/N. The LC paper proves + * that this algorithm finds all elements with true frequency at least s, + * and that no frequency is overestimated or is underestimated by more than + * epsilon. Furthermore, given reasonable assumptions about the input + * distribution, the required table size is no more than about 7 times w. + * + * In the absence of a principled basis for other particular values, we + * follow ts_typanalyze() and use parameters s = 0.07/K, epsilon = s/10. + * But we leave out the correction for stopwords, which do not apply to + * arrays. These parameters give bucket width w = K/0.007 and maximum + * expected hashtable size of about 1000 * K. + * + * Elements may repeat within an array. Since duplicates do not change the + * behavior of <@, && or @>, we want to count each element only once per + * array. Therefore, we store in the finished pg_statistic entry each + * element's frequency as the fraction of all non-null rows that contain it. + * We divide the raw counts by nonnull_cnt to get those figures. + */ +static void +compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, + int samplerows, double totalrows) +{ + ArrayAnalyzeExtraData *extra_data; + int num_mcelem; + int null_elem_cnt = 0; + int analyzed_rows = 0; + + /* This is D from the LC algorithm. */ + HTAB *elements_tab; + HASHCTL elem_hash_ctl; + HASH_SEQ_STATUS scan_status; + + /* This is the current bucket number from the LC algorithm */ + int b_current; + + /* This is 'w' from the LC algorithm */ + int bucket_width; + int array_no; + int64 element_no; + TrackItem *item; + int slot_idx; + HTAB *count_tab; + HASHCTL count_hash_ctl; + DECountItem *count_item; + + extra_data = (ArrayAnalyzeExtraData *) stats->extra_data; + + /* + * Invoke analyze.c's standard analysis function to create scalar-style + * stats for the column. It will expect its own extra_data pointer, so + * temporarily install that. + */ + stats->extra_data = extra_data->std_extra_data; + extra_data->std_compute_stats(stats, fetchfunc, samplerows, totalrows); + stats->extra_data = extra_data; + + /* + * Set up static pointer for use by subroutines. We wait till here in + * case std_compute_stats somehow recursively invokes us (probably not + * possible, but ...) + */ + array_extra_data = extra_data; + + /* + * We want statistics_target * 10 elements in the MCELEM array. This + * multiplier is pretty arbitrary, but is meant to reflect the fact that + * the number of individual elements tracked in pg_statistic ought to be + * more than the number of values for a simple scalar column. + */ + num_mcelem = stats->attr->attstattarget * 10; + + /* + * We set bucket width equal to num_mcelem / 0.007 as per the comment + * above. + */ + bucket_width = num_mcelem * 1000 / 7; + + /* + * Create the hashtable. It will be in local memory, so we don't need to + * worry about overflowing the initial size. Also we don't need to pay any + * attention to locking and memory management. + */ + elem_hash_ctl.keysize = sizeof(Datum); + elem_hash_ctl.entrysize = sizeof(TrackItem); + elem_hash_ctl.hash = element_hash; + elem_hash_ctl.match = element_match; + elem_hash_ctl.hcxt = CurrentMemoryContext; + elements_tab = hash_create("Analyzed elements table", + num_mcelem, + &elem_hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + + /* hashtable for array distinct elements counts */ + count_hash_ctl.keysize = sizeof(int); + count_hash_ctl.entrysize = sizeof(DECountItem); + count_hash_ctl.hcxt = CurrentMemoryContext; + count_tab = hash_create("Array distinct element count table", + 64, + &count_hash_ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* Initialize counters. */ + b_current = 1; + element_no = 0; + + /* Loop over the arrays. */ + for (array_no = 0; array_no < samplerows; array_no++) + { + Datum value; + bool isnull; + ArrayType *array; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + bool null_present; + int j; + int64 prev_element_no = element_no; + int distinct_count; + bool count_item_found; + + vacuum_delay_point(); + + value = fetchfunc(stats, array_no, &isnull); + if (isnull) + { + /* ignore arrays that are null overall */ + continue; + } + + /* Skip too-large values. */ + if (toast_raw_datum_size(value) > ARRAY_WIDTH_THRESHOLD) + continue; + else + analyzed_rows++; + + /* + * Now detoast the array if needed, and deconstruct into datums. + */ + array = DatumGetArrayTypeP(value); + + Assert(ARR_ELEMTYPE(array) == extra_data->type_id); + deconstruct_array(array, + extra_data->type_id, + extra_data->typlen, + extra_data->typbyval, + extra_data->typalign, + &elem_values, &elem_nulls, &num_elems); + + /* + * We loop through the elements in the array and add them to our + * tracking hashtable. + */ + null_present = false; + for (j = 0; j < num_elems; j++) + { + Datum elem_value; + bool found; + + /* No null element processing other than flag setting here */ + if (elem_nulls[j]) + { + null_present = true; + continue; + } + + /* Lookup current element in hashtable, adding it if new */ + elem_value = elem_values[j]; + item = (TrackItem *) hash_search(elements_tab, + &elem_value, + HASH_ENTER, &found); + + if (found) + { + /* The element value is already on the tracking list */ + + /* + * The operators we assist ignore duplicate array elements, so + * count a given distinct element only once per array. + */ + if (item->last_container == array_no) + continue; + + item->frequency++; + item->last_container = array_no; + } + else + { + /* Initialize new tracking list element */ + + /* + * If element type is pass-by-reference, we must copy it into + * palloc'd space, so that we can release the array below. (We + * do this so that the space needed for element values is + * limited by the size of the hashtable; if we kept all the + * array values around, it could be much more.) + */ + item->key = datumCopy(elem_value, + extra_data->typbyval, + extra_data->typlen); + + item->frequency = 1; + item->delta = b_current - 1; + item->last_container = array_no; + } + + /* element_no is the number of elements processed (ie N) */ + element_no++; + + /* We prune the D structure after processing each bucket */ + if (element_no % bucket_width == 0) + { + prune_element_hashtable(elements_tab, b_current); + b_current++; + } + } + + /* Count null element presence once per array. */ + if (null_present) + null_elem_cnt++; + + /* Update frequency of the particular array distinct element count. */ + distinct_count = (int) (element_no - prev_element_no); + count_item = (DECountItem *) hash_search(count_tab, &distinct_count, + HASH_ENTER, + &count_item_found); + + if (count_item_found) + count_item->frequency++; + else + count_item->frequency = 1; + + /* Free memory allocated while detoasting. */ + if (PointerGetDatum(array) != value) + pfree(array); + pfree(elem_values); + pfree(elem_nulls); + } + + /* Skip pg_statistic slots occupied by standard statistics */ + slot_idx = 0; + while (slot_idx < STATISTIC_NUM_SLOTS && stats->stakind[slot_idx] != 0) + slot_idx++; + if (slot_idx > STATISTIC_NUM_SLOTS - 2) + elog(ERROR, "insufficient pg_statistic slots for array stats"); + + /* We can only compute real stats if we found some non-null values. */ + if (analyzed_rows > 0) + { + int nonnull_cnt = analyzed_rows; + int count_items_count; + int i; + TrackItem **sort_table; + int track_len; + int64 cutoff_freq; + int64 minfreq, + maxfreq; + + /* + * We assume the standard stats code already took care of setting + * stats_valid, stanullfrac, stawidth, stadistinct. We'd have to + * re-compute those values if we wanted to not store the standard + * stats. + */ + + /* + * Construct an array of the interesting hashtable items, that is, + * those meeting the cutoff frequency (s - epsilon)*N. Also identify + * the minimum and maximum frequencies among these items. + * + * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff + * frequency is 9*N / bucket_width. + */ + cutoff_freq = 9 * element_no / bucket_width; + + i = hash_get_num_entries(elements_tab); /* surely enough space */ + sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i); + + hash_seq_init(&scan_status, elements_tab); + track_len = 0; + minfreq = element_no; + maxfreq = 0; + while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) + { + if (item->frequency > cutoff_freq) + { + sort_table[track_len++] = item; + minfreq = Min(minfreq, item->frequency); + maxfreq = Max(maxfreq, item->frequency); + } + } + Assert(track_len <= i); + + /* emit some statistics for debug purposes */ + elog(DEBUG3, "compute_array_stats: target # mces = %d, " + "bucket width = %d, " + "# elements = " INT64_FORMAT ", hashtable size = %d, " + "usable entries = %d", + num_mcelem, bucket_width, element_no, i, track_len); + + /* + * If we obtained more elements than we really want, get rid of those + * with least frequencies. The easiest way is to qsort the array into + * descending frequency order and truncate the array. + */ + if (num_mcelem < track_len) + { + qsort_interruptible(sort_table, track_len, sizeof(TrackItem *), + trackitem_compare_frequencies_desc, NULL); + /* reset minfreq to the smallest frequency we're keeping */ + minfreq = sort_table[num_mcelem - 1]->frequency; + } + else + num_mcelem = track_len; + + /* Generate MCELEM slot entry */ + if (num_mcelem > 0) + { + MemoryContext old_context; + Datum *mcelem_values; + float4 *mcelem_freqs; + + /* + * We want to store statistics sorted on the element value using + * the element type's default comparison function. This permits + * fast binary searches in selectivity estimation functions. + */ + qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *), + trackitem_compare_element, NULL); + + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + + /* + * We sorted statistics on the element value, but we want to be + * able to find the minimal and maximal frequencies without going + * through all the values. We also want the frequency of null + * elements. Store these three values at the end of mcelem_freqs. + */ + mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); + mcelem_freqs = (float4 *) palloc((num_mcelem + 3) * sizeof(float4)); + + /* + * See comments above about use of nonnull_cnt as the divisor for + * the final frequency estimates. + */ + for (i = 0; i < num_mcelem; i++) + { + TrackItem *titem = sort_table[i]; + + mcelem_values[i] = datumCopy(titem->key, + extra_data->typbyval, + extra_data->typlen); + mcelem_freqs[i] = (double) titem->frequency / + (double) nonnull_cnt; + } + mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt; + mcelem_freqs[i++] = (double) maxfreq / (double) nonnull_cnt; + mcelem_freqs[i++] = (double) null_elem_cnt / (double) nonnull_cnt; + + MemoryContextSwitchTo(old_context); + + stats->stakind[slot_idx] = STATISTIC_KIND_MCELEM; + stats->staop[slot_idx] = extra_data->eq_opr; + stats->stacoll[slot_idx] = extra_data->coll_id; + stats->stanumbers[slot_idx] = mcelem_freqs; + /* See above comment about extra stanumber entries */ + stats->numnumbers[slot_idx] = num_mcelem + 3; + stats->stavalues[slot_idx] = mcelem_values; + stats->numvalues[slot_idx] = num_mcelem; + /* We are storing values of element type */ + stats->statypid[slot_idx] = extra_data->type_id; + stats->statyplen[slot_idx] = extra_data->typlen; + stats->statypbyval[slot_idx] = extra_data->typbyval; + stats->statypalign[slot_idx] = extra_data->typalign; + slot_idx++; + } + + /* Generate DECHIST slot entry */ + count_items_count = hash_get_num_entries(count_tab); + if (count_items_count > 0) + { + int num_hist = stats->attr->attstattarget; + DECountItem **sorted_count_items; + int j; + int delta; + int64 frac; + float4 *hist; + + /* num_hist must be at least 2 for the loop below to work */ + num_hist = Max(num_hist, 2); + + /* + * Create an array of DECountItem pointers, and sort them into + * increasing count order. + */ + sorted_count_items = (DECountItem **) + palloc(sizeof(DECountItem *) * count_items_count); + hash_seq_init(&scan_status, count_tab); + j = 0; + while ((count_item = (DECountItem *) hash_seq_search(&scan_status)) != NULL) + { + sorted_count_items[j++] = count_item; + } + qsort_interruptible(sorted_count_items, count_items_count, + sizeof(DECountItem *), + countitem_compare_count, NULL); + + /* + * Prepare to fill stanumbers with the histogram, followed by the + * average count. This array must be stored in anl_context. + */ + hist = (float4 *) + MemoryContextAlloc(stats->anl_context, + sizeof(float4) * (num_hist + 1)); + hist[num_hist] = (double) element_no / (double) nonnull_cnt; + + /*---------- + * Construct the histogram of distinct-element counts (DECs). + * + * The object of this loop is to copy the min and max DECs to + * hist[0] and hist[num_hist - 1], along with evenly-spaced DECs + * in between (where "evenly-spaced" is with reference to the + * whole input population of arrays). If we had a complete sorted + * array of DECs, one per analyzed row, the i'th hist value would + * come from DECs[i * (analyzed_rows - 1) / (num_hist - 1)] + * (compare the histogram-making loop in compute_scalar_stats()). + * But instead of that we have the sorted_count_items[] array, + * which holds unique DEC values with their frequencies (that is, + * a run-length-compressed version of the full array). So we + * control advancing through sorted_count_items[] with the + * variable "frac", which is defined as (x - y) * (num_hist - 1), + * where x is the index in the notional DECs array corresponding + * to the start of the next sorted_count_items[] element's run, + * and y is the index in DECs from which we should take the next + * histogram value. We have to advance whenever x <= y, that is + * frac <= 0. The x component is the sum of the frequencies seen + * so far (up through the current sorted_count_items[] element), + * and of course y * (num_hist - 1) = i * (analyzed_rows - 1), + * per the subscript calculation above. (The subscript calculation + * implies dropping any fractional part of y; in this formulation + * that's handled by not advancing until frac reaches 1.) + * + * Even though frac has a bounded range, it could overflow int32 + * when working with very large statistics targets, so we do that + * math in int64. + *---------- + */ + delta = analyzed_rows - 1; + j = 0; /* current index in sorted_count_items */ + /* Initialize frac for sorted_count_items[0]; y is initially 0 */ + frac = (int64) sorted_count_items[0]->frequency * (num_hist - 1); + for (i = 0; i < num_hist; i++) + { + while (frac <= 0) + { + /* Advance, and update x component of frac */ + j++; + frac += (int64) sorted_count_items[j]->frequency * (num_hist - 1); + } + hist[i] = sorted_count_items[j]->count; + frac -= delta; /* update y for upcoming i increment */ + } + Assert(j == count_items_count - 1); + + stats->stakind[slot_idx] = STATISTIC_KIND_DECHIST; + stats->staop[slot_idx] = extra_data->eq_opr; + stats->stacoll[slot_idx] = extra_data->coll_id; + stats->stanumbers[slot_idx] = hist; + stats->numnumbers[slot_idx] = num_hist + 1; + slot_idx++; + } + } + + /* + * We don't need to bother cleaning up any of our temporary palloc's. The + * hashtable should also go away, as it used a child memory context. + */ +} + +/* + * A function to prune the D structure from the Lossy Counting algorithm. + * Consult compute_tsvector_stats() for wider explanation. + */ +static void +prune_element_hashtable(HTAB *elements_tab, int b_current) +{ + HASH_SEQ_STATUS scan_status; + TrackItem *item; + + hash_seq_init(&scan_status, elements_tab); + while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) + { + if (item->frequency + item->delta <= b_current) + { + Datum value = item->key; + + if (hash_search(elements_tab, &item->key, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); + /* We should free memory if element is not passed by value */ + if (!array_extra_data->typbyval) + pfree(DatumGetPointer(value)); + } + } +} + +/* + * Hash function for elements. + * + * We use the element type's default hash opclass, and the column collation + * if the type is collation-sensitive. + */ +static uint32 +element_hash(const void *key, Size keysize) +{ + Datum d = *((const Datum *) key); + Datum h; + + h = FunctionCall1Coll(array_extra_data->hash, + array_extra_data->coll_id, + d); + return DatumGetUInt32(h); +} + +/* + * Matching function for elements, to be used in hashtable lookups. + */ +static int +element_match(const void *key1, const void *key2, Size keysize) +{ + /* The keysize parameter is superfluous here */ + return element_compare(key1, key2); +} + +/* + * Comparison function for elements. + * + * We use the element type's default btree opclass, and the column collation + * if the type is collation-sensitive. + * + * XXX consider using SortSupport infrastructure + */ +static int +element_compare(const void *key1, const void *key2) +{ + Datum d1 = *((const Datum *) key1); + Datum d2 = *((const Datum *) key2); + Datum c; + + c = FunctionCall2Coll(array_extra_data->cmp, + array_extra_data->coll_id, + d1, d2); + return DatumGetInt32(c); +} + +/* + * Comparator for sorting TrackItems by frequencies (descending sort) + */ +static int +trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg) +{ + const TrackItem *const *t1 = (const TrackItem *const *) e1; + const TrackItem *const *t2 = (const TrackItem *const *) e2; + + return (*t2)->frequency - (*t1)->frequency; +} + +/* + * Comparator for sorting TrackItems by element values + */ +static int +trackitem_compare_element(const void *e1, const void *e2, void *arg) +{ + const TrackItem *const *t1 = (const TrackItem *const *) e1; + const TrackItem *const *t2 = (const TrackItem *const *) e2; + + return element_compare(&(*t1)->key, &(*t2)->key); +} + +/* + * Comparator for sorting DECountItems by count + */ +static int +countitem_compare_count(const void *e1, const void *e2, void *arg) +{ + const DECountItem *const *t1 = (const DECountItem *const *) e1; + const DECountItem *const *t2 = (const DECountItem *const *) e2; + + if ((*t1)->count < (*t2)->count) + return -1; + else if ((*t1)->count == (*t2)->count) + return 0; + else + return 1; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_userfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_userfuncs.c new file mode 100644 index 00000000000..5c4fdcfba46 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_userfuncs.c @@ -0,0 +1,1701 @@ +/*------------------------------------------------------------------------- + * + * array_userfuncs.c + * Misc user-visible array support functions + * + * Copyright (c) 2003-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/array_userfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_type.h" +#include "libpq/pqformat.h" +#include "common/int.h" +#include "common/pg_prng.h" +#include "port/pg_bitutils.h" +#include "utils/array.h" +#include "utils/datum.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/typcache.h" + +/* + * SerialIOData + * Used for caching element-type data in array_agg_serialize + */ +typedef struct SerialIOData +{ + FmgrInfo typsend; +} SerialIOData; + +/* + * DeserialIOData + * Used for caching element-type data in array_agg_deserialize + */ +typedef struct DeserialIOData +{ + FmgrInfo typreceive; + Oid typioparam; +} DeserialIOData; + +static Datum array_position_common(FunctionCallInfo fcinfo); + + +/* + * fetch_array_arg_replace_nulls + * + * Fetch an array-valued argument in expanded form; if it's null, construct an + * empty array value of the proper data type. Also cache basic element type + * information in fn_extra. + * + * Caution: if the input is a read/write pointer, this returns the input + * argument; so callers must be sure that their changes are "safe", that is + * they cannot leave the array in a corrupt state. + * + * If we're being called as an aggregate function, make sure any newly-made + * expanded array is allocated in the aggregate state context, so as to save + * copying operations. + */ +static ExpandedArrayHeader * +fetch_array_arg_replace_nulls(FunctionCallInfo fcinfo, int argno) +{ + ExpandedArrayHeader *eah; + Oid element_type; + ArrayMetaState *my_extra; + MemoryContext resultcxt; + + /* If first time through, create datatype cache struct */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + my_extra = (ArrayMetaState *) + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra->element_type = InvalidOid; + fcinfo->flinfo->fn_extra = my_extra; + } + + /* Figure out which context we want the result in */ + if (!AggCheckCallContext(fcinfo, &resultcxt)) + resultcxt = CurrentMemoryContext; + + /* Now collect the array value */ + if (!PG_ARGISNULL(argno)) + { + MemoryContext oldcxt = MemoryContextSwitchTo(resultcxt); + + eah = PG_GETARG_EXPANDED_ARRAYX(argno, my_extra); + MemoryContextSwitchTo(oldcxt); + } + else + { + /* We have to look up the array type and element type */ + Oid arr_typeid = get_fn_expr_argtype(fcinfo->flinfo, argno); + + if (!OidIsValid(arr_typeid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + element_type = get_element_type(arr_typeid); + if (!OidIsValid(element_type)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("input data type is not an array"))); + + eah = construct_empty_expanded_array(element_type, + resultcxt, + my_extra); + } + + return eah; +} + +/*----------------------------------------------------------------------------- + * array_append : + * push an element onto the end of a one-dimensional array + *---------------------------------------------------------------------------- + */ +Datum +array_append(PG_FUNCTION_ARGS) +{ + ExpandedArrayHeader *eah; + Datum newelem; + bool isNull; + Datum result; + int *dimv, + *lb; + int indx; + ArrayMetaState *my_extra; + + eah = fetch_array_arg_replace_nulls(fcinfo, 0); + isNull = PG_ARGISNULL(1); + if (isNull) + newelem = (Datum) 0; + else + newelem = PG_GETARG_DATUM(1); + + if (eah->ndims == 1) + { + /* append newelem */ + lb = eah->lbound; + dimv = eah->dims; + + /* index of added elem is at lb[0] + (dimv[0] - 1) + 1 */ + if (pg_add_s32_overflow(lb[0], dimv[0], &indx)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else if (eah->ndims == 0) + indx = 1; + else + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("argument must be empty or one-dimensional array"))); + + /* Perform element insertion */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + + result = array_set_element(EOHPGetRWDatum(&eah->hdr), + 1, &indx, newelem, isNull, + -1, my_extra->typlen, my_extra->typbyval, my_extra->typalign); + + PG_RETURN_DATUM(result); +} + +/*----------------------------------------------------------------------------- + * array_prepend : + * push an element onto the front of a one-dimensional array + *---------------------------------------------------------------------------- + */ +Datum +array_prepend(PG_FUNCTION_ARGS) +{ + ExpandedArrayHeader *eah; + Datum newelem; + bool isNull; + Datum result; + int *lb; + int indx; + int lb0; + ArrayMetaState *my_extra; + + isNull = PG_ARGISNULL(0); + if (isNull) + newelem = (Datum) 0; + else + newelem = PG_GETARG_DATUM(0); + eah = fetch_array_arg_replace_nulls(fcinfo, 1); + + if (eah->ndims == 1) + { + /* prepend newelem */ + lb = eah->lbound; + lb0 = lb[0]; + + if (pg_sub_s32_overflow(lb0, 1, &indx)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else if (eah->ndims == 0) + { + indx = 1; + lb0 = 1; + } + else + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("argument must be empty or one-dimensional array"))); + + /* Perform element insertion */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + + result = array_set_element(EOHPGetRWDatum(&eah->hdr), + 1, &indx, newelem, isNull, + -1, my_extra->typlen, my_extra->typbyval, my_extra->typalign); + + /* Readjust result's LB to match the input's, as expected for prepend */ + Assert(result == EOHPGetRWDatum(&eah->hdr)); + if (eah->ndims == 1) + { + /* This is ok whether we've deconstructed or not */ + eah->lbound[0] = lb0; + } + + PG_RETURN_DATUM(result); +} + +/*----------------------------------------------------------------------------- + * array_cat : + * concatenate two nD arrays to form an nD array, or + * push an (n-1)D array onto the end of an nD array + *---------------------------------------------------------------------------- + */ +Datum +array_cat(PG_FUNCTION_ARGS) +{ + ArrayType *v1, + *v2; + ArrayType *result; + int *dims, + *lbs, + ndims, + nitems, + ndatabytes, + nbytes; + int *dims1, + *lbs1, + ndims1, + nitems1, + ndatabytes1; + int *dims2, + *lbs2, + ndims2, + nitems2, + ndatabytes2; + int i; + char *dat1, + *dat2; + bits8 *bitmap1, + *bitmap2; + Oid element_type; + Oid element_type1; + Oid element_type2; + int32 dataoffset; + + /* Concatenating a null array is a no-op, just return the other input */ + if (PG_ARGISNULL(0)) + { + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + result = PG_GETARG_ARRAYTYPE_P(1); + PG_RETURN_ARRAYTYPE_P(result); + } + if (PG_ARGISNULL(1)) + { + result = PG_GETARG_ARRAYTYPE_P(0); + PG_RETURN_ARRAYTYPE_P(result); + } + + v1 = PG_GETARG_ARRAYTYPE_P(0); + v2 = PG_GETARG_ARRAYTYPE_P(1); + + element_type1 = ARR_ELEMTYPE(v1); + element_type2 = ARR_ELEMTYPE(v2); + + /* Check we have matching element types */ + if (element_type1 != element_type2) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot concatenate incompatible arrays"), + errdetail("Arrays with element types %s and %s are not " + "compatible for concatenation.", + format_type_be(element_type1), + format_type_be(element_type2)))); + + /* OK, use it */ + element_type = element_type1; + + /*---------- + * We must have one of the following combinations of inputs: + * 1) one empty array, and one non-empty array + * 2) both arrays empty + * 3) two arrays with ndims1 == ndims2 + * 4) ndims1 == ndims2 - 1 + * 5) ndims1 == ndims2 + 1 + *---------- + */ + ndims1 = ARR_NDIM(v1); + ndims2 = ARR_NDIM(v2); + + /* + * short circuit - if one input array is empty, and the other is not, we + * return the non-empty one as the result + * + * if both are empty, return the first one + */ + if (ndims1 == 0 && ndims2 > 0) + PG_RETURN_ARRAYTYPE_P(v2); + + if (ndims2 == 0) + PG_RETURN_ARRAYTYPE_P(v1); + + /* the rest fall under rule 3, 4, or 5 */ + if (ndims1 != ndims2 && + ndims1 != ndims2 - 1 && + ndims1 != ndims2 + 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot concatenate incompatible arrays"), + errdetail("Arrays of %d and %d dimensions are not " + "compatible for concatenation.", + ndims1, ndims2))); + + /* get argument array details */ + lbs1 = ARR_LBOUND(v1); + lbs2 = ARR_LBOUND(v2); + dims1 = ARR_DIMS(v1); + dims2 = ARR_DIMS(v2); + dat1 = ARR_DATA_PTR(v1); + dat2 = ARR_DATA_PTR(v2); + bitmap1 = ARR_NULLBITMAP(v1); + bitmap2 = ARR_NULLBITMAP(v2); + nitems1 = ArrayGetNItems(ndims1, dims1); + nitems2 = ArrayGetNItems(ndims2, dims2); + ndatabytes1 = ARR_SIZE(v1) - ARR_DATA_OFFSET(v1); + ndatabytes2 = ARR_SIZE(v2) - ARR_DATA_OFFSET(v2); + + if (ndims1 == ndims2) + { + /* + * resulting array is made up of the elements (possibly arrays + * themselves) of the input argument arrays + */ + ndims = ndims1; + dims = (int *) palloc(ndims * sizeof(int)); + lbs = (int *) palloc(ndims * sizeof(int)); + + dims[0] = dims1[0] + dims2[0]; + lbs[0] = lbs1[0]; + + for (i = 1; i < ndims; i++) + { + if (dims1[i] != dims2[i] || lbs1[i] != lbs2[i]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot concatenate incompatible arrays"), + errdetail("Arrays with differing element dimensions are " + "not compatible for concatenation."))); + + dims[i] = dims1[i]; + lbs[i] = lbs1[i]; + } + } + else if (ndims1 == ndims2 - 1) + { + /* + * resulting array has the second argument as the outer array, with + * the first argument inserted at the front of the outer dimension + */ + ndims = ndims2; + dims = (int *) palloc(ndims * sizeof(int)); + lbs = (int *) palloc(ndims * sizeof(int)); + memcpy(dims, dims2, ndims * sizeof(int)); + memcpy(lbs, lbs2, ndims * sizeof(int)); + + /* increment number of elements in outer array */ + dims[0] += 1; + + /* make sure the added element matches our existing elements */ + for (i = 0; i < ndims1; i++) + { + if (dims1[i] != dims[i + 1] || lbs1[i] != lbs[i + 1]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot concatenate incompatible arrays"), + errdetail("Arrays with differing dimensions are not " + "compatible for concatenation."))); + } + } + else + { + /* + * (ndims1 == ndims2 + 1) + * + * resulting array has the first argument as the outer array, with the + * second argument appended to the end of the outer dimension + */ + ndims = ndims1; + dims = (int *) palloc(ndims * sizeof(int)); + lbs = (int *) palloc(ndims * sizeof(int)); + memcpy(dims, dims1, ndims * sizeof(int)); + memcpy(lbs, lbs1, ndims * sizeof(int)); + + /* increment number of elements in outer array */ + dims[0] += 1; + + /* make sure the added element matches our existing elements */ + for (i = 0; i < ndims2; i++) + { + if (dims2[i] != dims[i + 1] || lbs2[i] != lbs[i + 1]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot concatenate incompatible arrays"), + errdetail("Arrays with differing dimensions are not " + "compatible for concatenation."))); + } + } + + /* Do this mainly for overflow checking */ + nitems = ArrayGetNItems(ndims, dims); + ArrayCheckBounds(ndims, dims, lbs); + + /* build the result array */ + ndatabytes = ndatabytes1 + ndatabytes2; + if (ARR_HASNULL(v1) || ARR_HASNULL(v2)) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nitems); + nbytes = ndatabytes + dataoffset; + } + else + { + dataoffset = 0; /* marker for no null bitmap */ + nbytes = ndatabytes + ARR_OVERHEAD_NONULLS(ndims); + } + result = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(result, nbytes); + result->ndim = ndims; + result->dataoffset = dataoffset; + result->elemtype = element_type; + memcpy(ARR_DIMS(result), dims, ndims * sizeof(int)); + memcpy(ARR_LBOUND(result), lbs, ndims * sizeof(int)); + /* data area is arg1 then arg2 */ + memcpy(ARR_DATA_PTR(result), dat1, ndatabytes1); + memcpy(ARR_DATA_PTR(result) + ndatabytes1, dat2, ndatabytes2); + /* handle the null bitmap if needed */ + if (ARR_HASNULL(result)) + { + array_bitmap_copy(ARR_NULLBITMAP(result), 0, + bitmap1, 0, + nitems1); + array_bitmap_copy(ARR_NULLBITMAP(result), nitems1, + bitmap2, 0, + nitems2); + } + + PG_RETURN_ARRAYTYPE_P(result); +} + + +/* + * ARRAY_AGG(anynonarray) aggregate function + */ +Datum +array_agg_transfn(PG_FUNCTION_ARGS) +{ + Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1); + MemoryContext aggcontext; + ArrayBuildState *state; + Datum elem; + + if (arg1_typeid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + /* + * Note: we do not need a run-time check about whether arg1_typeid is a + * valid array element type, because the parser would have verified that + * while resolving the input/result types of this polymorphic aggregate. + */ + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "array_agg_transfn called in non-aggregate context"); + } + + if (PG_ARGISNULL(0)) + state = initArrayResult(arg1_typeid, aggcontext, false); + else + state = (ArrayBuildState *) PG_GETARG_POINTER(0); + + elem = PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1); + + state = accumArrayResult(state, + elem, + PG_ARGISNULL(1), + arg1_typeid, + aggcontext); + + /* + * The transition type for array_agg() is declared to be "internal", which + * is a pass-by-value type the same size as a pointer. So we can safely + * pass the ArrayBuildState pointer through nodeAgg.c's machinations. + */ + PG_RETURN_POINTER(state); +} + +Datum +array_agg_combine(PG_FUNCTION_ARGS) +{ + ArrayBuildState *state1; + ArrayBuildState *state2; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state1 = PG_ARGISNULL(0) ? NULL : (ArrayBuildState *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (ArrayBuildState *) PG_GETARG_POINTER(1); + + if (state2 == NULL) + { + /* + * NULL state2 is easy, just return state1, which we know is already + * in the agg_context + */ + if (state1 == NULL) + PG_RETURN_NULL(); + PG_RETURN_POINTER(state1); + } + + if (state1 == NULL) + { + /* We must copy state2's data into the agg_context */ + state1 = initArrayResultWithSize(state2->element_type, agg_context, + false, state2->alen); + + old_context = MemoryContextSwitchTo(agg_context); + + for (int i = 0; i < state2->nelems; i++) + { + if (!state2->dnulls[i]) + state1->dvalues[i] = datumCopy(state2->dvalues[i], + state1->typbyval, + state1->typlen); + else + state1->dvalues[i] = (Datum) 0; + } + + MemoryContextSwitchTo(old_context); + + memcpy(state1->dnulls, state2->dnulls, sizeof(bool) * state2->nelems); + + state1->nelems = state2->nelems; + + PG_RETURN_POINTER(state1); + } + else if (state2->nelems > 0) + { + /* We only need to combine the two states if state2 has any elements */ + int reqsize = state1->nelems + state2->nelems; + MemoryContext oldContext = MemoryContextSwitchTo(state1->mcontext); + + Assert(state1->element_type == state2->element_type); + + /* Enlarge state1 arrays if needed */ + if (state1->alen < reqsize) + { + /* Use a power of 2 size rather than allocating just reqsize */ + state1->alen = pg_nextpower2_32(reqsize); + state1->dvalues = (Datum *) repalloc(state1->dvalues, + state1->alen * sizeof(Datum)); + state1->dnulls = (bool *) repalloc(state1->dnulls, + state1->alen * sizeof(bool)); + } + + /* Copy in the state2 elements to the end of the state1 arrays */ + for (int i = 0; i < state2->nelems; i++) + { + if (!state2->dnulls[i]) + state1->dvalues[i + state1->nelems] = + datumCopy(state2->dvalues[i], + state1->typbyval, + state1->typlen); + else + state1->dvalues[i + state1->nelems] = (Datum) 0; + } + + memcpy(&state1->dnulls[state1->nelems], state2->dnulls, + sizeof(bool) * state2->nelems); + + state1->nelems = reqsize; + + MemoryContextSwitchTo(oldContext); + } + + PG_RETURN_POINTER(state1); +} + +/* + * array_agg_serialize + * Serialize ArrayBuildState into bytea. + */ +Datum +array_agg_serialize(PG_FUNCTION_ARGS) +{ + ArrayBuildState *state; + StringInfoData buf; + bytea *result; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = (ArrayBuildState *) PG_GETARG_POINTER(0); + + pq_begintypsend(&buf); + + /* + * element_type. Putting this first is more convenient in deserialization + */ + pq_sendint32(&buf, state->element_type); + + /* + * nelems -- send first so we know how large to make the dvalues and + * dnulls array during deserialization. + */ + pq_sendint64(&buf, state->nelems); + + /* alen can be decided during deserialization */ + + /* typlen */ + pq_sendint16(&buf, state->typlen); + + /* typbyval */ + pq_sendbyte(&buf, state->typbyval); + + /* typalign */ + pq_sendbyte(&buf, state->typalign); + + /* dnulls */ + pq_sendbytes(&buf, state->dnulls, sizeof(bool) * state->nelems); + + /* + * dvalues. By agreement with array_agg_deserialize, when the element + * type is byval, we just transmit the Datum array as-is, including any + * null elements. For by-ref types, we must invoke the element type's + * send function, and we skip null elements (which is why the nulls flags + * must be sent first). + */ + if (state->typbyval) + pq_sendbytes(&buf, state->dvalues, sizeof(Datum) * state->nelems); + else + { + SerialIOData *iodata; + int i; + + /* Avoid repeat catalog lookups for typsend function */ + iodata = (SerialIOData *) fcinfo->flinfo->fn_extra; + if (iodata == NULL) + { + Oid typsend; + bool typisvarlena; + + iodata = (SerialIOData *) + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(SerialIOData)); + getTypeBinaryOutputInfo(state->element_type, &typsend, + &typisvarlena); + fmgr_info_cxt(typsend, &iodata->typsend, + fcinfo->flinfo->fn_mcxt); + fcinfo->flinfo->fn_extra = (void *) iodata; + } + + for (i = 0; i < state->nelems; i++) + { + bytea *outputbytes; + + if (state->dnulls[i]) + continue; + outputbytes = SendFunctionCall(&iodata->typsend, + state->dvalues[i]); + pq_sendint32(&buf, VARSIZE(outputbytes) - VARHDRSZ); + pq_sendbytes(&buf, VARDATA(outputbytes), + VARSIZE(outputbytes) - VARHDRSZ); + } + } + + result = pq_endtypsend(&buf); + + PG_RETURN_BYTEA_P(result); +} + +Datum +array_agg_deserialize(PG_FUNCTION_ARGS) +{ + bytea *sstate; + ArrayBuildState *result; + StringInfoData buf; + Oid element_type; + int64 nelems; + const char *temp; + + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + sstate = PG_GETARG_BYTEA_PP(0); + + /* + * Copy the bytea into a StringInfo so that we can "receive" it using the + * standard recv-function infrastructure. + */ + initStringInfo(&buf); + appendBinaryStringInfo(&buf, + VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); + + /* element_type */ + element_type = pq_getmsgint(&buf, 4); + + /* nelems */ + nelems = pq_getmsgint64(&buf); + + /* Create output ArrayBuildState with the needed number of elements */ + result = initArrayResultWithSize(element_type, CurrentMemoryContext, + false, nelems); + result->nelems = nelems; + + /* typlen */ + result->typlen = pq_getmsgint(&buf, 2); + + /* typbyval */ + result->typbyval = pq_getmsgbyte(&buf); + + /* typalign */ + result->typalign = pq_getmsgbyte(&buf); + + /* dnulls */ + temp = pq_getmsgbytes(&buf, sizeof(bool) * nelems); + memcpy(result->dnulls, temp, sizeof(bool) * nelems); + + /* dvalues --- see comment in array_agg_serialize */ + if (result->typbyval) + { + temp = pq_getmsgbytes(&buf, sizeof(Datum) * nelems); + memcpy(result->dvalues, temp, sizeof(Datum) * nelems); + } + else + { + DeserialIOData *iodata; + + /* Avoid repeat catalog lookups for typreceive function */ + iodata = (DeserialIOData *) fcinfo->flinfo->fn_extra; + if (iodata == NULL) + { + Oid typreceive; + + iodata = (DeserialIOData *) + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(DeserialIOData)); + getTypeBinaryInputInfo(element_type, &typreceive, + &iodata->typioparam); + fmgr_info_cxt(typreceive, &iodata->typreceive, + fcinfo->flinfo->fn_mcxt); + fcinfo->flinfo->fn_extra = (void *) iodata; + } + + for (int i = 0; i < nelems; i++) + { + int itemlen; + StringInfoData elem_buf; + char csave; + + if (result->dnulls[i]) + { + result->dvalues[i] = (Datum) 0; + continue; + } + + itemlen = pq_getmsgint(&buf, 4); + if (itemlen < 0 || itemlen > (buf.len - buf.cursor)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("insufficient data left in message"))); + + /* + * Rather than copying data around, we just set up a phony + * StringInfo pointing to the correct portion of the input buffer. + * We assume we can scribble on the input buffer so as to maintain + * the convention that StringInfos have a trailing null. + */ + elem_buf.data = &buf.data[buf.cursor]; + elem_buf.maxlen = itemlen + 1; + elem_buf.len = itemlen; + elem_buf.cursor = 0; + + buf.cursor += itemlen; + + csave = buf.data[buf.cursor]; + buf.data[buf.cursor] = '\0'; + + /* Now call the element's receiveproc */ + result->dvalues[i] = ReceiveFunctionCall(&iodata->typreceive, + &elem_buf, + iodata->typioparam, + -1); + + buf.data[buf.cursor] = csave; + } + } + + pq_getmsgend(&buf); + pfree(buf.data); + + PG_RETURN_POINTER(result); +} + +Datum +array_agg_finalfn(PG_FUNCTION_ARGS) +{ + Datum result; + ArrayBuildState *state; + int dims[1]; + int lbs[1]; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (ArrayBuildState *) PG_GETARG_POINTER(0); + + if (state == NULL) + PG_RETURN_NULL(); /* returns null iff no input values */ + + dims[0] = state->nelems; + lbs[0] = 1; + + /* + * Make the result. We cannot release the ArrayBuildState because + * sometimes aggregate final functions are re-executed. Rather, it is + * nodeAgg.c's responsibility to reset the aggcontext when it's safe to do + * so. + */ + result = makeMdArrayResult(state, 1, dims, lbs, + CurrentMemoryContext, + false); + + PG_RETURN_DATUM(result); +} + +/* + * ARRAY_AGG(anyarray) aggregate function + */ +Datum +array_agg_array_transfn(PG_FUNCTION_ARGS) +{ + Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1); + MemoryContext aggcontext; + ArrayBuildStateArr *state; + + if (arg1_typeid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + /* + * Note: we do not need a run-time check about whether arg1_typeid is a + * valid array type, because the parser would have verified that while + * resolving the input/result types of this polymorphic aggregate. + */ + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "array_agg_array_transfn called in non-aggregate context"); + } + + + if (PG_ARGISNULL(0)) + state = initArrayResultArr(arg1_typeid, InvalidOid, aggcontext, false); + else + state = (ArrayBuildStateArr *) PG_GETARG_POINTER(0); + + state = accumArrayResultArr(state, + PG_GETARG_DATUM(1), + PG_ARGISNULL(1), + arg1_typeid, + aggcontext); + + /* + * The transition type for array_agg() is declared to be "internal", which + * is a pass-by-value type the same size as a pointer. So we can safely + * pass the ArrayBuildStateArr pointer through nodeAgg.c's machinations. + */ + PG_RETURN_POINTER(state); +} + +Datum +array_agg_array_combine(PG_FUNCTION_ARGS) +{ + ArrayBuildStateArr *state1; + ArrayBuildStateArr *state2; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state1 = PG_ARGISNULL(0) ? NULL : (ArrayBuildStateArr *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (ArrayBuildStateArr *) PG_GETARG_POINTER(1); + + if (state2 == NULL) + { + /* + * NULL state2 is easy, just return state1, which we know is already + * in the agg_context + */ + if (state1 == NULL) + PG_RETURN_NULL(); + PG_RETURN_POINTER(state1); + } + + if (state1 == NULL) + { + /* We must copy state2's data into the agg_context */ + old_context = MemoryContextSwitchTo(agg_context); + + state1 = initArrayResultArr(state2->array_type, InvalidOid, + agg_context, false); + + state1->abytes = state2->abytes; + state1->data = (char *) palloc(state1->abytes); + + if (state2->nullbitmap) + { + int size = (state2->aitems + 7) / 8; + + state1->nullbitmap = (bits8 *) palloc(size); + memcpy(state1->nullbitmap, state2->nullbitmap, size); + } + + memcpy(state1->data, state2->data, state2->nbytes); + state1->nbytes = state2->nbytes; + state1->aitems = state2->aitems; + state1->nitems = state2->nitems; + state1->ndims = state2->ndims; + memcpy(state1->dims, state2->dims, sizeof(state2->dims)); + memcpy(state1->lbs, state2->lbs, sizeof(state2->lbs)); + state1->array_type = state2->array_type; + state1->element_type = state2->element_type; + + MemoryContextSwitchTo(old_context); + + PG_RETURN_POINTER(state1); + } + + /* We only need to combine the two states if state2 has any items */ + else if (state2->nitems > 0) + { + MemoryContext oldContext; + int reqsize = state1->nbytes + state2->nbytes; + int i; + + /* + * Check the states are compatible with each other. Ensure we use the + * same error messages that are listed in accumArrayResultArr so that + * the same error is shown as would have been if we'd not used the + * combine function for the aggregation. + */ + if (state1->ndims != state2->ndims) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot accumulate arrays of different dimensionality"))); + + /* Check dimensions match ignoring the first dimension. */ + for (i = 1; i < state1->ndims; i++) + { + if (state1->dims[i] != state2->dims[i] || state1->lbs[i] != state2->lbs[i]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot accumulate arrays of different dimensionality"))); + } + + + oldContext = MemoryContextSwitchTo(state1->mcontext); + + /* + * If there's not enough space in state1 then we'll need to reallocate + * more. + */ + if (state1->abytes < reqsize) + { + /* use a power of 2 size rather than allocating just reqsize */ + state1->abytes = pg_nextpower2_32(reqsize); + state1->data = (char *) repalloc(state1->data, state1->abytes); + } + + if (state2->nullbitmap) + { + int newnitems = state1->nitems + state2->nitems; + + if (state1->nullbitmap == NULL) + { + /* + * First input with nulls; we must retrospectively handle any + * previous inputs by marking all their items non-null. + */ + state1->aitems = pg_nextpower2_32(Max(256, newnitems + 1)); + state1->nullbitmap = (bits8 *) palloc((state1->aitems + 7) / 8); + array_bitmap_copy(state1->nullbitmap, 0, + NULL, 0, + state1->nitems); + } + else if (newnitems > state1->aitems) + { + int newaitems = state1->aitems + state2->aitems; + + state1->aitems = pg_nextpower2_32(newaitems); + state1->nullbitmap = (bits8 *) + repalloc(state1->nullbitmap, (state1->aitems + 7) / 8); + } + array_bitmap_copy(state1->nullbitmap, state1->nitems, + state2->nullbitmap, 0, + state2->nitems); + } + + memcpy(state1->data + state1->nbytes, state2->data, state2->nbytes); + state1->nbytes += state2->nbytes; + state1->nitems += state2->nitems; + + state1->dims[0] += state2->dims[0]; + /* remaining dims already match, per test above */ + + Assert(state1->array_type == state2->array_type); + Assert(state1->element_type == state2->element_type); + + MemoryContextSwitchTo(oldContext); + } + + PG_RETURN_POINTER(state1); +} + +/* + * array_agg_array_serialize + * Serialize ArrayBuildStateArr into bytea. + */ +Datum +array_agg_array_serialize(PG_FUNCTION_ARGS) +{ + ArrayBuildStateArr *state; + StringInfoData buf; + bytea *result; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = (ArrayBuildStateArr *) PG_GETARG_POINTER(0); + + pq_begintypsend(&buf); + + /* + * element_type. Putting this first is more convenient in deserialization + * so that we can init the new state sooner. + */ + pq_sendint32(&buf, state->element_type); + + /* array_type */ + pq_sendint32(&buf, state->array_type); + + /* nbytes */ + pq_sendint32(&buf, state->nbytes); + + /* data */ + pq_sendbytes(&buf, state->data, state->nbytes); + + /* abytes */ + pq_sendint32(&buf, state->abytes); + + /* aitems */ + pq_sendint32(&buf, state->aitems); + + /* nullbitmap */ + if (state->nullbitmap) + { + Assert(state->aitems > 0); + pq_sendbytes(&buf, state->nullbitmap, (state->aitems + 7) / 8); + } + + /* nitems */ + pq_sendint32(&buf, state->nitems); + + /* ndims */ + pq_sendint32(&buf, state->ndims); + + /* dims: XXX should we just send ndims elements? */ + pq_sendbytes(&buf, state->dims, sizeof(state->dims)); + + /* lbs */ + pq_sendbytes(&buf, state->lbs, sizeof(state->lbs)); + + result = pq_endtypsend(&buf); + + PG_RETURN_BYTEA_P(result); +} + +Datum +array_agg_array_deserialize(PG_FUNCTION_ARGS) +{ + bytea *sstate; + ArrayBuildStateArr *result; + StringInfoData buf; + Oid element_type; + Oid array_type; + int nbytes; + const char *temp; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + sstate = PG_GETARG_BYTEA_PP(0); + + /* + * Copy the bytea into a StringInfo so that we can "receive" it using the + * standard recv-function infrastructure. + */ + initStringInfo(&buf); + appendBinaryStringInfo(&buf, + VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); + + /* element_type */ + element_type = pq_getmsgint(&buf, 4); + + /* array_type */ + array_type = pq_getmsgint(&buf, 4); + + /* nbytes */ + nbytes = pq_getmsgint(&buf, 4); + + result = initArrayResultArr(array_type, element_type, + CurrentMemoryContext, false); + + result->abytes = 1024; + while (result->abytes < nbytes) + result->abytes *= 2; + + result->data = (char *) palloc(result->abytes); + + /* data */ + temp = pq_getmsgbytes(&buf, nbytes); + memcpy(result->data, temp, nbytes); + result->nbytes = nbytes; + + /* abytes */ + result->abytes = pq_getmsgint(&buf, 4); + + /* aitems: might be 0 */ + result->aitems = pq_getmsgint(&buf, 4); + + /* nullbitmap */ + if (result->aitems > 0) + { + int size = (result->aitems + 7) / 8; + + result->nullbitmap = (bits8 *) palloc(size); + temp = pq_getmsgbytes(&buf, size); + memcpy(result->nullbitmap, temp, size); + } + else + result->nullbitmap = NULL; + + /* nitems */ + result->nitems = pq_getmsgint(&buf, 4); + + /* ndims */ + result->ndims = pq_getmsgint(&buf, 4); + + /* dims */ + temp = pq_getmsgbytes(&buf, sizeof(result->dims)); + memcpy(result->dims, temp, sizeof(result->dims)); + + /* lbs */ + temp = pq_getmsgbytes(&buf, sizeof(result->lbs)); + memcpy(result->lbs, temp, sizeof(result->lbs)); + + pq_getmsgend(&buf); + pfree(buf.data); + + PG_RETURN_POINTER(result); +} + +Datum +array_agg_array_finalfn(PG_FUNCTION_ARGS) +{ + Datum result; + ArrayBuildStateArr *state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (ArrayBuildStateArr *) PG_GETARG_POINTER(0); + + if (state == NULL) + PG_RETURN_NULL(); /* returns null iff no input values */ + + /* + * Make the result. We cannot release the ArrayBuildStateArr because + * sometimes aggregate final functions are re-executed. Rather, it is + * nodeAgg.c's responsibility to reset the aggcontext when it's safe to do + * so. + */ + result = makeArrayResultArr(state, CurrentMemoryContext, false); + + PG_RETURN_DATUM(result); +} + +/*----------------------------------------------------------------------------- + * array_position, array_position_start : + * return the offset of a value in an array. + * + * IS NOT DISTINCT FROM semantics are used for comparisons. Return NULL when + * the value is not found. + *----------------------------------------------------------------------------- + */ +Datum +array_position(PG_FUNCTION_ARGS) +{ + return array_position_common(fcinfo); +} + +Datum +array_position_start(PG_FUNCTION_ARGS) +{ + return array_position_common(fcinfo); +} + +/* + * array_position_common + * Common code for array_position and array_position_start + * + * These are separate wrappers for the sake of opr_sanity regression test. + * They are not strict so we have to test for null inputs explicitly. + */ +static Datum +array_position_common(FunctionCallInfo fcinfo) +{ + ArrayType *array; + Oid collation = PG_GET_COLLATION(); + Oid element_type; + Datum searched_element, + value; + bool isnull; + int position, + position_min; + bool found = false; + TypeCacheEntry *typentry; + ArrayMetaState *my_extra; + bool null_search; + ArrayIterator array_iterator; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + array = PG_GETARG_ARRAYTYPE_P(0); + + /* + * We refuse to search for elements in multi-dimensional arrays, since we + * have no good way to report the element's location in the array. + */ + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("searching for elements in multidimensional arrays is not supported"))); + + /* Searching in an empty array is well-defined, though: it always fails */ + if (ARR_NDIM(array) < 1) + PG_RETURN_NULL(); + + if (PG_ARGISNULL(1)) + { + /* fast return when the array doesn't have nulls */ + if (!array_contains_nulls(array)) + PG_RETURN_NULL(); + searched_element = (Datum) 0; + null_search = true; + } + else + { + searched_element = PG_GETARG_DATUM(1); + null_search = false; + } + + element_type = ARR_ELEMTYPE(array); + position = (ARR_LBOUND(array))[0] - 1; + + /* figure out where to start */ + if (PG_NARGS() == 3) + { + if (PG_ARGISNULL(2)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("initial position must not be null"))); + + position_min = PG_GETARG_INT32(2); + } + else + position_min = (ARR_LBOUND(array))[0]; + + /* + * We arrange to look up type info for array_create_iterator only once per + * series of calls, assuming the element type doesn't change underneath + * us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + get_typlenbyvalalign(element_type, + &my_extra->typlen, + &my_extra->typbyval, + &my_extra->typalign); + + typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO); + + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + + my_extra->element_type = element_type; + fmgr_info_cxt(typentry->eq_opr_finfo.fn_oid, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + } + + /* Examine each array element until we find a match. */ + array_iterator = array_create_iterator(array, 0, my_extra); + while (array_iterate(array_iterator, &value, &isnull)) + { + position++; + + /* skip initial elements if caller requested so */ + if (position < position_min) + continue; + + /* + * Can't look at the array element's value if it's null; but if we + * search for null, we have a hit and are done. + */ + if (isnull || null_search) + { + if (isnull && null_search) + { + found = true; + break; + } + else + continue; + } + + /* not nulls, so run the operator */ + if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation, + searched_element, value))) + { + found = true; + break; + } + } + + array_free_iterator(array_iterator); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + + if (!found) + PG_RETURN_NULL(); + + PG_RETURN_INT32(position); +} + +/*----------------------------------------------------------------------------- + * array_positions : + * return an array of positions of a value in an array. + * + * IS NOT DISTINCT FROM semantics are used for comparisons. Returns NULL when + * the input array is NULL. When the value is not found in the array, returns + * an empty array. + * + * This is not strict so we have to test for null inputs explicitly. + *----------------------------------------------------------------------------- + */ +Datum +array_positions(PG_FUNCTION_ARGS) +{ + ArrayType *array; + Oid collation = PG_GET_COLLATION(); + Oid element_type; + Datum searched_element, + value; + bool isnull; + int position; + TypeCacheEntry *typentry; + ArrayMetaState *my_extra; + bool null_search; + ArrayIterator array_iterator; + ArrayBuildState *astate = NULL; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + array = PG_GETARG_ARRAYTYPE_P(0); + + /* + * We refuse to search for elements in multi-dimensional arrays, since we + * have no good way to report the element's location in the array. + */ + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("searching for elements in multidimensional arrays is not supported"))); + + astate = initArrayResult(INT4OID, CurrentMemoryContext, false); + + /* Searching in an empty array is well-defined, though: it always fails */ + if (ARR_NDIM(array) < 1) + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); + + if (PG_ARGISNULL(1)) + { + /* fast return when the array doesn't have nulls */ + if (!array_contains_nulls(array)) + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); + searched_element = (Datum) 0; + null_search = true; + } + else + { + searched_element = PG_GETARG_DATUM(1); + null_search = false; + } + + element_type = ARR_ELEMTYPE(array); + position = (ARR_LBOUND(array))[0] - 1; + + /* + * We arrange to look up type info for array_create_iterator only once per + * series of calls, assuming the element type doesn't change underneath + * us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + get_typlenbyvalalign(element_type, + &my_extra->typlen, + &my_extra->typbyval, + &my_extra->typalign); + + typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO); + + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + + my_extra->element_type = element_type; + fmgr_info_cxt(typentry->eq_opr_finfo.fn_oid, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + } + + /* + * Accumulate each array position iff the element matches the given + * element. + */ + array_iterator = array_create_iterator(array, 0, my_extra); + while (array_iterate(array_iterator, &value, &isnull)) + { + position += 1; + + /* + * Can't look at the array element's value if it's null; but if we + * search for null, we have a hit. + */ + if (isnull || null_search) + { + if (isnull && null_search) + astate = + accumArrayResult(astate, Int32GetDatum(position), false, + INT4OID, CurrentMemoryContext); + + continue; + } + + /* not nulls, so run the operator */ + if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation, + searched_element, value))) + astate = + accumArrayResult(astate, Int32GetDatum(position), false, + INT4OID, CurrentMemoryContext); + } + + array_free_iterator(array_iterator); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} + +/* + * array_shuffle_n + * Return a copy of array with n randomly chosen items. + * + * The number of items must not exceed the size of the first dimension of the + * array. We preserve the first dimension's lower bound if keep_lb, + * else it's set to 1. Lower-order dimensions are preserved in any case. + * + * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info + * from the system catalogs, given only the elmtyp. However, the caller is + * in a better position to cache this info across multiple calls. + */ +static ArrayType * +array_shuffle_n(ArrayType *array, int n, bool keep_lb, + Oid elmtyp, TypeCacheEntry *typentry) +{ + ArrayType *result; + int ndim, + *dims, + *lbs, + nelm, + nitem, + rdims[MAXDIM], + rlbs[MAXDIM]; + int16 elmlen; + bool elmbyval; + char elmalign; + Datum *elms, + *ielms; + bool *nuls, + *inuls; + + ndim = ARR_NDIM(array); + dims = ARR_DIMS(array); + lbs = ARR_LBOUND(array); + + elmlen = typentry->typlen; + elmbyval = typentry->typbyval; + elmalign = typentry->typalign; + + /* If the target array is empty, exit fast */ + if (ndim < 1 || dims[0] < 1 || n < 1) + return construct_empty_array(elmtyp); + + deconstruct_array(array, elmtyp, elmlen, elmbyval, elmalign, + &elms, &nuls, &nelm); + + nitem = dims[0]; /* total number of items */ + nelm /= nitem; /* number of elements per item */ + + Assert(n <= nitem); /* else it's caller error */ + + /* + * Shuffle array using Fisher-Yates algorithm. Scan the array and swap + * current item (nelm datums starting at ielms) with a randomly chosen + * later item (nelm datums starting at jelms) in each iteration. We can + * stop once we've done n iterations; then first n items are the result. + */ + ielms = elms; + inuls = nuls; + for (int i = 0; i < n; i++) + { + int j = (int) pg_prng_uint64_range(&pg_global_prng_state, i, nitem - 1) * nelm; + Datum *jelms = elms + j; + bool *jnuls = nuls + j; + + /* Swap i'th and j'th items; advance ielms/inuls to next item */ + for (int k = 0; k < nelm; k++) + { + Datum elm = *ielms; + bool nul = *inuls; + + *ielms++ = *jelms; + *inuls++ = *jnuls; + *jelms++ = elm; + *jnuls++ = nul; + } + } + + /* Set up dimensions of the result */ + memcpy(rdims, dims, ndim * sizeof(int)); + memcpy(rlbs, lbs, ndim * sizeof(int)); + rdims[0] = n; + if (!keep_lb) + rlbs[0] = 1; + + result = construct_md_array(elms, nuls, ndim, rdims, rlbs, + elmtyp, elmlen, elmbyval, elmalign); + + pfree(elms); + pfree(nuls); + + return result; +} + +/* + * array_shuffle + * + * Returns an array with the same dimensions as the input array, with its + * first-dimension elements in random order. + */ +Datum +array_shuffle(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *result; + Oid elmtyp; + TypeCacheEntry *typentry; + + /* + * There is no point in shuffling empty arrays or arrays with less than + * two items. + */ + if (ARR_NDIM(array) < 1 || ARR_DIMS(array)[0] < 2) + PG_RETURN_ARRAYTYPE_P(array); + + elmtyp = ARR_ELEMTYPE(array); + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, 0); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + + result = array_shuffle_n(array, ARR_DIMS(array)[0], true, elmtyp, typentry); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * array_sample + * + * Returns an array of n randomly chosen first-dimension elements + * from the input array. + */ +Datum +array_sample(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int n = PG_GETARG_INT32(1); + ArrayType *result; + Oid elmtyp; + TypeCacheEntry *typentry; + int nitem; + + nitem = (ARR_NDIM(array) < 1) ? 0 : ARR_DIMS(array)[0]; + + if (n < 0 || n > nitem) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("sample size must be between 0 and %d", nitem))); + + elmtyp = ARR_ELEMTYPE(array); + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, 0); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + + result = array_shuffle_n(array, n, false, elmtyp, typentry); + + PG_RETURN_ARRAYTYPE_P(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayfuncs.c new file mode 100644 index 00000000000..807030da997 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayfuncs.c @@ -0,0 +1,6961 @@ +/*------------------------------------------------------------------------- + * + * arrayfuncs.c + * Support functions for arrays. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/arrayfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_type.h" +#include "common/int.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" +#include "port/pg_bitutils.h" +#include "utils/array.h" +#include "utils/arrayaccess.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/selfuncs.h" +#include "utils/typcache.h" + + +/* + * GUC parameter + */ +__thread bool Array_nulls = true; + +/* + * Local definitions + */ +#define ASSGN "=" + +#define AARR_FREE_IF_COPY(array,n) \ + do { \ + if (!VARATT_IS_EXPANDED_HEADER(array)) \ + PG_FREE_IF_COPY(array, n); \ + } while (0) + +typedef enum +{ + ARRAY_NO_LEVEL, + ARRAY_LEVEL_STARTED, + ARRAY_ELEM_STARTED, + ARRAY_ELEM_COMPLETED, + ARRAY_QUOTED_ELEM_STARTED, + ARRAY_QUOTED_ELEM_COMPLETED, + ARRAY_ELEM_DELIMITED, + ARRAY_LEVEL_COMPLETED, + ARRAY_LEVEL_DELIMITED +} ArrayParseState; + +/* Working state for array_iterate() */ +typedef struct ArrayIteratorData +{ + /* basic info about the array, set up during array_create_iterator() */ + ArrayType *arr; /* array we're iterating through */ + bits8 *nullbitmap; /* its null bitmap, if any */ + int nitems; /* total number of elements in array */ + int16 typlen; /* element type's length */ + bool typbyval; /* element type's byval property */ + char typalign; /* element type's align property */ + + /* information about the requested slice size */ + int slice_ndim; /* slice dimension, or 0 if not slicing */ + int slice_len; /* number of elements per slice */ + int *slice_dims; /* slice dims array */ + int *slice_lbound; /* slice lbound array */ + Datum *slice_values; /* workspace of length slice_len */ + bool *slice_nulls; /* workspace of length slice_len */ + + /* current position information, updated on each iteration */ + char *data_ptr; /* our current position in the array */ + int current_item; /* the item # we're at in the array */ +} ArrayIteratorData; + +static bool array_isspace(char ch); +static int ArrayCount(const char *str, int *dim, char typdelim, + Node *escontext); +static bool ReadArrayStr(char *arrayStr, const char *origStr, + int nitems, int ndim, int *dim, + FmgrInfo *inputproc, Oid typioparam, int32 typmod, + char typdelim, + int typlen, bool typbyval, char typalign, + Datum *values, bool *nulls, + bool *hasnulls, int32 *nbytes, Node *escontext); +static void ReadArrayBinary(StringInfo buf, int nitems, + FmgrInfo *receiveproc, Oid typioparam, int32 typmod, + int typlen, bool typbyval, char typalign, + Datum *values, bool *nulls, + bool *hasnulls, int32 *nbytes); +static Datum array_get_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign, + bool *isNull); +static Datum array_set_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + Datum dataValue, bool isNull, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign); +static bool array_get_isnull(const bits8 *nullbitmap, int offset); +static void array_set_isnull(bits8 *nullbitmap, int offset, bool isNull); +static Datum ArrayCast(char *value, bool byval, int len); +static int ArrayCastAndSet(Datum src, + int typlen, bool typbyval, char typalign, + char *dest); +static char *array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems, + int typlen, bool typbyval, char typalign); +static int array_nelems_size(char *ptr, int offset, bits8 *nullbitmap, + int nitems, int typlen, bool typbyval, char typalign); +static int array_copy(char *destptr, int nitems, + char *srcptr, int offset, bits8 *nullbitmap, + int typlen, bool typbyval, char typalign); +static int array_slice_size(char *arraydataptr, bits8 *arraynullsptr, + int ndim, int *dim, int *lb, + int *st, int *endp, + int typlen, bool typbyval, char typalign); +static void array_extract_slice(ArrayType *newarray, + int ndim, int *dim, int *lb, + char *arraydataptr, bits8 *arraynullsptr, + int *st, int *endp, + int typlen, bool typbyval, char typalign); +static void array_insert_slice(ArrayType *destArray, ArrayType *origArray, + ArrayType *srcArray, + int ndim, int *dim, int *lb, + int *st, int *endp, + int typlen, bool typbyval, char typalign); +static int array_cmp(FunctionCallInfo fcinfo); +static ArrayType *create_array_envelope(int ndims, int *dimv, int *lbsv, int nbytes, + Oid elmtype, int dataoffset); +static ArrayType *array_fill_internal(ArrayType *dims, ArrayType *lbs, + Datum value, bool isnull, Oid elmtype, + FunctionCallInfo fcinfo); +static ArrayType *array_replace_internal(ArrayType *array, + Datum search, bool search_isnull, + Datum replace, bool replace_isnull, + bool remove, Oid collation, + FunctionCallInfo fcinfo); +static int width_bucket_array_float8(Datum operand, ArrayType *thresholds); +static int width_bucket_array_fixed(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry); +static int width_bucket_array_variable(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry); + + +/* + * array_in : + * converts an array from the external format in "string" to + * its internal format. + * + * return value : + * the internal representation of the input array + */ +Datum +array_in(PG_FUNCTION_ARGS) +{ + char *string = PG_GETARG_CSTRING(0); /* external form */ + Oid element_type = PG_GETARG_OID(1); /* type of an array + * element */ + int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */ + Node *escontext = fcinfo->context; + int typlen; + bool typbyval; + char typalign; + char typdelim; + Oid typioparam; + char *string_save, + *p; + int i, + nitems; + Datum *dataPtr; + bool *nullsPtr; + bool hasnulls; + int32 nbytes; + int32 dataoffset; + ArrayType *retval; + int ndim, + dim[MAXDIM], + lBound[MAXDIM]; + ArrayMetaState *my_extra; + + /* + * We arrange to look up info about element type, including its input + * conversion proc, only once per series of calls, assuming the element + * type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + /* + * Get info about element type, including its input conversion proc + */ + get_type_io_data(element_type, IOFunc_input, + &my_extra->typlen, &my_extra->typbyval, + &my_extra->typalign, &my_extra->typdelim, + &my_extra->typioparam, &my_extra->typiofunc); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + my_extra->element_type = element_type; + } + typlen = my_extra->typlen; + typbyval = my_extra->typbyval; + typalign = my_extra->typalign; + typdelim = my_extra->typdelim; + typioparam = my_extra->typioparam; + + /* Make a modifiable copy of the input */ + string_save = pstrdup(string); + + /* + * If the input string starts with dimension info, read and use that. + * Otherwise, we require the input to be in curly-brace style, and we + * prescan the input to determine dimensions. + * + * Dimension info takes the form of one or more [n] or [m:n] items. The + * outer loop iterates once per dimension item. + */ + p = string_save; + ndim = 0; + for (;;) + { + char *q; + int ub; + + /* + * Note: we currently allow whitespace between, but not within, + * dimension items. + */ + while (array_isspace(*p)) + p++; + if (*p != '[') + break; /* no more dimension items */ + p++; + if (ndim >= MAXDIM) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + ndim + 1, MAXDIM))); + + for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) + /* skip */ ; + if (q == p) /* no digits? */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("\"[\" must introduce explicitly-specified array dimensions."))); + + if (*q == ':') + { + /* [m:n] format */ + *q = '\0'; + lBound[ndim] = atoi(p); + p = q + 1; + for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) + /* skip */ ; + if (q == p) /* no digits? */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("Missing array dimension value."))); + } + else + { + /* [n] format */ + lBound[ndim] = 1; + } + if (*q != ']') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("Missing \"%s\" after array dimensions.", + "]"))); + + *q = '\0'; + ub = atoi(p); + p = q + 1; + if (ub < lBound[ndim]) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("upper bound cannot be less than lower bound"))); + + dim[ndim] = ub - lBound[ndim] + 1; + ndim++; + } + + if (ndim == 0) + { + /* No array dimensions, so intuit dimensions from brace structure */ + if (*p != '{') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("Array value must start with \"{\" or dimension information."))); + ndim = ArrayCount(p, dim, typdelim, escontext); + if (ndim < 0) + PG_RETURN_NULL(); + for (i = 0; i < ndim; i++) + lBound[i] = 1; + } + else + { + int ndim_braces, + dim_braces[MAXDIM]; + + /* If array dimensions are given, expect '=' operator */ + if (strncmp(p, ASSGN, strlen(ASSGN)) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("Missing \"%s\" after array dimensions.", + ASSGN))); + p += strlen(ASSGN); + while (array_isspace(*p)) + p++; + + /* + * intuit dimensions from brace structure -- it better match what we + * were given + */ + if (*p != '{') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("Array contents must start with \"{\"."))); + ndim_braces = ArrayCount(p, dim_braces, typdelim, escontext); + if (ndim_braces < 0) + PG_RETURN_NULL(); + if (ndim_braces != ndim) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("Specified array dimensions do not match array contents."))); + for (i = 0; i < ndim; ++i) + { + if (dim[i] != dim_braces[i]) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", string), + errdetail("Specified array dimensions do not match array contents."))); + } + } + +#ifdef ARRAYDEBUG + printf("array_in- ndim %d (", ndim); + for (i = 0; i < ndim; i++) + { + printf(" %d", dim[i]); + }; + printf(") for %s\n", string); +#endif + + /* This checks for overflow of the array dimensions */ + nitems = ArrayGetNItemsSafe(ndim, dim, escontext); + if (nitems < 0) + PG_RETURN_NULL(); + if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext)) + PG_RETURN_NULL(); + + /* Empty array? */ + if (nitems == 0) + PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type)); + + dataPtr = (Datum *) palloc(nitems * sizeof(Datum)); + nullsPtr = (bool *) palloc(nitems * sizeof(bool)); + if (!ReadArrayStr(p, string, + nitems, ndim, dim, + &my_extra->proc, typioparam, typmod, + typdelim, + typlen, typbyval, typalign, + dataPtr, nullsPtr, + &hasnulls, &nbytes, escontext)) + PG_RETURN_NULL(); + if (hasnulls) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems); + nbytes += dataoffset; + } + else + { + dataoffset = 0; /* marker for no null bitmap */ + nbytes += ARR_OVERHEAD_NONULLS(ndim); + } + retval = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(retval, nbytes); + retval->ndim = ndim; + retval->dataoffset = dataoffset; + + /* + * This comes from the array's pg_type.typelem (which points to the base + * data type's pg_type.oid) and stores system oids in user tables. This + * oid must be preserved by binary upgrades. + */ + retval->elemtype = element_type; + memcpy(ARR_DIMS(retval), dim, ndim * sizeof(int)); + memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int)); + + CopyArrayEls(retval, + dataPtr, nullsPtr, nitems, + typlen, typbyval, typalign, + true); + + pfree(dataPtr); + pfree(nullsPtr); + pfree(string_save); + + PG_RETURN_ARRAYTYPE_P(retval); +} + +/* + * array_isspace() --- a non-locale-dependent isspace() + * + * We used to use isspace() for parsing array values, but that has + * undesirable results: an array value might be silently interpreted + * differently depending on the locale setting. Now we just hard-wire + * the traditional ASCII definition of isspace(). + */ +static bool +array_isspace(char ch) +{ + if (ch == ' ' || + ch == '\t' || + ch == '\n' || + ch == '\r' || + ch == '\v' || + ch == '\f') + return true; + return false; +} + +/* + * ArrayCount + * Determines the dimensions for an array string. + * + * Returns number of dimensions as function result. The axis lengths are + * returned in dim[], which must be of size MAXDIM. + * + * If we detect an error, fill *escontext with error details and return -1 + * (unless escontext isn't provided, in which case errors will be thrown). + */ +static int +ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) +{ + int nest_level = 0, + i; + int ndim = 1, + temp[MAXDIM], + nelems[MAXDIM], + nelems_last[MAXDIM]; + bool in_quotes = false; + bool eoArray = false; + bool empty_array = true; + const char *ptr; + ArrayParseState parse_state = ARRAY_NO_LEVEL; + + for (i = 0; i < MAXDIM; ++i) + { + temp[i] = dim[i] = nelems_last[i] = 0; + nelems[i] = 1; + } + + ptr = str; + while (!eoArray) + { + bool itemdone = false; + + while (!itemdone) + { + if (parse_state == ARRAY_ELEM_STARTED || + parse_state == ARRAY_QUOTED_ELEM_STARTED) + empty_array = false; + + switch (*ptr) + { + case '\0': + /* Signal a premature end of the string */ + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected end of input."))); + case '\\': + + /* + * An escape must be after a level start, after an element + * start, or after an element delimiter. In any case we + * now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_QUOTED_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected \"%c\" character.", + '\\'))); + if (parse_state != ARRAY_QUOTED_ELEM_STARTED) + parse_state = ARRAY_ELEM_STARTED; + /* skip the escaped character */ + if (*(ptr + 1)) + ptr++; + else + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected end of input."))); + break; + case '"': + + /* + * A quote must be after a level start, after a quoted + * element start, or after an element delimiter. In any + * case we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_QUOTED_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected array element."))); + in_quotes = !in_quotes; + if (in_quotes) + parse_state = ARRAY_QUOTED_ELEM_STARTED; + else + parse_state = ARRAY_QUOTED_ELEM_COMPLETED; + break; + case '{': + if (!in_quotes) + { + /* + * A left brace can occur if no nesting has occurred + * yet, after a level start, or after a level + * delimiter. + */ + if (parse_state != ARRAY_NO_LEVEL && + parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_LEVEL_DELIMITED) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected \"%c\" character.", + '{'))); + parse_state = ARRAY_LEVEL_STARTED; + if (nest_level >= MAXDIM) + ereturn(escontext, -1, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + nest_level + 1, MAXDIM))); + temp[nest_level] = 0; + nest_level++; + if (ndim < nest_level) + ndim = nest_level; + } + break; + case '}': + if (!in_quotes) + { + /* + * A right brace can occur after an element start, an + * element completion, a quoted element completion, or + * a level completion. + */ + if (parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_ELEM_COMPLETED && + parse_state != ARRAY_QUOTED_ELEM_COMPLETED && + parse_state != ARRAY_LEVEL_COMPLETED && + !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED)) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected \"%c\" character.", + '}'))); + parse_state = ARRAY_LEVEL_COMPLETED; + if (nest_level == 0) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unmatched \"%c\" character.", '}'))); + nest_level--; + + if (nelems_last[nest_level] != 0 && + nelems[nest_level] != nelems_last[nest_level]) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Multidimensional arrays must have " + "sub-arrays with matching " + "dimensions."))); + nelems_last[nest_level] = nelems[nest_level]; + nelems[nest_level] = 1; + if (nest_level == 0) + eoArray = itemdone = true; + else + { + /* + * We don't set itemdone here; see comments in + * ReadArrayStr + */ + temp[nest_level - 1]++; + } + } + break; + default: + if (!in_quotes) + { + if (*ptr == typdelim) + { + /* + * Delimiters can occur after an element start, an + * element completion, a quoted element + * completion, or a level completion. + */ + if (parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_ELEM_COMPLETED && + parse_state != ARRAY_QUOTED_ELEM_COMPLETED && + parse_state != ARRAY_LEVEL_COMPLETED) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected \"%c\" character.", + typdelim))); + if (parse_state == ARRAY_LEVEL_COMPLETED) + parse_state = ARRAY_LEVEL_DELIMITED; + else + parse_state = ARRAY_ELEM_DELIMITED; + itemdone = true; + nelems[nest_level - 1]++; + } + else if (!array_isspace(*ptr)) + { + /* + * Other non-space characters must be after a + * level start, after an element start, or after + * an element delimiter. In any case we now must + * be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Unexpected array element."))); + parse_state = ARRAY_ELEM_STARTED; + } + } + break; + } + if (!itemdone) + ptr++; + } + temp[ndim - 1]++; + ptr++; + } + + /* only whitespace is allowed after the closing brace */ + while (*ptr) + { + if (!array_isspace(*ptr++)) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Junk after closing right brace."))); + } + + /* special case for an empty array */ + if (empty_array) + return 0; + + for (i = 0; i < ndim; ++i) + dim[i] = temp[i]; + + return ndim; +} + +/* + * ReadArrayStr : + * parses the array string pointed to by "arrayStr" and converts the values + * to internal format. Unspecified elements are initialized to nulls. + * The array dimensions must already have been determined. + * + * Inputs: + * arrayStr: the string to parse. + * CAUTION: the contents of "arrayStr" will be modified! + * origStr: the unmodified input string, used only in error messages. + * nitems: total number of array elements, as already determined. + * ndim: number of array dimensions + * dim[]: array axis lengths + * inputproc: type-specific input procedure for element datatype. + * typioparam, typmod: auxiliary values to pass to inputproc. + * typdelim: the value delimiter (type-specific). + * typlen, typbyval, typalign: storage parameters of element datatype. + * + * Outputs: + * values[]: filled with converted data values. + * nulls[]: filled with is-null markers. + * *hasnulls: set true iff there are any null elements. + * *nbytes: set to total size of data area needed (including alignment + * padding but not including array header overhead). + * *escontext: if this points to an ErrorSaveContext, details of + * any error are reported there. + * + * Result: + * true for success, false for failure (if escontext is provided). + * + * Note that values[] and nulls[] are allocated by the caller, and must have + * nitems elements. + */ +static bool +ReadArrayStr(char *arrayStr, + const char *origStr, + int nitems, + int ndim, + int *dim, + FmgrInfo *inputproc, + Oid typioparam, + int32 typmod, + char typdelim, + int typlen, + bool typbyval, + char typalign, + Datum *values, + bool *nulls, + bool *hasnulls, + int32 *nbytes, + Node *escontext) +{ + int i, + nest_level = 0; + char *srcptr; + bool in_quotes = false; + bool eoArray = false; + bool hasnull; + int32 totbytes; + int indx[MAXDIM] = {0}, + prod[MAXDIM]; + + mda_get_prod(ndim, dim, prod); + + /* Initialize is-null markers to true */ + memset(nulls, true, nitems * sizeof(bool)); + + /* + * We have to remove " and \ characters to create a clean item value to + * pass to the datatype input routine. We overwrite each item value + * in-place within arrayStr to do this. srcptr is the current scan point, + * and dstptr is where we are copying to. + * + * We also want to suppress leading and trailing unquoted whitespace. We + * use the leadingspace flag to suppress leading space. Trailing space is + * tracked by using dstendptr to point to the last significant output + * character. + * + * The error checking in this routine is mostly pro-forma, since we expect + * that ArrayCount() already validated the string. So we don't bother + * with errdetail messages. + */ + srcptr = arrayStr; + while (!eoArray) + { + bool itemdone = false; + bool leadingspace = true; + bool hasquoting = false; + char *itemstart; + char *dstptr; + char *dstendptr; + + i = -1; + itemstart = dstptr = dstendptr = srcptr; + + while (!itemdone) + { + switch (*srcptr) + { + case '\0': + /* Signal a premature end of the string */ + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", + origStr))); + break; + case '\\': + /* Skip backslash, copy next character as-is. */ + srcptr++; + if (*srcptr == '\0') + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", + origStr))); + *dstptr++ = *srcptr++; + /* Treat the escaped character as non-whitespace */ + leadingspace = false; + dstendptr = dstptr; + hasquoting = true; /* can't be a NULL marker */ + break; + case '"': + in_quotes = !in_quotes; + if (in_quotes) + leadingspace = false; + else + { + /* + * Advance dstendptr when we exit in_quotes; this + * saves having to do it in all the other in_quotes + * cases. + */ + dstendptr = dstptr; + } + hasquoting = true; /* can't be a NULL marker */ + srcptr++; + break; + case '{': + if (!in_quotes) + { + if (nest_level >= ndim) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", + origStr))); + nest_level++; + indx[nest_level - 1] = 0; + srcptr++; + } + else + *dstptr++ = *srcptr++; + break; + case '}': + if (!in_quotes) + { + if (nest_level == 0) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", + origStr))); + if (i == -1) + i = ArrayGetOffset0(ndim, indx, prod); + indx[nest_level - 1] = 0; + nest_level--; + if (nest_level == 0) + eoArray = itemdone = true; + else + indx[nest_level - 1]++; + srcptr++; + } + else + *dstptr++ = *srcptr++; + break; + default: + if (in_quotes) + *dstptr++ = *srcptr++; + else if (*srcptr == typdelim) + { + if (i == -1) + i = ArrayGetOffset0(ndim, indx, prod); + itemdone = true; + indx[ndim - 1]++; + srcptr++; + } + else if (array_isspace(*srcptr)) + { + /* + * If leading space, drop it immediately. Else, copy + * but don't advance dstendptr. + */ + if (leadingspace) + srcptr++; + else + *dstptr++ = *srcptr++; + } + else + { + *dstptr++ = *srcptr++; + leadingspace = false; + dstendptr = dstptr; + } + break; + } + } + + Assert(dstptr < srcptr); + *dstendptr = '\0'; + + if (i < 0 || i >= nitems) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", + origStr))); + + if (Array_nulls && !hasquoting && + pg_strcasecmp(itemstart, "NULL") == 0) + { + /* it's a NULL item */ + if (!InputFunctionCallSafe(inputproc, NULL, + typioparam, typmod, + escontext, + &values[i])) + return false; + nulls[i] = true; + } + else + { + if (!InputFunctionCallSafe(inputproc, itemstart, + typioparam, typmod, + escontext, + &values[i])) + return false; + nulls[i] = false; + } + } + + /* + * Check for nulls, compute total data space needed + */ + hasnull = false; + totbytes = 0; + for (i = 0; i < nitems; i++) + { + if (nulls[i]) + hasnull = true; + else + { + /* let's just make sure data is not toasted */ + if (typlen == -1) + values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i])); + totbytes = att_addlength_datum(totbytes, typlen, values[i]); + totbytes = att_align_nominal(totbytes, typalign); + /* check for overflow of total request */ + if (!AllocSizeIsValid(totbytes)) + ereturn(escontext, false, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + } + } + *hasnulls = hasnull; + *nbytes = totbytes; + return true; +} + + +/* + * Copy data into an array object from a temporary array of Datums. + * + * array: array object (with header fields already filled in) + * values: array of Datums to be copied + * nulls: array of is-null flags (can be NULL if no nulls) + * nitems: number of Datums to be copied + * typbyval, typlen, typalign: info about element datatype + * freedata: if true and element type is pass-by-ref, pfree data values + * referenced by Datums after copying them. + * + * If the input data is of varlena type, the caller must have ensured that + * the values are not toasted. (Doing it here doesn't work since the + * caller has already allocated space for the array...) + */ +void +CopyArrayEls(ArrayType *array, + Datum *values, + bool *nulls, + int nitems, + int typlen, + bool typbyval, + char typalign, + bool freedata) +{ + char *p = ARR_DATA_PTR(array); + bits8 *bitmap = ARR_NULLBITMAP(array); + int bitval = 0; + int bitmask = 1; + int i; + + if (typbyval) + freedata = false; + + for (i = 0; i < nitems; i++) + { + if (nulls && nulls[i]) + { + if (!bitmap) /* shouldn't happen */ + elog(ERROR, "null array element where not supported"); + /* bitmap bit stays 0 */ + } + else + { + bitval |= bitmask; + p += ArrayCastAndSet(values[i], typlen, typbyval, typalign, p); + if (freedata) + pfree(DatumGetPointer(values[i])); + } + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + *bitmap++ = bitval; + bitval = 0; + bitmask = 1; + } + } + } + + if (bitmap && bitmask != 1) + *bitmap = bitval; +} + +/* + * array_out : + * takes the internal representation of an array and returns a string + * containing the array in its external format. + */ +Datum +array_out(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + Oid element_type = AARR_ELEMTYPE(v); + int typlen; + bool typbyval; + char typalign; + char typdelim; + char *p, + *tmp, + *retval, + **values, + dims_str[(MAXDIM * 33) + 2]; + + /* + * 33 per dim since we assume 15 digits per number + ':' +'[]' + * + * +2 allows for assignment operator + trailing null + */ + bool *needquotes, + needdims = false; + size_t overall_length; + int nitems, + i, + j, + k, + indx[MAXDIM]; + int ndim, + *dims, + *lb; + array_iter iter; + ArrayMetaState *my_extra; + + /* + * We arrange to look up info about element type, including its output + * conversion proc, only once per series of calls, assuming the element + * type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + /* + * Get info about element type, including its output conversion proc + */ + get_type_io_data(element_type, IOFunc_output, + &my_extra->typlen, &my_extra->typbyval, + &my_extra->typalign, &my_extra->typdelim, + &my_extra->typioparam, &my_extra->typiofunc); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + my_extra->element_type = element_type; + } + typlen = my_extra->typlen; + typbyval = my_extra->typbyval; + typalign = my_extra->typalign; + typdelim = my_extra->typdelim; + + ndim = AARR_NDIM(v); + dims = AARR_DIMS(v); + lb = AARR_LBOUND(v); + nitems = ArrayGetNItems(ndim, dims); + + if (nitems == 0) + { + retval = pstrdup("{}"); + PG_RETURN_CSTRING(retval); + } + + /* + * we will need to add explicit dimensions if any dimension has a lower + * bound other than one + */ + for (i = 0; i < ndim; i++) + { + if (lb[i] != 1) + { + needdims = true; + break; + } + } + + /* + * Convert all values to string form, count total space needed (including + * any overhead such as escaping backslashes), and detect whether each + * item needs double quotes. + */ + values = (char **) palloc(nitems * sizeof(char *)); + needquotes = (bool *) palloc(nitems * sizeof(bool)); + overall_length = 0; + + array_iter_setup(&iter, v); + + for (i = 0; i < nitems; i++) + { + Datum itemvalue; + bool isnull; + bool needquote; + + /* Get source element, checking for NULL */ + itemvalue = array_iter_next(&iter, &isnull, i, + typlen, typbyval, typalign); + + if (isnull) + { + values[i] = pstrdup("NULL"); + overall_length += 4; + needquote = false; + } + else + { + values[i] = OutputFunctionCall(&my_extra->proc, itemvalue); + + /* count data plus backslashes; detect chars needing quotes */ + if (values[i][0] == '\0') + needquote = true; /* force quotes for empty string */ + else if (pg_strcasecmp(values[i], "NULL") == 0) + needquote = true; /* force quotes for literal NULL */ + else + needquote = false; + + for (tmp = values[i]; *tmp != '\0'; tmp++) + { + char ch = *tmp; + + overall_length += 1; + if (ch == '"' || ch == '\\') + { + needquote = true; + overall_length += 1; + } + else if (ch == '{' || ch == '}' || ch == typdelim || + array_isspace(ch)) + needquote = true; + } + } + + needquotes[i] = needquote; + + /* Count the pair of double quotes, if needed */ + if (needquote) + overall_length += 2; + /* and the comma (or other typdelim delimiter) */ + overall_length += 1; + } + + /* + * The very last array element doesn't have a typdelim delimiter after it, + * but that's OK; that space is needed for the trailing '\0'. + * + * Now count total number of curly brace pairs in output string. + */ + for (i = j = 0, k = 1; i < ndim; i++) + { + j += k, k *= dims[i]; + } + overall_length += 2 * j; + + /* Format explicit dimensions if required */ + dims_str[0] = '\0'; + if (needdims) + { + char *ptr = dims_str; + + for (i = 0; i < ndim; i++) + { + sprintf(ptr, "[%d:%d]", lb[i], lb[i] + dims[i] - 1); + ptr += strlen(ptr); + } + *ptr++ = *ASSGN; + *ptr = '\0'; + overall_length += ptr - dims_str; + } + + /* Now construct the output string */ + retval = (char *) palloc(overall_length); + p = retval; + +#define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p)) +#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0') + + if (needdims) + APPENDSTR(dims_str); + APPENDCHAR('{'); + for (i = 0; i < ndim; i++) + indx[i] = 0; + j = 0; + k = 0; + do + { + for (i = j; i < ndim - 1; i++) + APPENDCHAR('{'); + + if (needquotes[k]) + { + APPENDCHAR('"'); + for (tmp = values[k]; *tmp; tmp++) + { + char ch = *tmp; + + if (ch == '"' || ch == '\\') + *p++ = '\\'; + *p++ = ch; + } + *p = '\0'; + APPENDCHAR('"'); + } + else + APPENDSTR(values[k]); + pfree(values[k++]); + + for (i = ndim - 1; i >= 0; i--) + { + if (++(indx[i]) < dims[i]) + { + APPENDCHAR(typdelim); + break; + } + else + { + indx[i] = 0; + APPENDCHAR('}'); + } + } + j = i; + } while (j != -1); + +#undef APPENDSTR +#undef APPENDCHAR + + /* Assert that we calculated the string length accurately */ + Assert(overall_length == (p - retval + 1)); + + pfree(values); + pfree(needquotes); + + PG_RETURN_CSTRING(retval); +} + +/* + * array_recv : + * converts an array from the external binary format to + * its internal format. + * + * return value : + * the internal representation of the input array + */ +Datum +array_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Oid spec_element_type = PG_GETARG_OID(1); /* type of an array + * element */ + int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */ + Oid element_type; + int typlen; + bool typbyval; + char typalign; + Oid typioparam; + int i, + nitems; + Datum *dataPtr; + bool *nullsPtr; + bool hasnulls; + int32 nbytes; + int32 dataoffset; + ArrayType *retval; + int ndim, + flags, + dim[MAXDIM], + lBound[MAXDIM]; + ArrayMetaState *my_extra; + + /* Get the array header information */ + ndim = pq_getmsgint(buf, 4); + if (ndim < 0) /* we do allow zero-dimension arrays */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid number of dimensions: %d", ndim))); + if (ndim > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + ndim, MAXDIM))); + + flags = pq_getmsgint(buf, 4); + if (flags != 0 && flags != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid array flags"))); + + /* Check element type recorded in the data */ + element_type = pq_getmsgint(buf, sizeof(Oid)); + + /* + * From a security standpoint, it doesn't matter whether the input's + * element type matches what we expect: the element type's receive + * function has to be robust enough to cope with invalid data. However, + * from a user-friendliness standpoint, it's nicer to complain about type + * mismatches than to throw "improper binary format" errors. But there's + * a problem: only built-in types have OIDs that are stable enough to + * believe that a mismatch is a real issue. So complain only if both OIDs + * are in the built-in range. Otherwise, carry on with the element type + * we "should" be getting. + */ + if (element_type != spec_element_type) + { + if (element_type < FirstGenbkiObjectId && + spec_element_type < FirstGenbkiObjectId) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("binary data has array element type %u (%s) instead of expected %u (%s)", + element_type, + format_type_extended(element_type, -1, + FORMAT_TYPE_ALLOW_INVALID), + spec_element_type, + format_type_extended(spec_element_type, -1, + FORMAT_TYPE_ALLOW_INVALID)))); + element_type = spec_element_type; + } + + for (i = 0; i < ndim; i++) + { + dim[i] = pq_getmsgint(buf, 4); + lBound[i] = pq_getmsgint(buf, 4); + } + + /* This checks for overflow of array dimensions */ + nitems = ArrayGetNItems(ndim, dim); + ArrayCheckBounds(ndim, dim, lBound); + + /* + * We arrange to look up info about element type, including its receive + * conversion proc, only once per series of calls, assuming the element + * type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + /* Get info about element type, including its receive proc */ + get_type_io_data(element_type, IOFunc_receive, + &my_extra->typlen, &my_extra->typbyval, + &my_extra->typalign, &my_extra->typdelim, + &my_extra->typioparam, &my_extra->typiofunc); + if (!OidIsValid(my_extra->typiofunc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary input function available for type %s", + format_type_be(element_type)))); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + my_extra->element_type = element_type; + } + + if (nitems == 0) + { + /* Return empty array ... but not till we've validated element_type */ + PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type)); + } + + typlen = my_extra->typlen; + typbyval = my_extra->typbyval; + typalign = my_extra->typalign; + typioparam = my_extra->typioparam; + + dataPtr = (Datum *) palloc(nitems * sizeof(Datum)); + nullsPtr = (bool *) palloc(nitems * sizeof(bool)); + ReadArrayBinary(buf, nitems, + &my_extra->proc, typioparam, typmod, + typlen, typbyval, typalign, + dataPtr, nullsPtr, + &hasnulls, &nbytes); + if (hasnulls) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems); + nbytes += dataoffset; + } + else + { + dataoffset = 0; /* marker for no null bitmap */ + nbytes += ARR_OVERHEAD_NONULLS(ndim); + } + retval = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(retval, nbytes); + retval->ndim = ndim; + retval->dataoffset = dataoffset; + retval->elemtype = element_type; + memcpy(ARR_DIMS(retval), dim, ndim * sizeof(int)); + memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int)); + + CopyArrayEls(retval, + dataPtr, nullsPtr, nitems, + typlen, typbyval, typalign, + true); + + pfree(dataPtr); + pfree(nullsPtr); + + PG_RETURN_ARRAYTYPE_P(retval); +} + +/* + * ReadArrayBinary: + * collect the data elements of an array being read in binary style. + * + * Inputs: + * buf: the data buffer to read from. + * nitems: total number of array elements (already read). + * receiveproc: type-specific receive procedure for element datatype. + * typioparam, typmod: auxiliary values to pass to receiveproc. + * typlen, typbyval, typalign: storage parameters of element datatype. + * + * Outputs: + * values[]: filled with converted data values. + * nulls[]: filled with is-null markers. + * *hasnulls: set true iff there are any null elements. + * *nbytes: set to total size of data area needed (including alignment + * padding but not including array header overhead). + * + * Note that values[] and nulls[] are allocated by the caller, and must have + * nitems elements. + */ +static void +ReadArrayBinary(StringInfo buf, + int nitems, + FmgrInfo *receiveproc, + Oid typioparam, + int32 typmod, + int typlen, + bool typbyval, + char typalign, + Datum *values, + bool *nulls, + bool *hasnulls, + int32 *nbytes) +{ + int i; + bool hasnull; + int32 totbytes; + + for (i = 0; i < nitems; i++) + { + int itemlen; + StringInfoData elem_buf; + + /* Get and check the item length */ + itemlen = pq_getmsgint(buf, 4); + if (itemlen < -1 || itemlen > (buf->len - buf->cursor)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("insufficient data left in message"))); + + if (itemlen == -1) + { + /* -1 length means NULL */ + values[i] = ReceiveFunctionCall(receiveproc, NULL, + typioparam, typmod); + nulls[i] = true; + continue; + } + + /* + * Rather than copying data around, we just set up a phony StringInfo + * pointing to the correct portion of the input buffer. We assume we + * can scribble on the input buffer so as to maintain the convention + * that StringInfos have a trailing null. + */ + elem_buf.data = &buf->data[buf->cursor]; + elem_buf.maxlen = itemlen + 1; + elem_buf.len = itemlen; + elem_buf.cursor = 0; + + buf->cursor += itemlen; + + /* Now call the element's receiveproc */ + values[i] = ReceiveFunctionCall(receiveproc, &elem_buf, + typioparam, typmod); + nulls[i] = false; + + /* Trouble if it didn't eat the whole buffer */ + if (elem_buf.cursor != itemlen) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("improper binary format in array element %d", + i + 1))); + } + + /* + * Check for nulls, compute total data space needed + */ + hasnull = false; + totbytes = 0; + for (i = 0; i < nitems; i++) + { + if (nulls[i]) + hasnull = true; + else + { + /* let's just make sure data is not toasted */ + if (typlen == -1) + values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i])); + totbytes = att_addlength_datum(totbytes, typlen, values[i]); + totbytes = att_align_nominal(totbytes, typalign); + /* check for overflow of total request */ + if (!AllocSizeIsValid(totbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + } + } + *hasnulls = hasnull; + *nbytes = totbytes; +} + + +/* + * array_send : + * takes the internal representation of an array and returns a bytea + * containing the array in its external binary format. + */ +Datum +array_send(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + Oid element_type = AARR_ELEMTYPE(v); + int typlen; + bool typbyval; + char typalign; + int nitems, + i; + int ndim, + *dim, + *lb; + StringInfoData buf; + array_iter iter; + ArrayMetaState *my_extra; + + /* + * We arrange to look up info about element type, including its send + * conversion proc, only once per series of calls, assuming the element + * type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + /* Get info about element type, including its send proc */ + get_type_io_data(element_type, IOFunc_send, + &my_extra->typlen, &my_extra->typbyval, + &my_extra->typalign, &my_extra->typdelim, + &my_extra->typioparam, &my_extra->typiofunc); + if (!OidIsValid(my_extra->typiofunc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary output function available for type %s", + format_type_be(element_type)))); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + my_extra->element_type = element_type; + } + typlen = my_extra->typlen; + typbyval = my_extra->typbyval; + typalign = my_extra->typalign; + + ndim = AARR_NDIM(v); + dim = AARR_DIMS(v); + lb = AARR_LBOUND(v); + nitems = ArrayGetNItems(ndim, dim); + + pq_begintypsend(&buf); + + /* Send the array header information */ + pq_sendint32(&buf, ndim); + pq_sendint32(&buf, AARR_HASNULL(v) ? 1 : 0); + pq_sendint32(&buf, element_type); + for (i = 0; i < ndim; i++) + { + pq_sendint32(&buf, dim[i]); + pq_sendint32(&buf, lb[i]); + } + + /* Send the array elements using the element's own sendproc */ + array_iter_setup(&iter, v); + + for (i = 0; i < nitems; i++) + { + Datum itemvalue; + bool isnull; + + /* Get source element, checking for NULL */ + itemvalue = array_iter_next(&iter, &isnull, i, + typlen, typbyval, typalign); + + if (isnull) + { + /* -1 length means a NULL */ + pq_sendint32(&buf, -1); + } + else + { + bytea *outputbytes; + + outputbytes = SendFunctionCall(&my_extra->proc, itemvalue); + pq_sendint32(&buf, VARSIZE(outputbytes) - VARHDRSZ); + pq_sendbytes(&buf, VARDATA(outputbytes), + VARSIZE(outputbytes) - VARHDRSZ); + pfree(outputbytes); + } + } + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * array_ndims : + * returns the number of dimensions of the array pointed to by "v" + */ +Datum +array_ndims(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + + /* Sanity check: does it look like an array at all? */ + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) + PG_RETURN_NULL(); + + PG_RETURN_INT32(AARR_NDIM(v)); +} + +/* + * array_dims : + * returns the dimensions of the array pointed to by "v", as a "text" + */ +Datum +array_dims(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + char *p; + int i; + int *dimv, + *lb; + + /* + * 33 since we assume 15 digits per number + ':' +'[]' + * + * +1 for trailing null + */ + char buf[MAXDIM * 33 + 1]; + + /* Sanity check: does it look like an array at all? */ + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) + PG_RETURN_NULL(); + + dimv = AARR_DIMS(v); + lb = AARR_LBOUND(v); + + p = buf; + for (i = 0; i < AARR_NDIM(v); i++) + { + sprintf(p, "[%d:%d]", lb[i], dimv[i] + lb[i] - 1); + p += strlen(p); + } + + PG_RETURN_TEXT_P(cstring_to_text(buf)); +} + +/* + * array_lower : + * returns the lower dimension, of the DIM requested, for + * the array pointed to by "v", as an int4 + */ +Datum +array_lower(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + int reqdim = PG_GETARG_INT32(1); + int *lb; + int result; + + /* Sanity check: does it look like an array at all? */ + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) + PG_RETURN_NULL(); + + /* Sanity check: was the requested dim valid */ + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) + PG_RETURN_NULL(); + + lb = AARR_LBOUND(v); + result = lb[reqdim - 1]; + + PG_RETURN_INT32(result); +} + +/* + * array_upper : + * returns the upper dimension, of the DIM requested, for + * the array pointed to by "v", as an int4 + */ +Datum +array_upper(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + int reqdim = PG_GETARG_INT32(1); + int *dimv, + *lb; + int result; + + /* Sanity check: does it look like an array at all? */ + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) + PG_RETURN_NULL(); + + /* Sanity check: was the requested dim valid */ + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) + PG_RETURN_NULL(); + + lb = AARR_LBOUND(v); + dimv = AARR_DIMS(v); + + result = dimv[reqdim - 1] + lb[reqdim - 1] - 1; + + PG_RETURN_INT32(result); +} + +/* + * array_length : + * returns the length, of the dimension requested, for + * the array pointed to by "v", as an int4 + */ +Datum +array_length(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + int reqdim = PG_GETARG_INT32(1); + int *dimv; + int result; + + /* Sanity check: does it look like an array at all? */ + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) + PG_RETURN_NULL(); + + /* Sanity check: was the requested dim valid */ + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) + PG_RETURN_NULL(); + + dimv = AARR_DIMS(v); + + result = dimv[reqdim - 1]; + + PG_RETURN_INT32(result); +} + +/* + * array_cardinality: + * returns the total number of elements in an array + */ +Datum +array_cardinality(PG_FUNCTION_ARGS) +{ + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + + PG_RETURN_INT32(ArrayGetNItems(AARR_NDIM(v), AARR_DIMS(v))); +} + + +/* + * array_get_element : + * This routine takes an array datum and a subscript array and returns + * the referenced item as a Datum. Note that for a pass-by-reference + * datatype, the returned Datum is a pointer into the array object. + * + * This handles both ordinary varlena arrays and fixed-length arrays. + * + * Inputs: + * arraydatum: the array object (mustn't be NULL) + * nSubscripts: number of subscripts supplied + * indx[]: the subscript values + * arraytyplen: pg_type.typlen for the array type + * elmlen: pg_type.typlen for the array's element type + * elmbyval: pg_type.typbyval for the array's element type + * elmalign: pg_type.typalign for the array's element type + * + * Outputs: + * The return value is the element Datum. + * *isNull is set to indicate whether the element is NULL. + */ +Datum +array_get_element(Datum arraydatum, + int nSubscripts, + int *indx, + int arraytyplen, + int elmlen, + bool elmbyval, + char elmalign, + bool *isNull) +{ + int i, + ndim, + *dim, + *lb, + offset, + fixedDim[1], + fixedLb[1]; + char *arraydataptr, + *retptr; + bits8 *arraynullsptr; + + if (arraytyplen > 0) + { + /* + * fixed-length arrays -- these are assumed to be 1-d, 0-based + */ + ndim = 1; + fixedDim[0] = arraytyplen / elmlen; + fixedLb[0] = 0; + dim = fixedDim; + lb = fixedLb; + arraydataptr = (char *) DatumGetPointer(arraydatum); + arraynullsptr = NULL; + } + else if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum))) + { + /* expanded array: let's do this in a separate function */ + return array_get_element_expanded(arraydatum, + nSubscripts, + indx, + arraytyplen, + elmlen, + elmbyval, + elmalign, + isNull); + } + else + { + /* detoast array if necessary, producing normal varlena input */ + ArrayType *array = DatumGetArrayTypeP(arraydatum); + + ndim = ARR_NDIM(array); + dim = ARR_DIMS(array); + lb = ARR_LBOUND(array); + arraydataptr = ARR_DATA_PTR(array); + arraynullsptr = ARR_NULLBITMAP(array); + } + + /* + * Return NULL for invalid subscript + */ + if (ndim != nSubscripts || ndim <= 0 || ndim > MAXDIM) + { + *isNull = true; + return (Datum) 0; + } + for (i = 0; i < ndim; i++) + { + if (indx[i] < lb[i] || indx[i] >= (dim[i] + lb[i])) + { + *isNull = true; + return (Datum) 0; + } + } + + /* + * Calculate the element number + */ + offset = ArrayGetOffset(nSubscripts, dim, lb, indx); + + /* + * Check for NULL array element + */ + if (array_get_isnull(arraynullsptr, offset)) + { + *isNull = true; + return (Datum) 0; + } + + /* + * OK, get the element + */ + *isNull = false; + retptr = array_seek(arraydataptr, 0, arraynullsptr, offset, + elmlen, elmbyval, elmalign); + return ArrayCast(retptr, elmbyval, elmlen); +} + +/* + * Implementation of array_get_element() for an expanded array + */ +static Datum +array_get_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign, + bool *isNull) +{ + ExpandedArrayHeader *eah; + int i, + ndim, + *dim, + *lb, + offset; + Datum *dvalues; + bool *dnulls; + + eah = (ExpandedArrayHeader *) DatumGetEOHP(arraydatum); + Assert(eah->ea_magic == EA_MAGIC); + + /* sanity-check caller's info against object */ + Assert(arraytyplen == -1); + Assert(elmlen == eah->typlen); + Assert(elmbyval == eah->typbyval); + Assert(elmalign == eah->typalign); + + ndim = eah->ndims; + dim = eah->dims; + lb = eah->lbound; + + /* + * Return NULL for invalid subscript + */ + if (ndim != nSubscripts || ndim <= 0 || ndim > MAXDIM) + { + *isNull = true; + return (Datum) 0; + } + for (i = 0; i < ndim; i++) + { + if (indx[i] < lb[i] || indx[i] >= (dim[i] + lb[i])) + { + *isNull = true; + return (Datum) 0; + } + } + + /* + * Calculate the element number + */ + offset = ArrayGetOffset(nSubscripts, dim, lb, indx); + + /* + * Deconstruct array if we didn't already. Note that we apply this even + * if the input is nominally read-only: it should be safe enough. + */ + deconstruct_expanded_array(eah); + + dvalues = eah->dvalues; + dnulls = eah->dnulls; + + /* + * Check for NULL array element + */ + if (dnulls && dnulls[offset]) + { + *isNull = true; + return (Datum) 0; + } + + /* + * OK, get the element. It's OK to return a pass-by-ref value as a + * pointer into the expanded array, for the same reason that regular + * array_get_element can return a pointer into flat arrays: the value is + * assumed not to change for as long as the Datum reference can exist. + */ + *isNull = false; + return dvalues[offset]; +} + +/* + * array_get_slice : + * This routine takes an array and a range of indices (upperIndx and + * lowerIndx), creates a new array structure for the referred elements + * and returns a pointer to it. + * + * This handles both ordinary varlena arrays and fixed-length arrays. + * + * Inputs: + * arraydatum: the array object (mustn't be NULL) + * nSubscripts: number of subscripts supplied (must be same for upper/lower) + * upperIndx[]: the upper subscript values + * lowerIndx[]: the lower subscript values + * upperProvided[]: true for provided upper subscript values + * lowerProvided[]: true for provided lower subscript values + * arraytyplen: pg_type.typlen for the array type + * elmlen: pg_type.typlen for the array's element type + * elmbyval: pg_type.typbyval for the array's element type + * elmalign: pg_type.typalign for the array's element type + * + * Outputs: + * The return value is the new array Datum (it's never NULL) + * + * Omitted upper and lower subscript values are replaced by the corresponding + * array bound. + * + * NOTE: we assume it is OK to scribble on the provided subscript arrays + * lowerIndx[] and upperIndx[]; also, these arrays must be of size MAXDIM + * even when nSubscripts is less. These are generally just temporaries. + */ +Datum +array_get_slice(Datum arraydatum, + int nSubscripts, + int *upperIndx, + int *lowerIndx, + bool *upperProvided, + bool *lowerProvided, + int arraytyplen, + int elmlen, + bool elmbyval, + char elmalign) +{ + ArrayType *array; + ArrayType *newarray; + int i, + ndim, + *dim, + *lb, + *newlb; + int fixedDim[1], + fixedLb[1]; + Oid elemtype; + char *arraydataptr; + bits8 *arraynullsptr; + int32 dataoffset; + int bytes, + span[MAXDIM]; + + if (arraytyplen > 0) + { + /* + * fixed-length arrays -- currently, cannot slice these because parser + * labels output as being of the fixed-length array type! Code below + * shows how we could support it if the parser were changed to label + * output as a suitable varlena array type. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("slices of fixed-length arrays not implemented"))); + + /* + * fixed-length arrays -- these are assumed to be 1-d, 0-based + * + * XXX where would we get the correct ELEMTYPE from? + */ + ndim = 1; + fixedDim[0] = arraytyplen / elmlen; + fixedLb[0] = 0; + dim = fixedDim; + lb = fixedLb; + elemtype = InvalidOid; /* XXX */ + arraydataptr = (char *) DatumGetPointer(arraydatum); + arraynullsptr = NULL; + } + else + { + /* detoast input array if necessary */ + array = DatumGetArrayTypeP(arraydatum); + + ndim = ARR_NDIM(array); + dim = ARR_DIMS(array); + lb = ARR_LBOUND(array); + elemtype = ARR_ELEMTYPE(array); + arraydataptr = ARR_DATA_PTR(array); + arraynullsptr = ARR_NULLBITMAP(array); + } + + /* + * Check provided subscripts. A slice exceeding the current array limits + * is silently truncated to the array limits. If we end up with an empty + * slice, return an empty array. + */ + if (ndim < nSubscripts || ndim <= 0 || ndim > MAXDIM) + return PointerGetDatum(construct_empty_array(elemtype)); + + for (i = 0; i < nSubscripts; i++) + { + if (!lowerProvided[i] || lowerIndx[i] < lb[i]) + lowerIndx[i] = lb[i]; + if (!upperProvided[i] || upperIndx[i] >= (dim[i] + lb[i])) + upperIndx[i] = dim[i] + lb[i] - 1; + if (lowerIndx[i] > upperIndx[i]) + return PointerGetDatum(construct_empty_array(elemtype)); + } + /* fill any missing subscript positions with full array range */ + for (; i < ndim; i++) + { + lowerIndx[i] = lb[i]; + upperIndx[i] = dim[i] + lb[i] - 1; + if (lowerIndx[i] > upperIndx[i]) + return PointerGetDatum(construct_empty_array(elemtype)); + } + + mda_get_range(ndim, span, lowerIndx, upperIndx); + + bytes = array_slice_size(arraydataptr, arraynullsptr, + ndim, dim, lb, + lowerIndx, upperIndx, + elmlen, elmbyval, elmalign); + + /* + * Currently, we put a null bitmap in the result if the source has one; + * could be smarter ... + */ + if (arraynullsptr) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, ArrayGetNItems(ndim, span)); + bytes += dataoffset; + } + else + { + dataoffset = 0; /* marker for no null bitmap */ + bytes += ARR_OVERHEAD_NONULLS(ndim); + } + + newarray = (ArrayType *) palloc0(bytes); + SET_VARSIZE(newarray, bytes); + newarray->ndim = ndim; + newarray->dataoffset = dataoffset; + newarray->elemtype = elemtype; + memcpy(ARR_DIMS(newarray), span, ndim * sizeof(int)); + + /* + * Lower bounds of the new array are set to 1. Formerly (before 7.3) we + * copied the given lowerIndx values ... but that seems confusing. + */ + newlb = ARR_LBOUND(newarray); + for (i = 0; i < ndim; i++) + newlb[i] = 1; + + array_extract_slice(newarray, + ndim, dim, lb, + arraydataptr, arraynullsptr, + lowerIndx, upperIndx, + elmlen, elmbyval, elmalign); + + return PointerGetDatum(newarray); +} + +/* + * array_set_element : + * This routine sets the value of one array element (specified by + * a subscript array) to a new value specified by "dataValue". + * + * This handles both ordinary varlena arrays and fixed-length arrays. + * + * Inputs: + * arraydatum: the initial array object (mustn't be NULL) + * nSubscripts: number of subscripts supplied + * indx[]: the subscript values + * dataValue: the datum to be inserted at the given position + * isNull: whether dataValue is NULL + * arraytyplen: pg_type.typlen for the array type + * elmlen: pg_type.typlen for the array's element type + * elmbyval: pg_type.typbyval for the array's element type + * elmalign: pg_type.typalign for the array's element type + * + * Result: + * A new array is returned, just like the old except for the one + * modified entry. The original array object is not changed, + * unless what is passed is a read-write reference to an expanded + * array object; in that case the expanded array is updated in-place. + * + * For one-dimensional arrays only, we allow the array to be extended + * by assigning to a position outside the existing subscript range; any + * positions between the existing elements and the new one are set to NULLs. + * (XXX TODO: allow a corresponding behavior for multidimensional arrays) + * + * NOTE: For assignments, we throw an error for invalid subscripts etc, + * rather than returning a NULL as the fetch operations do. + */ +Datum +array_set_element(Datum arraydatum, + int nSubscripts, + int *indx, + Datum dataValue, + bool isNull, + int arraytyplen, + int elmlen, + bool elmbyval, + char elmalign) +{ + ArrayType *array; + ArrayType *newarray; + int i, + ndim, + dim[MAXDIM], + lb[MAXDIM], + offset; + char *elt_ptr; + bool newhasnulls; + bits8 *oldnullbitmap; + int oldnitems, + newnitems, + olddatasize, + newsize, + olditemlen, + newitemlen, + overheadlen, + oldoverheadlen, + addedbefore, + addedafter, + lenbefore, + lenafter; + + if (arraytyplen > 0) + { + /* + * fixed-length arrays -- these are assumed to be 1-d, 0-based. We + * cannot extend them, either. + */ + char *resultarray; + + if (nSubscripts != 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + if (indx[0] < 0 || indx[0] >= arraytyplen / elmlen) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array subscript out of range"))); + + if (isNull) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("cannot assign null value to an element of a fixed-length array"))); + + resultarray = (char *) palloc(arraytyplen); + memcpy(resultarray, DatumGetPointer(arraydatum), arraytyplen); + elt_ptr = (char *) resultarray + indx[0] * elmlen; + ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalign, elt_ptr); + return PointerGetDatum(resultarray); + } + + if (nSubscripts <= 0 || nSubscripts > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + /* make sure item to be inserted is not toasted */ + if (elmlen == -1 && !isNull) + dataValue = PointerGetDatum(PG_DETOAST_DATUM(dataValue)); + + if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum))) + { + /* expanded array: let's do this in a separate function */ + return array_set_element_expanded(arraydatum, + nSubscripts, + indx, + dataValue, + isNull, + arraytyplen, + elmlen, + elmbyval, + elmalign); + } + + /* detoast input array if necessary */ + array = DatumGetArrayTypeP(arraydatum); + + ndim = ARR_NDIM(array); + + /* + * if number of dims is zero, i.e. an empty array, create an array with + * nSubscripts dimensions, and set the lower bounds to the supplied + * subscripts + */ + if (ndim == 0) + { + Oid elmtype = ARR_ELEMTYPE(array); + + for (i = 0; i < nSubscripts; i++) + { + dim[i] = 1; + lb[i] = indx[i]; + } + + return PointerGetDatum(construct_md_array(&dataValue, &isNull, + nSubscripts, dim, lb, + elmtype, + elmlen, elmbyval, elmalign)); + } + + if (ndim != nSubscripts) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + /* copy dim/lb since we may modify them */ + memcpy(dim, ARR_DIMS(array), ndim * sizeof(int)); + memcpy(lb, ARR_LBOUND(array), ndim * sizeof(int)); + + newhasnulls = (ARR_HASNULL(array) || isNull); + addedbefore = addedafter = 0; + + /* + * Check subscripts. We assume the existing subscripts passed + * ArrayCheckBounds, so that dim[i] + lb[i] can be computed without + * overflow. But we must beware of other overflows in our calculations of + * new dim[] values. + */ + if (ndim == 1) + { + if (indx[0] < lb[0]) + { + /* addedbefore = lb[0] - indx[0]; */ + /* dim[0] += addedbefore; */ + if (pg_sub_s32_overflow(lb[0], indx[0], &addedbefore) || + pg_add_s32_overflow(dim[0], addedbefore, &dim[0])) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + lb[0] = indx[0]; + if (addedbefore > 1) + newhasnulls = true; /* will insert nulls */ + } + if (indx[0] >= (dim[0] + lb[0])) + { + /* addedafter = indx[0] - (dim[0] + lb[0]) + 1; */ + /* dim[0] += addedafter; */ + if (pg_sub_s32_overflow(indx[0], dim[0] + lb[0], &addedafter) || + pg_add_s32_overflow(addedafter, 1, &addedafter) || + pg_add_s32_overflow(dim[0], addedafter, &dim[0])) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + if (addedafter > 1) + newhasnulls = true; /* will insert nulls */ + } + } + else + { + /* + * XXX currently we do not support extending multi-dimensional arrays + * during assignment + */ + for (i = 0; i < ndim; i++) + { + if (indx[i] < lb[i] || + indx[i] >= (dim[i] + lb[i])) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array subscript out of range"))); + } + } + + /* This checks for overflow of the array dimensions */ + newnitems = ArrayGetNItems(ndim, dim); + ArrayCheckBounds(ndim, dim, lb); + + /* + * Compute sizes of items and areas to copy + */ + if (newhasnulls) + overheadlen = ARR_OVERHEAD_WITHNULLS(ndim, newnitems); + else + overheadlen = ARR_OVERHEAD_NONULLS(ndim); + oldnitems = ArrayGetNItems(ndim, ARR_DIMS(array)); + oldnullbitmap = ARR_NULLBITMAP(array); + oldoverheadlen = ARR_DATA_OFFSET(array); + olddatasize = ARR_SIZE(array) - oldoverheadlen; + if (addedbefore) + { + offset = 0; + lenbefore = 0; + olditemlen = 0; + lenafter = olddatasize; + } + else if (addedafter) + { + offset = oldnitems; + lenbefore = olddatasize; + olditemlen = 0; + lenafter = 0; + } + else + { + offset = ArrayGetOffset(nSubscripts, dim, lb, indx); + elt_ptr = array_seek(ARR_DATA_PTR(array), 0, oldnullbitmap, offset, + elmlen, elmbyval, elmalign); + lenbefore = (int) (elt_ptr - ARR_DATA_PTR(array)); + if (array_get_isnull(oldnullbitmap, offset)) + olditemlen = 0; + else + { + olditemlen = att_addlength_pointer(0, elmlen, elt_ptr); + olditemlen = att_align_nominal(olditemlen, elmalign); + } + lenafter = (int) (olddatasize - lenbefore - olditemlen); + } + + if (isNull) + newitemlen = 0; + else + { + newitemlen = att_addlength_datum(0, elmlen, dataValue); + newitemlen = att_align_nominal(newitemlen, elmalign); + } + + newsize = overheadlen + lenbefore + newitemlen + lenafter; + + /* + * OK, create the new array and fill in header/dimensions + */ + newarray = (ArrayType *) palloc0(newsize); + SET_VARSIZE(newarray, newsize); + newarray->ndim = ndim; + newarray->dataoffset = newhasnulls ? overheadlen : 0; + newarray->elemtype = ARR_ELEMTYPE(array); + memcpy(ARR_DIMS(newarray), dim, ndim * sizeof(int)); + memcpy(ARR_LBOUND(newarray), lb, ndim * sizeof(int)); + + /* + * Fill in data + */ + memcpy((char *) newarray + overheadlen, + (char *) array + oldoverheadlen, + lenbefore); + if (!isNull) + ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalign, + (char *) newarray + overheadlen + lenbefore); + memcpy((char *) newarray + overheadlen + lenbefore + newitemlen, + (char *) array + oldoverheadlen + lenbefore + olditemlen, + lenafter); + + /* + * Fill in nulls bitmap if needed + * + * Note: it's possible we just replaced the last NULL with a non-NULL, and + * could get rid of the bitmap. Seems not worth testing for though. + */ + if (newhasnulls) + { + bits8 *newnullbitmap = ARR_NULLBITMAP(newarray); + + /* palloc0 above already marked any inserted positions as nulls */ + /* Fix the inserted value */ + if (addedafter) + array_set_isnull(newnullbitmap, newnitems - 1, isNull); + else + array_set_isnull(newnullbitmap, offset, isNull); + /* Fix the copied range(s) */ + if (addedbefore) + array_bitmap_copy(newnullbitmap, addedbefore, + oldnullbitmap, 0, + oldnitems); + else + { + array_bitmap_copy(newnullbitmap, 0, + oldnullbitmap, 0, + offset); + if (addedafter == 0) + array_bitmap_copy(newnullbitmap, offset + 1, + oldnullbitmap, offset + 1, + oldnitems - offset - 1); + } + } + + return PointerGetDatum(newarray); +} + +/* + * Implementation of array_set_element() for an expanded array + * + * Note: as with any operation on a read/write expanded object, we must + * take pains not to leave the object in a corrupt state if we fail partway + * through. + */ +static Datum +array_set_element_expanded(Datum arraydatum, + int nSubscripts, int *indx, + Datum dataValue, bool isNull, + int arraytyplen, + int elmlen, bool elmbyval, char elmalign) +{ + ExpandedArrayHeader *eah; + Datum *dvalues; + bool *dnulls; + int i, + ndim, + dim[MAXDIM], + lb[MAXDIM], + offset; + bool dimschanged, + newhasnulls; + int addedbefore, + addedafter; + char *oldValue; + + /* Convert to R/W object if not so already */ + eah = DatumGetExpandedArray(arraydatum); + + /* Sanity-check caller's info against object; we don't use it otherwise */ + Assert(arraytyplen == -1); + Assert(elmlen == eah->typlen); + Assert(elmbyval == eah->typbyval); + Assert(elmalign == eah->typalign); + + /* + * Copy dimension info into local storage. This allows us to modify the + * dimensions if needed, while not messing up the expanded value if we + * fail partway through. + */ + ndim = eah->ndims; + Assert(ndim >= 0 && ndim <= MAXDIM); + memcpy(dim, eah->dims, ndim * sizeof(int)); + memcpy(lb, eah->lbound, ndim * sizeof(int)); + dimschanged = false; + + /* + * if number of dims is zero, i.e. an empty array, create an array with + * nSubscripts dimensions, and set the lower bounds to the supplied + * subscripts. + */ + if (ndim == 0) + { + /* + * Allocate adequate space for new dimension info. This is harmless + * if we fail later. + */ + Assert(nSubscripts > 0 && nSubscripts <= MAXDIM); + eah->dims = (int *) MemoryContextAllocZero(eah->hdr.eoh_context, + nSubscripts * sizeof(int)); + eah->lbound = (int *) MemoryContextAllocZero(eah->hdr.eoh_context, + nSubscripts * sizeof(int)); + + /* Update local copies of dimension info */ + ndim = nSubscripts; + for (i = 0; i < nSubscripts; i++) + { + dim[i] = 0; + lb[i] = indx[i]; + } + dimschanged = true; + } + else if (ndim != nSubscripts) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + /* + * Deconstruct array if we didn't already. (Someday maybe add a special + * case path for fixed-length, no-nulls cases, where we can overwrite an + * element in place without ever deconstructing. But today is not that + * day.) + */ + deconstruct_expanded_array(eah); + + /* + * Copy new element into array's context, if needed (we assume it's + * already detoasted, so no junk should be created). Doing this before + * we've made any significant changes ensures that our behavior is sane + * even when the source is a reference to some element of this same array. + * If we fail further down, this memory is leaked, but that's reasonably + * harmless. + */ + if (!eah->typbyval && !isNull) + { + MemoryContext oldcxt = MemoryContextSwitchTo(eah->hdr.eoh_context); + + dataValue = datumCopy(dataValue, false, eah->typlen); + MemoryContextSwitchTo(oldcxt); + } + + dvalues = eah->dvalues; + dnulls = eah->dnulls; + + newhasnulls = ((dnulls != NULL) || isNull); + addedbefore = addedafter = 0; + + /* + * Check subscripts (this logic must match array_set_element). We assume + * the existing subscripts passed ArrayCheckBounds, so that dim[i] + lb[i] + * can be computed without overflow. But we must beware of other + * overflows in our calculations of new dim[] values. + */ + if (ndim == 1) + { + if (indx[0] < lb[0]) + { + /* addedbefore = lb[0] - indx[0]; */ + /* dim[0] += addedbefore; */ + if (pg_sub_s32_overflow(lb[0], indx[0], &addedbefore) || + pg_add_s32_overflow(dim[0], addedbefore, &dim[0])) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + lb[0] = indx[0]; + dimschanged = true; + if (addedbefore > 1) + newhasnulls = true; /* will insert nulls */ + } + if (indx[0] >= (dim[0] + lb[0])) + { + /* addedafter = indx[0] - (dim[0] + lb[0]) + 1; */ + /* dim[0] += addedafter; */ + if (pg_sub_s32_overflow(indx[0], dim[0] + lb[0], &addedafter) || + pg_add_s32_overflow(addedafter, 1, &addedafter) || + pg_add_s32_overflow(dim[0], addedafter, &dim[0])) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + dimschanged = true; + if (addedafter > 1) + newhasnulls = true; /* will insert nulls */ + } + } + else + { + /* + * XXX currently we do not support extending multi-dimensional arrays + * during assignment + */ + for (i = 0; i < ndim; i++) + { + if (indx[i] < lb[i] || + indx[i] >= (dim[i] + lb[i])) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array subscript out of range"))); + } + } + + /* Check for overflow of the array dimensions */ + if (dimschanged) + { + (void) ArrayGetNItems(ndim, dim); + ArrayCheckBounds(ndim, dim, lb); + } + + /* Now we can calculate linear offset of target item in array */ + offset = ArrayGetOffset(nSubscripts, dim, lb, indx); + + /* Physically enlarge existing dvalues/dnulls arrays if needed */ + if (dim[0] > eah->dvalueslen) + { + /* We want some extra space if we're enlarging */ + int newlen = dim[0] + dim[0] / 8; + + newlen = Max(newlen, dim[0]); /* integer overflow guard */ + eah->dvalues = dvalues = (Datum *) + repalloc(dvalues, newlen * sizeof(Datum)); + if (dnulls) + eah->dnulls = dnulls = (bool *) + repalloc(dnulls, newlen * sizeof(bool)); + eah->dvalueslen = newlen; + } + + /* + * If we need a nulls bitmap and don't already have one, create it, being + * sure to mark all existing entries as not null. + */ + if (newhasnulls && dnulls == NULL) + eah->dnulls = dnulls = (bool *) + MemoryContextAllocZero(eah->hdr.eoh_context, + eah->dvalueslen * sizeof(bool)); + + /* + * We now have all the needed space allocated, so we're ready to make + * irreversible changes. Be very wary of allowing failure below here. + */ + + /* Flattened value will no longer represent array accurately */ + eah->fvalue = NULL; + /* And we don't know the flattened size either */ + eah->flat_size = 0; + + /* Update dimensionality info if needed */ + if (dimschanged) + { + eah->ndims = ndim; + memcpy(eah->dims, dim, ndim * sizeof(int)); + memcpy(eah->lbound, lb, ndim * sizeof(int)); + } + + /* Reposition items if needed, and fill addedbefore items with nulls */ + if (addedbefore > 0) + { + memmove(dvalues + addedbefore, dvalues, eah->nelems * sizeof(Datum)); + for (i = 0; i < addedbefore; i++) + dvalues[i] = (Datum) 0; + if (dnulls) + { + memmove(dnulls + addedbefore, dnulls, eah->nelems * sizeof(bool)); + for (i = 0; i < addedbefore; i++) + dnulls[i] = true; + } + eah->nelems += addedbefore; + } + + /* fill addedafter items with nulls */ + if (addedafter > 0) + { + for (i = 0; i < addedafter; i++) + dvalues[eah->nelems + i] = (Datum) 0; + if (dnulls) + { + for (i = 0; i < addedafter; i++) + dnulls[eah->nelems + i] = true; + } + eah->nelems += addedafter; + } + + /* Grab old element value for pfree'ing, if needed. */ + if (!eah->typbyval && (dnulls == NULL || !dnulls[offset])) + oldValue = (char *) DatumGetPointer(dvalues[offset]); + else + oldValue = NULL; + + /* And finally we can insert the new element. */ + dvalues[offset] = dataValue; + if (dnulls) + dnulls[offset] = isNull; + + /* + * Free old element if needed; this keeps repeated element replacements + * from bloating the array's storage. If the pfree somehow fails, it + * won't corrupt the array. + */ + if (oldValue) + { + /* Don't try to pfree a part of the original flat array */ + if (oldValue < eah->fstartptr || oldValue >= eah->fendptr) + pfree(oldValue); + } + + /* Done, return standard TOAST pointer for object */ + return EOHPGetRWDatum(&eah->hdr); +} + +/* + * array_set_slice : + * This routine sets the value of a range of array locations (specified + * by upper and lower subscript values) to new values passed as + * another array. + * + * This handles both ordinary varlena arrays and fixed-length arrays. + * + * Inputs: + * arraydatum: the initial array object (mustn't be NULL) + * nSubscripts: number of subscripts supplied (must be same for upper/lower) + * upperIndx[]: the upper subscript values + * lowerIndx[]: the lower subscript values + * upperProvided[]: true for provided upper subscript values + * lowerProvided[]: true for provided lower subscript values + * srcArrayDatum: the source for the inserted values + * isNull: indicates whether srcArrayDatum is NULL + * arraytyplen: pg_type.typlen for the array type + * elmlen: pg_type.typlen for the array's element type + * elmbyval: pg_type.typbyval for the array's element type + * elmalign: pg_type.typalign for the array's element type + * + * Result: + * A new array is returned, just like the old except for the + * modified range. The original array object is not changed. + * + * Omitted upper and lower subscript values are replaced by the corresponding + * array bound. + * + * For one-dimensional arrays only, we allow the array to be extended + * by assigning to positions outside the existing subscript range; any + * positions between the existing elements and the new ones are set to NULLs. + * (XXX TODO: allow a corresponding behavior for multidimensional arrays) + * + * NOTE: we assume it is OK to scribble on the provided index arrays + * lowerIndx[] and upperIndx[]; also, these arrays must be of size MAXDIM + * even when nSubscripts is less. These are generally just temporaries. + * + * NOTE: For assignments, we throw an error for silly subscripts etc, + * rather than returning a NULL or empty array as the fetch operations do. + */ +Datum +array_set_slice(Datum arraydatum, + int nSubscripts, + int *upperIndx, + int *lowerIndx, + bool *upperProvided, + bool *lowerProvided, + Datum srcArrayDatum, + bool isNull, + int arraytyplen, + int elmlen, + bool elmbyval, + char elmalign) +{ + ArrayType *array; + ArrayType *srcArray; + ArrayType *newarray; + int i, + ndim, + dim[MAXDIM], + lb[MAXDIM], + span[MAXDIM]; + bool newhasnulls; + int nitems, + nsrcitems, + olddatasize, + newsize, + olditemsize, + newitemsize, + overheadlen, + oldoverheadlen, + addedbefore, + addedafter, + lenbefore, + lenafter, + itemsbefore, + itemsafter, + nolditems; + + /* Currently, assignment from a NULL source array is a no-op */ + if (isNull) + return arraydatum; + + if (arraytyplen > 0) + { + /* + * fixed-length arrays -- not got round to doing this... + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("updates on slices of fixed-length arrays not implemented"))); + } + + /* detoast arrays if necessary */ + array = DatumGetArrayTypeP(arraydatum); + srcArray = DatumGetArrayTypeP(srcArrayDatum); + + /* note: we assume srcArray contains no toasted elements */ + + ndim = ARR_NDIM(array); + + /* + * if number of dims is zero, i.e. an empty array, create an array with + * nSubscripts dimensions, and set the upper and lower bounds to the + * supplied subscripts + */ + if (ndim == 0) + { + Datum *dvalues; + bool *dnulls; + int nelems; + Oid elmtype = ARR_ELEMTYPE(array); + + deconstruct_array(srcArray, elmtype, elmlen, elmbyval, elmalign, + &dvalues, &dnulls, &nelems); + + for (i = 0; i < nSubscripts; i++) + { + if (!upperProvided[i] || !lowerProvided[i]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array slice subscript must provide both boundaries"), + errdetail("When assigning to a slice of an empty array value," + " slice boundaries must be fully specified."))); + + dim[i] = 1 + upperIndx[i] - lowerIndx[i]; + lb[i] = lowerIndx[i]; + } + + /* complain if too few source items; we ignore extras, however */ + if (nelems < ArrayGetNItems(nSubscripts, dim)) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("source array too small"))); + + return PointerGetDatum(construct_md_array(dvalues, dnulls, nSubscripts, + dim, lb, elmtype, + elmlen, elmbyval, elmalign)); + } + + if (ndim < nSubscripts || ndim <= 0 || ndim > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + /* copy dim/lb since we may modify them */ + memcpy(dim, ARR_DIMS(array), ndim * sizeof(int)); + memcpy(lb, ARR_LBOUND(array), ndim * sizeof(int)); + + newhasnulls = (ARR_HASNULL(array) || ARR_HASNULL(srcArray)); + addedbefore = addedafter = 0; + + /* + * Check subscripts. We assume the existing subscripts passed + * ArrayCheckBounds, so that dim[i] + lb[i] can be computed without + * overflow. But we must beware of other overflows in our calculations of + * new dim[] values. + */ + if (ndim == 1) + { + Assert(nSubscripts == 1); + if (!lowerProvided[0]) + lowerIndx[0] = lb[0]; + if (!upperProvided[0]) + upperIndx[0] = dim[0] + lb[0] - 1; + if (lowerIndx[0] > upperIndx[0]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("upper bound cannot be less than lower bound"))); + if (lowerIndx[0] < lb[0]) + { + /* addedbefore = lb[0] - lowerIndx[0]; */ + /* dim[0] += addedbefore; */ + if (pg_sub_s32_overflow(lb[0], lowerIndx[0], &addedbefore) || + pg_add_s32_overflow(dim[0], addedbefore, &dim[0])) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + lb[0] = lowerIndx[0]; + if (addedbefore > 1) + newhasnulls = true; /* will insert nulls */ + } + if (upperIndx[0] >= (dim[0] + lb[0])) + { + /* addedafter = upperIndx[0] - (dim[0] + lb[0]) + 1; */ + /* dim[0] += addedafter; */ + if (pg_sub_s32_overflow(upperIndx[0], dim[0] + lb[0], &addedafter) || + pg_add_s32_overflow(addedafter, 1, &addedafter) || + pg_add_s32_overflow(dim[0], addedafter, &dim[0])) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + if (addedafter > 1) + newhasnulls = true; /* will insert nulls */ + } + } + else + { + /* + * XXX currently we do not support extending multi-dimensional arrays + * during assignment + */ + for (i = 0; i < nSubscripts; i++) + { + if (!lowerProvided[i]) + lowerIndx[i] = lb[i]; + if (!upperProvided[i]) + upperIndx[i] = dim[i] + lb[i] - 1; + if (lowerIndx[i] > upperIndx[i]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("upper bound cannot be less than lower bound"))); + if (lowerIndx[i] < lb[i] || + upperIndx[i] >= (dim[i] + lb[i])) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array subscript out of range"))); + } + /* fill any missing subscript positions with full array range */ + for (; i < ndim; i++) + { + lowerIndx[i] = lb[i]; + upperIndx[i] = dim[i] + lb[i] - 1; + if (lowerIndx[i] > upperIndx[i]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("upper bound cannot be less than lower bound"))); + } + } + + /* Do this mainly to check for overflow */ + nitems = ArrayGetNItems(ndim, dim); + ArrayCheckBounds(ndim, dim, lb); + + /* + * Make sure source array has enough entries. Note we ignore the shape of + * the source array and just read entries serially. + */ + mda_get_range(ndim, span, lowerIndx, upperIndx); + nsrcitems = ArrayGetNItems(ndim, span); + if (nsrcitems > ArrayGetNItems(ARR_NDIM(srcArray), ARR_DIMS(srcArray))) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("source array too small"))); + + /* + * Compute space occupied by new entries, space occupied by replaced + * entries, and required space for new array. + */ + if (newhasnulls) + overheadlen = ARR_OVERHEAD_WITHNULLS(ndim, nitems); + else + overheadlen = ARR_OVERHEAD_NONULLS(ndim); + newitemsize = array_nelems_size(ARR_DATA_PTR(srcArray), 0, + ARR_NULLBITMAP(srcArray), nsrcitems, + elmlen, elmbyval, elmalign); + oldoverheadlen = ARR_DATA_OFFSET(array); + olddatasize = ARR_SIZE(array) - oldoverheadlen; + if (ndim > 1) + { + /* + * here we do not need to cope with extension of the array; it would + * be a lot more complicated if we had to do so... + */ + olditemsize = array_slice_size(ARR_DATA_PTR(array), + ARR_NULLBITMAP(array), + ndim, dim, lb, + lowerIndx, upperIndx, + elmlen, elmbyval, elmalign); + lenbefore = lenafter = 0; /* keep compiler quiet */ + itemsbefore = itemsafter = nolditems = 0; + } + else + { + /* + * here we must allow for possibility of slice larger than orig array + * and/or not adjacent to orig array subscripts + */ + int oldlb = ARR_LBOUND(array)[0]; + int oldub = oldlb + ARR_DIMS(array)[0] - 1; + int slicelb = Max(oldlb, lowerIndx[0]); + int sliceub = Min(oldub, upperIndx[0]); + char *oldarraydata = ARR_DATA_PTR(array); + bits8 *oldarraybitmap = ARR_NULLBITMAP(array); + + /* count/size of old array entries that will go before the slice */ + itemsbefore = Min(slicelb, oldub + 1) - oldlb; + lenbefore = array_nelems_size(oldarraydata, 0, oldarraybitmap, + itemsbefore, + elmlen, elmbyval, elmalign); + /* count/size of old array entries that will be replaced by slice */ + if (slicelb > sliceub) + { + nolditems = 0; + olditemsize = 0; + } + else + { + nolditems = sliceub - slicelb + 1; + olditemsize = array_nelems_size(oldarraydata + lenbefore, + itemsbefore, oldarraybitmap, + nolditems, + elmlen, elmbyval, elmalign); + } + /* count/size of old array entries that will go after the slice */ + itemsafter = oldub + 1 - Max(sliceub + 1, oldlb); + lenafter = olddatasize - lenbefore - olditemsize; + } + + newsize = overheadlen + olddatasize - olditemsize + newitemsize; + + newarray = (ArrayType *) palloc0(newsize); + SET_VARSIZE(newarray, newsize); + newarray->ndim = ndim; + newarray->dataoffset = newhasnulls ? overheadlen : 0; + newarray->elemtype = ARR_ELEMTYPE(array); + memcpy(ARR_DIMS(newarray), dim, ndim * sizeof(int)); + memcpy(ARR_LBOUND(newarray), lb, ndim * sizeof(int)); + + if (ndim > 1) + { + /* + * here we do not need to cope with extension of the array; it would + * be a lot more complicated if we had to do so... + */ + array_insert_slice(newarray, array, srcArray, + ndim, dim, lb, + lowerIndx, upperIndx, + elmlen, elmbyval, elmalign); + } + else + { + /* fill in data */ + memcpy((char *) newarray + overheadlen, + (char *) array + oldoverheadlen, + lenbefore); + memcpy((char *) newarray + overheadlen + lenbefore, + ARR_DATA_PTR(srcArray), + newitemsize); + memcpy((char *) newarray + overheadlen + lenbefore + newitemsize, + (char *) array + oldoverheadlen + lenbefore + olditemsize, + lenafter); + /* fill in nulls bitmap if needed */ + if (newhasnulls) + { + bits8 *newnullbitmap = ARR_NULLBITMAP(newarray); + bits8 *oldnullbitmap = ARR_NULLBITMAP(array); + + /* palloc0 above already marked any inserted positions as nulls */ + array_bitmap_copy(newnullbitmap, addedbefore, + oldnullbitmap, 0, + itemsbefore); + array_bitmap_copy(newnullbitmap, lowerIndx[0] - lb[0], + ARR_NULLBITMAP(srcArray), 0, + nsrcitems); + array_bitmap_copy(newnullbitmap, addedbefore + itemsbefore + nolditems, + oldnullbitmap, itemsbefore + nolditems, + itemsafter); + } + } + + return PointerGetDatum(newarray); +} + +/* + * array_ref : backwards compatibility wrapper for array_get_element + * + * This only works for detoasted/flattened varlena arrays, since the array + * argument is declared as "ArrayType *". However there's enough code like + * that to justify preserving this API. + */ +Datum +array_ref(ArrayType *array, int nSubscripts, int *indx, + int arraytyplen, int elmlen, bool elmbyval, char elmalign, + bool *isNull) +{ + return array_get_element(PointerGetDatum(array), nSubscripts, indx, + arraytyplen, elmlen, elmbyval, elmalign, + isNull); +} + +/* + * array_set : backwards compatibility wrapper for array_set_element + * + * This only works for detoasted/flattened varlena arrays, since the array + * argument and result are declared as "ArrayType *". However there's enough + * code like that to justify preserving this API. + */ +ArrayType * +array_set(ArrayType *array, int nSubscripts, int *indx, + Datum dataValue, bool isNull, + int arraytyplen, int elmlen, bool elmbyval, char elmalign) +{ + return DatumGetArrayTypeP(array_set_element(PointerGetDatum(array), + nSubscripts, indx, + dataValue, isNull, + arraytyplen, + elmlen, elmbyval, elmalign)); +} + +/* + * array_map() + * + * Map an array through an arbitrary expression. Return a new array with + * the same dimensions and each source element transformed by the given, + * already-compiled expression. Each source element is placed in the + * innermost_caseval/innermost_casenull fields of the ExprState. + * + * Parameters are: + * * arrayd: Datum representing array argument. + * * exprstate: ExprState representing the per-element transformation. + * * econtext: context for expression evaluation. + * * retType: OID of element type of output array. This must be the same as, + * or binary-compatible with, the result type of the expression. It might + * be different from the input array's element type. + * * amstate: workspace for array_map. Must be zeroed by caller before + * first call, and not touched after that. + * + * It is legitimate to pass a freshly-zeroed ArrayMapState on each call, + * but better performance can be had if the state can be preserved across + * a series of calls. + * + * NB: caller must assure that input array is not NULL. NULL elements in + * the array are OK however. + * NB: caller should be running in econtext's per-tuple memory context. + */ +Datum +array_map(Datum arrayd, + ExprState *exprstate, ExprContext *econtext, + Oid retType, ArrayMapState *amstate) +{ + AnyArrayType *v = DatumGetAnyArrayP(arrayd); + ArrayType *result; + Datum *values; + bool *nulls; + int *dim; + int ndim; + int nitems; + int i; + int32 nbytes = 0; + int32 dataoffset; + bool hasnulls; + Oid inpType; + int inp_typlen; + bool inp_typbyval; + char inp_typalign; + int typlen; + bool typbyval; + char typalign; + array_iter iter; + ArrayMetaState *inp_extra; + ArrayMetaState *ret_extra; + Datum *transform_source = exprstate->innermost_caseval; + bool *transform_source_isnull = exprstate->innermost_casenull; + + inpType = AARR_ELEMTYPE(v); + ndim = AARR_NDIM(v); + dim = AARR_DIMS(v); + nitems = ArrayGetNItems(ndim, dim); + + /* Check for empty array */ + if (nitems <= 0) + { + /* Return empty array */ + return PointerGetDatum(construct_empty_array(retType)); + } + + /* + * We arrange to look up info about input and return element types only + * once per series of calls, assuming the element type doesn't change + * underneath us. + */ + inp_extra = &amstate->inp_extra; + ret_extra = &amstate->ret_extra; + + if (inp_extra->element_type != inpType) + { + get_typlenbyvalalign(inpType, + &inp_extra->typlen, + &inp_extra->typbyval, + &inp_extra->typalign); + inp_extra->element_type = inpType; + } + inp_typlen = inp_extra->typlen; + inp_typbyval = inp_extra->typbyval; + inp_typalign = inp_extra->typalign; + + if (ret_extra->element_type != retType) + { + get_typlenbyvalalign(retType, + &ret_extra->typlen, + &ret_extra->typbyval, + &ret_extra->typalign); + ret_extra->element_type = retType; + } + typlen = ret_extra->typlen; + typbyval = ret_extra->typbyval; + typalign = ret_extra->typalign; + + /* Allocate temporary arrays for new values */ + values = (Datum *) palloc(nitems * sizeof(Datum)); + nulls = (bool *) palloc(nitems * sizeof(bool)); + + /* Loop over source data */ + array_iter_setup(&iter, v); + hasnulls = false; + + for (i = 0; i < nitems; i++) + { + /* Get source element, checking for NULL */ + *transform_source = + array_iter_next(&iter, transform_source_isnull, i, + inp_typlen, inp_typbyval, inp_typalign); + + /* Apply the given expression to source element */ + values[i] = ExecEvalExpr(exprstate, econtext, &nulls[i]); + + if (nulls[i]) + hasnulls = true; + else + { + /* Ensure data is not toasted */ + if (typlen == -1) + values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i])); + /* Update total result size */ + nbytes = att_addlength_datum(nbytes, typlen, values[i]); + nbytes = att_align_nominal(nbytes, typalign); + /* check for overflow of total request */ + if (!AllocSizeIsValid(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + } + } + + /* Allocate and fill the result array */ + if (hasnulls) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems); + nbytes += dataoffset; + } + else + { + dataoffset = 0; /* marker for no null bitmap */ + nbytes += ARR_OVERHEAD_NONULLS(ndim); + } + result = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(result, nbytes); + result->ndim = ndim; + result->dataoffset = dataoffset; + result->elemtype = retType; + memcpy(ARR_DIMS(result), AARR_DIMS(v), ndim * sizeof(int)); + memcpy(ARR_LBOUND(result), AARR_LBOUND(v), ndim * sizeof(int)); + + CopyArrayEls(result, + values, nulls, nitems, + typlen, typbyval, typalign, + false); + + /* + * Note: do not risk trying to pfree the results of the called expression + */ + pfree(values); + pfree(nulls); + + return PointerGetDatum(result); +} + +/* + * construct_array --- simple method for constructing an array object + * + * elems: array of Datum items to become the array contents + * (NULL element values are not supported). + * nelems: number of items + * elmtype, elmlen, elmbyval, elmalign: info for the datatype of the items + * + * A palloc'd 1-D array object is constructed and returned. Note that + * elem values will be copied into the object even if pass-by-ref type. + * Also note the result will be 0-D not 1-D if nelems = 0. + * + * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info + * from the system catalogs, given the elmtype. However, the caller is + * in a better position to cache this info across multiple uses, or even + * to hard-wire values if the element type is hard-wired. + */ +ArrayType * +construct_array(Datum *elems, int nelems, + Oid elmtype, + int elmlen, bool elmbyval, char elmalign) +{ + int dims[1]; + int lbs[1]; + + dims[0] = nelems; + lbs[0] = 1; + + return construct_md_array(elems, NULL, 1, dims, lbs, + elmtype, elmlen, elmbyval, elmalign); +} + +/* + * Like construct_array(), where elmtype must be a built-in type, and + * elmlen/elmbyval/elmalign is looked up from hardcoded data. This is often + * useful when manipulating arrays from/for system catalogs. + */ +ArrayType * +construct_array_builtin(Datum *elems, int nelems, Oid elmtype) +{ + int elmlen; + bool elmbyval; + char elmalign; + + switch (elmtype) + { + case CHAROID: + elmlen = 1; + elmbyval = true; + elmalign = TYPALIGN_CHAR; + break; + + case CSTRINGOID: + elmlen = -2; + elmbyval = false; + elmalign = TYPALIGN_CHAR; + break; + + case FLOAT4OID: + elmlen = sizeof(float4); + elmbyval = true; + elmalign = TYPALIGN_INT; + break; + + case INT2OID: + elmlen = sizeof(int16); + elmbyval = true; + elmalign = TYPALIGN_SHORT; + break; + + case INT4OID: + elmlen = sizeof(int32); + elmbyval = true; + elmalign = TYPALIGN_INT; + break; + + case INT8OID: + elmlen = sizeof(int64); + elmbyval = FLOAT8PASSBYVAL; + elmalign = TYPALIGN_DOUBLE; + break; + + case NAMEOID: + elmlen = NAMEDATALEN; + elmbyval = false; + elmalign = TYPALIGN_CHAR; + break; + + case OIDOID: + case REGTYPEOID: + elmlen = sizeof(Oid); + elmbyval = true; + elmalign = TYPALIGN_INT; + break; + + case TEXTOID: + elmlen = -1; + elmbyval = false; + elmalign = TYPALIGN_INT; + break; + + case TIDOID: + elmlen = sizeof(ItemPointerData); + elmbyval = false; + elmalign = TYPALIGN_SHORT; + break; + + default: + elog(ERROR, "type %u not supported by construct_array_builtin()", elmtype); + /* keep compiler quiet */ + elmlen = 0; + elmbyval = false; + elmalign = 0; + } + + return construct_array(elems, nelems, elmtype, elmlen, elmbyval, elmalign); +} + +/* + * construct_md_array --- simple method for constructing an array object + * with arbitrary dimensions and possible NULLs + * + * elems: array of Datum items to become the array contents + * nulls: array of is-null flags (can be NULL if no nulls) + * ndims: number of dimensions + * dims: integer array with size of each dimension + * lbs: integer array with lower bound of each dimension + * elmtype, elmlen, elmbyval, elmalign: info for the datatype of the items + * + * A palloc'd ndims-D array object is constructed and returned. Note that + * elem values will be copied into the object even if pass-by-ref type. + * Also note the result will be 0-D not ndims-D if any dims[i] = 0. + * + * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info + * from the system catalogs, given the elmtype. However, the caller is + * in a better position to cache this info across multiple uses, or even + * to hard-wire values if the element type is hard-wired. + */ +ArrayType * +construct_md_array(Datum *elems, + bool *nulls, + int ndims, + int *dims, + int *lbs, + Oid elmtype, int elmlen, bool elmbyval, char elmalign) +{ + ArrayType *result; + bool hasnulls; + int32 nbytes; + int32 dataoffset; + int i; + int nelems; + + if (ndims < 0) /* we do allow zero-dimension arrays */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid number of dimensions: %d", ndims))); + if (ndims > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + ndims, MAXDIM))); + + /* This checks for overflow of the array dimensions */ + nelems = ArrayGetNItems(ndims, dims); + ArrayCheckBounds(ndims, dims, lbs); + + /* if ndims <= 0 or any dims[i] == 0, return empty array */ + if (nelems <= 0) + return construct_empty_array(elmtype); + + /* compute required space */ + nbytes = 0; + hasnulls = false; + for (i = 0; i < nelems; i++) + { + if (nulls && nulls[i]) + { + hasnulls = true; + continue; + } + /* make sure data is not toasted */ + if (elmlen == -1) + elems[i] = PointerGetDatum(PG_DETOAST_DATUM(elems[i])); + nbytes = att_addlength_datum(nbytes, elmlen, elems[i]); + nbytes = att_align_nominal(nbytes, elmalign); + /* check for overflow of total request */ + if (!AllocSizeIsValid(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + } + + /* Allocate and initialize result array */ + if (hasnulls) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nelems); + nbytes += dataoffset; + } + else + { + dataoffset = 0; /* marker for no null bitmap */ + nbytes += ARR_OVERHEAD_NONULLS(ndims); + } + result = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(result, nbytes); + result->ndim = ndims; + result->dataoffset = dataoffset; + result->elemtype = elmtype; + memcpy(ARR_DIMS(result), dims, ndims * sizeof(int)); + memcpy(ARR_LBOUND(result), lbs, ndims * sizeof(int)); + + CopyArrayEls(result, + elems, nulls, nelems, + elmlen, elmbyval, elmalign, + false); + + return result; +} + +/* + * construct_empty_array --- make a zero-dimensional array of given type + */ +ArrayType * +construct_empty_array(Oid elmtype) +{ + ArrayType *result; + + result = (ArrayType *) palloc0(sizeof(ArrayType)); + SET_VARSIZE(result, sizeof(ArrayType)); + result->ndim = 0; + result->dataoffset = 0; + result->elemtype = elmtype; + return result; +} + +/* + * construct_empty_expanded_array: make an empty expanded array + * given only type information. (metacache can be NULL if not needed.) + */ +ExpandedArrayHeader * +construct_empty_expanded_array(Oid element_type, + MemoryContext parentcontext, + ArrayMetaState *metacache) +{ + ArrayType *array = construct_empty_array(element_type); + Datum d; + + d = expand_array(PointerGetDatum(array), parentcontext, metacache); + pfree(array); + return (ExpandedArrayHeader *) DatumGetEOHP(d); +} + +/* + * deconstruct_array --- simple method for extracting data from an array + * + * array: array object to examine (must not be NULL) + * elmtype, elmlen, elmbyval, elmalign: info for the datatype of the items + * elemsp: return value, set to point to palloc'd array of Datum values + * nullsp: return value, set to point to palloc'd array of isnull markers + * nelemsp: return value, set to number of extracted values + * + * The caller may pass nullsp == NULL if it does not support NULLs in the + * array. Note that this produces a very uninformative error message, + * so do it only in cases where a NULL is really not expected. + * + * If array elements are pass-by-ref data type, the returned Datums will + * be pointers into the array object. + * + * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info + * from the system catalogs, given the elmtype. However, the caller is + * in a better position to cache this info across multiple uses, or even + * to hard-wire values if the element type is hard-wired. + */ +void +deconstruct_array(ArrayType *array, + Oid elmtype, + int elmlen, bool elmbyval, char elmalign, + Datum **elemsp, bool **nullsp, int *nelemsp) +{ + Datum *elems; + bool *nulls; + int nelems; + char *p; + bits8 *bitmap; + int bitmask; + int i; + + Assert(ARR_ELEMTYPE(array) == elmtype); + + nelems = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)); + *elemsp = elems = (Datum *) palloc(nelems * sizeof(Datum)); + if (nullsp) + *nullsp = nulls = (bool *) palloc0(nelems * sizeof(bool)); + else + nulls = NULL; + *nelemsp = nelems; + + p = ARR_DATA_PTR(array); + bitmap = ARR_NULLBITMAP(array); + bitmask = 1; + + for (i = 0; i < nelems; i++) + { + /* Get source element, checking for NULL */ + if (bitmap && (*bitmap & bitmask) == 0) + { + elems[i] = (Datum) 0; + if (nulls) + nulls[i] = true; + else + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null array element not allowed in this context"))); + } + else + { + elems[i] = fetch_att(p, elmbyval, elmlen); + p = att_addlength_pointer(p, elmlen, p); + p = (char *) att_align_nominal(p, elmalign); + } + + /* advance bitmap pointer if any */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + } +} + +/* + * Like deconstruct_array(), where elmtype must be a built-in type, and + * elmlen/elmbyval/elmalign is looked up from hardcoded data. This is often + * useful when manipulating arrays from/for system catalogs. + */ +void +deconstruct_array_builtin(ArrayType *array, + Oid elmtype, + Datum **elemsp, bool **nullsp, int *nelemsp) +{ + int elmlen; + bool elmbyval; + char elmalign; + + switch (elmtype) + { + case CHAROID: + elmlen = 1; + elmbyval = true; + elmalign = TYPALIGN_CHAR; + break; + + case CSTRINGOID: + elmlen = -2; + elmbyval = false; + elmalign = TYPALIGN_CHAR; + break; + + case FLOAT8OID: + elmlen = sizeof(float8); + elmbyval = FLOAT8PASSBYVAL; + elmalign = TYPALIGN_DOUBLE; + break; + + case INT2OID: + elmlen = sizeof(int16); + elmbyval = true; + elmalign = TYPALIGN_SHORT; + break; + + case OIDOID: + elmlen = sizeof(Oid); + elmbyval = true; + elmalign = TYPALIGN_INT; + break; + + case TEXTOID: + elmlen = -1; + elmbyval = false; + elmalign = TYPALIGN_INT; + break; + + case TIDOID: + elmlen = sizeof(ItemPointerData); + elmbyval = false; + elmalign = TYPALIGN_SHORT; + break; + + default: + elog(ERROR, "type %u not supported by deconstruct_array_builtin()", elmtype); + /* keep compiler quiet */ + elmlen = 0; + elmbyval = false; + elmalign = 0; + } + + deconstruct_array(array, elmtype, elmlen, elmbyval, elmalign, elemsp, nullsp, nelemsp); +} + +/* + * array_contains_nulls --- detect whether an array has any null elements + * + * This gives an accurate answer, whereas testing ARR_HASNULL only tells + * if the array *might* contain a null. + */ +bool +array_contains_nulls(ArrayType *array) +{ + int nelems; + bits8 *bitmap; + int bitmask; + + /* Easy answer if there's no null bitmap */ + if (!ARR_HASNULL(array)) + return false; + + nelems = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)); + + bitmap = ARR_NULLBITMAP(array); + + /* check whole bytes of the bitmap byte-at-a-time */ + while (nelems >= 8) + { + if (*bitmap != 0xFF) + return true; + bitmap++; + nelems -= 8; + } + + /* check last partial byte */ + bitmask = 1; + while (nelems > 0) + { + if ((*bitmap & bitmask) == 0) + return true; + bitmask <<= 1; + nelems--; + } + + return false; +} + + +/* + * array_eq : + * compares two arrays for equality + * result : + * returns true if the arrays are equal, false otherwise. + * + * Note: we do not use array_cmp here, since equality may be meaningful in + * datatypes that don't have a total ordering (and hence no btree support). + */ +Datum +array_eq(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(locfcinfo, 2); + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1); + Oid collation = PG_GET_COLLATION(); + int ndims1 = AARR_NDIM(array1); + int ndims2 = AARR_NDIM(array2); + int *dims1 = AARR_DIMS(array1); + int *dims2 = AARR_DIMS(array2); + int *lbs1 = AARR_LBOUND(array1); + int *lbs2 = AARR_LBOUND(array2); + Oid element_type = AARR_ELEMTYPE(array1); + bool result = true; + int nitems; + TypeCacheEntry *typentry; + int typlen; + bool typbyval; + char typalign; + array_iter it1; + array_iter it2; + int i; + + if (element_type != AARR_ELEMTYPE(array2)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare arrays of different element types"))); + + /* fast path if the arrays do not have the same dimensionality */ + if (ndims1 != ndims2 || + memcmp(dims1, dims2, ndims1 * sizeof(int)) != 0 || + memcmp(lbs1, lbs2, ndims1 * sizeof(int)) != 0) + result = false; + else + { + /* + * We arrange to look up the equality function only once per series of + * calls, assuming the element type doesn't change underneath us. The + * typcache is used so that we have no memory leakage when being used + * as an index support function. + */ + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_EQ_OPR_FINFO); + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + /* + * apply the operator to each pair of array elements. + */ + InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2, + collation, NULL, NULL); + + /* Loop over source data */ + nitems = ArrayGetNItems(ndims1, dims1); + array_iter_setup(&it1, array1); + array_iter_setup(&it2, array2); + + for (i = 0; i < nitems; i++) + { + Datum elt1; + Datum elt2; + bool isnull1; + bool isnull2; + bool oprresult; + + /* Get elements, checking for NULL */ + elt1 = array_iter_next(&it1, &isnull1, i, + typlen, typbyval, typalign); + elt2 = array_iter_next(&it2, &isnull2, i, + typlen, typbyval, typalign); + + /* + * We consider two NULLs equal; NULL and not-NULL are unequal. + */ + if (isnull1 && isnull2) + continue; + if (isnull1 || isnull2) + { + result = false; + break; + } + + /* + * Apply the operator to the element pair; treat NULL as false + */ + locfcinfo->args[0].value = elt1; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = elt2; + locfcinfo->args[1].isnull = false; + locfcinfo->isnull = false; + oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo)); + if (locfcinfo->isnull || !oprresult) + { + result = false; + break; + } + } + } + + /* Avoid leaking memory when handed toasted input. */ + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); + + PG_RETURN_BOOL(result); +} + + +/*----------------------------------------------------------------------------- + * array-array bool operators: + * Given two arrays, iterate comparison operators + * over the array. Uses logic similar to text comparison + * functions, except element-by-element instead of + * character-by-character. + *---------------------------------------------------------------------------- + */ + +Datum +array_ne(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(!DatumGetBool(array_eq(fcinfo))); +} + +Datum +array_lt(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(array_cmp(fcinfo) < 0); +} + +Datum +array_gt(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(array_cmp(fcinfo) > 0); +} + +Datum +array_le(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(array_cmp(fcinfo) <= 0); +} + +Datum +array_ge(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(array_cmp(fcinfo) >= 0); +} + +Datum +btarraycmp(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(array_cmp(fcinfo)); +} + +/* + * array_cmp() + * Internal comparison function for arrays. + * + * Returns -1, 0 or 1 + */ +static int +array_cmp(FunctionCallInfo fcinfo) +{ + LOCAL_FCINFO(locfcinfo, 2); + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1); + Oid collation = PG_GET_COLLATION(); + int ndims1 = AARR_NDIM(array1); + int ndims2 = AARR_NDIM(array2); + int *dims1 = AARR_DIMS(array1); + int *dims2 = AARR_DIMS(array2); + int nitems1 = ArrayGetNItems(ndims1, dims1); + int nitems2 = ArrayGetNItems(ndims2, dims2); + Oid element_type = AARR_ELEMTYPE(array1); + int result = 0; + TypeCacheEntry *typentry; + int typlen; + bool typbyval; + char typalign; + int min_nitems; + array_iter it1; + array_iter it2; + int i; + + if (element_type != AARR_ELEMTYPE(array2)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare arrays of different element types"))); + + /* + * We arrange to look up the comparison function only once per series of + * calls, assuming the element type doesn't change underneath us. The + * typcache is used so that we have no memory leakage when being used as + * an index support function. + */ + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a comparison function for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + /* + * apply the operator to each pair of array elements. + */ + InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2, + collation, NULL, NULL); + + /* Loop over source data */ + min_nitems = Min(nitems1, nitems2); + array_iter_setup(&it1, array1); + array_iter_setup(&it2, array2); + + for (i = 0; i < min_nitems; i++) + { + Datum elt1; + Datum elt2; + bool isnull1; + bool isnull2; + int32 cmpresult; + + /* Get elements, checking for NULL */ + elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign); + elt2 = array_iter_next(&it2, &isnull2, i, typlen, typbyval, typalign); + + /* + * We consider two NULLs equal; NULL > not-NULL. + */ + if (isnull1 && isnull2) + continue; + if (isnull1) + { + /* arg1 is greater than arg2 */ + result = 1; + break; + } + if (isnull2) + { + /* arg1 is less than arg2 */ + result = -1; + break; + } + + /* Compare the pair of elements */ + locfcinfo->args[0].value = elt1; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = elt2; + locfcinfo->args[1].isnull = false; + cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo)); + + /* We don't expect comparison support functions to return null */ + Assert(!locfcinfo->isnull); + + if (cmpresult == 0) + continue; /* equal */ + + if (cmpresult < 0) + { + /* arg1 is less than arg2 */ + result = -1; + break; + } + else + { + /* arg1 is greater than arg2 */ + result = 1; + break; + } + } + + /* + * If arrays contain same data (up to end of shorter one), apply + * additional rules to sort by dimensionality. The relative significance + * of the different bits of information is historical; mainly we just care + * that we don't say "equal" for arrays of different dimensionality. + */ + if (result == 0) + { + if (nitems1 != nitems2) + result = (nitems1 < nitems2) ? -1 : 1; + else if (ndims1 != ndims2) + result = (ndims1 < ndims2) ? -1 : 1; + else + { + for (i = 0; i < ndims1; i++) + { + if (dims1[i] != dims2[i]) + { + result = (dims1[i] < dims2[i]) ? -1 : 1; + break; + } + } + if (result == 0) + { + int *lbound1 = AARR_LBOUND(array1); + int *lbound2 = AARR_LBOUND(array2); + + for (i = 0; i < ndims1; i++) + { + if (lbound1[i] != lbound2[i]) + { + result = (lbound1[i] < lbound2[i]) ? -1 : 1; + break; + } + } + } + } + } + + /* Avoid leaking memory when handed toasted input. */ + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); + + return result; +} + + +/*----------------------------------------------------------------------------- + * array hashing + * Hash the elements and combine the results. + *---------------------------------------------------------------------------- + */ + +Datum +hash_array(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(locfcinfo, 1); + AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0); + int ndims = AARR_NDIM(array); + int *dims = AARR_DIMS(array); + Oid element_type = AARR_ELEMTYPE(array); + uint32 result = 1; + int nitems; + TypeCacheEntry *typentry; + int typlen; + bool typbyval; + char typalign; + int i; + array_iter iter; + + /* + * We arrange to look up the hash function only once per series of calls, + * assuming the element type doesn't change underneath us. The typcache + * is used so that we have no memory leakage when being used as an index + * support function. + */ + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_HASH_PROC_FINFO); + if (!OidIsValid(typentry->hash_proc_finfo.fn_oid) && element_type != RECORDOID) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(element_type)))); + + /* + * The type cache doesn't believe that record is hashable (see + * cache_record_field_properties()), but since we're here, we're + * committed to hashing, so we can assume it does. Worst case, if any + * components of the record don't support hashing, we will fail at + * execution. + */ + if (element_type == RECORDOID) + { + MemoryContext oldcontext; + TypeCacheEntry *record_typentry; + + oldcontext = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt); + + /* + * Make fake type cache entry structure. Note that we can't just + * modify typentry, since that points directly into the type + * cache. + */ + record_typentry = palloc0(sizeof(*record_typentry)); + record_typentry->type_id = element_type; + + /* fill in what we need below */ + record_typentry->typlen = typentry->typlen; + record_typentry->typbyval = typentry->typbyval; + record_typentry->typalign = typentry->typalign; + fmgr_info(F_HASH_RECORD, &record_typentry->hash_proc_finfo); + + MemoryContextSwitchTo(oldcontext); + + typentry = record_typentry; + } + + fcinfo->flinfo->fn_extra = (void *) typentry; + } + + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + /* + * apply the hash function to each array element. + */ + InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1, + PG_GET_COLLATION(), NULL, NULL); + + /* Loop over source data */ + nitems = ArrayGetNItems(ndims, dims); + array_iter_setup(&iter, array); + + for (i = 0; i < nitems; i++) + { + Datum elt; + bool isnull; + uint32 elthash; + + /* Get element, checking for NULL */ + elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign); + + if (isnull) + { + /* Treat nulls as having hashvalue 0 */ + elthash = 0; + } + else + { + /* Apply the hash function */ + locfcinfo->args[0].value = elt; + locfcinfo->args[0].isnull = false; + elthash = DatumGetUInt32(FunctionCallInvoke(locfcinfo)); + /* We don't expect hash functions to return null */ + Assert(!locfcinfo->isnull); + } + + /* + * Combine hash values of successive elements by multiplying the + * current value by 31 and adding on the new element's hash value. + * + * The result is a sum in which each element's hash value is + * multiplied by a different power of 31. This is modulo 2^32 + * arithmetic, and the powers of 31 modulo 2^32 form a cyclic group of + * order 2^27. So for arrays of up to 2^27 elements, each element's + * hash value is multiplied by a different (odd) number, resulting in + * a good mixing of all the elements' hash values. + */ + result = (result << 5) - result + elthash; + } + + /* Avoid leaking memory when handed toasted input. */ + AARR_FREE_IF_COPY(array, 0); + + PG_RETURN_UINT32(result); +} + +/* + * Returns 64-bit value by hashing a value to a 64-bit value, with a seed. + * Otherwise, similar to hash_array. + */ +Datum +hash_array_extended(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(locfcinfo, 2); + AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0); + uint64 seed = PG_GETARG_INT64(1); + int ndims = AARR_NDIM(array); + int *dims = AARR_DIMS(array); + Oid element_type = AARR_ELEMTYPE(array); + uint64 result = 1; + int nitems; + TypeCacheEntry *typentry; + int typlen; + bool typbyval; + char typalign; + int i; + array_iter iter; + + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_HASH_EXTENDED_PROC_FINFO); + if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an extended hash function for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + InitFunctionCallInfoData(*locfcinfo, &typentry->hash_extended_proc_finfo, 2, + PG_GET_COLLATION(), NULL, NULL); + + /* Loop over source data */ + nitems = ArrayGetNItems(ndims, dims); + array_iter_setup(&iter, array); + + for (i = 0; i < nitems; i++) + { + Datum elt; + bool isnull; + uint64 elthash; + + /* Get element, checking for NULL */ + elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign); + + if (isnull) + { + elthash = 0; + } + else + { + /* Apply the hash function */ + locfcinfo->args[0].value = elt; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = Int64GetDatum(seed); + locfcinfo->args[1].isnull = false; + elthash = DatumGetUInt64(FunctionCallInvoke(locfcinfo)); + /* We don't expect hash functions to return null */ + Assert(!locfcinfo->isnull); + } + + result = (result << 5) - result + elthash; + } + + AARR_FREE_IF_COPY(array, 0); + + PG_RETURN_UINT64(result); +} + + +/*----------------------------------------------------------------------------- + * array overlap/containment comparisons + * These use the same methods of comparing array elements as array_eq. + * We consider only the elements of the arrays, ignoring dimensionality. + *---------------------------------------------------------------------------- + */ + +/* + * array_contain_compare : + * compares two arrays for overlap/containment + * + * When matchall is true, return true if all members of array1 are in array2. + * When matchall is false, return true if any members of array1 are in array2. + */ +static bool +array_contain_compare(AnyArrayType *array1, AnyArrayType *array2, Oid collation, + bool matchall, void **fn_extra) +{ + LOCAL_FCINFO(locfcinfo, 2); + bool result = matchall; + Oid element_type = AARR_ELEMTYPE(array1); + TypeCacheEntry *typentry; + int nelems1; + Datum *values2; + bool *nulls2; + int nelems2; + int typlen; + bool typbyval; + char typalign; + int i; + int j; + array_iter it1; + + if (element_type != AARR_ELEMTYPE(array2)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare arrays of different element types"))); + + /* + * We arrange to look up the equality function only once per series of + * calls, assuming the element type doesn't change underneath us. The + * typcache is used so that we have no memory leakage when being used as + * an index support function. + */ + typentry = (TypeCacheEntry *) *fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_EQ_OPR_FINFO); + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + *fn_extra = (void *) typentry; + } + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + /* + * Since we probably will need to scan array2 multiple times, it's + * worthwhile to use deconstruct_array on it. We scan array1 the hard way + * however, since we very likely won't need to look at all of it. + */ + if (VARATT_IS_EXPANDED_HEADER(array2)) + { + /* This should be safe even if input is read-only */ + deconstruct_expanded_array(&(array2->xpn)); + values2 = array2->xpn.dvalues; + nulls2 = array2->xpn.dnulls; + nelems2 = array2->xpn.nelems; + } + else + deconstruct_array((ArrayType *) array2, + element_type, typlen, typbyval, typalign, + &values2, &nulls2, &nelems2); + + /* + * Apply the comparison operator to each pair of array elements. + */ + InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2, + collation, NULL, NULL); + + /* Loop over source data */ + nelems1 = ArrayGetNItems(AARR_NDIM(array1), AARR_DIMS(array1)); + array_iter_setup(&it1, array1); + + for (i = 0; i < nelems1; i++) + { + Datum elt1; + bool isnull1; + + /* Get element, checking for NULL */ + elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign); + + /* + * We assume that the comparison operator is strict, so a NULL can't + * match anything. XXX this diverges from the "NULL=NULL" behavior of + * array_eq, should we act like that? + */ + if (isnull1) + { + if (matchall) + { + result = false; + break; + } + continue; + } + + for (j = 0; j < nelems2; j++) + { + Datum elt2 = values2[j]; + bool isnull2 = nulls2 ? nulls2[j] : false; + bool oprresult; + + if (isnull2) + continue; /* can't match */ + + /* + * Apply the operator to the element pair; treat NULL as false + */ + locfcinfo->args[0].value = elt1; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = elt2; + locfcinfo->args[1].isnull = false; + locfcinfo->isnull = false; + oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo)); + if (!locfcinfo->isnull && oprresult) + break; + } + + if (j < nelems2) + { + /* found a match for elt1 */ + if (!matchall) + { + result = true; + break; + } + } + else + { + /* no match for elt1 */ + if (matchall) + { + result = false; + break; + } + } + } + + return result; +} + +Datum +arrayoverlap(PG_FUNCTION_ARGS) +{ + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1); + Oid collation = PG_GET_COLLATION(); + bool result; + + result = array_contain_compare(array1, array2, collation, false, + &fcinfo->flinfo->fn_extra); + + /* Avoid leaking memory when handed toasted input. */ + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +arraycontains(PG_FUNCTION_ARGS) +{ + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1); + Oid collation = PG_GET_COLLATION(); + bool result; + + result = array_contain_compare(array2, array1, collation, true, + &fcinfo->flinfo->fn_extra); + + /* Avoid leaking memory when handed toasted input. */ + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +arraycontained(PG_FUNCTION_ARGS) +{ + AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0); + AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1); + Oid collation = PG_GET_COLLATION(); + bool result; + + result = array_contain_compare(array1, array2, collation, true, + &fcinfo->flinfo->fn_extra); + + /* Avoid leaking memory when handed toasted input. */ + AARR_FREE_IF_COPY(array1, 0); + AARR_FREE_IF_COPY(array2, 1); + + PG_RETURN_BOOL(result); +} + + +/*----------------------------------------------------------------------------- + * Array iteration functions + * These functions are used to iterate efficiently through arrays + *----------------------------------------------------------------------------- + */ + +/* + * array_create_iterator --- set up to iterate through an array + * + * If slice_ndim is zero, we will iterate element-by-element; the returned + * datums are of the array's element type. + * + * If slice_ndim is 1..ARR_NDIM(arr), we will iterate by slices: the + * returned datums are of the same array type as 'arr', but of size + * equal to the rightmost N dimensions of 'arr'. + * + * The passed-in array must remain valid for the lifetime of the iterator. + */ +ArrayIterator +array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate) +{ + ArrayIterator iterator = palloc0(sizeof(ArrayIteratorData)); + + /* + * Sanity-check inputs --- caller should have got this right already + */ + Assert(PointerIsValid(arr)); + if (slice_ndim < 0 || slice_ndim > ARR_NDIM(arr)) + elog(ERROR, "invalid arguments to array_create_iterator"); + + /* + * Remember basic info about the array and its element type + */ + iterator->arr = arr; + iterator->nullbitmap = ARR_NULLBITMAP(arr); + iterator->nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + if (mstate != NULL) + { + Assert(mstate->element_type == ARR_ELEMTYPE(arr)); + + iterator->typlen = mstate->typlen; + iterator->typbyval = mstate->typbyval; + iterator->typalign = mstate->typalign; + } + else + get_typlenbyvalalign(ARR_ELEMTYPE(arr), + &iterator->typlen, + &iterator->typbyval, + &iterator->typalign); + + /* + * Remember the slicing parameters. + */ + iterator->slice_ndim = slice_ndim; + + if (slice_ndim > 0) + { + /* + * Get pointers into the array's dims and lbound arrays to represent + * the dims/lbound arrays of a slice. These are the same as the + * rightmost N dimensions of the array. + */ + iterator->slice_dims = ARR_DIMS(arr) + ARR_NDIM(arr) - slice_ndim; + iterator->slice_lbound = ARR_LBOUND(arr) + ARR_NDIM(arr) - slice_ndim; + + /* + * Compute number of elements in a slice. + */ + iterator->slice_len = ArrayGetNItems(slice_ndim, + iterator->slice_dims); + + /* + * Create workspace for building sub-arrays. + */ + iterator->slice_values = (Datum *) + palloc(iterator->slice_len * sizeof(Datum)); + iterator->slice_nulls = (bool *) + palloc(iterator->slice_len * sizeof(bool)); + } + + /* + * Initialize our data pointer and linear element number. These will + * advance through the array during array_iterate(). + */ + iterator->data_ptr = ARR_DATA_PTR(arr); + iterator->current_item = 0; + + return iterator; +} + +/* + * Iterate through the array referenced by 'iterator'. + * + * As long as there is another element (or slice), return it into + * *value / *isnull, and return true. Return false when no more data. + */ +bool +array_iterate(ArrayIterator iterator, Datum *value, bool *isnull) +{ + /* Done if we have reached the end of the array */ + if (iterator->current_item >= iterator->nitems) + return false; + + if (iterator->slice_ndim == 0) + { + /* + * Scalar case: return one element. + */ + if (array_get_isnull(iterator->nullbitmap, iterator->current_item++)) + { + *isnull = true; + *value = (Datum) 0; + } + else + { + /* non-NULL, so fetch the individual Datum to return */ + char *p = iterator->data_ptr; + + *isnull = false; + *value = fetch_att(p, iterator->typbyval, iterator->typlen); + + /* Move our data pointer forward to the next element */ + p = att_addlength_pointer(p, iterator->typlen, p); + p = (char *) att_align_nominal(p, iterator->typalign); + iterator->data_ptr = p; + } + } + else + { + /* + * Slice case: build and return an array of the requested size. + */ + ArrayType *result; + Datum *values = iterator->slice_values; + bool *nulls = iterator->slice_nulls; + char *p = iterator->data_ptr; + int i; + + for (i = 0; i < iterator->slice_len; i++) + { + if (array_get_isnull(iterator->nullbitmap, + iterator->current_item++)) + { + nulls[i] = true; + values[i] = (Datum) 0; + } + else + { + nulls[i] = false; + values[i] = fetch_att(p, iterator->typbyval, iterator->typlen); + + /* Move our data pointer forward to the next element */ + p = att_addlength_pointer(p, iterator->typlen, p); + p = (char *) att_align_nominal(p, iterator->typalign); + } + } + + iterator->data_ptr = p; + + result = construct_md_array(values, + nulls, + iterator->slice_ndim, + iterator->slice_dims, + iterator->slice_lbound, + ARR_ELEMTYPE(iterator->arr), + iterator->typlen, + iterator->typbyval, + iterator->typalign); + + *isnull = false; + *value = PointerGetDatum(result); + } + + return true; +} + +/* + * Release an ArrayIterator data structure + */ +void +array_free_iterator(ArrayIterator iterator) +{ + if (iterator->slice_ndim > 0) + { + pfree(iterator->slice_values); + pfree(iterator->slice_nulls); + } + pfree(iterator); +} + + +/***************************************************************************/ +/******************| Support Routines |*****************/ +/***************************************************************************/ + +/* + * Check whether a specific array element is NULL + * + * nullbitmap: pointer to array's null bitmap (NULL if none) + * offset: 0-based linear element number of array element + */ +static bool +array_get_isnull(const bits8 *nullbitmap, int offset) +{ + if (nullbitmap == NULL) + return false; /* assume not null */ + if (nullbitmap[offset / 8] & (1 << (offset % 8))) + return false; /* not null */ + return true; +} + +/* + * Set a specific array element's null-bitmap entry + * + * nullbitmap: pointer to array's null bitmap (mustn't be NULL) + * offset: 0-based linear element number of array element + * isNull: null status to set + */ +static void +array_set_isnull(bits8 *nullbitmap, int offset, bool isNull) +{ + int bitmask; + + nullbitmap += offset / 8; + bitmask = 1 << (offset % 8); + if (isNull) + *nullbitmap &= ~bitmask; + else + *nullbitmap |= bitmask; +} + +/* + * Fetch array element at pointer, converted correctly to a Datum + * + * Caller must have handled case of NULL element + */ +static Datum +ArrayCast(char *value, bool byval, int len) +{ + return fetch_att(value, byval, len); +} + +/* + * Copy datum to *dest and return total space used (including align padding) + * + * Caller must have handled case of NULL element + */ +static int +ArrayCastAndSet(Datum src, + int typlen, + bool typbyval, + char typalign, + char *dest) +{ + int inc; + + if (typlen > 0) + { + if (typbyval) + store_att_byval(dest, src, typlen); + else + memmove(dest, DatumGetPointer(src), typlen); + inc = att_align_nominal(typlen, typalign); + } + else + { + Assert(!typbyval); + inc = att_addlength_datum(0, typlen, src); + memmove(dest, DatumGetPointer(src), inc); + inc = att_align_nominal(inc, typalign); + } + + return inc; +} + +/* + * Advance ptr over nitems array elements + * + * ptr: starting location in array + * offset: 0-based linear element number of first element (the one at *ptr) + * nullbitmap: start of array's null bitmap, or NULL if none + * nitems: number of array elements to advance over (>= 0) + * typlen, typbyval, typalign: storage parameters of array element datatype + * + * It is caller's responsibility to ensure that nitems is within range + */ +static char * +array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems, + int typlen, bool typbyval, char typalign) +{ + int bitmask; + int i; + + /* easy if fixed-size elements and no NULLs */ + if (typlen > 0 && !nullbitmap) + return ptr + nitems * ((Size) att_align_nominal(typlen, typalign)); + + /* seems worth having separate loops for NULL and no-NULLs cases */ + if (nullbitmap) + { + nullbitmap += offset / 8; + bitmask = 1 << (offset % 8); + + for (i = 0; i < nitems; i++) + { + if (*nullbitmap & bitmask) + { + ptr = att_addlength_pointer(ptr, typlen, ptr); + ptr = (char *) att_align_nominal(ptr, typalign); + } + bitmask <<= 1; + if (bitmask == 0x100) + { + nullbitmap++; + bitmask = 1; + } + } + } + else + { + for (i = 0; i < nitems; i++) + { + ptr = att_addlength_pointer(ptr, typlen, ptr); + ptr = (char *) att_align_nominal(ptr, typalign); + } + } + return ptr; +} + +/* + * Compute total size of the nitems array elements starting at *ptr + * + * Parameters same as for array_seek + */ +static int +array_nelems_size(char *ptr, int offset, bits8 *nullbitmap, int nitems, + int typlen, bool typbyval, char typalign) +{ + return array_seek(ptr, offset, nullbitmap, nitems, + typlen, typbyval, typalign) - ptr; +} + +/* + * Copy nitems array elements from srcptr to destptr + * + * destptr: starting destination location (must be enough room!) + * nitems: number of array elements to copy (>= 0) + * srcptr: starting location in source array + * offset: 0-based linear element number of first element (the one at *srcptr) + * nullbitmap: start of source array's null bitmap, or NULL if none + * typlen, typbyval, typalign: storage parameters of array element datatype + * + * Returns number of bytes copied + * + * NB: this does not take care of setting up the destination's null bitmap! + */ +static int +array_copy(char *destptr, int nitems, + char *srcptr, int offset, bits8 *nullbitmap, + int typlen, bool typbyval, char typalign) +{ + int numbytes; + + numbytes = array_nelems_size(srcptr, offset, nullbitmap, nitems, + typlen, typbyval, typalign); + memcpy(destptr, srcptr, numbytes); + return numbytes; +} + +/* + * Copy nitems null-bitmap bits from source to destination + * + * destbitmap: start of destination array's null bitmap (mustn't be NULL) + * destoffset: 0-based linear element number of first dest element + * srcbitmap: start of source array's null bitmap, or NULL if none + * srcoffset: 0-based linear element number of first source element + * nitems: number of bits to copy (>= 0) + * + * If srcbitmap is NULL then we assume the source is all-non-NULL and + * fill 1's into the destination bitmap. Note that only the specified + * bits in the destination map are changed, not any before or after. + * + * Note: this could certainly be optimized using standard bitblt methods. + * However, it's not clear that the typical Postgres array has enough elements + * to make it worth worrying too much. For the moment, KISS. + */ +void +array_bitmap_copy(bits8 *destbitmap, int destoffset, + const bits8 *srcbitmap, int srcoffset, + int nitems) +{ + int destbitmask, + destbitval, + srcbitmask, + srcbitval; + + Assert(destbitmap); + if (nitems <= 0) + return; /* don't risk fetch off end of memory */ + destbitmap += destoffset / 8; + destbitmask = 1 << (destoffset % 8); + destbitval = *destbitmap; + if (srcbitmap) + { + srcbitmap += srcoffset / 8; + srcbitmask = 1 << (srcoffset % 8); + srcbitval = *srcbitmap; + while (nitems-- > 0) + { + if (srcbitval & srcbitmask) + destbitval |= destbitmask; + else + destbitval &= ~destbitmask; + destbitmask <<= 1; + if (destbitmask == 0x100) + { + *destbitmap++ = destbitval; + destbitmask = 1; + if (nitems > 0) + destbitval = *destbitmap; + } + srcbitmask <<= 1; + if (srcbitmask == 0x100) + { + srcbitmap++; + srcbitmask = 1; + if (nitems > 0) + srcbitval = *srcbitmap; + } + } + if (destbitmask != 1) + *destbitmap = destbitval; + } + else + { + while (nitems-- > 0) + { + destbitval |= destbitmask; + destbitmask <<= 1; + if (destbitmask == 0x100) + { + *destbitmap++ = destbitval; + destbitmask = 1; + if (nitems > 0) + destbitval = *destbitmap; + } + } + if (destbitmask != 1) + *destbitmap = destbitval; + } +} + +/* + * Compute space needed for a slice of an array + * + * We assume the caller has verified that the slice coordinates are valid. + */ +static int +array_slice_size(char *arraydataptr, bits8 *arraynullsptr, + int ndim, int *dim, int *lb, + int *st, int *endp, + int typlen, bool typbyval, char typalign) +{ + int src_offset, + span[MAXDIM], + prod[MAXDIM], + dist[MAXDIM], + indx[MAXDIM]; + char *ptr; + int i, + j, + inc; + int count = 0; + + mda_get_range(ndim, span, st, endp); + + /* Pretty easy for fixed element length without nulls ... */ + if (typlen > 0 && !arraynullsptr) + return ArrayGetNItems(ndim, span) * att_align_nominal(typlen, typalign); + + /* Else gotta do it the hard way */ + src_offset = ArrayGetOffset(ndim, dim, lb, st); + ptr = array_seek(arraydataptr, 0, arraynullsptr, src_offset, + typlen, typbyval, typalign); + mda_get_prod(ndim, dim, prod); + mda_get_offset_values(ndim, dist, prod, span); + for (i = 0; i < ndim; i++) + indx[i] = 0; + j = ndim - 1; + do + { + if (dist[j]) + { + ptr = array_seek(ptr, src_offset, arraynullsptr, dist[j], + typlen, typbyval, typalign); + src_offset += dist[j]; + } + if (!array_get_isnull(arraynullsptr, src_offset)) + { + inc = att_addlength_pointer(0, typlen, ptr); + inc = att_align_nominal(inc, typalign); + ptr += inc; + count += inc; + } + src_offset++; + } while ((j = mda_next_tuple(ndim, indx, span)) != -1); + return count; +} + +/* + * Extract a slice of an array into consecutive elements in the destination + * array. + * + * We assume the caller has verified that the slice coordinates are valid, + * allocated enough storage for the result, and initialized the header + * of the new array. + */ +static void +array_extract_slice(ArrayType *newarray, + int ndim, + int *dim, + int *lb, + char *arraydataptr, + bits8 *arraynullsptr, + int *st, + int *endp, + int typlen, + bool typbyval, + char typalign) +{ + char *destdataptr = ARR_DATA_PTR(newarray); + bits8 *destnullsptr = ARR_NULLBITMAP(newarray); + char *srcdataptr; + int src_offset, + dest_offset, + prod[MAXDIM], + span[MAXDIM], + dist[MAXDIM], + indx[MAXDIM]; + int i, + j, + inc; + + src_offset = ArrayGetOffset(ndim, dim, lb, st); + srcdataptr = array_seek(arraydataptr, 0, arraynullsptr, src_offset, + typlen, typbyval, typalign); + mda_get_prod(ndim, dim, prod); + mda_get_range(ndim, span, st, endp); + mda_get_offset_values(ndim, dist, prod, span); + for (i = 0; i < ndim; i++) + indx[i] = 0; + dest_offset = 0; + j = ndim - 1; + do + { + if (dist[j]) + { + /* skip unwanted elements */ + srcdataptr = array_seek(srcdataptr, src_offset, arraynullsptr, + dist[j], + typlen, typbyval, typalign); + src_offset += dist[j]; + } + inc = array_copy(destdataptr, 1, + srcdataptr, src_offset, arraynullsptr, + typlen, typbyval, typalign); + if (destnullsptr) + array_bitmap_copy(destnullsptr, dest_offset, + arraynullsptr, src_offset, + 1); + destdataptr += inc; + srcdataptr += inc; + src_offset++; + dest_offset++; + } while ((j = mda_next_tuple(ndim, indx, span)) != -1); +} + +/* + * Insert a slice into an array. + * + * ndim/dim[]/lb[] are dimensions of the original array. A new array with + * those same dimensions is to be constructed. destArray must already + * have been allocated and its header initialized. + * + * st[]/endp[] identify the slice to be replaced. Elements within the slice + * volume are taken from consecutive elements of the srcArray; elements + * outside it are copied from origArray. + * + * We assume the caller has verified that the slice coordinates are valid. + */ +static void +array_insert_slice(ArrayType *destArray, + ArrayType *origArray, + ArrayType *srcArray, + int ndim, + int *dim, + int *lb, + int *st, + int *endp, + int typlen, + bool typbyval, + char typalign) +{ + char *destPtr = ARR_DATA_PTR(destArray); + char *origPtr = ARR_DATA_PTR(origArray); + char *srcPtr = ARR_DATA_PTR(srcArray); + bits8 *destBitmap = ARR_NULLBITMAP(destArray); + bits8 *origBitmap = ARR_NULLBITMAP(origArray); + bits8 *srcBitmap = ARR_NULLBITMAP(srcArray); + int orignitems = ArrayGetNItems(ARR_NDIM(origArray), + ARR_DIMS(origArray)); + int dest_offset, + orig_offset, + src_offset, + prod[MAXDIM], + span[MAXDIM], + dist[MAXDIM], + indx[MAXDIM]; + int i, + j, + inc; + + dest_offset = ArrayGetOffset(ndim, dim, lb, st); + /* copy items before the slice start */ + inc = array_copy(destPtr, dest_offset, + origPtr, 0, origBitmap, + typlen, typbyval, typalign); + destPtr += inc; + origPtr += inc; + if (destBitmap) + array_bitmap_copy(destBitmap, 0, origBitmap, 0, dest_offset); + orig_offset = dest_offset; + mda_get_prod(ndim, dim, prod); + mda_get_range(ndim, span, st, endp); + mda_get_offset_values(ndim, dist, prod, span); + for (i = 0; i < ndim; i++) + indx[i] = 0; + src_offset = 0; + j = ndim - 1; + do + { + /* Copy/advance over elements between here and next part of slice */ + if (dist[j]) + { + inc = array_copy(destPtr, dist[j], + origPtr, orig_offset, origBitmap, + typlen, typbyval, typalign); + destPtr += inc; + origPtr += inc; + if (destBitmap) + array_bitmap_copy(destBitmap, dest_offset, + origBitmap, orig_offset, + dist[j]); + dest_offset += dist[j]; + orig_offset += dist[j]; + } + /* Copy new element at this slice position */ + inc = array_copy(destPtr, 1, + srcPtr, src_offset, srcBitmap, + typlen, typbyval, typalign); + if (destBitmap) + array_bitmap_copy(destBitmap, dest_offset, + srcBitmap, src_offset, + 1); + destPtr += inc; + srcPtr += inc; + dest_offset++; + src_offset++; + /* Advance over old element at this slice position */ + origPtr = array_seek(origPtr, orig_offset, origBitmap, 1, + typlen, typbyval, typalign); + orig_offset++; + } while ((j = mda_next_tuple(ndim, indx, span)) != -1); + + /* don't miss any data at the end */ + array_copy(destPtr, orignitems - orig_offset, + origPtr, orig_offset, origBitmap, + typlen, typbyval, typalign); + if (destBitmap) + array_bitmap_copy(destBitmap, dest_offset, + origBitmap, orig_offset, + orignitems - orig_offset); +} + +/* + * initArrayResult - initialize an empty ArrayBuildState + * + * element_type is the array element type (must be a valid array element type) + * rcontext is where to keep working state + * subcontext is a flag determining whether to use a separate memory context + * + * Note: there are two common schemes for using accumArrayResult(). + * In the older scheme, you start with a NULL ArrayBuildState pointer, and + * call accumArrayResult once per element. In this scheme you end up with + * a NULL pointer if there were no elements, which you need to special-case. + * In the newer scheme, call initArrayResult and then call accumArrayResult + * once per element. In this scheme you always end with a non-NULL pointer + * that you can pass to makeArrayResult; you get an empty array if there + * were no elements. This is preferred if an empty array is what you want. + * + * It's possible to choose whether to create a separate memory context for the + * array build state, or whether to allocate it directly within rcontext. + * + * When there are many concurrent small states (e.g. array_agg() using hash + * aggregation of many small groups), using a separate memory context for each + * one may result in severe memory bloat. In such cases, use the same memory + * context to initialize all such array build states, and pass + * subcontext=false. + * + * In cases when the array build states have different lifetimes, using a + * single memory context is impractical. Instead, pass subcontext=true so that + * the array build states can be freed individually. + */ +ArrayBuildState * +initArrayResult(Oid element_type, MemoryContext rcontext, bool subcontext) +{ + /* + * When using a subcontext, we can afford to start with a somewhat larger + * initial array size. Without subcontexts, we'd better hope that most of + * the states stay small ... + */ + return initArrayResultWithSize(element_type, rcontext, subcontext, + subcontext ? 64 : 8); +} + +/* + * initArrayResultWithSize + * As initArrayResult, but allow the initial size of the allocated arrays + * to be specified. + */ +ArrayBuildState * +initArrayResultWithSize(Oid element_type, MemoryContext rcontext, + bool subcontext, int initsize) +{ + ArrayBuildState *astate; + MemoryContext arr_context = rcontext; + + /* Make a temporary context to hold all the junk */ + if (subcontext) + arr_context = AllocSetContextCreate(rcontext, + "accumArrayResult", + ALLOCSET_DEFAULT_SIZES); + + astate = (ArrayBuildState *) + MemoryContextAlloc(arr_context, sizeof(ArrayBuildState)); + astate->mcontext = arr_context; + astate->private_cxt = subcontext; + astate->alen = initsize; + astate->dvalues = (Datum *) + MemoryContextAlloc(arr_context, astate->alen * sizeof(Datum)); + astate->dnulls = (bool *) + MemoryContextAlloc(arr_context, astate->alen * sizeof(bool)); + astate->nelems = 0; + astate->element_type = element_type; + get_typlenbyvalalign(element_type, + &astate->typlen, + &astate->typbyval, + &astate->typalign); + + return astate; +} + +/* + * accumArrayResult - accumulate one (more) Datum for an array result + * + * astate is working state (can be NULL on first call) + * dvalue/disnull represent the new Datum to append to the array + * element_type is the Datum's type (must be a valid array element type) + * rcontext is where to keep working state + */ +ArrayBuildState * +accumArrayResult(ArrayBuildState *astate, + Datum dvalue, bool disnull, + Oid element_type, + MemoryContext rcontext) +{ + MemoryContext oldcontext; + + if (astate == NULL) + { + /* First time through --- initialize */ + astate = initArrayResult(element_type, rcontext, true); + } + else + { + Assert(astate->element_type == element_type); + } + + oldcontext = MemoryContextSwitchTo(astate->mcontext); + + /* enlarge dvalues[]/dnulls[] if needed */ + if (astate->nelems >= astate->alen) + { + astate->alen *= 2; + astate->dvalues = (Datum *) + repalloc(astate->dvalues, astate->alen * sizeof(Datum)); + astate->dnulls = (bool *) + repalloc(astate->dnulls, astate->alen * sizeof(bool)); + } + + /* + * Ensure pass-by-ref stuff is copied into mcontext; and detoast it too if + * it's varlena. (You might think that detoasting is not needed here + * because construct_md_array can detoast the array elements later. + * However, we must not let construct_md_array modify the ArrayBuildState + * because that would mean array_agg_finalfn damages its input, which is + * verboten. Also, this way frequently saves one copying step.) + */ + if (!disnull && !astate->typbyval) + { + if (astate->typlen == -1) + dvalue = PointerGetDatum(PG_DETOAST_DATUM_COPY(dvalue)); + else + dvalue = datumCopy(dvalue, astate->typbyval, astate->typlen); + } + + astate->dvalues[astate->nelems] = dvalue; + astate->dnulls[astate->nelems] = disnull; + astate->nelems++; + + MemoryContextSwitchTo(oldcontext); + + return astate; +} + +/* + * makeArrayResult - produce 1-D final result of accumArrayResult + * + * Note: only releases astate if it was initialized within a separate memory + * context (i.e. using subcontext=true when calling initArrayResult). + * + * astate is working state (must not be NULL) + * rcontext is where to construct result + */ +Datum +makeArrayResult(ArrayBuildState *astate, + MemoryContext rcontext) +{ + int ndims; + int dims[1]; + int lbs[1]; + + /* If no elements were presented, we want to create an empty array */ + ndims = (astate->nelems > 0) ? 1 : 0; + dims[0] = astate->nelems; + lbs[0] = 1; + + return makeMdArrayResult(astate, ndims, dims, lbs, rcontext, + astate->private_cxt); +} + +/* + * makeMdArrayResult - produce multi-D final result of accumArrayResult + * + * beware: no check that specified dimensions match the number of values + * accumulated. + * + * Note: if the astate was not initialized within a separate memory context + * (that is, initArrayResult was called with subcontext=false), then using + * release=true is illegal. Instead, release astate along with the rest of its + * context when appropriate. + * + * astate is working state (must not be NULL) + * rcontext is where to construct result + * release is true if okay to release working state + */ +Datum +makeMdArrayResult(ArrayBuildState *astate, + int ndims, + int *dims, + int *lbs, + MemoryContext rcontext, + bool release) +{ + ArrayType *result; + MemoryContext oldcontext; + + /* Build the final array result in rcontext */ + oldcontext = MemoryContextSwitchTo(rcontext); + + result = construct_md_array(astate->dvalues, + astate->dnulls, + ndims, + dims, + lbs, + astate->element_type, + astate->typlen, + astate->typbyval, + astate->typalign); + + MemoryContextSwitchTo(oldcontext); + + /* Clean up all the junk */ + if (release) + { + Assert(astate->private_cxt); + MemoryContextDelete(astate->mcontext); + } + + return PointerGetDatum(result); +} + +/* + * The following three functions provide essentially the same API as + * initArrayResult/accumArrayResult/makeArrayResult, but instead of accepting + * inputs that are array elements, they accept inputs that are arrays and + * produce an output array having N+1 dimensions. The inputs must all have + * identical dimensionality as well as element type. + */ + +/* + * initArrayResultArr - initialize an empty ArrayBuildStateArr + * + * array_type is the array type (must be a valid varlena array type) + * element_type is the type of the array's elements (lookup if InvalidOid) + * rcontext is where to keep working state + * subcontext is a flag determining whether to use a separate memory context + */ +ArrayBuildStateArr * +initArrayResultArr(Oid array_type, Oid element_type, MemoryContext rcontext, + bool subcontext) +{ + ArrayBuildStateArr *astate; + MemoryContext arr_context = rcontext; /* by default use the parent ctx */ + + /* Lookup element type, unless element_type already provided */ + if (!OidIsValid(element_type)) + { + element_type = get_element_type(array_type); + + if (!OidIsValid(element_type)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("data type %s is not an array type", + format_type_be(array_type)))); + } + + /* Make a temporary context to hold all the junk */ + if (subcontext) + arr_context = AllocSetContextCreate(rcontext, + "accumArrayResultArr", + ALLOCSET_DEFAULT_SIZES); + + /* Note we initialize all fields to zero */ + astate = (ArrayBuildStateArr *) + MemoryContextAllocZero(arr_context, sizeof(ArrayBuildStateArr)); + astate->mcontext = arr_context; + astate->private_cxt = subcontext; + + /* Save relevant datatype information */ + astate->array_type = array_type; + astate->element_type = element_type; + + return astate; +} + +/* + * accumArrayResultArr - accumulate one (more) sub-array for an array result + * + * astate is working state (can be NULL on first call) + * dvalue/disnull represent the new sub-array to append to the array + * array_type is the array type (must be a valid varlena array type) + * rcontext is where to keep working state + */ +ArrayBuildStateArr * +accumArrayResultArr(ArrayBuildStateArr *astate, + Datum dvalue, bool disnull, + Oid array_type, + MemoryContext rcontext) +{ + ArrayType *arg; + MemoryContext oldcontext; + int *dims, + *lbs, + ndims, + nitems, + ndatabytes; + char *data; + int i; + + /* + * We disallow accumulating null subarrays. Another plausible definition + * is to ignore them, but callers that want that can just skip calling + * this function. + */ + if (disnull) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("cannot accumulate null arrays"))); + + /* Detoast input array in caller's context */ + arg = DatumGetArrayTypeP(dvalue); + + if (astate == NULL) + astate = initArrayResultArr(array_type, InvalidOid, rcontext, true); + else + Assert(astate->array_type == array_type); + + oldcontext = MemoryContextSwitchTo(astate->mcontext); + + /* Collect this input's dimensions */ + ndims = ARR_NDIM(arg); + dims = ARR_DIMS(arg); + lbs = ARR_LBOUND(arg); + data = ARR_DATA_PTR(arg); + nitems = ArrayGetNItems(ndims, dims); + ndatabytes = ARR_SIZE(arg) - ARR_DATA_OFFSET(arg); + + if (astate->ndims == 0) + { + /* First input; check/save the dimensionality info */ + + /* Should we allow empty inputs and just produce an empty output? */ + if (ndims == 0) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot accumulate empty arrays"))); + if (ndims + 1 > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + ndims + 1, MAXDIM))); + + /* + * The output array will have n+1 dimensions, with the ones after the + * first matching the input's dimensions. + */ + astate->ndims = ndims + 1; + astate->dims[0] = 0; + memcpy(&astate->dims[1], dims, ndims * sizeof(int)); + astate->lbs[0] = 1; + memcpy(&astate->lbs[1], lbs, ndims * sizeof(int)); + + /* Allocate at least enough data space for this item */ + astate->abytes = pg_nextpower2_32(Max(1024, ndatabytes + 1)); + astate->data = (char *) palloc(astate->abytes); + } + else + { + /* Second or later input: must match first input's dimensionality */ + if (astate->ndims != ndims + 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot accumulate arrays of different dimensionality"))); + for (i = 0; i < ndims; i++) + { + if (astate->dims[i + 1] != dims[i] || astate->lbs[i + 1] != lbs[i]) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("cannot accumulate arrays of different dimensionality"))); + } + + /* Enlarge data space if needed */ + if (astate->nbytes + ndatabytes > astate->abytes) + { + astate->abytes = Max(astate->abytes * 2, + astate->nbytes + ndatabytes); + astate->data = (char *) repalloc(astate->data, astate->abytes); + } + } + + /* + * Copy the data portion of the sub-array. Note we assume that the + * advertised data length of the sub-array is properly aligned. We do not + * have to worry about detoasting elements since whatever's in the + * sub-array should be OK already. + */ + memcpy(astate->data + astate->nbytes, data, ndatabytes); + astate->nbytes += ndatabytes; + + /* Deal with null bitmap if needed */ + if (astate->nullbitmap || ARR_HASNULL(arg)) + { + int newnitems = astate->nitems + nitems; + + if (astate->nullbitmap == NULL) + { + /* + * First input with nulls; we must retrospectively handle any + * previous inputs by marking all their items non-null. + */ + astate->aitems = pg_nextpower2_32(Max(256, newnitems + 1)); + astate->nullbitmap = (bits8 *) palloc((astate->aitems + 7) / 8); + array_bitmap_copy(astate->nullbitmap, 0, + NULL, 0, + astate->nitems); + } + else if (newnitems > astate->aitems) + { + astate->aitems = Max(astate->aitems * 2, newnitems); + astate->nullbitmap = (bits8 *) + repalloc(astate->nullbitmap, (astate->aitems + 7) / 8); + } + array_bitmap_copy(astate->nullbitmap, astate->nitems, + ARR_NULLBITMAP(arg), 0, + nitems); + } + + astate->nitems += nitems; + astate->dims[0] += 1; + + MemoryContextSwitchTo(oldcontext); + + /* Release detoasted copy if any */ + if ((Pointer) arg != DatumGetPointer(dvalue)) + pfree(arg); + + return astate; +} + +/* + * makeArrayResultArr - produce N+1-D final result of accumArrayResultArr + * + * astate is working state (must not be NULL) + * rcontext is where to construct result + * release is true if okay to release working state + */ +Datum +makeArrayResultArr(ArrayBuildStateArr *astate, + MemoryContext rcontext, + bool release) +{ + ArrayType *result; + MemoryContext oldcontext; + + /* Build the final array result in rcontext */ + oldcontext = MemoryContextSwitchTo(rcontext); + + if (astate->ndims == 0) + { + /* No inputs, return empty array */ + result = construct_empty_array(astate->element_type); + } + else + { + int dataoffset, + nbytes; + + /* Check for overflow of the array dimensions */ + (void) ArrayGetNItems(astate->ndims, astate->dims); + ArrayCheckBounds(astate->ndims, astate->dims, astate->lbs); + + /* Compute required space */ + nbytes = astate->nbytes; + if (astate->nullbitmap != NULL) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(astate->ndims, astate->nitems); + nbytes += dataoffset; + } + else + { + dataoffset = 0; + nbytes += ARR_OVERHEAD_NONULLS(astate->ndims); + } + + result = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(result, nbytes); + result->ndim = astate->ndims; + result->dataoffset = dataoffset; + result->elemtype = astate->element_type; + + memcpy(ARR_DIMS(result), astate->dims, astate->ndims * sizeof(int)); + memcpy(ARR_LBOUND(result), astate->lbs, astate->ndims * sizeof(int)); + memcpy(ARR_DATA_PTR(result), astate->data, astate->nbytes); + + if (astate->nullbitmap != NULL) + array_bitmap_copy(ARR_NULLBITMAP(result), 0, + astate->nullbitmap, 0, + astate->nitems); + } + + MemoryContextSwitchTo(oldcontext); + + /* Clean up all the junk */ + if (release) + { + Assert(astate->private_cxt); + MemoryContextDelete(astate->mcontext); + } + + return PointerGetDatum(result); +} + +/* + * The following three functions provide essentially the same API as + * initArrayResult/accumArrayResult/makeArrayResult, but can accept either + * scalar or array inputs, invoking the appropriate set of functions above. + */ + +/* + * initArrayResultAny - initialize an empty ArrayBuildStateAny + * + * input_type is the input datatype (either element or array type) + * rcontext is where to keep working state + * subcontext is a flag determining whether to use a separate memory context + */ +ArrayBuildStateAny * +initArrayResultAny(Oid input_type, MemoryContext rcontext, bool subcontext) +{ + ArrayBuildStateAny *astate; + Oid element_type = get_element_type(input_type); + + if (OidIsValid(element_type)) + { + /* Array case */ + ArrayBuildStateArr *arraystate; + + arraystate = initArrayResultArr(input_type, InvalidOid, rcontext, subcontext); + astate = (ArrayBuildStateAny *) + MemoryContextAlloc(arraystate->mcontext, + sizeof(ArrayBuildStateAny)); + astate->scalarstate = NULL; + astate->arraystate = arraystate; + } + else + { + /* Scalar case */ + ArrayBuildState *scalarstate; + + /* Let's just check that we have a type that can be put into arrays */ + Assert(OidIsValid(get_array_type(input_type))); + + scalarstate = initArrayResult(input_type, rcontext, subcontext); + astate = (ArrayBuildStateAny *) + MemoryContextAlloc(scalarstate->mcontext, + sizeof(ArrayBuildStateAny)); + astate->scalarstate = scalarstate; + astate->arraystate = NULL; + } + + return astate; +} + +/* + * accumArrayResultAny - accumulate one (more) input for an array result + * + * astate is working state (can be NULL on first call) + * dvalue/disnull represent the new input to append to the array + * input_type is the input datatype (either element or array type) + * rcontext is where to keep working state + */ +ArrayBuildStateAny * +accumArrayResultAny(ArrayBuildStateAny *astate, + Datum dvalue, bool disnull, + Oid input_type, + MemoryContext rcontext) +{ + if (astate == NULL) + astate = initArrayResultAny(input_type, rcontext, true); + + if (astate->scalarstate) + (void) accumArrayResult(astate->scalarstate, + dvalue, disnull, + input_type, rcontext); + else + (void) accumArrayResultArr(astate->arraystate, + dvalue, disnull, + input_type, rcontext); + + return astate; +} + +/* + * makeArrayResultAny - produce final result of accumArrayResultAny + * + * astate is working state (must not be NULL) + * rcontext is where to construct result + * release is true if okay to release working state + */ +Datum +makeArrayResultAny(ArrayBuildStateAny *astate, + MemoryContext rcontext, bool release) +{ + Datum result; + + if (astate->scalarstate) + { + /* Must use makeMdArrayResult to support "release" parameter */ + int ndims; + int dims[1]; + int lbs[1]; + + /* If no elements were presented, we want to create an empty array */ + ndims = (astate->scalarstate->nelems > 0) ? 1 : 0; + dims[0] = astate->scalarstate->nelems; + lbs[0] = 1; + + result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs, + rcontext, release); + } + else + { + result = makeArrayResultArr(astate->arraystate, + rcontext, release); + } + return result; +} + + +Datum +array_larger(PG_FUNCTION_ARGS) +{ + if (array_cmp(fcinfo) > 0) + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); + else + PG_RETURN_DATUM(PG_GETARG_DATUM(1)); +} + +Datum +array_smaller(PG_FUNCTION_ARGS) +{ + if (array_cmp(fcinfo) < 0) + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); + else + PG_RETURN_DATUM(PG_GETARG_DATUM(1)); +} + + +typedef struct generate_subscripts_fctx +{ + int32 lower; + int32 upper; + bool reverse; +} generate_subscripts_fctx; + +/* + * generate_subscripts(array anyarray, dim int [, reverse bool]) + * Returns all subscripts of the array for any dimension + */ +Datum +generate_subscripts(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + MemoryContext oldcontext; + generate_subscripts_fctx *fctx; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0); + int reqdim = PG_GETARG_INT32(1); + int *lb, + *dimv; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* Sanity check: does it look like an array at all? */ + if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM) + SRF_RETURN_DONE(funcctx); + + /* Sanity check: was the requested dim valid */ + if (reqdim <= 0 || reqdim > AARR_NDIM(v)) + SRF_RETURN_DONE(funcctx); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + fctx = (generate_subscripts_fctx *) palloc(sizeof(generate_subscripts_fctx)); + + lb = AARR_LBOUND(v); + dimv = AARR_DIMS(v); + + fctx->lower = lb[reqdim - 1]; + fctx->upper = dimv[reqdim - 1] + lb[reqdim - 1] - 1; + fctx->reverse = (PG_NARGS() < 3) ? false : PG_GETARG_BOOL(2); + + funcctx->user_fctx = fctx; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + + fctx = funcctx->user_fctx; + + if (fctx->lower <= fctx->upper) + { + if (!fctx->reverse) + SRF_RETURN_NEXT(funcctx, Int32GetDatum(fctx->lower++)); + else + SRF_RETURN_NEXT(funcctx, Int32GetDatum(fctx->upper--)); + } + else + /* done when there are no more elements left */ + SRF_RETURN_DONE(funcctx); +} + +/* + * generate_subscripts_nodir + * Implements the 2-argument version of generate_subscripts + */ +Datum +generate_subscripts_nodir(PG_FUNCTION_ARGS) +{ + /* just call the other one -- it can handle both cases */ + return generate_subscripts(fcinfo); +} + +/* + * array_fill_with_lower_bounds + * Create and fill array with defined lower bounds. + */ +Datum +array_fill_with_lower_bounds(PG_FUNCTION_ARGS) +{ + ArrayType *dims; + ArrayType *lbs; + ArrayType *result; + Oid elmtype; + Datum value; + bool isnull; + + if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("dimension array or low bound array cannot be null"))); + + dims = PG_GETARG_ARRAYTYPE_P(1); + lbs = PG_GETARG_ARRAYTYPE_P(2); + + if (!PG_ARGISNULL(0)) + { + value = PG_GETARG_DATUM(0); + isnull = false; + } + else + { + value = 0; + isnull = true; + } + + elmtype = get_fn_expr_argtype(fcinfo->flinfo, 0); + if (!OidIsValid(elmtype)) + elog(ERROR, "could not determine data type of input"); + + result = array_fill_internal(dims, lbs, value, isnull, elmtype, fcinfo); + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * array_fill + * Create and fill array with default lower bounds. + */ +Datum +array_fill(PG_FUNCTION_ARGS) +{ + ArrayType *dims; + ArrayType *result; + Oid elmtype; + Datum value; + bool isnull; + + if (PG_ARGISNULL(1)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("dimension array or low bound array cannot be null"))); + + dims = PG_GETARG_ARRAYTYPE_P(1); + + if (!PG_ARGISNULL(0)) + { + value = PG_GETARG_DATUM(0); + isnull = false; + } + else + { + value = 0; + isnull = true; + } + + elmtype = get_fn_expr_argtype(fcinfo->flinfo, 0); + if (!OidIsValid(elmtype)) + elog(ERROR, "could not determine data type of input"); + + result = array_fill_internal(dims, NULL, value, isnull, elmtype, fcinfo); + PG_RETURN_ARRAYTYPE_P(result); +} + +static ArrayType * +create_array_envelope(int ndims, int *dimv, int *lbsv, int nbytes, + Oid elmtype, int dataoffset) +{ + ArrayType *result; + + result = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(result, nbytes); + result->ndim = ndims; + result->dataoffset = dataoffset; + result->elemtype = elmtype; + memcpy(ARR_DIMS(result), dimv, ndims * sizeof(int)); + memcpy(ARR_LBOUND(result), lbsv, ndims * sizeof(int)); + + return result; +} + +static ArrayType * +array_fill_internal(ArrayType *dims, ArrayType *lbs, + Datum value, bool isnull, Oid elmtype, + FunctionCallInfo fcinfo) +{ + ArrayType *result; + int *dimv; + int *lbsv; + int ndims; + int nitems; + int deflbs[MAXDIM]; + int16 elmlen; + bool elmbyval; + char elmalign; + ArrayMetaState *my_extra; + + /* + * Params checks + */ + if (ARR_NDIM(dims) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"), + errdetail("Dimension array must be one dimensional."))); + + if (array_contains_nulls(dims)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("dimension values cannot be null"))); + + dimv = (int *) ARR_DATA_PTR(dims); + ndims = (ARR_NDIM(dims) > 0) ? ARR_DIMS(dims)[0] : 0; + + if (ndims < 0) /* we do allow zero-dimension arrays */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid number of dimensions: %d", ndims))); + if (ndims > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + ndims, MAXDIM))); + + if (lbs != NULL) + { + if (ARR_NDIM(lbs) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"), + errdetail("Dimension array must be one dimensional."))); + + if (array_contains_nulls(lbs)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("dimension values cannot be null"))); + + if (ndims != ((ARR_NDIM(lbs) > 0) ? ARR_DIMS(lbs)[0] : 0)) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"), + errdetail("Low bound array has different size than dimensions array."))); + + lbsv = (int *) ARR_DATA_PTR(lbs); + } + else + { + int i; + + for (i = 0; i < MAXDIM; i++) + deflbs[i] = 1; + + lbsv = deflbs; + } + + /* This checks for overflow of the array dimensions */ + nitems = ArrayGetNItems(ndims, dimv); + ArrayCheckBounds(ndims, dimv, lbsv); + + /* fast track for empty array */ + if (nitems <= 0) + return construct_empty_array(elmtype); + + /* + * We arrange to look up info about element type only once per series of + * calls, assuming the element type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = InvalidOid; + } + + if (my_extra->element_type != elmtype) + { + /* Get info about element type */ + get_typlenbyvalalign(elmtype, + &my_extra->typlen, + &my_extra->typbyval, + &my_extra->typalign); + my_extra->element_type = elmtype; + } + + elmlen = my_extra->typlen; + elmbyval = my_extra->typbyval; + elmalign = my_extra->typalign; + + /* compute required space */ + if (!isnull) + { + int i; + char *p; + int nbytes; + int totbytes; + + /* make sure data is not toasted */ + if (elmlen == -1) + value = PointerGetDatum(PG_DETOAST_DATUM(value)); + + nbytes = att_addlength_datum(0, elmlen, value); + nbytes = att_align_nominal(nbytes, elmalign); + Assert(nbytes > 0); + + totbytes = nbytes * nitems; + + /* check for overflow of multiplication or total request */ + if (totbytes / nbytes != nitems || + !AllocSizeIsValid(totbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + + /* + * This addition can't overflow, but it might cause us to go past + * MaxAllocSize. We leave it to palloc to complain in that case. + */ + totbytes += ARR_OVERHEAD_NONULLS(ndims); + + result = create_array_envelope(ndims, dimv, lbsv, totbytes, + elmtype, 0); + + p = ARR_DATA_PTR(result); + for (i = 0; i < nitems; i++) + p += ArrayCastAndSet(value, elmlen, elmbyval, elmalign, p); + } + else + { + int nbytes; + int dataoffset; + + dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nitems); + nbytes = dataoffset; + + result = create_array_envelope(ndims, dimv, lbsv, nbytes, + elmtype, dataoffset); + + /* create_array_envelope already zeroed the bitmap, so we're done */ + } + + return result; +} + + +/* + * UNNEST + */ +Datum +array_unnest(PG_FUNCTION_ARGS) +{ + typedef struct + { + array_iter iter; + int nextelem; + int numelems; + int16 elmlen; + bool elmbyval; + char elmalign; + } array_unnest_fctx; + + FuncCallContext *funcctx; + array_unnest_fctx *fctx; + MemoryContext oldcontext; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + AnyArrayType *arr; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* + * Get the array value and detoast if needed. We can't do this + * earlier because if we have to detoast, we want the detoasted copy + * to be in multi_call_memory_ctx, so it will go away when we're done + * and not before. (If no detoast happens, we assume the originally + * passed array will stick around till then.) + */ + arr = PG_GETARG_ANY_ARRAY_P(0); + + /* allocate memory for user context */ + fctx = (array_unnest_fctx *) palloc(sizeof(array_unnest_fctx)); + + /* initialize state */ + array_iter_setup(&fctx->iter, arr); + fctx->nextelem = 0; + fctx->numelems = ArrayGetNItems(AARR_NDIM(arr), AARR_DIMS(arr)); + + if (VARATT_IS_EXPANDED_HEADER(arr)) + { + /* we can just grab the type data from expanded array */ + fctx->elmlen = arr->xpn.typlen; + fctx->elmbyval = arr->xpn.typbyval; + fctx->elmalign = arr->xpn.typalign; + } + else + get_typlenbyvalalign(AARR_ELEMTYPE(arr), + &fctx->elmlen, + &fctx->elmbyval, + &fctx->elmalign); + + funcctx->user_fctx = fctx; + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + fctx = funcctx->user_fctx; + + if (fctx->nextelem < fctx->numelems) + { + int offset = fctx->nextelem++; + Datum elem; + + elem = array_iter_next(&fctx->iter, &fcinfo->isnull, offset, + fctx->elmlen, fctx->elmbyval, fctx->elmalign); + + SRF_RETURN_NEXT(funcctx, elem); + } + else + { + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); + } +} + +/* + * Planner support function for array_unnest(anyarray) + */ +Datum +array_unnest_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + + req->rows = estimate_array_length(arg1); + ret = (Node *) req; + } + } + + PG_RETURN_POINTER(ret); +} + + +/* + * array_replace/array_remove support + * + * Find all array entries matching (not distinct from) search/search_isnull, + * and delete them if remove is true, else replace them with + * replace/replace_isnull. Comparisons are done using the specified + * collation. fcinfo is passed only for caching purposes. + */ +static ArrayType * +array_replace_internal(ArrayType *array, + Datum search, bool search_isnull, + Datum replace, bool replace_isnull, + bool remove, Oid collation, + FunctionCallInfo fcinfo) +{ + LOCAL_FCINFO(locfcinfo, 2); + ArrayType *result; + Oid element_type; + Datum *values; + bool *nulls; + int *dim; + int ndim; + int nitems, + nresult; + int i; + int32 nbytes = 0; + int32 dataoffset; + bool hasnulls; + int typlen; + bool typbyval; + char typalign; + char *arraydataptr; + bits8 *bitmap; + int bitmask; + bool changed = false; + TypeCacheEntry *typentry; + + element_type = ARR_ELEMTYPE(array); + ndim = ARR_NDIM(array); + dim = ARR_DIMS(array); + nitems = ArrayGetNItems(ndim, dim); + + /* Return input array unmodified if it is empty */ + if (nitems <= 0) + return array; + + /* + * We can't remove elements from multi-dimensional arrays, since the + * result might not be rectangular. + */ + if (remove && ndim > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("removing elements from multidimensional arrays is not supported"))); + + /* + * We arrange to look up the equality function only once per series of + * calls, assuming the element type doesn't change underneath us. + */ + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_EQ_OPR_FINFO); + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + /* + * Detoast values if they are toasted. The replacement value must be + * detoasted for insertion into the result array, while detoasting the + * search value only once saves cycles. + */ + if (typlen == -1) + { + if (!search_isnull) + search = PointerGetDatum(PG_DETOAST_DATUM(search)); + if (!replace_isnull) + replace = PointerGetDatum(PG_DETOAST_DATUM(replace)); + } + + /* Prepare to apply the comparison operator */ + InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2, + collation, NULL, NULL); + + /* Allocate temporary arrays for new values */ + values = (Datum *) palloc(nitems * sizeof(Datum)); + nulls = (bool *) palloc(nitems * sizeof(bool)); + + /* Loop over source data */ + arraydataptr = ARR_DATA_PTR(array); + bitmap = ARR_NULLBITMAP(array); + bitmask = 1; + hasnulls = false; + nresult = 0; + + for (i = 0; i < nitems; i++) + { + Datum elt; + bool isNull; + bool oprresult; + bool skip = false; + + /* Get source element, checking for NULL */ + if (bitmap && (*bitmap & bitmask) == 0) + { + isNull = true; + /* If searching for NULL, we have a match */ + if (search_isnull) + { + if (remove) + { + skip = true; + changed = true; + } + else if (!replace_isnull) + { + values[nresult] = replace; + isNull = false; + changed = true; + } + } + } + else + { + isNull = false; + elt = fetch_att(arraydataptr, typbyval, typlen); + arraydataptr = att_addlength_datum(arraydataptr, typlen, elt); + arraydataptr = (char *) att_align_nominal(arraydataptr, typalign); + + if (search_isnull) + { + /* no match possible, keep element */ + values[nresult] = elt; + } + else + { + /* + * Apply the operator to the element pair; treat NULL as false + */ + locfcinfo->args[0].value = elt; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = search; + locfcinfo->args[1].isnull = false; + locfcinfo->isnull = false; + oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo)); + if (locfcinfo->isnull || !oprresult) + { + /* no match, keep element */ + values[nresult] = elt; + } + else + { + /* match, so replace or delete */ + changed = true; + if (remove) + skip = true; + else + { + values[nresult] = replace; + isNull = replace_isnull; + } + } + } + } + + if (!skip) + { + nulls[nresult] = isNull; + if (isNull) + hasnulls = true; + else + { + /* Update total result size */ + nbytes = att_addlength_datum(nbytes, typlen, values[nresult]); + nbytes = att_align_nominal(nbytes, typalign); + /* check for overflow of total request */ + if (!AllocSizeIsValid(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); + } + nresult++; + } + + /* advance bitmap pointer if any */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + } + + /* + * If not changed just return the original array + */ + if (!changed) + { + pfree(values); + pfree(nulls); + return array; + } + + /* If all elements were removed return an empty array */ + if (nresult == 0) + { + pfree(values); + pfree(nulls); + return construct_empty_array(element_type); + } + + /* Allocate and initialize the result array */ + if (hasnulls) + { + dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nresult); + nbytes += dataoffset; + } + else + { + dataoffset = 0; /* marker for no null bitmap */ + nbytes += ARR_OVERHEAD_NONULLS(ndim); + } + result = (ArrayType *) palloc0(nbytes); + SET_VARSIZE(result, nbytes); + result->ndim = ndim; + result->dataoffset = dataoffset; + result->elemtype = element_type; + memcpy(ARR_DIMS(result), ARR_DIMS(array), ndim * sizeof(int)); + memcpy(ARR_LBOUND(result), ARR_LBOUND(array), ndim * sizeof(int)); + + if (remove) + { + /* Adjust the result length */ + ARR_DIMS(result)[0] = nresult; + } + + /* Insert data into result array */ + CopyArrayEls(result, + values, nulls, nresult, + typlen, typbyval, typalign, + false); + + pfree(values); + pfree(nulls); + + return result; +} + +/* + * Remove any occurrences of an element from an array + * + * If used on a multi-dimensional array this will raise an error. + */ +Datum +array_remove(PG_FUNCTION_ARGS) +{ + ArrayType *array; + Datum search = PG_GETARG_DATUM(1); + bool search_isnull = PG_ARGISNULL(1); + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + array = PG_GETARG_ARRAYTYPE_P(0); + + array = array_replace_internal(array, + search, search_isnull, + (Datum) 0, true, + true, PG_GET_COLLATION(), + fcinfo); + PG_RETURN_ARRAYTYPE_P(array); +} + +/* + * Replace any occurrences of an element in an array + */ +Datum +array_replace(PG_FUNCTION_ARGS) +{ + ArrayType *array; + Datum search = PG_GETARG_DATUM(1); + bool search_isnull = PG_ARGISNULL(1); + Datum replace = PG_GETARG_DATUM(2); + bool replace_isnull = PG_ARGISNULL(2); + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + array = PG_GETARG_ARRAYTYPE_P(0); + + array = array_replace_internal(array, + search, search_isnull, + replace, replace_isnull, + false, PG_GET_COLLATION(), + fcinfo); + PG_RETURN_ARRAYTYPE_P(array); +} + +/* + * Implements width_bucket(anyelement, anyarray). + * + * 'thresholds' is an array containing lower bound values for each bucket; + * these must be sorted from smallest to largest, or bogus results will be + * produced. If N thresholds are supplied, the output is from 0 to N: + * 0 is for inputs < first threshold, N is for inputs >= last threshold. + */ +Datum +width_bucket_array(PG_FUNCTION_ARGS) +{ + Datum operand = PG_GETARG_DATUM(0); + ArrayType *thresholds = PG_GETARG_ARRAYTYPE_P(1); + Oid collation = PG_GET_COLLATION(); + Oid element_type = ARR_ELEMTYPE(thresholds); + int result; + + /* Check input */ + if (ARR_NDIM(thresholds) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("thresholds must be one-dimensional array"))); + + if (array_contains_nulls(thresholds)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("thresholds array must not contain NULLs"))); + + /* We have a dedicated implementation for float8 data */ + if (element_type == FLOAT8OID) + result = width_bucket_array_float8(operand, thresholds); + else + { + TypeCacheEntry *typentry; + + /* Cache information about the input type */ + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a comparison function for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + + /* + * We have separate implementation paths for fixed- and variable-width + * types, since indexing the array is a lot cheaper in the first case. + */ + if (typentry->typlen > 0) + result = width_bucket_array_fixed(operand, thresholds, + collation, typentry); + else + result = width_bucket_array_variable(operand, thresholds, + collation, typentry); + } + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(thresholds, 1); + + PG_RETURN_INT32(result); +} + +/* + * width_bucket_array for float8 data. + */ +static int +width_bucket_array_float8(Datum operand, ArrayType *thresholds) +{ + float8 op = DatumGetFloat8(operand); + float8 *thresholds_data; + int left; + int right; + + /* + * Since we know the array contains no NULLs, we can just index it + * directly. + */ + thresholds_data = (float8 *) ARR_DATA_PTR(thresholds); + + left = 0; + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); + + /* + * If the probe value is a NaN, it's greater than or equal to all possible + * threshold values (including other NaNs), so we need not search. Note + * that this would give the same result as searching even if the array + * contains multiple NaNs (as long as they're correctly sorted), since the + * loop logic will find the rightmost of multiple equal threshold values. + */ + if (isnan(op)) + return right; + + /* Find the bucket */ + while (left < right) + { + int mid = (left + right) / 2; + + if (isnan(thresholds_data[mid]) || op < thresholds_data[mid]) + right = mid; + else + left = mid + 1; + } + + return left; +} + +/* + * width_bucket_array for generic fixed-width data types. + */ +static int +width_bucket_array_fixed(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry) +{ + LOCAL_FCINFO(locfcinfo, 2); + char *thresholds_data; + int typlen = typentry->typlen; + bool typbyval = typentry->typbyval; + int left; + int right; + + /* + * Since we know the array contains no NULLs, we can just index it + * directly. + */ + thresholds_data = (char *) ARR_DATA_PTR(thresholds); + + InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2, + collation, NULL, NULL); + + /* Find the bucket */ + left = 0; + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); + while (left < right) + { + int mid = (left + right) / 2; + char *ptr; + int32 cmpresult; + + ptr = thresholds_data + mid * typlen; + + locfcinfo->args[0].value = operand; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = fetch_att(ptr, typbyval, typlen); + locfcinfo->args[1].isnull = false; + + cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo)); + + /* We don't expect comparison support functions to return null */ + Assert(!locfcinfo->isnull); + + if (cmpresult < 0) + right = mid; + else + left = mid + 1; + } + + return left; +} + +/* + * width_bucket_array for generic variable-width data types. + */ +static int +width_bucket_array_variable(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry) +{ + LOCAL_FCINFO(locfcinfo, 2); + char *thresholds_data; + int typlen = typentry->typlen; + bool typbyval = typentry->typbyval; + char typalign = typentry->typalign; + int left; + int right; + + thresholds_data = (char *) ARR_DATA_PTR(thresholds); + + InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2, + collation, NULL, NULL); + + /* Find the bucket */ + left = 0; + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); + while (left < right) + { + int mid = (left + right) / 2; + char *ptr; + int i; + int32 cmpresult; + + /* Locate mid'th array element by advancing from left element */ + ptr = thresholds_data; + for (i = left; i < mid; i++) + { + ptr = att_addlength_pointer(ptr, typlen, ptr); + ptr = (char *) att_align_nominal(ptr, typalign); + } + + locfcinfo->args[0].value = operand; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = fetch_att(ptr, typbyval, typlen); + locfcinfo->args[1].isnull = false; + + cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo)); + + /* We don't expect comparison support functions to return null */ + Assert(!locfcinfo->isnull); + + if (cmpresult < 0) + right = mid; + else + { + left = mid + 1; + + /* + * Move the thresholds pointer to match new "left" index, so we + * don't have to seek over those elements again. This trick + * ensures we do only O(N) array indexing work, not O(N^2). + */ + ptr = att_addlength_pointer(ptr, typlen, ptr); + thresholds_data = (char *) att_align_nominal(ptr, typalign); + } + } + + return left; +} + +/* + * Trim the last N elements from an array by building an appropriate slice. + * Only the first dimension is trimmed. + */ +Datum +trim_array(PG_FUNCTION_ARGS) +{ + ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + int n = PG_GETARG_INT32(1); + int array_length = (ARR_NDIM(v) > 0) ? ARR_DIMS(v)[0] : 0; + int16 elmlen; + bool elmbyval; + char elmalign; + int lower[MAXDIM]; + int upper[MAXDIM]; + bool lowerProvided[MAXDIM]; + bool upperProvided[MAXDIM]; + Datum result; + + /* Per spec, throw an error if out of bounds */ + if (n < 0 || n > array_length) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("number of elements to trim must be between 0 and %d", + array_length))); + + /* Set all the bounds as unprovided except the first upper bound */ + memset(lowerProvided, false, sizeof(lowerProvided)); + memset(upperProvided, false, sizeof(upperProvided)); + if (ARR_NDIM(v) > 0) + { + upper[0] = ARR_LBOUND(v)[0] + array_length - n - 1; + upperProvided[0] = true; + } + + /* Fetch the needed information about the element type */ + get_typlenbyvalalign(ARR_ELEMTYPE(v), &elmlen, &elmbyval, &elmalign); + + /* Get the slice */ + result = array_get_slice(PointerGetDatum(v), 1, + upper, lower, upperProvided, lowerProvided, + -1, elmlen, elmbyval, elmalign); + + PG_RETURN_DATUM(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arraysubs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arraysubs.c new file mode 100644 index 00000000000..66666fea98e --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arraysubs.c @@ -0,0 +1,577 @@ +/*------------------------------------------------------------------------- + * + * arraysubs.c + * Subscripting support functions for arrays. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/arraysubs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "executor/execExpr.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/subscripting.h" +#include "parser/parse_coerce.h" +#include "parser/parse_expr.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + + +/* SubscriptingRefState.workspace for array subscripting execution */ +typedef struct ArraySubWorkspace +{ + /* Values determined during expression compilation */ + Oid refelemtype; /* OID of the array element type */ + int16 refattrlength; /* typlen of array type */ + int16 refelemlength; /* typlen of the array element type */ + bool refelembyval; /* is the element type pass-by-value? */ + char refelemalign; /* typalign of the element type */ + + /* + * Subscript values converted to integers. Note that these arrays must be + * of length MAXDIM even when dealing with fewer subscripts, because + * array_get/set_slice may scribble on the extra entries. + */ + int upperindex[MAXDIM]; + int lowerindex[MAXDIM]; +} ArraySubWorkspace; + + +/* + * Finish parse analysis of a SubscriptingRef expression for an array. + * + * Transform the subscript expressions, coerce them to integers, + * and determine the result type of the SubscriptingRef node. + */ +static void +array_subscript_transform(SubscriptingRef *sbsref, + List *indirection, + ParseState *pstate, + bool isSlice, + bool isAssignment) +{ + List *upperIndexpr = NIL; + List *lowerIndexpr = NIL; + ListCell *idx; + + /* + * Transform the subscript expressions, and separate upper and lower + * bounds into two lists. + * + * If we have a container slice expression, we convert any non-slice + * indirection items to slices by treating the single subscript as the + * upper bound and supplying an assumed lower bound of 1. + */ + foreach(idx, indirection) + { + A_Indices *ai = lfirst_node(A_Indices, idx); + Node *subexpr; + + if (isSlice) + { + if (ai->lidx) + { + subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind); + /* If it's not int4 already, try to coerce */ + subexpr = coerce_to_target_type(pstate, + subexpr, exprType(subexpr), + INT4OID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array subscript must have type integer"), + parser_errposition(pstate, exprLocation(ai->lidx)))); + } + else if (!ai->is_slice) + { + /* Make a constant 1 */ + subexpr = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(1), + false, + true); /* pass by value */ + } + else + { + /* Slice with omitted lower bound, put NULL into the list */ + subexpr = NULL; + } + lowerIndexpr = lappend(lowerIndexpr, subexpr); + } + else + Assert(ai->lidx == NULL && !ai->is_slice); + + if (ai->uidx) + { + subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); + /* If it's not int4 already, try to coerce */ + subexpr = coerce_to_target_type(pstate, + subexpr, exprType(subexpr), + INT4OID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array subscript must have type integer"), + parser_errposition(pstate, exprLocation(ai->uidx)))); + } + else + { + /* Slice with omitted upper bound, put NULL into the list */ + Assert(isSlice && ai->is_slice); + subexpr = NULL; + } + upperIndexpr = lappend(upperIndexpr, subexpr); + } + + /* ... and store the transformed lists into the SubscriptRef node */ + sbsref->refupperindexpr = upperIndexpr; + sbsref->reflowerindexpr = lowerIndexpr; + + /* Verify subscript list lengths are within implementation limit */ + if (list_length(upperIndexpr) > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + list_length(upperIndexpr), MAXDIM))); + /* We need not check lowerIndexpr separately */ + + /* + * Determine the result type of the subscripting operation. It's the same + * as the array type if we're slicing, else it's the element type. In + * either case, the typmod is the same as the array's, so we need not + * change reftypmod. + */ + if (isSlice) + sbsref->refrestype = sbsref->refcontainertype; + else + sbsref->refrestype = sbsref->refelemtype; +} + +/* + * During execution, process the subscripts in a SubscriptingRef expression. + * + * The subscript expressions are already evaluated in Datum form in the + * SubscriptingRefState's arrays. Check and convert them as necessary. + * + * If any subscript is NULL, we throw error in assignment cases, or in fetch + * cases set result to NULL and return false (instructing caller to skip the + * rest of the SubscriptingRef sequence). + * + * We convert all the subscripts to plain integers and save them in the + * sbsrefstate->workspace arrays. + */ +static bool +array_subscript_check_subscripts(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state; + ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace; + + /* Process upper subscripts */ + for (int i = 0; i < sbsrefstate->numupper; i++) + { + if (sbsrefstate->upperprovided[i]) + { + /* If any index expr yields NULL, result is NULL or error */ + if (sbsrefstate->upperindexnull[i]) + { + if (sbsrefstate->isassignment) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array subscript in assignment must not be null"))); + *op->resnull = true; + return false; + } + workspace->upperindex[i] = DatumGetInt32(sbsrefstate->upperindex[i]); + } + } + + /* Likewise for lower subscripts */ + for (int i = 0; i < sbsrefstate->numlower; i++) + { + if (sbsrefstate->lowerprovided[i]) + { + /* If any index expr yields NULL, result is NULL or error */ + if (sbsrefstate->lowerindexnull[i]) + { + if (sbsrefstate->isassignment) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array subscript in assignment must not be null"))); + *op->resnull = true; + return false; + } + workspace->lowerindex[i] = DatumGetInt32(sbsrefstate->lowerindex[i]); + } + } + + return true; +} + +/* + * Evaluate SubscriptingRef fetch for an array element. + * + * Source container is in step's result variable (it's known not NULL, since + * we set fetch_strict to true), and indexes have already been evaluated into + * workspace array. + */ +static void +array_subscript_fetch(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace; + + /* Should not get here if source array (or any subscript) is null */ + Assert(!(*op->resnull)); + + *op->resvalue = array_get_element(*op->resvalue, + sbsrefstate->numupper, + workspace->upperindex, + workspace->refattrlength, + workspace->refelemlength, + workspace->refelembyval, + workspace->refelemalign, + op->resnull); +} + +/* + * Evaluate SubscriptingRef fetch for an array slice. + * + * Source container is in step's result variable (it's known not NULL, since + * we set fetch_strict to true), and indexes have already been evaluated into + * workspace array. + */ +static void +array_subscript_fetch_slice(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace; + + /* Should not get here if source array (or any subscript) is null */ + Assert(!(*op->resnull)); + + *op->resvalue = array_get_slice(*op->resvalue, + sbsrefstate->numupper, + workspace->upperindex, + workspace->lowerindex, + sbsrefstate->upperprovided, + sbsrefstate->lowerprovided, + workspace->refattrlength, + workspace->refelemlength, + workspace->refelembyval, + workspace->refelemalign); + /* The slice is never NULL, so no need to change *op->resnull */ +} + +/* + * Evaluate SubscriptingRef assignment for an array element assignment. + * + * Input container (possibly null) is in result area, replacement value is in + * SubscriptingRefState's replacevalue/replacenull. + */ +static void +array_subscript_assign(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace; + Datum arraySource = *op->resvalue; + + /* + * For an assignment to a fixed-length array type, both the original array + * and the value to be assigned into it must be non-NULL, else we punt and + * return the original array. + */ + if (workspace->refattrlength > 0) + { + if (*op->resnull || sbsrefstate->replacenull) + return; + } + + /* + * For assignment to varlena arrays, we handle a NULL original array by + * substituting an empty (zero-dimensional) array; insertion of the new + * element will result in a singleton array value. It does not matter + * whether the new element is NULL. + */ + if (*op->resnull) + { + arraySource = PointerGetDatum(construct_empty_array(workspace->refelemtype)); + *op->resnull = false; + } + + *op->resvalue = array_set_element(arraySource, + sbsrefstate->numupper, + workspace->upperindex, + sbsrefstate->replacevalue, + sbsrefstate->replacenull, + workspace->refattrlength, + workspace->refelemlength, + workspace->refelembyval, + workspace->refelemalign); + /* The result is never NULL, so no need to change *op->resnull */ +} + +/* + * Evaluate SubscriptingRef assignment for an array slice assignment. + * + * Input container (possibly null) is in result area, replacement value is in + * SubscriptingRefState's replacevalue/replacenull. + */ +static void +array_subscript_assign_slice(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace; + Datum arraySource = *op->resvalue; + + /* + * For an assignment to a fixed-length array type, both the original array + * and the value to be assigned into it must be non-NULL, else we punt and + * return the original array. + */ + if (workspace->refattrlength > 0) + { + if (*op->resnull || sbsrefstate->replacenull) + return; + } + + /* + * For assignment to varlena arrays, we handle a NULL original array by + * substituting an empty (zero-dimensional) array; insertion of the new + * element will result in a singleton array value. It does not matter + * whether the new element is NULL. + */ + if (*op->resnull) + { + arraySource = PointerGetDatum(construct_empty_array(workspace->refelemtype)); + *op->resnull = false; + } + + *op->resvalue = array_set_slice(arraySource, + sbsrefstate->numupper, + workspace->upperindex, + workspace->lowerindex, + sbsrefstate->upperprovided, + sbsrefstate->lowerprovided, + sbsrefstate->replacevalue, + sbsrefstate->replacenull, + workspace->refattrlength, + workspace->refelemlength, + workspace->refelembyval, + workspace->refelemalign); + /* The result is never NULL, so no need to change *op->resnull */ +} + +/* + * Compute old array element value for a SubscriptingRef assignment + * expression. Will only be called if the new-value subexpression + * contains SubscriptingRef or FieldStore. This is the same as the + * regular fetch case, except that we have to handle a null array, + * and the value should be stored into the SubscriptingRefState's + * prevvalue/prevnull fields. + */ +static void +array_subscript_fetch_old(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace; + + if (*op->resnull) + { + /* whole array is null, so any element is too */ + sbsrefstate->prevvalue = (Datum) 0; + sbsrefstate->prevnull = true; + } + else + sbsrefstate->prevvalue = array_get_element(*op->resvalue, + sbsrefstate->numupper, + workspace->upperindex, + workspace->refattrlength, + workspace->refelemlength, + workspace->refelembyval, + workspace->refelemalign, + &sbsrefstate->prevnull); +} + +/* + * Compute old array slice value for a SubscriptingRef assignment + * expression. Will only be called if the new-value subexpression + * contains SubscriptingRef or FieldStore. This is the same as the + * regular fetch case, except that we have to handle a null array, + * and the value should be stored into the SubscriptingRefState's + * prevvalue/prevnull fields. + * + * Note: this is presently dead code, because the new value for a + * slice would have to be an array, so it couldn't directly contain a + * FieldStore; nor could it contain a SubscriptingRef assignment, since + * we consider adjacent subscripts to index one multidimensional array + * not nested array types. Future generalizations might make this + * reachable, however. + */ +static void +array_subscript_fetch_old_slice(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace; + + if (*op->resnull) + { + /* whole array is null, so any slice is too */ + sbsrefstate->prevvalue = (Datum) 0; + sbsrefstate->prevnull = true; + } + else + { + sbsrefstate->prevvalue = array_get_slice(*op->resvalue, + sbsrefstate->numupper, + workspace->upperindex, + workspace->lowerindex, + sbsrefstate->upperprovided, + sbsrefstate->lowerprovided, + workspace->refattrlength, + workspace->refelemlength, + workspace->refelembyval, + workspace->refelemalign); + /* slices of non-null arrays are never null */ + sbsrefstate->prevnull = false; + } +} + +/* + * Set up execution state for an array subscript operation. + */ +static void +array_exec_setup(const SubscriptingRef *sbsref, + SubscriptingRefState *sbsrefstate, + SubscriptExecSteps *methods) +{ + bool is_slice = (sbsrefstate->numlower != 0); + ArraySubWorkspace *workspace; + + /* + * Enforce the implementation limit on number of array subscripts. This + * check isn't entirely redundant with checking at parse time; conceivably + * the expression was stored by a backend with a different MAXDIM value. + */ + if (sbsrefstate->numupper > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + sbsrefstate->numupper, MAXDIM))); + + /* Should be impossible if parser is sane, but check anyway: */ + if (sbsrefstate->numlower != 0 && + sbsrefstate->numupper != sbsrefstate->numlower) + elog(ERROR, "upper and lower index lists are not same length"); + + /* + * Allocate type-specific workspace. + */ + workspace = (ArraySubWorkspace *) palloc(sizeof(ArraySubWorkspace)); + sbsrefstate->workspace = workspace; + + /* + * Collect datatype details we'll need at execution. + */ + workspace->refelemtype = sbsref->refelemtype; + workspace->refattrlength = get_typlen(sbsref->refcontainertype); + get_typlenbyvalalign(sbsref->refelemtype, + &workspace->refelemlength, + &workspace->refelembyval, + &workspace->refelemalign); + + /* + * Pass back pointers to appropriate step execution functions. + */ + methods->sbs_check_subscripts = array_subscript_check_subscripts; + if (is_slice) + { + methods->sbs_fetch = array_subscript_fetch_slice; + methods->sbs_assign = array_subscript_assign_slice; + methods->sbs_fetch_old = array_subscript_fetch_old_slice; + } + else + { + methods->sbs_fetch = array_subscript_fetch; + methods->sbs_assign = array_subscript_assign; + methods->sbs_fetch_old = array_subscript_fetch_old; + } +} + +/* + * array_subscript_handler + * Subscripting handler for standard varlena arrays. + * + * This should be used only for "true" array types, which have array headers + * as understood by the varlena array routines, and are referenced by the + * element type's pg_type.typarray field. + */ +Datum +array_subscript_handler(PG_FUNCTION_ARGS) +{ + static const SubscriptRoutines sbsroutines = { + .transform = array_subscript_transform, + .exec_setup = array_exec_setup, + .fetch_strict = true, /* fetch returns NULL for NULL inputs */ + .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ + .store_leakproof = false /* ... but assignment throws error */ + }; + + PG_RETURN_POINTER(&sbsroutines); +} + +/* + * raw_array_subscript_handler + * Subscripting handler for "raw" arrays. + * + * A "raw" array just contains N independent instances of the element type. + * Currently we require both the element type and the array type to be fixed + * length, but it wouldn't be too hard to relax that for the array type. + * + * As of now, all the support code is shared with standard varlena arrays. + * We may split those into separate code paths, but probably that would yield + * only marginal speedups. The main point of having a separate handler is + * so that pg_type.typsubscript clearly indicates the type's semantics. + */ +Datum +raw_array_subscript_handler(PG_FUNCTION_ARGS) +{ + static const SubscriptRoutines sbsroutines = { + .transform = array_subscript_transform, + .exec_setup = array_exec_setup, + .fetch_strict = true, /* fetch returns NULL for NULL inputs */ + .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ + .store_leakproof = false /* ... but assignment throws error */ + }; + + PG_RETURN_POINTER(&sbsroutines); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayutils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayutils.c new file mode 100644 index 00000000000..aed799234cd --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayutils.c @@ -0,0 +1,279 @@ +/*------------------------------------------------------------------------- + * + * arrayutils.c + * This file contains some support routines required for array functions. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/arrayutils.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_type.h" +#include "common/int.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/memutils.h" + + +/* + * Convert subscript list into linear element number (from 0) + * + * We assume caller has already range-checked the dimensions and subscripts, + * so no overflow is possible. + */ +int +ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx) +{ + int i, + scale = 1, + offset = 0; + + for (i = n - 1; i >= 0; i--) + { + offset += (indx[i] - lb[i]) * scale; + scale *= dim[i]; + } + return offset; +} + +/* + * Same, but subscripts are assumed 0-based, and use a scale array + * instead of raw dimension data (see mda_get_prod to create scale array) + */ +int +ArrayGetOffset0(int n, const int *tup, const int *scale) +{ + int i, + lin = 0; + + for (i = 0; i < n; i++) + lin += tup[i] * scale[i]; + return lin; +} + +/* + * Convert array dimensions into number of elements + * + * This must do overflow checking, since it is used to validate that a user + * dimensionality request doesn't overflow what we can handle. + * + * The multiplication overflow check only works on machines that have int64 + * arithmetic, but that is nearly all platforms these days, and doing check + * divides for those that don't seems way too expensive. + */ +int +ArrayGetNItems(int ndim, const int *dims) +{ + return ArrayGetNItemsSafe(ndim, dims, NULL); +} + +/* + * This entry point can return the error into an ErrorSaveContext + * instead of throwing an exception. -1 is returned after an error. + */ +int +ArrayGetNItemsSafe(int ndim, const int *dims, struct Node *escontext) +{ + int32 ret; + int i; + + if (ndim <= 0) + return 0; + ret = 1; + for (i = 0; i < ndim; i++) + { + int64 prod; + + /* A negative dimension implies that UB-LB overflowed ... */ + if (dims[i] < 0) + ereturn(escontext, -1, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + + prod = (int64) ret * (int64) dims[i]; + + ret = (int32) prod; + if ((int64) ret != prod) + ereturn(escontext, -1, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + } + Assert(ret >= 0); + if ((Size) ret > MaxArraySize) + ereturn(escontext, -1, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxArraySize))); + return (int) ret; +} + +/* + * Verify sanity of proposed lower-bound values for an array + * + * The lower-bound values must not be so large as to cause overflow when + * calculating subscripts, e.g. lower bound 2147483640 with length 10 + * must be disallowed. We actually insist that dims[i] + lb[i] be + * computable without overflow, meaning that an array with last subscript + * equal to INT_MAX will be disallowed. + * + * It is assumed that the caller already called ArrayGetNItems, so that + * overflowed (negative) dims[] values have been eliminated. + */ +void +ArrayCheckBounds(int ndim, const int *dims, const int *lb) +{ + (void) ArrayCheckBoundsSafe(ndim, dims, lb, NULL); +} + +/* + * This entry point can return the error into an ErrorSaveContext + * instead of throwing an exception. + */ +bool +ArrayCheckBoundsSafe(int ndim, const int *dims, const int *lb, + struct Node *escontext) +{ + int i; + + for (i = 0; i < ndim; i++) + { + /* PG_USED_FOR_ASSERTS_ONLY prevents variable-isn't-read warnings */ + int32 sum PG_USED_FOR_ASSERTS_ONLY; + + if (pg_add_s32_overflow(dims[i], lb[i], &sum)) + ereturn(escontext, false, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array lower bound is too large: %d", + lb[i]))); + } + + return true; +} + +/* + * Compute ranges (sub-array dimensions) for an array slice + * + * We assume caller has validated slice endpoints, so overflow is impossible + */ +void +mda_get_range(int n, int *span, const int *st, const int *endp) +{ + int i; + + for (i = 0; i < n; i++) + span[i] = endp[i] - st[i] + 1; +} + +/* + * Compute products of array dimensions, ie, scale factors for subscripts + * + * We assume caller has validated dimensions, so overflow is impossible + */ +void +mda_get_prod(int n, const int *range, int *prod) +{ + int i; + + prod[n - 1] = 1; + for (i = n - 2; i >= 0; i--) + prod[i] = prod[i + 1] * range[i + 1]; +} + +/* + * From products of whole-array dimensions and spans of a sub-array, + * compute offset distances needed to step through subarray within array + * + * We assume caller has validated dimensions, so overflow is impossible + */ +void +mda_get_offset_values(int n, int *dist, const int *prod, const int *span) +{ + int i, + j; + + dist[n - 1] = 0; + for (j = n - 2; j >= 0; j--) + { + dist[j] = prod[j] - 1; + for (i = j + 1; i < n; i++) + dist[j] -= (span[i] - 1) * prod[i]; + } +} + +/* + * Generates the tuple that is lexicographically one greater than the current + * n-tuple in "curr", with the restriction that the i-th element of "curr" is + * less than the i-th element of "span". + * + * Returns -1 if no next tuple exists, else the subscript position (0..n-1) + * corresponding to the dimension to advance along. + * + * We assume caller has validated dimensions, so overflow is impossible + */ +int +mda_next_tuple(int n, int *curr, const int *span) +{ + int i; + + if (n <= 0) + return -1; + + curr[n - 1] = (curr[n - 1] + 1) % span[n - 1]; + for (i = n - 1; i && curr[i] == 0; i--) + curr[i - 1] = (curr[i - 1] + 1) % span[i - 1]; + + if (i) + return i; + if (curr[0]) + return 0; + + return -1; +} + +/* + * ArrayGetIntegerTypmods: verify that argument is a 1-D cstring array, + * and get the contents converted to integers. Returns a palloc'd array + * and places the length at *n. + */ +int32 * +ArrayGetIntegerTypmods(ArrayType *arr, int *n) +{ + int32 *result; + Datum *elem_values; + int i; + + if (ARR_ELEMTYPE(arr) != CSTRINGOID) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("typmod array must be type cstring[]"))); + + if (ARR_NDIM(arr) != 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("typmod array must be one-dimensional"))); + + if (array_contains_nulls(arr)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("typmod array must not contain nulls"))); + + deconstruct_array_builtin(arr, CSTRINGOID, &elem_values, NULL, n); + + result = (int32 *) palloc(*n * sizeof(int32)); + + for (i = 0; i < *n; i++) + result[i] = pg_strtoint32(DatumGetCString(elem_values[i])); + + pfree(elem_values); + + return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ascii.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ascii.c new file mode 100644 index 00000000000..b6944d80934 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ascii.c @@ -0,0 +1,199 @@ +/*----------------------------------------------------------------------- + * ascii.c + * The PostgreSQL routine for string to ascii conversion. + * + * Portions Copyright (c) 1999-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/ascii.c + * + *----------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "mb/pg_wchar.h" +#include "utils/ascii.h" +#include "utils/builtins.h" +#include "varatt.h" + +static void pg_to_ascii(unsigned char *src, unsigned char *src_end, + unsigned char *dest, int enc); +static text *encode_to_ascii(text *data, int enc); + + +/* ---------- + * to_ascii + * ---------- + */ +static void +pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc) +{ + unsigned char *x; + const unsigned char *ascii; + int range; + + /* + * relevant start for an encoding + */ +#define RANGE_128 128 +#define RANGE_160 160 + + if (enc == PG_LATIN1) + { + /* + * ISO-8859-1 <range: 160 -- 255> + */ + ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty"; + range = RANGE_160; + } + else if (enc == PG_LATIN2) + { + /* + * ISO-8859-2 <range: 160 -- 255> + */ + ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt."; + range = RANGE_160; + } + else if (enc == PG_LATIN9) + { + /* + * ISO-8859-15 <range: 160 -- 255> + */ + ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty"; + range = RANGE_160; + } + else if (enc == PG_WIN1250) + { + /* + * Window CP1250 <range: 128 -- 255> + */ + ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt "; + range = RANGE_128; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("encoding conversion from %s to ASCII not supported", + pg_encoding_to_char(enc)))); + return; /* keep compiler quiet */ + } + + /* + * Encode + */ + for (x = src; x < src_end; x++) + { + if (*x < 128) + *dest++ = *x; + else if (*x < range) + *dest++ = ' '; /* bogus 128 to 'range' */ + else + *dest++ = ascii[*x - range]; + } +} + +/* ---------- + * encode text + * + * The text datum is overwritten in-place, therefore this coding method + * cannot support conversions that change the string length! + * ---------- + */ +static text * +encode_to_ascii(text *data, int enc) +{ + pg_to_ascii((unsigned char *) VARDATA(data), /* src */ + (unsigned char *) (data) + VARSIZE(data), /* src end */ + (unsigned char *) VARDATA(data), /* dest */ + enc); /* encoding */ + + return data; +} + +/* ---------- + * convert to ASCII - enc is set as 'name' arg. + * ---------- + */ +Datum +to_ascii_encname(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_P_COPY(0); + char *encname = NameStr(*PG_GETARG_NAME(1)); + int enc = pg_char_to_encoding(encname); + + if (enc < 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("%s is not a valid encoding name", encname))); + + PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); +} + +/* ---------- + * convert to ASCII - enc is set as int4 + * ---------- + */ +Datum +to_ascii_enc(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_P_COPY(0); + int enc = PG_GETARG_INT32(1); + + if (!PG_VALID_ENCODING(enc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("%d is not a valid encoding code", enc))); + + PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); +} + +/* ---------- + * convert to ASCII - current enc is DatabaseEncoding + * ---------- + */ +Datum +to_ascii_default(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_P_COPY(0); + int enc = GetDatabaseEncoding(); + + PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); +} + +/* ---------- + * Copy a string in an arbitrary backend-safe encoding, converting it to a + * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the + * behavior is identical to strlcpy(), except that we don't bother with a + * return value. + * + * This must not trigger ereport(ERROR), as it is called in postmaster. + * ---------- + */ +void +ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz) +{ + if (destsiz == 0) /* corner case: no room for trailing nul */ + return; + + while (--destsiz > 0) + { + /* use unsigned char here to avoid compiler warning */ + unsigned char ch = *src++; + + if (ch == '\0') + break; + /* Keep printable ASCII characters */ + if (32 <= ch && ch <= 127) + *dest = ch; + /* White-space is also OK */ + else if (ch == '\n' || ch == '\r' || ch == '\t') + *dest = ch; + /* Everything else is replaced with '?' */ + else + *dest = '?'; + dest++; + } + + *dest = '\0'; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/bool.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/bool.c new file mode 100644 index 00000000000..cc4bd550354 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/bool.c @@ -0,0 +1,401 @@ +/*------------------------------------------------------------------------- + * + * bool.c + * Functions for the built-in type "bool". + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/bool.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <ctype.h> + +#include "libpq/pqformat.h" +#include "utils/builtins.h" + +/* + * Try to interpret value as boolean value. Valid values are: true, + * false, yes, no, on, off, 1, 0; as well as unique prefixes thereof. + * If the string parses okay, return true, else false. + * If okay and result is not NULL, return the value in *result. + */ +bool +parse_bool(const char *value, bool *result) +{ + return parse_bool_with_len(value, strlen(value), result); +} + +bool +parse_bool_with_len(const char *value, size_t len, bool *result) +{ + switch (*value) + { + case 't': + case 'T': + if (pg_strncasecmp(value, "true", len) == 0) + { + if (result) + *result = true; + return true; + } + break; + case 'f': + case 'F': + if (pg_strncasecmp(value, "false", len) == 0) + { + if (result) + *result = false; + return true; + } + break; + case 'y': + case 'Y': + if (pg_strncasecmp(value, "yes", len) == 0) + { + if (result) + *result = true; + return true; + } + break; + case 'n': + case 'N': + if (pg_strncasecmp(value, "no", len) == 0) + { + if (result) + *result = false; + return true; + } + break; + case 'o': + case 'O': + /* 'o' is not unique enough */ + if (pg_strncasecmp(value, "on", (len > 2 ? len : 2)) == 0) + { + if (result) + *result = true; + return true; + } + else if (pg_strncasecmp(value, "off", (len > 2 ? len : 2)) == 0) + { + if (result) + *result = false; + return true; + } + break; + case '1': + if (len == 1) + { + if (result) + *result = true; + return true; + } + break; + case '0': + if (len == 1) + { + if (result) + *result = false; + return true; + } + break; + default: + break; + } + + if (result) + *result = false; /* suppress compiler warning */ + return false; +} + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +/* + * boolin - converts "t" or "f" to 1 or 0 + * + * Check explicitly for "true/false" and TRUE/FALSE, 1/0, YES/NO, ON/OFF. + * Reject other values. + * + * In the switch statement, check the most-used possibilities first. + */ +Datum +boolin(PG_FUNCTION_ARGS) +{ + const char *in_str = PG_GETARG_CSTRING(0); + const char *str; + size_t len; + bool result; + + /* + * Skip leading and trailing whitespace + */ + str = in_str; + while (isspace((unsigned char) *str)) + str++; + + len = strlen(str); + while (len > 0 && isspace((unsigned char) str[len - 1])) + len--; + + if (parse_bool_with_len(str, len, &result)) + PG_RETURN_BOOL(result); + + ereturn(fcinfo->context, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "boolean", in_str))); +} + +/* + * boolout - converts 1 or 0 to "t" or "f" + */ +Datum +boolout(PG_FUNCTION_ARGS) +{ + bool b = PG_GETARG_BOOL(0); + char *result = (char *) palloc(2); + + result[0] = (b) ? 't' : 'f'; + result[1] = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * boolrecv - converts external binary format to bool + * + * The external representation is one byte. Any nonzero value is taken + * as "true". + */ +Datum +boolrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int ext; + + ext = pq_getmsgbyte(buf); + PG_RETURN_BOOL(ext != 0); +} + +/* + * boolsend - converts bool to binary format + */ +Datum +boolsend(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendbyte(&buf, arg1 ? 1 : 0); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * booltext - cast function for bool => text + * + * We need this because it's different from the behavior of boolout(); + * this function follows the SQL-spec result (except for producing lower case) + */ +Datum +booltext(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + const char *str; + + if (arg1) + str = "true"; + else + str = "false"; + + PG_RETURN_TEXT_P(cstring_to_text(str)); +} + + +/***************************************************************************** + * PUBLIC ROUTINES * + *****************************************************************************/ + +Datum +booleq(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + bool arg2 = PG_GETARG_BOOL(1); + + PG_RETURN_BOOL(arg1 == arg2); +} + +Datum +boolne(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + bool arg2 = PG_GETARG_BOOL(1); + + PG_RETURN_BOOL(arg1 != arg2); +} + +Datum +boollt(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + bool arg2 = PG_GETARG_BOOL(1); + + PG_RETURN_BOOL(arg1 < arg2); +} + +Datum +boolgt(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + bool arg2 = PG_GETARG_BOOL(1); + + PG_RETURN_BOOL(arg1 > arg2); +} + +Datum +boolle(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + bool arg2 = PG_GETARG_BOOL(1); + + PG_RETURN_BOOL(arg1 <= arg2); +} + +Datum +boolge(PG_FUNCTION_ARGS) +{ + bool arg1 = PG_GETARG_BOOL(0); + bool arg2 = PG_GETARG_BOOL(1); + + PG_RETURN_BOOL(arg1 >= arg2); +} + +/* + * boolean-and and boolean-or aggregates. + */ + +/* + * Function for standard EVERY aggregate conforming to SQL 2003. + * The aggregate is also named bool_and for consistency. + * + * Note: this is only used in plain aggregate mode, not moving-aggregate mode. + */ +Datum +booland_statefunc(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(PG_GETARG_BOOL(0) && PG_GETARG_BOOL(1)); +} + +/* + * Function for standard ANY/SOME aggregate conforming to SQL 2003. + * The aggregate is named bool_or, because ANY/SOME have parsing conflicts. + * + * Note: this is only used in plain aggregate mode, not moving-aggregate mode. + */ +Datum +boolor_statefunc(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(PG_GETARG_BOOL(0) || PG_GETARG_BOOL(1)); +} + +typedef struct BoolAggState +{ + int64 aggcount; /* number of non-null values aggregated */ + int64 aggtrue; /* number of values aggregated that are true */ +} BoolAggState; + +static BoolAggState * +makeBoolAggState(FunctionCallInfo fcinfo) +{ + BoolAggState *state; + MemoryContext agg_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state = (BoolAggState *) MemoryContextAlloc(agg_context, + sizeof(BoolAggState)); + state->aggcount = 0; + state->aggtrue = 0; + + return state; +} + +Datum +bool_accum(PG_FUNCTION_ARGS) +{ + BoolAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0); + + /* Create the state data on first call */ + if (state == NULL) + state = makeBoolAggState(fcinfo); + + if (!PG_ARGISNULL(1)) + { + state->aggcount++; + if (PG_GETARG_BOOL(1)) + state->aggtrue++; + } + + PG_RETURN_POINTER(state); +} + +Datum +bool_accum_inv(PG_FUNCTION_ARGS) +{ + BoolAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0); + + /* bool_accum should have created the state data */ + if (state == NULL) + elog(ERROR, "bool_accum_inv called with NULL state"); + + if (!PG_ARGISNULL(1)) + { + state->aggcount--; + if (PG_GETARG_BOOL(1)) + state->aggtrue--; + } + + PG_RETURN_POINTER(state); +} + +Datum +bool_alltrue(PG_FUNCTION_ARGS) +{ + BoolAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0); + + /* if there were no non-null values, return NULL */ + if (state == NULL || state->aggcount == 0) + PG_RETURN_NULL(); + + /* true if all non-null values are true */ + PG_RETURN_BOOL(state->aggtrue == state->aggcount); +} + +Datum +bool_anytrue(PG_FUNCTION_ARGS) +{ + BoolAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0); + + /* if there were no non-null values, return NULL */ + if (state == NULL || state->aggcount == 0) + PG_RETURN_NULL(); + + /* true if any non-null value is true */ + PG_RETURN_BOOL(state->aggtrue > 0); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/cash.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/cash.c new file mode 100644 index 00000000000..f15448018fe --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/cash.c @@ -0,0 +1,1176 @@ +/* + * cash.c + * Written by D'Arcy J.M. Cain + * darcy@druid.net + * http://www.druid.net/darcy/ + * + * Functions to allow input and output of money normally but store + * and handle it as 64 bit ints + * + * A slightly modified version of this file and a discussion of the + * workings can be found in the book "Software Solutions in C" by + * Dale Schumacher, Academic Press, ISBN: 0-12-632360-7 except that + * this version handles 64 bit numbers and so can hold values up to + * $92,233,720,368,547,758.07. + * + * src/backend/utils/adt/cash.c + */ + +#include "postgres.h" + +#include <limits.h> +#include <ctype.h> +#include <math.h> + +#include "common/int.h" +#include "libpq/pqformat.h" +#include "utils/builtins.h" +#include "utils/cash.h" +#include "utils/numeric.h" +#include "utils/pg_locale.h" + + +/************************************************************************* + * Private routines + ************************************************************************/ + +static const char * +num_word(Cash value) +{ + static __thread char buf[128]; + static const char *const small[] = { + "zero", "one", "two", "three", "four", "five", "six", "seven", + "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", + "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty", + "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" + }; + const char *const *big = small + 18; + int tu = value % 100; + + /* deal with the simple cases first */ + if (value <= 20) + return small[value]; + + /* is it an even multiple of 100? */ + if (!tu) + { + sprintf(buf, "%s hundred", small[value / 100]); + return buf; + } + + /* more than 99? */ + if (value > 99) + { + /* is it an even multiple of 10 other than 10? */ + if (value % 10 == 0 && tu > 10) + sprintf(buf, "%s hundred %s", + small[value / 100], big[tu / 10]); + else if (tu < 20) + sprintf(buf, "%s hundred and %s", + small[value / 100], small[tu]); + else + sprintf(buf, "%s hundred %s %s", + small[value / 100], big[tu / 10], small[tu % 10]); + } + else + { + /* is it an even multiple of 10 other than 10? */ + if (value % 10 == 0 && tu > 10) + sprintf(buf, "%s", big[tu / 10]); + else if (tu < 20) + sprintf(buf, "%s", small[tu]); + else + sprintf(buf, "%s %s", big[tu / 10], small[tu % 10]); + } + + return buf; +} /* num_word() */ + +/* cash_in() + * Convert a string to a cash data type. + * Format is [$]###[,]###[.##] + * Examples: 123.45 $123.45 $123,456.78 + * + */ +Datum +cash_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Cash result; + Cash value = 0; + Cash dec = 0; + Cash sgn = 1; + bool seen_dot = false; + const char *s = str; + int fpoint; + char dsymbol; + const char *ssymbol, + *psymbol, + *nsymbol, + *csymbol; + struct lconv *lconvert = PGLC_localeconv(); + + /* + * frac_digits will be CHAR_MAX in some locales, notably C. However, just + * testing for == CHAR_MAX is risky, because of compilers like gcc that + * "helpfully" let you alter the platform-standard definition of whether + * char is signed or not. If we are so unfortunate as to get compiled + * with a nonstandard -fsigned-char or -funsigned-char switch, then our + * idea of CHAR_MAX will not agree with libc's. The safest course is not + * to test for CHAR_MAX at all, but to impose a range check for plausible + * frac_digits values. + */ + fpoint = lconvert->frac_digits; + if (fpoint < 0 || fpoint > 10) + fpoint = 2; /* best guess in this case, I think */ + + /* we restrict dsymbol to be a single byte, but not the other symbols */ + if (*lconvert->mon_decimal_point != '\0' && + lconvert->mon_decimal_point[1] == '\0') + dsymbol = *lconvert->mon_decimal_point; + else + dsymbol = '.'; + if (*lconvert->mon_thousands_sep != '\0') + ssymbol = lconvert->mon_thousands_sep; + else /* ssymbol should not equal dsymbol */ + ssymbol = (dsymbol != ',') ? "," : "."; + csymbol = (*lconvert->currency_symbol != '\0') ? lconvert->currency_symbol : "$"; + psymbol = (*lconvert->positive_sign != '\0') ? lconvert->positive_sign : "+"; + nsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-"; + +#ifdef CASHDEBUG + printf("cashin- precision '%d'; decimal '%c'; thousands '%s'; currency '%s'; positive '%s'; negative '%s'\n", + fpoint, dsymbol, ssymbol, csymbol, psymbol, nsymbol); +#endif + + /* we need to add all sorts of checking here. For now just */ + /* strip all leading whitespace and any leading currency symbol */ + while (isspace((unsigned char) *s)) + s++; + if (strncmp(s, csymbol, strlen(csymbol)) == 0) + s += strlen(csymbol); + while (isspace((unsigned char) *s)) + s++; + +#ifdef CASHDEBUG + printf("cashin- string is '%s'\n", s); +#endif + + /* a leading minus or paren signifies a negative number */ + /* again, better heuristics needed */ + /* XXX - doesn't properly check for balanced parens - djmc */ + if (strncmp(s, nsymbol, strlen(nsymbol)) == 0) + { + sgn = -1; + s += strlen(nsymbol); + } + else if (*s == '(') + { + sgn = -1; + s++; + } + else if (strncmp(s, psymbol, strlen(psymbol)) == 0) + s += strlen(psymbol); + +#ifdef CASHDEBUG + printf("cashin- string is '%s'\n", s); +#endif + + /* allow whitespace and currency symbol after the sign, too */ + while (isspace((unsigned char) *s)) + s++; + if (strncmp(s, csymbol, strlen(csymbol)) == 0) + s += strlen(csymbol); + while (isspace((unsigned char) *s)) + s++; + +#ifdef CASHDEBUG + printf("cashin- string is '%s'\n", s); +#endif + + /* + * We accumulate the absolute amount in "value" and then apply the sign at + * the end. (The sign can appear before or after the digits, so it would + * be more complicated to do otherwise.) Because of the larger range of + * negative signed integers, we build "value" in the negative and then + * flip the sign at the end, catching most-negative-number overflow if + * necessary. + */ + + for (; *s; s++) + { + /* + * We look for digits as long as we have found less than the required + * number of decimal places. + */ + if (isdigit((unsigned char) *s) && (!seen_dot || dec < fpoint)) + { + int8 digit = *s - '0'; + + if (pg_mul_s64_overflow(value, 10, &value) || + pg_sub_s64_overflow(value, digit, &value)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + str, "money"))); + + if (seen_dot) + dec++; + } + /* decimal point? then start counting fractions... */ + else if (*s == dsymbol && !seen_dot) + { + seen_dot = true; + } + /* ignore if "thousands" separator, else we're done */ + else if (strncmp(s, ssymbol, strlen(ssymbol)) == 0) + s += strlen(ssymbol) - 1; + else + break; + } + + /* round off if there's another digit */ + if (isdigit((unsigned char) *s) && *s >= '5') + { + /* remember we build the value in the negative */ + if (pg_sub_s64_overflow(value, 1, &value)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + str, "money"))); + } + + /* adjust for less than required decimal places */ + for (; dec < fpoint; dec++) + { + if (pg_mul_s64_overflow(value, 10, &value)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + str, "money"))); + } + + /* + * should only be trailing digits followed by whitespace, right paren, + * trailing sign, and/or trailing currency symbol + */ + while (isdigit((unsigned char) *s)) + s++; + + while (*s) + { + if (isspace((unsigned char) *s) || *s == ')') + s++; + else if (strncmp(s, nsymbol, strlen(nsymbol)) == 0) + { + sgn = -1; + s += strlen(nsymbol); + } + else if (strncmp(s, psymbol, strlen(psymbol)) == 0) + s += strlen(psymbol); + else if (strncmp(s, csymbol, strlen(csymbol)) == 0) + s += strlen(csymbol); + else + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "money", str))); + } + + /* + * If the value is supposed to be positive, flip the sign, but check for + * the most negative number. + */ + if (sgn > 0) + { + if (value == PG_INT64_MIN) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + str, "money"))); + result = -value; + } + else + result = value; + +#ifdef CASHDEBUG + printf("cashin- result is " INT64_FORMAT "\n", result); +#endif + + PG_RETURN_CASH(result); +} + + +/* cash_out() + * Function to convert cash to a dollars and cents representation, using + * the lc_monetary locale's formatting. + */ +Datum +cash_out(PG_FUNCTION_ARGS) +{ + Cash value = PG_GETARG_CASH(0); + char *result; + char buf[128]; + char *bufptr; + int digit_pos; + int points, + mon_group; + char dsymbol; + const char *ssymbol, + *csymbol, + *signsymbol; + char sign_posn, + cs_precedes, + sep_by_space; + struct lconv *lconvert = PGLC_localeconv(); + + /* see comments about frac_digits in cash_in() */ + points = lconvert->frac_digits; + if (points < 0 || points > 10) + points = 2; /* best guess in this case, I think */ + + /* + * As with frac_digits, must apply a range check to mon_grouping to avoid + * being fooled by variant CHAR_MAX values. + */ + mon_group = *lconvert->mon_grouping; + if (mon_group <= 0 || mon_group > 6) + mon_group = 3; + + /* we restrict dsymbol to be a single byte, but not the other symbols */ + if (*lconvert->mon_decimal_point != '\0' && + lconvert->mon_decimal_point[1] == '\0') + dsymbol = *lconvert->mon_decimal_point; + else + dsymbol = '.'; + if (*lconvert->mon_thousands_sep != '\0') + ssymbol = lconvert->mon_thousands_sep; + else /* ssymbol should not equal dsymbol */ + ssymbol = (dsymbol != ',') ? "," : "."; + csymbol = (*lconvert->currency_symbol != '\0') ? lconvert->currency_symbol : "$"; + + if (value < 0) + { + /* make the amount positive for digit-reconstruction loop */ + value = -value; + /* set up formatting data */ + signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-"; + sign_posn = lconvert->n_sign_posn; + cs_precedes = lconvert->n_cs_precedes; + sep_by_space = lconvert->n_sep_by_space; + } + else + { + signsymbol = lconvert->positive_sign; + sign_posn = lconvert->p_sign_posn; + cs_precedes = lconvert->p_cs_precedes; + sep_by_space = lconvert->p_sep_by_space; + } + + /* we build the digits+decimal-point+sep string right-to-left in buf[] */ + bufptr = buf + sizeof(buf) - 1; + *bufptr = '\0'; + + /* + * Generate digits till there are no non-zero digits left and we emitted + * at least one to the left of the decimal point. digit_pos is the + * current digit position, with zero as the digit just left of the decimal + * point, increasing to the right. + */ + digit_pos = points; + do + { + if (points && digit_pos == 0) + { + /* insert decimal point, but not if value cannot be fractional */ + *(--bufptr) = dsymbol; + } + else if (digit_pos < 0 && (digit_pos % mon_group) == 0) + { + /* insert thousands sep, but only to left of radix point */ + bufptr -= strlen(ssymbol); + memcpy(bufptr, ssymbol, strlen(ssymbol)); + } + + *(--bufptr) = ((uint64) value % 10) + '0'; + value = ((uint64) value) / 10; + digit_pos--; + } while (value || digit_pos >= 0); + + /*---------- + * Now, attach currency symbol and sign symbol in the correct order. + * + * The POSIX spec defines these values controlling this code: + * + * p/n_sign_posn: + * 0 Parentheses enclose the quantity and the currency_symbol. + * 1 The sign string precedes the quantity and the currency_symbol. + * 2 The sign string succeeds the quantity and the currency_symbol. + * 3 The sign string precedes the currency_symbol. + * 4 The sign string succeeds the currency_symbol. + * + * p/n_cs_precedes: 0 means currency symbol after value, else before it. + * + * p/n_sep_by_space: + * 0 No <space> separates the currency symbol and value. + * 1 If the currency symbol and sign string are adjacent, a <space> + * separates them from the value; otherwise, a <space> separates + * the currency symbol from the value. + * 2 If the currency symbol and sign string are adjacent, a <space> + * separates them; otherwise, a <space> separates the sign string + * from the value. + *---------- + */ + switch (sign_posn) + { + case 0: + if (cs_precedes) + result = psprintf("(%s%s%s)", + csymbol, + (sep_by_space == 1) ? " " : "", + bufptr); + else + result = psprintf("(%s%s%s)", + bufptr, + (sep_by_space == 1) ? " " : "", + csymbol); + break; + case 1: + default: + if (cs_precedes) + result = psprintf("%s%s%s%s%s", + signsymbol, + (sep_by_space == 2) ? " " : "", + csymbol, + (sep_by_space == 1) ? " " : "", + bufptr); + else + result = psprintf("%s%s%s%s%s", + signsymbol, + (sep_by_space == 2) ? " " : "", + bufptr, + (sep_by_space == 1) ? " " : "", + csymbol); + break; + case 2: + if (cs_precedes) + result = psprintf("%s%s%s%s%s", + csymbol, + (sep_by_space == 1) ? " " : "", + bufptr, + (sep_by_space == 2) ? " " : "", + signsymbol); + else + result = psprintf("%s%s%s%s%s", + bufptr, + (sep_by_space == 1) ? " " : "", + csymbol, + (sep_by_space == 2) ? " " : "", + signsymbol); + break; + case 3: + if (cs_precedes) + result = psprintf("%s%s%s%s%s", + signsymbol, + (sep_by_space == 2) ? " " : "", + csymbol, + (sep_by_space == 1) ? " " : "", + bufptr); + else + result = psprintf("%s%s%s%s%s", + bufptr, + (sep_by_space == 1) ? " " : "", + signsymbol, + (sep_by_space == 2) ? " " : "", + csymbol); + break; + case 4: + if (cs_precedes) + result = psprintf("%s%s%s%s%s", + csymbol, + (sep_by_space == 2) ? " " : "", + signsymbol, + (sep_by_space == 1) ? " " : "", + bufptr); + else + result = psprintf("%s%s%s%s%s", + bufptr, + (sep_by_space == 1) ? " " : "", + csymbol, + (sep_by_space == 2) ? " " : "", + signsymbol); + break; + } + + PG_RETURN_CSTRING(result); +} + +/* + * cash_recv - converts external binary format to cash + */ +Datum +cash_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_CASH((Cash) pq_getmsgint64(buf)); +} + +/* + * cash_send - converts cash to binary format + */ +Datum +cash_send(PG_FUNCTION_ARGS) +{ + Cash arg1 = PG_GETARG_CASH(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Comparison functions + */ + +Datum +cash_eq(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + + PG_RETURN_BOOL(c1 == c2); +} + +Datum +cash_ne(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + + PG_RETURN_BOOL(c1 != c2); +} + +Datum +cash_lt(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + + PG_RETURN_BOOL(c1 < c2); +} + +Datum +cash_le(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + + PG_RETURN_BOOL(c1 <= c2); +} + +Datum +cash_gt(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + + PG_RETURN_BOOL(c1 > c2); +} + +Datum +cash_ge(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + + PG_RETURN_BOOL(c1 >= c2); +} + +Datum +cash_cmp(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + + if (c1 > c2) + PG_RETURN_INT32(1); + else if (c1 == c2) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); +} + + +/* cash_pl() + * Add two cash values. + */ +Datum +cash_pl(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + Cash result; + + result = c1 + c2; + + PG_RETURN_CASH(result); +} + + +/* cash_mi() + * Subtract two cash values. + */ +Datum +cash_mi(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + Cash result; + + result = c1 - c2; + + PG_RETURN_CASH(result); +} + + +/* cash_div_cash() + * Divide cash by cash, returning float8. + */ +Datum +cash_div_cash(PG_FUNCTION_ARGS) +{ + Cash dividend = PG_GETARG_CASH(0); + Cash divisor = PG_GETARG_CASH(1); + float8 quotient; + + if (divisor == 0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + quotient = (float8) dividend / (float8) divisor; + PG_RETURN_FLOAT8(quotient); +} + + +/* cash_mul_flt8() + * Multiply cash by float8. + */ +Datum +cash_mul_flt8(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + float8 f = PG_GETARG_FLOAT8(1); + Cash result; + + result = rint(c * f); + PG_RETURN_CASH(result); +} + + +/* flt8_mul_cash() + * Multiply float8 by cash. + */ +Datum +flt8_mul_cash(PG_FUNCTION_ARGS) +{ + float8 f = PG_GETARG_FLOAT8(0); + Cash c = PG_GETARG_CASH(1); + Cash result; + + result = rint(f * c); + PG_RETURN_CASH(result); +} + + +/* cash_div_flt8() + * Divide cash by float8. + */ +Datum +cash_div_flt8(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + float8 f = PG_GETARG_FLOAT8(1); + Cash result; + + if (f == 0.0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + result = rint(c / f); + PG_RETURN_CASH(result); +} + + +/* cash_mul_flt4() + * Multiply cash by float4. + */ +Datum +cash_mul_flt4(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + float4 f = PG_GETARG_FLOAT4(1); + Cash result; + + result = rint(c * (float8) f); + PG_RETURN_CASH(result); +} + + +/* flt4_mul_cash() + * Multiply float4 by cash. + */ +Datum +flt4_mul_cash(PG_FUNCTION_ARGS) +{ + float4 f = PG_GETARG_FLOAT4(0); + Cash c = PG_GETARG_CASH(1); + Cash result; + + result = rint((float8) f * c); + PG_RETURN_CASH(result); +} + + +/* cash_div_flt4() + * Divide cash by float4. + * + */ +Datum +cash_div_flt4(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + float4 f = PG_GETARG_FLOAT4(1); + Cash result; + + if (f == 0.0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + result = rint(c / (float8) f); + PG_RETURN_CASH(result); +} + + +/* cash_mul_int8() + * Multiply cash by int8. + */ +Datum +cash_mul_int8(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + int64 i = PG_GETARG_INT64(1); + Cash result; + + result = c * i; + PG_RETURN_CASH(result); +} + + +/* int8_mul_cash() + * Multiply int8 by cash. + */ +Datum +int8_mul_cash(PG_FUNCTION_ARGS) +{ + int64 i = PG_GETARG_INT64(0); + Cash c = PG_GETARG_CASH(1); + Cash result; + + result = i * c; + PG_RETURN_CASH(result); +} + +/* cash_div_int8() + * Divide cash by 8-byte integer. + */ +Datum +cash_div_int8(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + int64 i = PG_GETARG_INT64(1); + Cash result; + + if (i == 0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + result = c / i; + + PG_RETURN_CASH(result); +} + + +/* cash_mul_int4() + * Multiply cash by int4. + */ +Datum +cash_mul_int4(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + int32 i = PG_GETARG_INT32(1); + Cash result; + + result = c * i; + PG_RETURN_CASH(result); +} + + +/* int4_mul_cash() + * Multiply int4 by cash. + */ +Datum +int4_mul_cash(PG_FUNCTION_ARGS) +{ + int32 i = PG_GETARG_INT32(0); + Cash c = PG_GETARG_CASH(1); + Cash result; + + result = i * c; + PG_RETURN_CASH(result); +} + + +/* cash_div_int4() + * Divide cash by 4-byte integer. + * + */ +Datum +cash_div_int4(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + int32 i = PG_GETARG_INT32(1); + Cash result; + + if (i == 0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + result = c / i; + + PG_RETURN_CASH(result); +} + + +/* cash_mul_int2() + * Multiply cash by int2. + */ +Datum +cash_mul_int2(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + int16 s = PG_GETARG_INT16(1); + Cash result; + + result = c * s; + PG_RETURN_CASH(result); +} + +/* int2_mul_cash() + * Multiply int2 by cash. + */ +Datum +int2_mul_cash(PG_FUNCTION_ARGS) +{ + int16 s = PG_GETARG_INT16(0); + Cash c = PG_GETARG_CASH(1); + Cash result; + + result = s * c; + PG_RETURN_CASH(result); +} + +/* cash_div_int2() + * Divide cash by int2. + * + */ +Datum +cash_div_int2(PG_FUNCTION_ARGS) +{ + Cash c = PG_GETARG_CASH(0); + int16 s = PG_GETARG_INT16(1); + Cash result; + + if (s == 0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + result = c / s; + PG_RETURN_CASH(result); +} + +/* cashlarger() + * Return larger of two cash values. + */ +Datum +cashlarger(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + Cash result; + + result = (c1 > c2) ? c1 : c2; + + PG_RETURN_CASH(result); +} + +/* cashsmaller() + * Return smaller of two cash values. + */ +Datum +cashsmaller(PG_FUNCTION_ARGS) +{ + Cash c1 = PG_GETARG_CASH(0); + Cash c2 = PG_GETARG_CASH(1); + Cash result; + + result = (c1 < c2) ? c1 : c2; + + PG_RETURN_CASH(result); +} + +/* cash_words() + * This converts an int4 as well but to a representation using words + * Obviously way North American centric - sorry + */ +Datum +cash_words(PG_FUNCTION_ARGS) +{ + Cash value = PG_GETARG_CASH(0); + uint64 val; + char buf[256]; + char *p = buf; + Cash m0; + Cash m1; + Cash m2; + Cash m3; + Cash m4; + Cash m5; + Cash m6; + + /* work with positive numbers */ + if (value < 0) + { + value = -value; + strcpy(buf, "minus "); + p += 6; + } + else + buf[0] = '\0'; + + /* Now treat as unsigned, to avoid trouble at INT_MIN */ + val = (uint64) value; + + m0 = val % INT64CONST(100); /* cents */ + m1 = (val / INT64CONST(100)) % 1000; /* hundreds */ + m2 = (val / INT64CONST(100000)) % 1000; /* thousands */ + m3 = (val / INT64CONST(100000000)) % 1000; /* millions */ + m4 = (val / INT64CONST(100000000000)) % 1000; /* billions */ + m5 = (val / INT64CONST(100000000000000)) % 1000; /* trillions */ + m6 = (val / INT64CONST(100000000000000000)) % 1000; /* quadrillions */ + + if (m6) + { + strcat(buf, num_word(m6)); + strcat(buf, " quadrillion "); + } + + if (m5) + { + strcat(buf, num_word(m5)); + strcat(buf, " trillion "); + } + + if (m4) + { + strcat(buf, num_word(m4)); + strcat(buf, " billion "); + } + + if (m3) + { + strcat(buf, num_word(m3)); + strcat(buf, " million "); + } + + if (m2) + { + strcat(buf, num_word(m2)); + strcat(buf, " thousand "); + } + + if (m1) + strcat(buf, num_word(m1)); + + if (!*p) + strcat(buf, "zero"); + + strcat(buf, (val / 100) == 1 ? " dollar and " : " dollars and "); + strcat(buf, num_word(m0)); + strcat(buf, m0 == 1 ? " cent" : " cents"); + + /* capitalize output */ + buf[0] = pg_toupper((unsigned char) buf[0]); + + /* return as text datum */ + PG_RETURN_TEXT_P(cstring_to_text(buf)); +} + + +/* cash_numeric() + * Convert cash to numeric. + */ +Datum +cash_numeric(PG_FUNCTION_ARGS) +{ + Cash money = PG_GETARG_CASH(0); + Datum result; + int fpoint; + struct lconv *lconvert = PGLC_localeconv(); + + /* see comments about frac_digits in cash_in() */ + fpoint = lconvert->frac_digits; + if (fpoint < 0 || fpoint > 10) + fpoint = 2; + + /* convert the integral money value to numeric */ + result = NumericGetDatum(int64_to_numeric(money)); + + /* scale appropriately, if needed */ + if (fpoint > 0) + { + int64 scale; + int i; + Datum numeric_scale; + Datum quotient; + + /* compute required scale factor */ + scale = 1; + for (i = 0; i < fpoint; i++) + scale *= 10; + numeric_scale = NumericGetDatum(int64_to_numeric(scale)); + + /* + * Given integral inputs approaching INT64_MAX, select_div_scale() + * might choose a result scale of zero, causing loss of fractional + * digits in the quotient. We can ensure an exact result by setting + * the dscale of either input to be at least as large as the desired + * result scale. numeric_round() will do that for us. + */ + numeric_scale = DirectFunctionCall2(numeric_round, + numeric_scale, + Int32GetDatum(fpoint)); + + /* Now we can safely divide ... */ + quotient = DirectFunctionCall2(numeric_div, result, numeric_scale); + + /* ... and forcibly round to exactly the intended number of digits */ + result = DirectFunctionCall2(numeric_round, + quotient, + Int32GetDatum(fpoint)); + } + + PG_RETURN_DATUM(result); +} + +/* numeric_cash() + * Convert numeric to cash. + */ +Datum +numeric_cash(PG_FUNCTION_ARGS) +{ + Datum amount = PG_GETARG_DATUM(0); + Cash result; + int fpoint; + int64 scale; + int i; + Datum numeric_scale; + struct lconv *lconvert = PGLC_localeconv(); + + /* see comments about frac_digits in cash_in() */ + fpoint = lconvert->frac_digits; + if (fpoint < 0 || fpoint > 10) + fpoint = 2; + + /* compute required scale factor */ + scale = 1; + for (i = 0; i < fpoint; i++) + scale *= 10; + + /* multiply the input amount by scale factor */ + numeric_scale = NumericGetDatum(int64_to_numeric(scale)); + amount = DirectFunctionCall2(numeric_mul, amount, numeric_scale); + + /* note that numeric_int8 will round to nearest integer for us */ + result = DatumGetInt64(DirectFunctionCall1(numeric_int8, amount)); + + PG_RETURN_CASH(result); +} + +/* int4_cash() + * Convert int4 (int) to cash + */ +Datum +int4_cash(PG_FUNCTION_ARGS) +{ + int32 amount = PG_GETARG_INT32(0); + Cash result; + int fpoint; + int64 scale; + int i; + struct lconv *lconvert = PGLC_localeconv(); + + /* see comments about frac_digits in cash_in() */ + fpoint = lconvert->frac_digits; + if (fpoint < 0 || fpoint > 10) + fpoint = 2; + + /* compute required scale factor */ + scale = 1; + for (i = 0; i < fpoint; i++) + scale *= 10; + + /* compute amount * scale, checking for overflow */ + result = DatumGetInt64(DirectFunctionCall2(int8mul, Int64GetDatum(amount), + Int64GetDatum(scale))); + + PG_RETURN_CASH(result); +} + +/* int8_cash() + * Convert int8 (bigint) to cash + */ +Datum +int8_cash(PG_FUNCTION_ARGS) +{ + int64 amount = PG_GETARG_INT64(0); + Cash result; + int fpoint; + int64 scale; + int i; + struct lconv *lconvert = PGLC_localeconv(); + + /* see comments about frac_digits in cash_in() */ + fpoint = lconvert->frac_digits; + if (fpoint < 0 || fpoint > 10) + fpoint = 2; + + /* compute required scale factor */ + scale = 1; + for (i = 0; i < fpoint; i++) + scale *= 10; + + /* compute amount * scale, checking for overflow */ + result = DatumGetInt64(DirectFunctionCall2(int8mul, Int64GetDatum(amount), + Int64GetDatum(scale))); + + PG_RETURN_CASH(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/char.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/char.c new file mode 100644 index 00000000000..33662595398 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/char.c @@ -0,0 +1,254 @@ +/*------------------------------------------------------------------------- + * + * char.c + * Functions for the built-in type "char" (not to be confused with + * bpchar, which is the SQL CHAR(n) type). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/char.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> + +#include "libpq/pqformat.h" +#include "utils/builtins.h" +#include "varatt.h" + +#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7')) +#define TOOCTAL(c) ((c) + '0') +#define FROMOCTAL(c) ((unsigned char) (c) - '0') + + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +/* + * charin - converts "x" to 'x' + * + * This accepts the formats charout produces. If we have multibyte input + * that is not in the form '\ooo', then we take its first byte as the value + * and silently discard the rest; this is a backwards-compatibility provision. + */ +Datum +charin(PG_FUNCTION_ARGS) +{ + char *ch = PG_GETARG_CSTRING(0); + + if (strlen(ch) == 4 && ch[0] == '\\' && + ISOCTAL(ch[1]) && ISOCTAL(ch[2]) && ISOCTAL(ch[3])) + PG_RETURN_CHAR((FROMOCTAL(ch[1]) << 6) + + (FROMOCTAL(ch[2]) << 3) + + FROMOCTAL(ch[3])); + /* This will do the right thing for a zero-length input string */ + PG_RETURN_CHAR(ch[0]); +} + +/* + * charout - converts 'x' to "x" + * + * The possible output formats are: + * 1. 0x00 is represented as an empty string. + * 2. 0x01..0x7F are represented as a single ASCII byte. + * 3. 0x80..0xFF are represented as \ooo (backslash and 3 octal digits). + * Case 3 is meant to match the traditional "escape" format of bytea. + */ +Datum +charout(PG_FUNCTION_ARGS) +{ + char ch = PG_GETARG_CHAR(0); + char *result = (char *) palloc(5); + + if (IS_HIGHBIT_SET(ch)) + { + result[0] = '\\'; + result[1] = TOOCTAL(((unsigned char) ch) >> 6); + result[2] = TOOCTAL((((unsigned char) ch) >> 3) & 07); + result[3] = TOOCTAL(((unsigned char) ch) & 07); + result[4] = '\0'; + } + else + { + /* This produces acceptable results for 0x00 as well */ + result[0] = ch; + result[1] = '\0'; + } + PG_RETURN_CSTRING(result); +} + +/* + * charrecv - converts external binary format to char + * + * The external representation is one byte, with no character set + * conversion. This is somewhat dubious, perhaps, but in many + * cases people use char for a 1-byte binary type. + */ +Datum +charrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_CHAR(pq_getmsgbyte(buf)); +} + +/* + * charsend - converts char to binary format + */ +Datum +charsend(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendbyte(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/***************************************************************************** + * PUBLIC ROUTINES * + *****************************************************************************/ + +/* + * NOTE: comparisons are done as though char is unsigned (uint8). + * Conversions to and from integer are done as though char is signed (int8). + * + * You wanted consistency? + */ + +Datum +chareq(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + char arg2 = PG_GETARG_CHAR(1); + + PG_RETURN_BOOL(arg1 == arg2); +} + +Datum +charne(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + char arg2 = PG_GETARG_CHAR(1); + + PG_RETURN_BOOL(arg1 != arg2); +} + +Datum +charlt(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + char arg2 = PG_GETARG_CHAR(1); + + PG_RETURN_BOOL((uint8) arg1 < (uint8) arg2); +} + +Datum +charle(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + char arg2 = PG_GETARG_CHAR(1); + + PG_RETURN_BOOL((uint8) arg1 <= (uint8) arg2); +} + +Datum +chargt(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + char arg2 = PG_GETARG_CHAR(1); + + PG_RETURN_BOOL((uint8) arg1 > (uint8) arg2); +} + +Datum +charge(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + char arg2 = PG_GETARG_CHAR(1); + + PG_RETURN_BOOL((uint8) arg1 >= (uint8) arg2); +} + + +Datum +chartoi4(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + + PG_RETURN_INT32((int32) ((int8) arg1)); +} + +Datum +i4tochar(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + + if (arg1 < SCHAR_MIN || arg1 > SCHAR_MAX) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"char\" out of range"))); + + PG_RETURN_CHAR((int8) arg1); +} + + +Datum +text_char(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + char *ch = VARDATA_ANY(arg1); + char result; + + /* + * Conversion rules are the same as in charin(), but here we need to + * handle the empty-string case honestly. + */ + if (VARSIZE_ANY_EXHDR(arg1) == 4 && ch[0] == '\\' && + ISOCTAL(ch[1]) && ISOCTAL(ch[2]) && ISOCTAL(ch[3])) + result = (FROMOCTAL(ch[1]) << 6) + + (FROMOCTAL(ch[2]) << 3) + + FROMOCTAL(ch[3]); + else if (VARSIZE_ANY_EXHDR(arg1) > 0) + result = ch[0]; + else + result = '\0'; + + PG_RETURN_CHAR(result); +} + +Datum +char_text(PG_FUNCTION_ARGS) +{ + char arg1 = PG_GETARG_CHAR(0); + text *result = palloc(VARHDRSZ + 4); + + /* + * Conversion rules are the same as in charout(), but here we need to be + * honest about converting 0x00 to an empty string. + */ + if (IS_HIGHBIT_SET(arg1)) + { + SET_VARSIZE(result, VARHDRSZ + 4); + (VARDATA(result))[0] = '\\'; + (VARDATA(result))[1] = TOOCTAL(((unsigned char) arg1) >> 6); + (VARDATA(result))[2] = TOOCTAL((((unsigned char) arg1) >> 3) & 07); + (VARDATA(result))[3] = TOOCTAL(((unsigned char) arg1) & 07); + } + else if (arg1 != '\0') + { + SET_VARSIZE(result, VARHDRSZ + 1); + *(VARDATA(result)) = arg1; + } + else + SET_VARSIZE(result, VARHDRSZ); + + PG_RETURN_TEXT_P(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/cryptohashfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/cryptohashfuncs.c new file mode 100644 index 00000000000..f9603279581 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/cryptohashfuncs.c @@ -0,0 +1,169 @@ +/*------------------------------------------------------------------------- + * + * cryptohashfuncs.c + * Cryptographic hash functions + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/cryptohashfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/cryptohash.h" +#include "common/md5.h" +#include "common/sha2.h" +#include "utils/builtins.h" +#include "varatt.h" + + +/* + * MD5 + */ + +/* MD5 produces a 16 byte (128 bit) hash; double it for hex */ +#define MD5_HASH_LEN 32 + +/* + * Create an MD5 hash of a text value and return it as hex string. + */ +Datum +md5_text(PG_FUNCTION_ARGS) +{ + text *in_text = PG_GETARG_TEXT_PP(0); + size_t len; + char hexsum[MD5_HASH_LEN + 1]; + const char *errstr = NULL; + + /* Calculate the length of the buffer using varlena metadata */ + len = VARSIZE_ANY_EXHDR(in_text); + + /* get the hash result */ + if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum, &errstr) == false) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not compute %s hash: %s", "MD5", + errstr))); + + /* convert to text and return it */ + PG_RETURN_TEXT_P(cstring_to_text(hexsum)); +} + +/* + * Create an MD5 hash of a bytea value and return it as a hex string. + */ +Datum +md5_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + size_t len; + char hexsum[MD5_HASH_LEN + 1]; + const char *errstr = NULL; + + len = VARSIZE_ANY_EXHDR(in); + if (pg_md5_hash(VARDATA_ANY(in), len, hexsum, &errstr) == false) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not compute %s hash: %s", "MD5", + errstr))); + + PG_RETURN_TEXT_P(cstring_to_text(hexsum)); +} + +/* + * Internal routine to compute a cryptohash with the given bytea input. + */ +static inline bytea * +cryptohash_internal(pg_cryptohash_type type, bytea *input) +{ + const uint8 *data; + const char *typestr = NULL; + int digest_len = 0; + size_t len; + pg_cryptohash_ctx *ctx; + bytea *result; + + switch (type) + { + case PG_SHA224: + typestr = "SHA224"; + digest_len = PG_SHA224_DIGEST_LENGTH; + break; + case PG_SHA256: + typestr = "SHA256"; + digest_len = PG_SHA256_DIGEST_LENGTH; + break; + case PG_SHA384: + typestr = "SHA384"; + digest_len = PG_SHA384_DIGEST_LENGTH; + break; + case PG_SHA512: + typestr = "SHA512"; + digest_len = PG_SHA512_DIGEST_LENGTH; + break; + case PG_MD5: + case PG_SHA1: + elog(ERROR, "unsupported cryptohash type %d", type); + break; + } + + result = palloc0(digest_len + VARHDRSZ); + len = VARSIZE_ANY_EXHDR(input); + data = (unsigned char *) VARDATA_ANY(input); + + ctx = pg_cryptohash_create(type); + if (pg_cryptohash_init(ctx) < 0) + elog(ERROR, "could not initialize %s context: %s", typestr, + pg_cryptohash_error(ctx)); + if (pg_cryptohash_update(ctx, data, len) < 0) + elog(ERROR, "could not update %s context: %s", typestr, + pg_cryptohash_error(ctx)); + if (pg_cryptohash_final(ctx, (unsigned char *) VARDATA(result), + digest_len) < 0) + elog(ERROR, "could not finalize %s context: %s", typestr, + pg_cryptohash_error(ctx)); + pg_cryptohash_free(ctx); + + SET_VARSIZE(result, digest_len + VARHDRSZ); + + return result; +} + +/* + * SHA-2 variants + */ + +Datum +sha224_bytea(PG_FUNCTION_ARGS) +{ + bytea *result = cryptohash_internal(PG_SHA224, PG_GETARG_BYTEA_PP(0)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha256_bytea(PG_FUNCTION_ARGS) +{ + bytea *result = cryptohash_internal(PG_SHA256, PG_GETARG_BYTEA_PP(0)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha384_bytea(PG_FUNCTION_ARGS) +{ + bytea *result = cryptohash_internal(PG_SHA384, PG_GETARG_BYTEA_PP(0)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha512_bytea(PG_FUNCTION_ARGS) +{ + bytea *result = cryptohash_internal(PG_SHA512, PG_GETARG_BYTEA_PP(0)); + + PG_RETURN_BYTEA_P(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/date.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/date.c new file mode 100644 index 00000000000..3f4b5791841 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/date.c @@ -0,0 +1,3129 @@ +/*------------------------------------------------------------------------- + * + * date.c + * implements DATE and TIME data types specified in SQL standard + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994-5, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/date.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> +#include <float.h> +#include <math.h> +#include <time.h> + +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/supportnodes.h" +#include "parser/scansup.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/numeric.h" +#include "utils/sortsupport.h" + +/* + * gcc's -ffast-math switch breaks routines that expect exact results from + * expressions like timeval / SECS_PER_HOUR, where timeval is double. + */ +#ifdef __FAST_MATH__ +#error -ffast-math is known to break this code +#endif + + +/* common code for timetypmodin and timetztypmodin */ +static int32 +anytime_typmodin(bool istz, ArrayType *ta) +{ + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + /* + * we're not too tense about good error message here because grammar + * shouldn't allow wrong number of modifiers for TIME + */ + if (n != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid type modifier"))); + + return anytime_typmod_check(istz, tl[0]); +} + +/* exported so parse_expr.c can use it */ +int32 +anytime_typmod_check(bool istz, int32 typmod) +{ + if (typmod < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("TIME(%d)%s precision must not be negative", + typmod, (istz ? " WITH TIME ZONE" : "")))); + if (typmod > MAX_TIME_PRECISION) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("TIME(%d)%s precision reduced to maximum allowed, %d", + typmod, (istz ? " WITH TIME ZONE" : ""), + MAX_TIME_PRECISION))); + typmod = MAX_TIME_PRECISION; + } + + return typmod; +} + +/* common code for timetypmodout and timetztypmodout */ +static char * +anytime_typmodout(bool istz, int32 typmod) +{ + const char *tz = istz ? " with time zone" : " without time zone"; + + if (typmod >= 0) + return psprintf("(%d)%s", (int) typmod, tz); + else + return pstrdup(tz); +} + + +/***************************************************************************** + * Date ADT + *****************************************************************************/ + + +/* date_in() + * Given date text string, convert to internal date format. + */ +Datum +date_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + DateADT date; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + int tzp; + int dtype; + int nf; + int dterr; + char *field[MAXDATEFIELDS]; + int ftype[MAXDATEFIELDS]; + char workbuf[MAXDATELEN + 1]; + DateTimeErrorExtra extra; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), + field, ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeDateTime(field, ftype, nf, + &dtype, tm, &fsec, &tzp, &extra); + if (dterr != 0) + { + DateTimeParseError(dterr, &extra, str, "date", escontext); + PG_RETURN_NULL(); + } + + switch (dtype) + { + case DTK_DATE: + break; + + case DTK_EPOCH: + GetEpochTime(tm); + break; + + case DTK_LATE: + DATE_NOEND(date); + PG_RETURN_DATEADT(date); + + case DTK_EARLY: + DATE_NOBEGIN(date); + PG_RETURN_DATEADT(date); + + default: + DateTimeParseError(DTERR_BAD_FORMAT, &extra, str, "date", escontext); + PG_RETURN_NULL(); + } + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm->tm_year, tm->tm_mon, tm->tm_mday)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", str))); + + date = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; + + /* Now check for just-out-of-range dates */ + if (!IS_VALID_DATE(date)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", str))); + + PG_RETURN_DATEADT(date); +} + +/* date_out() + * Given internal format date, convert to text string. + */ +Datum +date_out(PG_FUNCTION_ARGS) +{ + DateADT date = PG_GETARG_DATEADT(0); + char *result; + struct pg_tm tt, + *tm = &tt; + char buf[MAXDATELEN + 1]; + + if (DATE_NOT_FINITE(date)) + EncodeSpecialDate(date, buf); + else + { + j2date(date + POSTGRES_EPOCH_JDATE, + &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday)); + EncodeDateOnly(tm, DateStyle, buf); + } + + result = pstrdup(buf); + PG_RETURN_CSTRING(result); +} + +/* + * date_recv - converts external binary format to date + */ +Datum +date_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + DateADT result; + + result = (DateADT) pq_getmsgint(buf, sizeof(DateADT)); + + /* Limit to the same range that date_in() accepts. */ + if (DATE_NOT_FINITE(result)) + /* ok */ ; + else if (!IS_VALID_DATE(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range"))); + + PG_RETURN_DATEADT(result); +} + +/* + * date_send - converts date to binary format + */ +Datum +date_send(PG_FUNCTION_ARGS) +{ + DateADT date = PG_GETARG_DATEADT(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, date); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * make_date - date constructor + */ +Datum +make_date(PG_FUNCTION_ARGS) +{ + struct pg_tm tm; + DateADT date; + int dterr; + bool bc = false; + + tm.tm_year = PG_GETARG_INT32(0); + tm.tm_mon = PG_GETARG_INT32(1); + tm.tm_mday = PG_GETARG_INT32(2); + + /* Handle negative years as BC */ + if (tm.tm_year < 0) + { + bc = true; + tm.tm_year = -tm.tm_year; + } + + dterr = ValidateDate(DTK_DATE_M, false, false, bc, &tm); + + if (dterr != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), + errmsg("date field value out of range: %d-%02d-%02d", + tm.tm_year, tm.tm_mon, tm.tm_mday))); + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: %d-%02d-%02d", + tm.tm_year, tm.tm_mon, tm.tm_mday))); + + date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; + + /* Now check for just-out-of-range dates */ + if (!IS_VALID_DATE(date)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: %d-%02d-%02d", + tm.tm_year, tm.tm_mon, tm.tm_mday))); + + PG_RETURN_DATEADT(date); +} + +/* + * Convert reserved date values to string. + */ +void +EncodeSpecialDate(DateADT dt, char *str) +{ + if (DATE_IS_NOBEGIN(dt)) + strcpy(str, EARLY); + else if (DATE_IS_NOEND(dt)) + strcpy(str, LATE); + else /* shouldn't happen */ + elog(ERROR, "invalid argument for EncodeSpecialDate"); +} + + +/* + * GetSQLCurrentDate -- implements CURRENT_DATE + */ +DateADT +GetSQLCurrentDate(void) +{ + struct pg_tm tm; + + static __thread int cache_year = 0; + static __thread int cache_mon = 0; + static __thread int cache_mday = 0; + static __thread DateADT cache_date; + + GetCurrentDateTime(&tm); + + /* + * date2j involves several integer divisions; moreover, unless our session + * lives across local midnight, we don't really have to do it more than + * once. So it seems worth having a separate cache here. + */ + if (tm.tm_year != cache_year || + tm.tm_mon != cache_mon || + tm.tm_mday != cache_mday) + { + cache_date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; + cache_year = tm.tm_year; + cache_mon = tm.tm_mon; + cache_mday = tm.tm_mday; + } + + return cache_date; +} + +/* + * GetSQLCurrentTime -- implements CURRENT_TIME, CURRENT_TIME(n) + */ +TimeTzADT * +GetSQLCurrentTime(int32 typmod) +{ + TimeTzADT *result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + + GetCurrentTimeUsec(tm, &fsec, &tz); + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + tm2timetz(tm, fsec, tz, result); + AdjustTimeForTypmod(&(result->time), typmod); + return result; +} + +/* + * GetSQLLocalTime -- implements LOCALTIME, LOCALTIME(n) + */ +TimeADT +GetSQLLocalTime(int32 typmod) +{ + TimeADT result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + + GetCurrentTimeUsec(tm, &fsec, &tz); + + tm2time(tm, fsec, &result); + AdjustTimeForTypmod(&result, typmod); + return result; +} + + +/* + * Comparison functions for dates + */ + +Datum +date_eq(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(dateVal1 == dateVal2); +} + +Datum +date_ne(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(dateVal1 != dateVal2); +} + +Datum +date_lt(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(dateVal1 < dateVal2); +} + +Datum +date_le(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(dateVal1 <= dateVal2); +} + +Datum +date_gt(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(dateVal1 > dateVal2); +} + +Datum +date_ge(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(dateVal1 >= dateVal2); +} + +Datum +date_cmp(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + if (dateVal1 < dateVal2) + PG_RETURN_INT32(-1); + else if (dateVal1 > dateVal2) + PG_RETURN_INT32(1); + PG_RETURN_INT32(0); +} + +Datum +date_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = ssup_datum_int32_cmp; + PG_RETURN_VOID(); +} + +Datum +date_finite(PG_FUNCTION_ARGS) +{ + DateADT date = PG_GETARG_DATEADT(0); + + PG_RETURN_BOOL(!DATE_NOT_FINITE(date)); +} + +Datum +date_larger(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_DATEADT((dateVal1 > dateVal2) ? dateVal1 : dateVal2); +} + +Datum +date_smaller(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + PG_RETURN_DATEADT((dateVal1 < dateVal2) ? dateVal1 : dateVal2); +} + +/* Compute difference between two dates in days. + */ +Datum +date_mi(PG_FUNCTION_ARGS) +{ + DateADT dateVal1 = PG_GETARG_DATEADT(0); + DateADT dateVal2 = PG_GETARG_DATEADT(1); + + if (DATE_NOT_FINITE(dateVal1) || DATE_NOT_FINITE(dateVal2)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("cannot subtract infinite dates"))); + + PG_RETURN_INT32((int32) (dateVal1 - dateVal2)); +} + +/* Add a number of days to a date, giving a new date. + * Must handle both positive and negative numbers of days. + */ +Datum +date_pli(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + int32 days = PG_GETARG_INT32(1); + DateADT result; + + if (DATE_NOT_FINITE(dateVal)) + PG_RETURN_DATEADT(dateVal); /* can't change infinity */ + + result = dateVal + days; + + /* Check for integer overflow and out-of-allowed-range */ + if ((days >= 0 ? (result < dateVal) : (result > dateVal)) || + !IS_VALID_DATE(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range"))); + + PG_RETURN_DATEADT(result); +} + +/* Subtract a number of days from a date, giving a new date. + */ +Datum +date_mii(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + int32 days = PG_GETARG_INT32(1); + DateADT result; + + if (DATE_NOT_FINITE(dateVal)) + PG_RETURN_DATEADT(dateVal); /* can't change infinity */ + + result = dateVal - days; + + /* Check for integer overflow and out-of-allowed-range */ + if ((days >= 0 ? (result > dateVal) : (result < dateVal)) || + !IS_VALID_DATE(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range"))); + + PG_RETURN_DATEADT(result); +} + + +/* + * Promote date to timestamp. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the date is finite but out of the valid range for timestamp, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate timestamp infinity. + * + * Note: *overflow = -1 is actually not possible currently, since both + * datatypes have the same lower bound, Julian day zero. + */ +Timestamp +date2timestamp_opt_overflow(DateADT dateVal, int *overflow) +{ + Timestamp result; + + if (overflow) + *overflow = 0; + + if (DATE_IS_NOBEGIN(dateVal)) + TIMESTAMP_NOBEGIN(result); + else if (DATE_IS_NOEND(dateVal)) + TIMESTAMP_NOEND(result); + else + { + /* + * Since dates have the same minimum values as timestamps, only upper + * boundary need be checked for overflow. + */ + if (dateVal >= (TIMESTAMP_END_JULIAN - POSTGRES_EPOCH_JDATE)) + { + if (overflow) + { + *overflow = 1; + TIMESTAMP_NOEND(result); + return result; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range for timestamp"))); + } + } + + /* date is days since 2000, timestamp is microseconds since same... */ + result = dateVal * USECS_PER_DAY; + } + + return result; +} + +/* + * Promote date to timestamp, throwing error for overflow. + */ +static TimestampTz +date2timestamp(DateADT dateVal) +{ + return date2timestamp_opt_overflow(dateVal, NULL); +} + +/* + * Promote date to timestamp with time zone. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the date is finite but out of the valid range for timestamptz, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate timestamptz infinity. + */ +TimestampTz +date2timestamptz_opt_overflow(DateADT dateVal, int *overflow) +{ + TimestampTz result; + struct pg_tm tt, + *tm = &tt; + int tz; + + if (overflow) + *overflow = 0; + + if (DATE_IS_NOBEGIN(dateVal)) + TIMESTAMP_NOBEGIN(result); + else if (DATE_IS_NOEND(dateVal)) + TIMESTAMP_NOEND(result); + else + { + /* + * Since dates have the same minimum values as timestamps, only upper + * boundary need be checked for overflow. + */ + if (dateVal >= (TIMESTAMP_END_JULIAN - POSTGRES_EPOCH_JDATE)) + { + if (overflow) + { + *overflow = 1; + TIMESTAMP_NOEND(result); + return result; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range for timestamp"))); + } + } + + j2date(dateVal + POSTGRES_EPOCH_JDATE, + &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday)); + tm->tm_hour = 0; + tm->tm_min = 0; + tm->tm_sec = 0; + tz = DetermineTimeZoneOffset(tm, session_timezone); + + result = dateVal * USECS_PER_DAY + tz * USECS_PER_SEC; + + /* + * Since it is possible to go beyond allowed timestamptz range because + * of time zone, check for allowed timestamp range after adding tz. + */ + if (!IS_VALID_TIMESTAMP(result)) + { + if (overflow) + { + if (result < MIN_TIMESTAMP) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range for timestamp"))); + } + } + } + + return result; +} + +/* + * Promote date to timestamptz, throwing error for overflow. + */ +static TimestampTz +date2timestamptz(DateADT dateVal) +{ + return date2timestamptz_opt_overflow(dateVal, NULL); +} + +/* + * date2timestamp_no_overflow + * + * This is chartered to produce a double value that is numerically + * equivalent to the corresponding Timestamp value, if the date is in the + * valid range of Timestamps, but in any case not throw an overflow error. + * We can do this since the numerical range of double is greater than + * that of non-erroneous timestamps. The results are currently only + * used for statistical estimation purposes. + */ +double +date2timestamp_no_overflow(DateADT dateVal) +{ + double result; + + if (DATE_IS_NOBEGIN(dateVal)) + result = -DBL_MAX; + else if (DATE_IS_NOEND(dateVal)) + result = DBL_MAX; + else + { + /* date is days since 2000, timestamp is microseconds since same... */ + result = dateVal * (double) USECS_PER_DAY; + } + + return result; +} + + +/* + * Crosstype comparison functions for dates + */ + +int32 +date_cmp_timestamp_internal(DateADT dateVal, Timestamp dt2) +{ + Timestamp dt1; + int overflow; + + dt1 = date2timestamp_opt_overflow(dateVal, &overflow); + if (overflow > 0) + { + /* dt1 is larger than any finite timestamp, but less than infinity */ + return TIMESTAMP_IS_NOEND(dt2) ? -1 : +1; + } + Assert(overflow == 0); /* -1 case cannot occur */ + + return timestamp_cmp_internal(dt1, dt2); +} + +Datum +date_eq_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) == 0); +} + +Datum +date_ne_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) != 0); +} + +Datum +date_lt_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) < 0); +} + +Datum +date_gt_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) > 0); +} + +Datum +date_le_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) <= 0); +} + +Datum +date_ge_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) >= 0); +} + +Datum +date_cmp_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_INT32(date_cmp_timestamp_internal(dateVal, dt2)); +} + +int32 +date_cmp_timestamptz_internal(DateADT dateVal, TimestampTz dt2) +{ + TimestampTz dt1; + int overflow; + + dt1 = date2timestamptz_opt_overflow(dateVal, &overflow); + if (overflow > 0) + { + /* dt1 is larger than any finite timestamp, but less than infinity */ + return TIMESTAMP_IS_NOEND(dt2) ? -1 : +1; + } + if (overflow < 0) + { + /* dt1 is less than any finite timestamp, but more than -infinity */ + return TIMESTAMP_IS_NOBEGIN(dt2) ? +1 : -1; + } + + return timestamptz_cmp_internal(dt1, dt2); +} + +Datum +date_eq_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) == 0); +} + +Datum +date_ne_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) != 0); +} + +Datum +date_lt_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) < 0); +} + +Datum +date_gt_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) > 0); +} + +Datum +date_le_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) <= 0); +} + +Datum +date_ge_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) >= 0); +} + +Datum +date_cmp_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_INT32(date_cmp_timestamptz_internal(dateVal, dt2)); +} + +Datum +timestamp_eq_date(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) == 0); +} + +Datum +timestamp_ne_date(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) != 0); +} + +Datum +timestamp_lt_date(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) > 0); +} + +Datum +timestamp_gt_date(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) < 0); +} + +Datum +timestamp_le_date(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) >= 0); +} + +Datum +timestamp_ge_date(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) <= 0); +} + +Datum +timestamp_cmp_date(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_INT32(-date_cmp_timestamp_internal(dateVal, dt1)); +} + +Datum +timestamptz_eq_date(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) == 0); +} + +Datum +timestamptz_ne_date(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) != 0); +} + +Datum +timestamptz_lt_date(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) > 0); +} + +Datum +timestamptz_gt_date(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) < 0); +} + +Datum +timestamptz_le_date(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) >= 0); +} + +Datum +timestamptz_ge_date(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) <= 0); +} + +Datum +timestamptz_cmp_date(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + DateADT dateVal = PG_GETARG_DATEADT(1); + + PG_RETURN_INT32(-date_cmp_timestamptz_internal(dateVal, dt1)); +} + +/* + * in_range support function for date. + * + * We implement this by promoting the dates to timestamp (without time zone) + * and then using the timestamp-and-interval in_range function. + */ +Datum +in_range_date_interval(PG_FUNCTION_ARGS) +{ + DateADT val = PG_GETARG_DATEADT(0); + DateADT base = PG_GETARG_DATEADT(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + Timestamp valStamp; + Timestamp baseStamp; + + /* XXX we could support out-of-range cases here, perhaps */ + valStamp = date2timestamp(val); + baseStamp = date2timestamp(base); + + return DirectFunctionCall5(in_range_timestamp_interval, + TimestampGetDatum(valStamp), + TimestampGetDatum(baseStamp), + IntervalPGetDatum(offset), + BoolGetDatum(sub), + BoolGetDatum(less)); +} + + +/* extract_date() + * Extract specified field from date type. + */ +Datum +extract_date(PG_FUNCTION_ARGS) +{ + text *units = PG_GETARG_TEXT_PP(0); + DateADT date = PG_GETARG_DATEADT(1); + int64 intresult; + int type, + val; + char *lowunits; + int year, + mon, + mday; + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(0, lowunits, &val); + + if (DATE_NOT_FINITE(date) && (type == UNITS || type == RESERV)) + { + switch (val) + { + /* Oscillating units */ + case DTK_DAY: + case DTK_MONTH: + case DTK_QUARTER: + case DTK_WEEK: + case DTK_DOW: + case DTK_ISODOW: + case DTK_DOY: + PG_RETURN_NULL(); + break; + + /* Monotonically-increasing units */ + case DTK_YEAR: + case DTK_DECADE: + case DTK_CENTURY: + case DTK_MILLENNIUM: + case DTK_JULIAN: + case DTK_ISOYEAR: + case DTK_EPOCH: + if (DATE_IS_NOBEGIN(date)) + PG_RETURN_NUMERIC(DatumGetNumeric(DirectFunctionCall3(numeric_in, + CStringGetDatum("-Infinity"), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)))); + else + PG_RETURN_NUMERIC(DatumGetNumeric(DirectFunctionCall3(numeric_in, + CStringGetDatum("Infinity"), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)))); + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(DATEOID)))); + } + } + else if (type == UNITS) + { + j2date(date + POSTGRES_EPOCH_JDATE, &year, &mon, &mday); + + switch (val) + { + case DTK_DAY: + intresult = mday; + break; + + case DTK_MONTH: + intresult = mon; + break; + + case DTK_QUARTER: + intresult = (mon - 1) / 3 + 1; + break; + + case DTK_WEEK: + intresult = date2isoweek(year, mon, mday); + break; + + case DTK_YEAR: + if (year > 0) + intresult = year; + else + /* there is no year 0, just 1 BC and 1 AD */ + intresult = year - 1; + break; + + case DTK_DECADE: + /* see comments in timestamp_part */ + if (year >= 0) + intresult = year / 10; + else + intresult = -((8 - (year - 1)) / 10); + break; + + case DTK_CENTURY: + /* see comments in timestamp_part */ + if (year > 0) + intresult = (year + 99) / 100; + else + intresult = -((99 - (year - 1)) / 100); + break; + + case DTK_MILLENNIUM: + /* see comments in timestamp_part */ + if (year > 0) + intresult = (year + 999) / 1000; + else + intresult = -((999 - (year - 1)) / 1000); + break; + + case DTK_JULIAN: + intresult = date + POSTGRES_EPOCH_JDATE; + break; + + case DTK_ISOYEAR: + intresult = date2isoyear(year, mon, mday); + /* Adjust BC years */ + if (intresult <= 0) + intresult -= 1; + break; + + case DTK_DOW: + case DTK_ISODOW: + intresult = j2day(date + POSTGRES_EPOCH_JDATE); + if (val == DTK_ISODOW && intresult == 0) + intresult = 7; + break; + + case DTK_DOY: + intresult = date2j(year, mon, mday) - date2j(year, 1, 1) + 1; + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(DATEOID)))); + intresult = 0; + } + } + else if (type == RESERV) + { + switch (val) + { + case DTK_EPOCH: + intresult = ((int64) date + POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY; + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(DATEOID)))); + intresult = 0; + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(DATEOID)))); + intresult = 0; + } + + PG_RETURN_NUMERIC(int64_to_numeric(intresult)); +} + + +/* Add an interval to a date, giving a new date. + * Must handle both positive and negative intervals. + * + * We implement this by promoting the date to timestamp (without time zone) + * and then using the timestamp plus interval function. + */ +Datum +date_pl_interval(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + Timestamp dateStamp; + + dateStamp = date2timestamp(dateVal); + + return DirectFunctionCall2(timestamp_pl_interval, + TimestampGetDatum(dateStamp), + PointerGetDatum(span)); +} + +/* Subtract an interval from a date, giving a new date. + * Must handle both positive and negative intervals. + * + * We implement this by promoting the date to timestamp (without time zone) + * and then using the timestamp minus interval function. + */ +Datum +date_mi_interval(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + Timestamp dateStamp; + + dateStamp = date2timestamp(dateVal); + + return DirectFunctionCall2(timestamp_mi_interval, + TimestampGetDatum(dateStamp), + PointerGetDatum(span)); +} + +/* date_timestamp() + * Convert date to timestamp data type. + */ +Datum +date_timestamp(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + Timestamp result; + + result = date2timestamp(dateVal); + + PG_RETURN_TIMESTAMP(result); +} + +/* timestamp_date() + * Convert timestamp to date data type. + */ +Datum +timestamp_date(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + DateADT result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + + if (TIMESTAMP_IS_NOBEGIN(timestamp)) + DATE_NOBEGIN(result); + else if (TIMESTAMP_IS_NOEND(timestamp)) + DATE_NOEND(result); + else + { + if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; + } + + PG_RETURN_DATEADT(result); +} + + +/* date_timestamptz() + * Convert date to timestamp with time zone data type. + */ +Datum +date_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT dateVal = PG_GETARG_DATEADT(0); + TimestampTz result; + + result = date2timestamptz(dateVal); + + PG_RETURN_TIMESTAMP(result); +} + + +/* timestamptz_date() + * Convert timestamp with time zone to date data type. + */ +Datum +timestamptz_date(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMP(0); + DateADT result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + + if (TIMESTAMP_IS_NOBEGIN(timestamp)) + DATE_NOBEGIN(result); + else if (TIMESTAMP_IS_NOEND(timestamp)) + DATE_NOEND(result); + else + { + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; + } + + PG_RETURN_DATEADT(result); +} + + +/***************************************************************************** + * Time ADT + *****************************************************************************/ + +Datum +time_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + TimeADT result; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + int tz; + int nf; + int dterr; + char workbuf[MAXDATELEN + 1]; + char *field[MAXDATEFIELDS]; + int dtype; + int ftype[MAXDATEFIELDS]; + DateTimeErrorExtra extra; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), + field, ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeTimeOnly(field, ftype, nf, + &dtype, tm, &fsec, &tz, &extra); + if (dterr != 0) + { + DateTimeParseError(dterr, &extra, str, "time", escontext); + PG_RETURN_NULL(); + } + + tm2time(tm, fsec, &result); + AdjustTimeForTypmod(&result, typmod); + + PG_RETURN_TIMEADT(result); +} + +/* tm2time() + * Convert a tm structure to a time data type. + */ +int +tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result) +{ + *result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) + * USECS_PER_SEC) + fsec; + return 0; +} + +/* time_overflows() + * Check to see if a broken-down time-of-day is out of range. + */ +bool +time_overflows(int hour, int min, int sec, fsec_t fsec) +{ + /* Range-check the fields individually. */ + if (hour < 0 || hour > HOURS_PER_DAY || + min < 0 || min >= MINS_PER_HOUR || + sec < 0 || sec > SECS_PER_MINUTE || + fsec < 0 || fsec > USECS_PER_SEC) + return true; + + /* + * Because we allow, eg, hour = 24 or sec = 60, we must check separately + * that the total time value doesn't exceed 24:00:00. + */ + if ((((((hour * MINS_PER_HOUR + min) * SECS_PER_MINUTE) + + sec) * USECS_PER_SEC) + fsec) > USECS_PER_DAY) + return true; + + return false; +} + +/* float_time_overflows() + * Same, when we have seconds + fractional seconds as one "double" value. + */ +bool +float_time_overflows(int hour, int min, double sec) +{ + /* Range-check the fields individually. */ + if (hour < 0 || hour > HOURS_PER_DAY || + min < 0 || min >= MINS_PER_HOUR) + return true; + + /* + * "sec", being double, requires extra care. Cope with NaN, and round off + * before applying the range check to avoid unexpected errors due to + * imprecise input. (We assume rint() behaves sanely with infinities.) + */ + if (isnan(sec)) + return true; + sec = rint(sec * USECS_PER_SEC); + if (sec < 0 || sec > SECS_PER_MINUTE * USECS_PER_SEC) + return true; + + /* + * Because we allow, eg, hour = 24 or sec = 60, we must check separately + * that the total time value doesn't exceed 24:00:00. This must match the + * way that callers will convert the fields to a time. + */ + if (((((hour * MINS_PER_HOUR + min) * SECS_PER_MINUTE) + * USECS_PER_SEC) + (int64) sec) > USECS_PER_DAY) + return true; + + return false; +} + + +/* time2tm() + * Convert time data type to POSIX time structure. + * + * Note that only the hour/min/sec/fractional-sec fields are filled in. + */ +int +time2tm(TimeADT time, struct pg_tm *tm, fsec_t *fsec) +{ + tm->tm_hour = time / USECS_PER_HOUR; + time -= tm->tm_hour * USECS_PER_HOUR; + tm->tm_min = time / USECS_PER_MINUTE; + time -= tm->tm_min * USECS_PER_MINUTE; + tm->tm_sec = time / USECS_PER_SEC; + time -= tm->tm_sec * USECS_PER_SEC; + *fsec = time; + return 0; +} + +Datum +time_out(PG_FUNCTION_ARGS) +{ + TimeADT time = PG_GETARG_TIMEADT(0); + char *result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + char buf[MAXDATELEN + 1]; + + time2tm(time, tm, &fsec); + EncodeTimeOnly(tm, fsec, false, 0, DateStyle, buf); + + result = pstrdup(buf); + PG_RETURN_CSTRING(result); +} + +/* + * time_recv - converts external binary format to time + */ +Datum +time_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + TimeADT result; + + result = pq_getmsgint64(buf); + + if (result < INT64CONST(0) || result > USECS_PER_DAY) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("time out of range"))); + + AdjustTimeForTypmod(&result, typmod); + + PG_RETURN_TIMEADT(result); +} + +/* + * time_send - converts time to binary format + */ +Datum +time_send(PG_FUNCTION_ARGS) +{ + TimeADT time = PG_GETARG_TIMEADT(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, time); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +timetypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anytime_typmodin(false, ta)); +} + +Datum +timetypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anytime_typmodout(false, typmod)); +} + +/* + * make_time - time constructor + */ +Datum +make_time(PG_FUNCTION_ARGS) +{ + int tm_hour = PG_GETARG_INT32(0); + int tm_min = PG_GETARG_INT32(1); + double sec = PG_GETARG_FLOAT8(2); + TimeADT time; + + /* Check for time overflow */ + if (float_time_overflows(tm_hour, tm_min, sec)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), + errmsg("time field value out of range: %d:%02d:%02g", + tm_hour, tm_min, sec))); + + /* This should match tm2time */ + time = (((tm_hour * MINS_PER_HOUR + tm_min) * SECS_PER_MINUTE) + * USECS_PER_SEC) + (int64) rint(sec * USECS_PER_SEC); + + PG_RETURN_TIMEADT(time); +} + + +/* time_support() + * + * Planner support function for the time_scale() and timetz_scale() + * length coercion functions (we need not distinguish them here). + */ +Datum +time_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + + ret = TemporalSimplify(MAX_TIME_PRECISION, (Node *) req->fcall); + } + + PG_RETURN_POINTER(ret); +} + +/* time_scale() + * Adjust time type for specified scale factor. + * Used by PostgreSQL type system to stuff columns. + */ +Datum +time_scale(PG_FUNCTION_ARGS) +{ + TimeADT time = PG_GETARG_TIMEADT(0); + int32 typmod = PG_GETARG_INT32(1); + TimeADT result; + + result = time; + AdjustTimeForTypmod(&result, typmod); + + PG_RETURN_TIMEADT(result); +} + +/* AdjustTimeForTypmod() + * Force the precision of the time value to a specified value. + * Uses *exactly* the same code as in AdjustTimestampForTypmod() + * but we make a separate copy because those types do not + * have a fundamental tie together but rather a coincidence of + * implementation. - thomas + */ +void +AdjustTimeForTypmod(TimeADT *time, int32 typmod) +{ + static const int64 TimeScales[MAX_TIME_PRECISION + 1] = { + INT64CONST(1000000), + INT64CONST(100000), + INT64CONST(10000), + INT64CONST(1000), + INT64CONST(100), + INT64CONST(10), + INT64CONST(1) + }; + + static const int64 TimeOffsets[MAX_TIME_PRECISION + 1] = { + INT64CONST(500000), + INT64CONST(50000), + INT64CONST(5000), + INT64CONST(500), + INT64CONST(50), + INT64CONST(5), + INT64CONST(0) + }; + + if (typmod >= 0 && typmod <= MAX_TIME_PRECISION) + { + if (*time >= INT64CONST(0)) + *time = ((*time + TimeOffsets[typmod]) / TimeScales[typmod]) * + TimeScales[typmod]; + else + *time = -((((-*time) + TimeOffsets[typmod]) / TimeScales[typmod]) * + TimeScales[typmod]); + } +} + + +Datum +time_eq(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_BOOL(time1 == time2); +} + +Datum +time_ne(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_BOOL(time1 != time2); +} + +Datum +time_lt(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_BOOL(time1 < time2); +} + +Datum +time_le(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_BOOL(time1 <= time2); +} + +Datum +time_gt(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_BOOL(time1 > time2); +} + +Datum +time_ge(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_BOOL(time1 >= time2); +} + +Datum +time_cmp(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + if (time1 < time2) + PG_RETURN_INT32(-1); + if (time1 > time2) + PG_RETURN_INT32(1); + PG_RETURN_INT32(0); +} + +Datum +time_hash(PG_FUNCTION_ARGS) +{ + return hashint8(fcinfo); +} + +Datum +time_hash_extended(PG_FUNCTION_ARGS) +{ + return hashint8extended(fcinfo); +} + +Datum +time_larger(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_TIMEADT((time1 > time2) ? time1 : time2); +} + +Datum +time_smaller(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + + PG_RETURN_TIMEADT((time1 < time2) ? time1 : time2); +} + +/* overlaps_time() --- implements the SQL OVERLAPS operator. + * + * Algorithm is per SQL spec. This is much harder than you'd think + * because the spec requires us to deliver a non-null answer in some cases + * where some of the inputs are null. + */ +Datum +overlaps_time(PG_FUNCTION_ARGS) +{ + /* + * The arguments are TimeADT, but we leave them as generic Datums to avoid + * dereferencing nulls (TimeADT is pass-by-reference!) + */ + Datum ts1 = PG_GETARG_DATUM(0); + Datum te1 = PG_GETARG_DATUM(1); + Datum ts2 = PG_GETARG_DATUM(2); + Datum te2 = PG_GETARG_DATUM(3); + bool ts1IsNull = PG_ARGISNULL(0); + bool te1IsNull = PG_ARGISNULL(1); + bool ts2IsNull = PG_ARGISNULL(2); + bool te2IsNull = PG_ARGISNULL(3); + +#define TIMEADT_GT(t1,t2) \ + (DatumGetTimeADT(t1) > DatumGetTimeADT(t2)) +#define TIMEADT_LT(t1,t2) \ + (DatumGetTimeADT(t1) < DatumGetTimeADT(t2)) + + /* + * If both endpoints of interval 1 are null, the result is null (unknown). + * If just one endpoint is null, take ts1 as the non-null one. Otherwise, + * take ts1 as the lesser endpoint. + */ + if (ts1IsNull) + { + if (te1IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ + ts1 = te1; + te1IsNull = true; + } + else if (!te1IsNull) + { + if (TIMEADT_GT(ts1, te1)) + { + Datum tt = ts1; + + ts1 = te1; + te1 = tt; + } + } + + /* Likewise for interval 2. */ + if (ts2IsNull) + { + if (te2IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ + ts2 = te2; + te2IsNull = true; + } + else if (!te2IsNull) + { + if (TIMEADT_GT(ts2, te2)) + { + Datum tt = ts2; + + ts2 = te2; + te2 = tt; + } + } + + /* + * At this point neither ts1 nor ts2 is null, so we can consider three + * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2 + */ + if (TIMEADT_GT(ts1, ts2)) + { + /* + * This case is ts1 < te2 OR te1 < te2, which may look redundant but + * in the presence of nulls it's not quite completely so. + */ + if (te2IsNull) + PG_RETURN_NULL(); + if (TIMEADT_LT(ts1, te2)) + PG_RETURN_BOOL(true); + if (te1IsNull) + PG_RETURN_NULL(); + + /* + * If te1 is not null then we had ts1 <= te1 above, and we just found + * ts1 >= te2, hence te1 >= te2. + */ + PG_RETURN_BOOL(false); + } + else if (TIMEADT_LT(ts1, ts2)) + { + /* This case is ts2 < te1 OR te2 < te1 */ + if (te1IsNull) + PG_RETURN_NULL(); + if (TIMEADT_LT(ts2, te1)) + PG_RETURN_BOOL(true); + if (te2IsNull) + PG_RETURN_NULL(); + + /* + * If te2 is not null then we had ts2 <= te2 above, and we just found + * ts2 >= te1, hence te2 >= te1. + */ + PG_RETURN_BOOL(false); + } + else + { + /* + * For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a + * rather silly way of saying "true if both are nonnull, else null". + */ + if (te1IsNull || te2IsNull) + PG_RETURN_NULL(); + PG_RETURN_BOOL(true); + } + +#undef TIMEADT_GT +#undef TIMEADT_LT +} + +/* timestamp_time() + * Convert timestamp to time data type. + */ +Datum +timestamp_time(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + TimeADT result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_NULL(); + + if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* + * Could also do this with time = (timestamp / USECS_PER_DAY * + * USECS_PER_DAY) - timestamp; + */ + result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * + USECS_PER_SEC) + fsec; + + PG_RETURN_TIMEADT(result); +} + +/* timestamptz_time() + * Convert timestamptz to time data type. + */ +Datum +timestamptz_time(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMP(0); + TimeADT result; + struct pg_tm tt, + *tm = &tt; + int tz; + fsec_t fsec; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_NULL(); + + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* + * Could also do this with time = (timestamp / USECS_PER_DAY * + * USECS_PER_DAY) - timestamp; + */ + result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * + USECS_PER_SEC) + fsec; + + PG_RETURN_TIMEADT(result); +} + +/* datetime_timestamp() + * Convert date and time to timestamp data type. + */ +Datum +datetime_timestamp(PG_FUNCTION_ARGS) +{ + DateADT date = PG_GETARG_DATEADT(0); + TimeADT time = PG_GETARG_TIMEADT(1); + Timestamp result; + + result = date2timestamp(date); + if (!TIMESTAMP_NOT_FINITE(result)) + { + result += time; + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + PG_RETURN_TIMESTAMP(result); +} + +/* time_interval() + * Convert time to interval data type. + */ +Datum +time_interval(PG_FUNCTION_ARGS) +{ + TimeADT time = PG_GETARG_TIMEADT(0); + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + result->time = time; + result->day = 0; + result->month = 0; + + PG_RETURN_INTERVAL_P(result); +} + +/* interval_time() + * Convert interval to time data type. + * + * This is defined as producing the fractional-day portion of the interval. + * Therefore, we can just ignore the months field. It is not real clear + * what to do with negative intervals, but we choose to subtract the floor, + * so that, say, '-2 hours' becomes '22:00:00'. + */ +Datum +interval_time(PG_FUNCTION_ARGS) +{ + Interval *span = PG_GETARG_INTERVAL_P(0); + TimeADT result; + int64 days; + + result = span->time; + if (result >= USECS_PER_DAY) + { + days = result / USECS_PER_DAY; + result -= days * USECS_PER_DAY; + } + else if (result < 0) + { + days = (-result + USECS_PER_DAY - 1) / USECS_PER_DAY; + result += days * USECS_PER_DAY; + } + + PG_RETURN_TIMEADT(result); +} + +/* time_mi_time() + * Subtract two times to produce an interval. + */ +Datum +time_mi_time(PG_FUNCTION_ARGS) +{ + TimeADT time1 = PG_GETARG_TIMEADT(0); + TimeADT time2 = PG_GETARG_TIMEADT(1); + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + result->month = 0; + result->day = 0; + result->time = time1 - time2; + + PG_RETURN_INTERVAL_P(result); +} + +/* time_pl_interval() + * Add interval to time. + */ +Datum +time_pl_interval(PG_FUNCTION_ARGS) +{ + TimeADT time = PG_GETARG_TIMEADT(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + TimeADT result; + + result = time + span->time; + result -= result / USECS_PER_DAY * USECS_PER_DAY; + if (result < INT64CONST(0)) + result += USECS_PER_DAY; + + PG_RETURN_TIMEADT(result); +} + +/* time_mi_interval() + * Subtract interval from time. + */ +Datum +time_mi_interval(PG_FUNCTION_ARGS) +{ + TimeADT time = PG_GETARG_TIMEADT(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + TimeADT result; + + result = time - span->time; + result -= result / USECS_PER_DAY * USECS_PER_DAY; + if (result < INT64CONST(0)) + result += USECS_PER_DAY; + + PG_RETURN_TIMEADT(result); +} + +/* + * in_range support function for time. + */ +Datum +in_range_time_interval(PG_FUNCTION_ARGS) +{ + TimeADT val = PG_GETARG_TIMEADT(0); + TimeADT base = PG_GETARG_TIMEADT(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + TimeADT sum; + + /* + * Like time_pl_interval/time_mi_interval, we disregard the month and day + * fields of the offset. So our test for negative should too. + */ + if (offset->time < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* + * We can't use time_pl_interval/time_mi_interval here, because their + * wraparound behavior would give wrong (or at least undesirable) answers. + * Fortunately the equivalent non-wrapping behavior is trivial, especially + * since we don't worry about integer overflow. + */ + if (sub) + sum = base - offset->time; + else + sum = base + offset->time; + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + + +/* time_part() and extract_time() + * Extract specified field from time type. + */ +static Datum +time_part_common(PG_FUNCTION_ARGS, bool retnumeric) +{ + text *units = PG_GETARG_TEXT_PP(0); + TimeADT time = PG_GETARG_TIMEADT(1); + int64 intresult; + int type, + val; + char *lowunits; + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(0, lowunits, &val); + + if (type == UNITS) + { + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + + time2tm(time, tm, &fsec); + + switch (val) + { + case DTK_MICROSEC: + intresult = tm->tm_sec * INT64CONST(1000000) + fsec; + break; + + case DTK_MILLISEC: + if (retnumeric) + /*--- + * tm->tm_sec * 1000 + fsec / 1000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3)); + else + PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0); + break; + + case DTK_SECOND: + if (retnumeric) + /*--- + * tm->tm_sec + fsec / 1'000'000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6)); + else + PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0); + break; + + case DTK_MINUTE: + intresult = tm->tm_min; + break; + + case DTK_HOUR: + intresult = tm->tm_hour; + break; + + case DTK_TZ: + case DTK_TZ_MINUTE: + case DTK_TZ_HOUR: + case DTK_DAY: + case DTK_MONTH: + case DTK_QUARTER: + case DTK_YEAR: + case DTK_DECADE: + case DTK_CENTURY: + case DTK_MILLENNIUM: + case DTK_ISOYEAR: + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMEOID)))); + intresult = 0; + } + } + else if (type == RESERV && val == DTK_EPOCH) + { + if (retnumeric) + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(time, 6)); + else + PG_RETURN_FLOAT8(time / 1000000.0); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(TIMEOID)))); + intresult = 0; + } + + if (retnumeric) + PG_RETURN_NUMERIC(int64_to_numeric(intresult)); + else + PG_RETURN_FLOAT8(intresult); +} + +Datum +time_part(PG_FUNCTION_ARGS) +{ + return time_part_common(fcinfo, false); +} + +Datum +extract_time(PG_FUNCTION_ARGS) +{ + return time_part_common(fcinfo, true); +} + + +/***************************************************************************** + * Time With Time Zone ADT + *****************************************************************************/ + +/* tm2timetz() + * Convert a tm structure to a time data type. + */ +int +tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result) +{ + result->time = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * + USECS_PER_SEC) + fsec; + result->zone = tz; + + return 0; +} + +Datum +timetz_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + TimeTzADT *result; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + int tz; + int nf; + int dterr; + char workbuf[MAXDATELEN + 1]; + char *field[MAXDATEFIELDS]; + int dtype; + int ftype[MAXDATEFIELDS]; + DateTimeErrorExtra extra; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), + field, ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeTimeOnly(field, ftype, nf, + &dtype, tm, &fsec, &tz, &extra); + if (dterr != 0) + { + DateTimeParseError(dterr, &extra, str, "time with time zone", + escontext); + PG_RETURN_NULL(); + } + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + tm2timetz(tm, fsec, tz, result); + AdjustTimeForTypmod(&(result->time), typmod); + + PG_RETURN_TIMETZADT_P(result); +} + +Datum +timetz_out(PG_FUNCTION_ARGS) +{ + TimeTzADT *time = PG_GETARG_TIMETZADT_P(0); + char *result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + char buf[MAXDATELEN + 1]; + + timetz2tm(time, tm, &fsec, &tz); + EncodeTimeOnly(tm, fsec, true, tz, DateStyle, buf); + + result = pstrdup(buf); + PG_RETURN_CSTRING(result); +} + +/* + * timetz_recv - converts external binary format to timetz + */ +Datum +timetz_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + TimeTzADT *result; + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + result->time = pq_getmsgint64(buf); + + if (result->time < INT64CONST(0) || result->time > USECS_PER_DAY) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("time out of range"))); + + result->zone = pq_getmsgint(buf, sizeof(result->zone)); + + /* Check for sane GMT displacement; see notes in datatype/timestamp.h */ + if (result->zone <= -TZDISP_LIMIT || result->zone >= TZDISP_LIMIT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE), + errmsg("time zone displacement out of range"))); + + AdjustTimeForTypmod(&(result->time), typmod); + + PG_RETURN_TIMETZADT_P(result); +} + +/* + * timetz_send - converts timetz to binary format + */ +Datum +timetz_send(PG_FUNCTION_ARGS) +{ + TimeTzADT *time = PG_GETARG_TIMETZADT_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, time->time); + pq_sendint32(&buf, time->zone); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +timetztypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anytime_typmodin(true, ta)); +} + +Datum +timetztypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anytime_typmodout(true, typmod)); +} + + +/* timetz2tm() + * Convert TIME WITH TIME ZONE data type to POSIX time structure. + */ +int +timetz2tm(TimeTzADT *time, struct pg_tm *tm, fsec_t *fsec, int *tzp) +{ + TimeOffset trem = time->time; + + tm->tm_hour = trem / USECS_PER_HOUR; + trem -= tm->tm_hour * USECS_PER_HOUR; + tm->tm_min = trem / USECS_PER_MINUTE; + trem -= tm->tm_min * USECS_PER_MINUTE; + tm->tm_sec = trem / USECS_PER_SEC; + *fsec = trem - tm->tm_sec * USECS_PER_SEC; + + if (tzp != NULL) + *tzp = time->zone; + + return 0; +} + +/* timetz_scale() + * Adjust time type for specified scale factor. + * Used by PostgreSQL type system to stuff columns. + */ +Datum +timetz_scale(PG_FUNCTION_ARGS) +{ + TimeTzADT *time = PG_GETARG_TIMETZADT_P(0); + int32 typmod = PG_GETARG_INT32(1); + TimeTzADT *result; + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + result->time = time->time; + result->zone = time->zone; + + AdjustTimeForTypmod(&(result->time), typmod); + + PG_RETURN_TIMETZADT_P(result); +} + + +static int +timetz_cmp_internal(TimeTzADT *time1, TimeTzADT *time2) +{ + TimeOffset t1, + t2; + + /* Primary sort is by true (GMT-equivalent) time */ + t1 = time1->time + (time1->zone * USECS_PER_SEC); + t2 = time2->time + (time2->zone * USECS_PER_SEC); + + if (t1 > t2) + return 1; + if (t1 < t2) + return -1; + + /* + * If same GMT time, sort by timezone; we only want to say that two + * timetz's are equal if both the time and zone parts are equal. + */ + if (time1->zone > time2->zone) + return 1; + if (time1->zone < time2->zone) + return -1; + + return 0; +} + +Datum +timetz_eq(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + + PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) == 0); +} + +Datum +timetz_ne(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + + PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) != 0); +} + +Datum +timetz_lt(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + + PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) < 0); +} + +Datum +timetz_le(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + + PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) <= 0); +} + +Datum +timetz_gt(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + + PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) > 0); +} + +Datum +timetz_ge(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + + PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) >= 0); +} + +Datum +timetz_cmp(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + + PG_RETURN_INT32(timetz_cmp_internal(time1, time2)); +} + +Datum +timetz_hash(PG_FUNCTION_ARGS) +{ + TimeTzADT *key = PG_GETARG_TIMETZADT_P(0); + uint32 thash; + + /* + * To avoid any problems with padding bytes in the struct, we figure the + * field hashes separately and XOR them. + */ + thash = DatumGetUInt32(DirectFunctionCall1(hashint8, + Int64GetDatumFast(key->time))); + thash ^= DatumGetUInt32(hash_uint32(key->zone)); + PG_RETURN_UINT32(thash); +} + +Datum +timetz_hash_extended(PG_FUNCTION_ARGS) +{ + TimeTzADT *key = PG_GETARG_TIMETZADT_P(0); + Datum seed = PG_GETARG_DATUM(1); + uint64 thash; + + /* Same approach as timetz_hash */ + thash = DatumGetUInt64(DirectFunctionCall2(hashint8extended, + Int64GetDatumFast(key->time), + seed)); + thash ^= DatumGetUInt64(hash_uint32_extended(key->zone, + DatumGetInt64(seed))); + PG_RETURN_UINT64(thash); +} + +Datum +timetz_larger(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + TimeTzADT *result; + + if (timetz_cmp_internal(time1, time2) > 0) + result = time1; + else + result = time2; + PG_RETURN_TIMETZADT_P(result); +} + +Datum +timetz_smaller(PG_FUNCTION_ARGS) +{ + TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1); + TimeTzADT *result; + + if (timetz_cmp_internal(time1, time2) < 0) + result = time1; + else + result = time2; + PG_RETURN_TIMETZADT_P(result); +} + +/* timetz_pl_interval() + * Add interval to timetz. + */ +Datum +timetz_pl_interval(PG_FUNCTION_ARGS) +{ + TimeTzADT *time = PG_GETARG_TIMETZADT_P(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + TimeTzADT *result; + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + result->time = time->time + span->time; + result->time -= result->time / USECS_PER_DAY * USECS_PER_DAY; + if (result->time < INT64CONST(0)) + result->time += USECS_PER_DAY; + + result->zone = time->zone; + + PG_RETURN_TIMETZADT_P(result); +} + +/* timetz_mi_interval() + * Subtract interval from timetz. + */ +Datum +timetz_mi_interval(PG_FUNCTION_ARGS) +{ + TimeTzADT *time = PG_GETARG_TIMETZADT_P(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + TimeTzADT *result; + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + result->time = time->time - span->time; + result->time -= result->time / USECS_PER_DAY * USECS_PER_DAY; + if (result->time < INT64CONST(0)) + result->time += USECS_PER_DAY; + + result->zone = time->zone; + + PG_RETURN_TIMETZADT_P(result); +} + +/* + * in_range support function for timetz. + */ +Datum +in_range_timetz_interval(PG_FUNCTION_ARGS) +{ + TimeTzADT *val = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *base = PG_GETARG_TIMETZADT_P(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + TimeTzADT sum; + + /* + * Like timetz_pl_interval/timetz_mi_interval, we disregard the month and + * day fields of the offset. So our test for negative should too. + */ + if (offset->time < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* + * We can't use timetz_pl_interval/timetz_mi_interval here, because their + * wraparound behavior would give wrong (or at least undesirable) answers. + * Fortunately the equivalent non-wrapping behavior is trivial, especially + * since we don't worry about integer overflow. + */ + if (sub) + sum.time = base->time - offset->time; + else + sum.time = base->time + offset->time; + sum.zone = base->zone; + + if (less) + PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) <= 0); + else + PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) >= 0); +} + +/* overlaps_timetz() --- implements the SQL OVERLAPS operator. + * + * Algorithm is per SQL spec. This is much harder than you'd think + * because the spec requires us to deliver a non-null answer in some cases + * where some of the inputs are null. + */ +Datum +overlaps_timetz(PG_FUNCTION_ARGS) +{ + /* + * The arguments are TimeTzADT *, but we leave them as generic Datums for + * convenience of notation --- and to avoid dereferencing nulls. + */ + Datum ts1 = PG_GETARG_DATUM(0); + Datum te1 = PG_GETARG_DATUM(1); + Datum ts2 = PG_GETARG_DATUM(2); + Datum te2 = PG_GETARG_DATUM(3); + bool ts1IsNull = PG_ARGISNULL(0); + bool te1IsNull = PG_ARGISNULL(1); + bool ts2IsNull = PG_ARGISNULL(2); + bool te2IsNull = PG_ARGISNULL(3); + +#define TIMETZ_GT(t1,t2) \ + DatumGetBool(DirectFunctionCall2(timetz_gt,t1,t2)) +#define TIMETZ_LT(t1,t2) \ + DatumGetBool(DirectFunctionCall2(timetz_lt,t1,t2)) + + /* + * If both endpoints of interval 1 are null, the result is null (unknown). + * If just one endpoint is null, take ts1 as the non-null one. Otherwise, + * take ts1 as the lesser endpoint. + */ + if (ts1IsNull) + { + if (te1IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ + ts1 = te1; + te1IsNull = true; + } + else if (!te1IsNull) + { + if (TIMETZ_GT(ts1, te1)) + { + Datum tt = ts1; + + ts1 = te1; + te1 = tt; + } + } + + /* Likewise for interval 2. */ + if (ts2IsNull) + { + if (te2IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ + ts2 = te2; + te2IsNull = true; + } + else if (!te2IsNull) + { + if (TIMETZ_GT(ts2, te2)) + { + Datum tt = ts2; + + ts2 = te2; + te2 = tt; + } + } + + /* + * At this point neither ts1 nor ts2 is null, so we can consider three + * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2 + */ + if (TIMETZ_GT(ts1, ts2)) + { + /* + * This case is ts1 < te2 OR te1 < te2, which may look redundant but + * in the presence of nulls it's not quite completely so. + */ + if (te2IsNull) + PG_RETURN_NULL(); + if (TIMETZ_LT(ts1, te2)) + PG_RETURN_BOOL(true); + if (te1IsNull) + PG_RETURN_NULL(); + + /* + * If te1 is not null then we had ts1 <= te1 above, and we just found + * ts1 >= te2, hence te1 >= te2. + */ + PG_RETURN_BOOL(false); + } + else if (TIMETZ_LT(ts1, ts2)) + { + /* This case is ts2 < te1 OR te2 < te1 */ + if (te1IsNull) + PG_RETURN_NULL(); + if (TIMETZ_LT(ts2, te1)) + PG_RETURN_BOOL(true); + if (te2IsNull) + PG_RETURN_NULL(); + + /* + * If te2 is not null then we had ts2 <= te2 above, and we just found + * ts2 >= te1, hence te2 >= te1. + */ + PG_RETURN_BOOL(false); + } + else + { + /* + * For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a + * rather silly way of saying "true if both are nonnull, else null". + */ + if (te1IsNull || te2IsNull) + PG_RETURN_NULL(); + PG_RETURN_BOOL(true); + } + +#undef TIMETZ_GT +#undef TIMETZ_LT +} + + +Datum +timetz_time(PG_FUNCTION_ARGS) +{ + TimeTzADT *timetz = PG_GETARG_TIMETZADT_P(0); + TimeADT result; + + /* swallow the time zone and just return the time */ + result = timetz->time; + + PG_RETURN_TIMEADT(result); +} + + +Datum +time_timetz(PG_FUNCTION_ARGS) +{ + TimeADT time = PG_GETARG_TIMEADT(0); + TimeTzADT *result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + + GetCurrentDateTime(tm); + time2tm(time, tm, &fsec); + tz = DetermineTimeZoneOffset(tm, session_timezone); + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + result->time = time; + result->zone = tz; + + PG_RETURN_TIMETZADT_P(result); +} + + +/* timestamptz_timetz() + * Convert timestamp to timetz data type. + */ +Datum +timestamptz_timetz(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMP(0); + TimeTzADT *result; + struct pg_tm tt, + *tm = &tt; + int tz; + fsec_t fsec; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_NULL(); + + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + tm2timetz(tm, fsec, tz, result); + + PG_RETURN_TIMETZADT_P(result); +} + + +/* datetimetz_timestamptz() + * Convert date and timetz to timestamp with time zone data type. + * Timestamp is stored in GMT, so add the time zone + * stored with the timetz to the result. + * - thomas 2000-03-10 + */ +Datum +datetimetz_timestamptz(PG_FUNCTION_ARGS) +{ + DateADT date = PG_GETARG_DATEADT(0); + TimeTzADT *time = PG_GETARG_TIMETZADT_P(1); + TimestampTz result; + + if (DATE_IS_NOBEGIN(date)) + TIMESTAMP_NOBEGIN(result); + else if (DATE_IS_NOEND(date)) + TIMESTAMP_NOEND(result); + else + { + /* + * Date's range is wider than timestamp's, so check for boundaries. + * Since dates have the same minimum values as timestamps, only upper + * boundary need be checked for overflow. + */ + if (date >= (TIMESTAMP_END_JULIAN - POSTGRES_EPOCH_JDATE)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range for timestamp"))); + result = date * USECS_PER_DAY + time->time + time->zone * USECS_PER_SEC; + + /* + * Since it is possible to go beyond allowed timestamptz range because + * of time zone, check for allowed timestamp range after adding tz. + */ + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range for timestamp"))); + } + + PG_RETURN_TIMESTAMP(result); +} + + +/* timetz_part() and extract_timetz() + * Extract specified field from time type. + */ +static Datum +timetz_part_common(PG_FUNCTION_ARGS, bool retnumeric) +{ + text *units = PG_GETARG_TEXT_PP(0); + TimeTzADT *time = PG_GETARG_TIMETZADT_P(1); + int64 intresult; + int type, + val; + char *lowunits; + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(0, lowunits, &val); + + if (type == UNITS) + { + int tz; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + + timetz2tm(time, tm, &fsec, &tz); + + switch (val) + { + case DTK_TZ: + intresult = -tz; + break; + + case DTK_TZ_MINUTE: + intresult = (-tz / SECS_PER_MINUTE) % MINS_PER_HOUR; + break; + + case DTK_TZ_HOUR: + intresult = -tz / SECS_PER_HOUR; + break; + + case DTK_MICROSEC: + intresult = tm->tm_sec * INT64CONST(1000000) + fsec; + break; + + case DTK_MILLISEC: + if (retnumeric) + /*--- + * tm->tm_sec * 1000 + fsec / 1000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3)); + else + PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0); + break; + + case DTK_SECOND: + if (retnumeric) + /*--- + * tm->tm_sec + fsec / 1'000'000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6)); + else + PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0); + break; + + case DTK_MINUTE: + intresult = tm->tm_min; + break; + + case DTK_HOUR: + intresult = tm->tm_hour; + break; + + case DTK_DAY: + case DTK_MONTH: + case DTK_QUARTER: + case DTK_YEAR: + case DTK_DECADE: + case DTK_CENTURY: + case DTK_MILLENNIUM: + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMETZOID)))); + intresult = 0; + } + } + else if (type == RESERV && val == DTK_EPOCH) + { + if (retnumeric) + /*--- + * time->time / 1'000'000 + time->zone + * = (time->time + time->zone * 1'000'000) / 1'000'000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(time->time + time->zone * INT64CONST(1000000), 6)); + else + PG_RETURN_FLOAT8(time->time / 1000000.0 + time->zone); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(TIMETZOID)))); + intresult = 0; + } + + if (retnumeric) + PG_RETURN_NUMERIC(int64_to_numeric(intresult)); + else + PG_RETURN_FLOAT8(intresult); +} + + +Datum +timetz_part(PG_FUNCTION_ARGS) +{ + return timetz_part_common(fcinfo, false); +} + +Datum +extract_timetz(PG_FUNCTION_ARGS) +{ + return timetz_part_common(fcinfo, true); +} + +/* timetz_zone() + * Encode time with time zone type with specified time zone. + * Applies DST rules as of the transaction start time. + */ +Datum +timetz_zone(PG_FUNCTION_ARGS) +{ + text *zone = PG_GETARG_TEXT_PP(0); + TimeTzADT *t = PG_GETARG_TIMETZADT_P(1); + TimeTzADT *result; + int tz; + char tzname[TZ_STRLEN_MAX + 1]; + int type, + val; + pg_tz *tzp; + + /* + * Look up the requested timezone. + */ + text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + type = DecodeTimezoneName(tzname, &val, &tzp); + + if (type == TZNAME_FIXED_OFFSET) + { + /* fixed-offset abbreviation */ + tz = -val; + } + else if (type == TZNAME_DYNTZ) + { + /* dynamic-offset abbreviation, resolve using transaction start time */ + TimestampTz now = GetCurrentTransactionStartTimestamp(); + int isdst; + + tz = DetermineTimeZoneAbbrevOffsetTS(now, tzname, tzp, &isdst); + } + else + { + /* Get the offset-from-GMT that is valid now for the zone name */ + TimestampTz now = GetCurrentTransactionStartTimestamp(); + struct pg_tm tm; + fsec_t fsec; + + if (timestamp2tm(now, &tz, &tm, &fsec, NULL, tzp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + result->time = t->time + (t->zone - tz) * USECS_PER_SEC; + /* C99 modulo has the wrong sign convention for negative input */ + while (result->time < INT64CONST(0)) + result->time += USECS_PER_DAY; + if (result->time >= USECS_PER_DAY) + result->time %= USECS_PER_DAY; + + result->zone = tz; + + PG_RETURN_TIMETZADT_P(result); +} + +/* timetz_izone() + * Encode time with time zone type with specified time interval as time zone. + */ +Datum +timetz_izone(PG_FUNCTION_ARGS) +{ + Interval *zone = PG_GETARG_INTERVAL_P(0); + TimeTzADT *time = PG_GETARG_TIMETZADT_P(1); + TimeTzADT *result; + int tz; + + if (zone->month != 0 || zone->day != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("interval time zone \"%s\" must not include months or days", + DatumGetCString(DirectFunctionCall1(interval_out, + PointerGetDatum(zone)))))); + + tz = -(zone->time / USECS_PER_SEC); + + result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); + + result->time = time->time + (time->zone - tz) * USECS_PER_SEC; + /* C99 modulo has the wrong sign convention for negative input */ + while (result->time < INT64CONST(0)) + result->time += USECS_PER_DAY; + if (result->time >= USECS_PER_DAY) + result->time %= USECS_PER_DAY; + + result->zone = tz; + + PG_RETURN_TIMETZADT_P(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datetime.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datetime.c new file mode 100644 index 00000000000..8b0c8150eb7 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datetime.c @@ -0,0 +1,5057 @@ +/*------------------------------------------------------------------------- + * + * datetime.c + * Support functions for date/time types. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/datetime.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> +#include <math.h> + +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "common/int.h" +#include "common/string.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "parser/scansup.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "utils/tzparser.h" + +static int DecodeNumber(int flen, char *str, bool haveTextMonth, + int fmask, int *tmask, + struct pg_tm *tm, fsec_t *fsec, bool *is2digits); +static int DecodeNumberField(int len, char *str, + int fmask, int *tmask, + struct pg_tm *tm, fsec_t *fsec, bool *is2digits); +static int DecodeTimeCommon(char *str, int fmask, int range, + int *tmask, struct pg_itm *itm); +static int DecodeTime(char *str, int fmask, int range, + int *tmask, struct pg_tm *tm, fsec_t *fsec); +static int DecodeTimeForInterval(char *str, int fmask, int range, + int *tmask, struct pg_itm_in *itm_in); +static const datetkn *datebsearch(const char *key, const datetkn *base, int nel); +static int DecodeDate(char *str, int fmask, int *tmask, bool *is2digits, + struct pg_tm *tm); +static char *AppendSeconds(char *cp, int sec, fsec_t fsec, + int precision, bool fillzeros); +static bool int64_multiply_add(int64 val, int64 multiplier, int64 *sum); +static bool AdjustFractMicroseconds(double frac, int64 scale, + struct pg_itm_in *itm_in); +static bool AdjustFractDays(double frac, int scale, + struct pg_itm_in *itm_in); +static bool AdjustFractYears(double frac, int scale, + struct pg_itm_in *itm_in); +static bool AdjustMicroseconds(int64 val, double fval, int64 scale, + struct pg_itm_in *itm_in); +static bool AdjustDays(int64 val, int scale, + struct pg_itm_in *itm_in); +static bool AdjustMonths(int64 val, struct pg_itm_in *itm_in); +static bool AdjustYears(int64 val, int scale, + struct pg_itm_in *itm_in); +static int DetermineTimeZoneOffsetInternal(struct pg_tm *tm, pg_tz *tzp, + pg_time_t *tp); +static bool DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, + const char *abbr, pg_tz *tzp, + int *offset, int *isdst); +static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp, + DateTimeErrorExtra *extra); + + +const int day_tab[2][13] = +{ + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0} +}; + +const char *const months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", +"Jul", "Aug", "Sep", "Oct", "Nov", "Dec", NULL}; + +const char *const days[] = {"Sunday", "Monday", "Tuesday", "Wednesday", +"Thursday", "Friday", "Saturday", NULL}; + + +/***************************************************************************** + * PRIVATE ROUTINES * + *****************************************************************************/ + +/* + * datetktbl holds date/time keywords. + * + * Note that this table must be strictly alphabetically ordered to allow an + * O(ln(N)) search algorithm to be used. + * + * The token field must be NUL-terminated; we truncate entries to TOKMAXLEN + * characters to fit. + * + * The static table contains no TZ, DTZ, or DYNTZ entries; rather those + * are loaded from configuration files and stored in zoneabbrevtbl, whose + * abbrevs[] field has the same format as the static datetktbl. + */ +static const datetkn datetktbl[] = { + /* token, type, value */ + {"+infinity", RESERV, DTK_LATE}, /* same as "infinity" */ + {EARLY, RESERV, DTK_EARLY}, /* "-infinity" reserved for "early time" */ + {DA_D, ADBC, AD}, /* "ad" for years > 0 */ + {"allballs", RESERV, DTK_ZULU}, /* 00:00:00 */ + {"am", AMPM, AM}, + {"apr", MONTH, 4}, + {"april", MONTH, 4}, + {"at", IGNORE_DTF, 0}, /* "at" (throwaway) */ + {"aug", MONTH, 8}, + {"august", MONTH, 8}, + {DB_C, ADBC, BC}, /* "bc" for years <= 0 */ + {"d", UNITS, DTK_DAY}, /* "day of month" for ISO input */ + {"dec", MONTH, 12}, + {"december", MONTH, 12}, + {"dow", UNITS, DTK_DOW}, /* day of week */ + {"doy", UNITS, DTK_DOY}, /* day of year */ + {"dst", DTZMOD, SECS_PER_HOUR}, + {EPOCH, RESERV, DTK_EPOCH}, /* "epoch" reserved for system epoch time */ + {"feb", MONTH, 2}, + {"february", MONTH, 2}, + {"fri", DOW, 5}, + {"friday", DOW, 5}, + {"h", UNITS, DTK_HOUR}, /* "hour" */ + {LATE, RESERV, DTK_LATE}, /* "infinity" reserved for "late time" */ + {"isodow", UNITS, DTK_ISODOW}, /* ISO day of week, Sunday == 7 */ + {"isoyear", UNITS, DTK_ISOYEAR}, /* year in terms of the ISO week date */ + {"j", UNITS, DTK_JULIAN}, + {"jan", MONTH, 1}, + {"january", MONTH, 1}, + {"jd", UNITS, DTK_JULIAN}, + {"jul", MONTH, 7}, + {"julian", UNITS, DTK_JULIAN}, + {"july", MONTH, 7}, + {"jun", MONTH, 6}, + {"june", MONTH, 6}, + {"m", UNITS, DTK_MONTH}, /* "month" for ISO input */ + {"mar", MONTH, 3}, + {"march", MONTH, 3}, + {"may", MONTH, 5}, + {"mm", UNITS, DTK_MINUTE}, /* "minute" for ISO input */ + {"mon", DOW, 1}, + {"monday", DOW, 1}, + {"nov", MONTH, 11}, + {"november", MONTH, 11}, + {NOW, RESERV, DTK_NOW}, /* current transaction time */ + {"oct", MONTH, 10}, + {"october", MONTH, 10}, + {"on", IGNORE_DTF, 0}, /* "on" (throwaway) */ + {"pm", AMPM, PM}, + {"s", UNITS, DTK_SECOND}, /* "seconds" for ISO input */ + {"sat", DOW, 6}, + {"saturday", DOW, 6}, + {"sep", MONTH, 9}, + {"sept", MONTH, 9}, + {"september", MONTH, 9}, + {"sun", DOW, 0}, + {"sunday", DOW, 0}, + {"t", ISOTIME, DTK_TIME}, /* Filler for ISO time fields */ + {"thu", DOW, 4}, + {"thur", DOW, 4}, + {"thurs", DOW, 4}, + {"thursday", DOW, 4}, + {TODAY, RESERV, DTK_TODAY}, /* midnight */ + {TOMORROW, RESERV, DTK_TOMORROW}, /* tomorrow midnight */ + {"tue", DOW, 2}, + {"tues", DOW, 2}, + {"tuesday", DOW, 2}, + {"wed", DOW, 3}, + {"wednesday", DOW, 3}, + {"weds", DOW, 3}, + {"y", UNITS, DTK_YEAR}, /* "year" for ISO input */ + {YESTERDAY, RESERV, DTK_YESTERDAY} /* yesterday midnight */ +}; + +static const int szdatetktbl = sizeof datetktbl / sizeof datetktbl[0]; + +/* + * deltatktbl: same format as datetktbl, but holds keywords used to represent + * time units (eg, for intervals, and for EXTRACT). + */ +static const datetkn deltatktbl[] = { + /* token, type, value */ + {"@", IGNORE_DTF, 0}, /* postgres relative prefix */ + {DAGO, AGO, 0}, /* "ago" indicates negative time offset */ + {"c", UNITS, DTK_CENTURY}, /* "century" relative */ + {"cent", UNITS, DTK_CENTURY}, /* "century" relative */ + {"centuries", UNITS, DTK_CENTURY}, /* "centuries" relative */ + {DCENTURY, UNITS, DTK_CENTURY}, /* "century" relative */ + {"d", UNITS, DTK_DAY}, /* "day" relative */ + {DDAY, UNITS, DTK_DAY}, /* "day" relative */ + {"days", UNITS, DTK_DAY}, /* "days" relative */ + {"dec", UNITS, DTK_DECADE}, /* "decade" relative */ + {DDECADE, UNITS, DTK_DECADE}, /* "decade" relative */ + {"decades", UNITS, DTK_DECADE}, /* "decades" relative */ + {"decs", UNITS, DTK_DECADE}, /* "decades" relative */ + {"h", UNITS, DTK_HOUR}, /* "hour" relative */ + {DHOUR, UNITS, DTK_HOUR}, /* "hour" relative */ + {"hours", UNITS, DTK_HOUR}, /* "hours" relative */ + {"hr", UNITS, DTK_HOUR}, /* "hour" relative */ + {"hrs", UNITS, DTK_HOUR}, /* "hours" relative */ + {"m", UNITS, DTK_MINUTE}, /* "minute" relative */ + {"microsecon", UNITS, DTK_MICROSEC}, /* "microsecond" relative */ + {"mil", UNITS, DTK_MILLENNIUM}, /* "millennium" relative */ + {"millennia", UNITS, DTK_MILLENNIUM}, /* "millennia" relative */ + {DMILLENNIUM, UNITS, DTK_MILLENNIUM}, /* "millennium" relative */ + {"millisecon", UNITS, DTK_MILLISEC}, /* relative */ + {"mils", UNITS, DTK_MILLENNIUM}, /* "millennia" relative */ + {"min", UNITS, DTK_MINUTE}, /* "minute" relative */ + {"mins", UNITS, DTK_MINUTE}, /* "minutes" relative */ + {DMINUTE, UNITS, DTK_MINUTE}, /* "minute" relative */ + {"minutes", UNITS, DTK_MINUTE}, /* "minutes" relative */ + {"mon", UNITS, DTK_MONTH}, /* "months" relative */ + {"mons", UNITS, DTK_MONTH}, /* "months" relative */ + {DMONTH, UNITS, DTK_MONTH}, /* "month" relative */ + {"months", UNITS, DTK_MONTH}, + {"ms", UNITS, DTK_MILLISEC}, + {"msec", UNITS, DTK_MILLISEC}, + {DMILLISEC, UNITS, DTK_MILLISEC}, + {"mseconds", UNITS, DTK_MILLISEC}, + {"msecs", UNITS, DTK_MILLISEC}, + {"qtr", UNITS, DTK_QUARTER}, /* "quarter" relative */ + {DQUARTER, UNITS, DTK_QUARTER}, /* "quarter" relative */ + {"s", UNITS, DTK_SECOND}, + {"sec", UNITS, DTK_SECOND}, + {DSECOND, UNITS, DTK_SECOND}, + {"seconds", UNITS, DTK_SECOND}, + {"secs", UNITS, DTK_SECOND}, + {DTIMEZONE, UNITS, DTK_TZ}, /* "timezone" time offset */ + {"timezone_h", UNITS, DTK_TZ_HOUR}, /* timezone hour units */ + {"timezone_m", UNITS, DTK_TZ_MINUTE}, /* timezone minutes units */ + {"us", UNITS, DTK_MICROSEC}, /* "microsecond" relative */ + {"usec", UNITS, DTK_MICROSEC}, /* "microsecond" relative */ + {DMICROSEC, UNITS, DTK_MICROSEC}, /* "microsecond" relative */ + {"useconds", UNITS, DTK_MICROSEC}, /* "microseconds" relative */ + {"usecs", UNITS, DTK_MICROSEC}, /* "microseconds" relative */ + {"w", UNITS, DTK_WEEK}, /* "week" relative */ + {DWEEK, UNITS, DTK_WEEK}, /* "week" relative */ + {"weeks", UNITS, DTK_WEEK}, /* "weeks" relative */ + {"y", UNITS, DTK_YEAR}, /* "year" relative */ + {DYEAR, UNITS, DTK_YEAR}, /* "year" relative */ + {"years", UNITS, DTK_YEAR}, /* "years" relative */ + {"yr", UNITS, DTK_YEAR}, /* "year" relative */ + {"yrs", UNITS, DTK_YEAR} /* "years" relative */ +}; + +static const int szdeltatktbl = sizeof deltatktbl / sizeof deltatktbl[0]; + +static __thread TimeZoneAbbrevTable *zoneabbrevtbl = NULL; + +/* Caches of recent lookup results in the above tables */ + +static __thread const datetkn *datecache[MAXDATEFIELDS] = {NULL}; + +static __thread const datetkn *deltacache[MAXDATEFIELDS] = {NULL}; + +static __thread const datetkn *abbrevcache[MAXDATEFIELDS] = {NULL}; + + +/* + * Calendar time to Julian date conversions. + * Julian date is commonly used in astronomical applications, + * since it is numerically accurate and computationally simple. + * The algorithms here will accurately convert between Julian day + * and calendar date for all non-negative Julian days + * (i.e. from Nov 24, -4713 on). + * + * Rewritten to eliminate overflow problems. This now allows the + * routines to work correctly for all Julian day counts from + * 0 to 2147483647 (Nov 24, -4713 to Jun 3, 5874898) assuming + * a 32-bit integer. Longer types should also work to the limits + * of their precision. + * + * Actually, date2j() will work sanely, in the sense of producing + * valid negative Julian dates, significantly before Nov 24, -4713. + * We rely on it to do so back to Nov 1, -4713; see IS_VALID_JULIAN() + * and associated commentary in timestamp.h. + */ + +int +date2j(int year, int month, int day) +{ + int julian; + int century; + + if (month > 2) + { + month += 1; + year += 4800; + } + else + { + month += 13; + year += 4799; + } + + century = year / 100; + julian = year * 365 - 32167; + julian += year / 4 - century + century / 4; + julian += 7834 * month / 256 + day; + + return julian; +} /* date2j() */ + +void +j2date(int jd, int *year, int *month, int *day) +{ + unsigned int julian; + unsigned int quad; + unsigned int extra; + int y; + + julian = jd; + julian += 32044; + quad = julian / 146097; + extra = (julian - quad * 146097) * 4 + 3; + julian += 60 + quad * 3 + extra / 146097; + quad = julian / 1461; + julian -= quad * 1461; + y = julian * 4 / 1461; + julian = ((y != 0) ? ((julian + 305) % 365) : ((julian + 306) % 366)) + + 123; + y += quad * 4; + *year = y - 4800; + quad = julian * 2141 / 65536; + *day = julian - 7834 * quad / 256; + *month = (quad + 10) % MONTHS_PER_YEAR + 1; +} /* j2date() */ + + +/* + * j2day - convert Julian date to day-of-week (0..6 == Sun..Sat) + * + * Note: various places use the locution j2day(date - 1) to produce a + * result according to the convention 0..6 = Mon..Sun. This is a bit of + * a crock, but will work as long as the computation here is just a modulo. + */ +int +j2day(int date) +{ + date += 1; + date %= 7; + /* Cope if division truncates towards zero, as it probably does */ + if (date < 0) + date += 7; + + return date; +} /* j2day() */ + + +/* + * GetCurrentDateTime() + * + * Get the transaction start time ("now()") broken down as a struct pg_tm, + * converted according to the session timezone setting. + * + * This is just a convenience wrapper for GetCurrentTimeUsec, to cover the + * case where caller doesn't need either fractional seconds or tz offset. + */ +void +GetCurrentDateTime(struct pg_tm *tm) +{ + fsec_t fsec; + + GetCurrentTimeUsec(tm, &fsec, NULL); +} + +/* + * GetCurrentTimeUsec() + * + * Get the transaction start time ("now()") broken down as a struct pg_tm, + * including fractional seconds and timezone offset. The time is converted + * according to the session timezone setting. + * + * Callers may pass tzp = NULL if they don't need the offset, but this does + * not affect the conversion behavior (unlike timestamp2tm()). + * + * Internally, we cache the result, since this could be called many times + * in a transaction, within which now() doesn't change. + */ +void +GetCurrentTimeUsec(struct pg_tm *tm, fsec_t *fsec, int *tzp) +{ + TimestampTz cur_ts = GetCurrentTransactionStartTimestamp(); + + /* + * The cache key must include both current time and current timezone. By + * representing the timezone by just a pointer, we're assuming that + * distinct timezone settings could never have the same pointer value. + * This is true by virtue of the hashtable used inside pg_tzset(); + * however, it might need another look if we ever allow entries in that + * hash to be recycled. + */ + static __thread TimestampTz cache_ts = 0; + static __thread pg_tz *cache_timezone = NULL; + static __thread struct pg_tm cache_tm; + static __thread fsec_t cache_fsec; + static __thread int cache_tz; + + if (cur_ts != cache_ts || session_timezone != cache_timezone) + { + /* + * Make sure cache is marked invalid in case of error after partial + * update within timestamp2tm. + */ + cache_timezone = NULL; + + /* + * Perform the computation, storing results into cache. We do not + * really expect any error here, since current time surely ought to be + * within range, but check just for sanity's sake. + */ + if (timestamp2tm(cur_ts, &cache_tz, &cache_tm, &cache_fsec, + NULL, session_timezone) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* OK, so mark the cache valid. */ + cache_ts = cur_ts; + cache_timezone = session_timezone; + } + + *tm = cache_tm; + *fsec = cache_fsec; + if (tzp != NULL) + *tzp = cache_tz; +} + + +/* + * Append seconds and fractional seconds (if any) at *cp. + * + * precision is the max number of fraction digits, fillzeros says to + * pad to two integral-seconds digits. + * + * Returns a pointer to the new end of string. No NUL terminator is put + * there; callers are responsible for NUL terminating str themselves. + * + * Note that any sign is stripped from the input sec and fsec values. + */ +static char * +AppendSeconds(char *cp, int sec, fsec_t fsec, int precision, bool fillzeros) +{ + Assert(precision >= 0); + + if (fillzeros) + cp = pg_ultostr_zeropad(cp, abs(sec), 2); + else + cp = pg_ultostr(cp, abs(sec)); + + /* fsec_t is just an int32 */ + if (fsec != 0) + { + int32 value = abs(fsec); + char *end = &cp[precision + 1]; + bool gotnonzero = false; + + *cp++ = '.'; + + /* + * Append the fractional seconds part. Note that we don't want any + * trailing zeros here, so since we're building the number in reverse + * we'll skip appending zeros until we've output a non-zero digit. + */ + while (precision--) + { + int32 oldval = value; + int32 remainder; + + value /= 10; + remainder = oldval - value * 10; + + /* check if we got a non-zero */ + if (remainder) + gotnonzero = true; + + if (gotnonzero) + cp[precision] = '0' + remainder; + else + end = &cp[precision]; + } + + /* + * If we still have a non-zero value then precision must have not been + * enough to print the number. We punt the problem to pg_ultostr(), + * which will generate a correct answer in the minimum valid width. + */ + if (value) + return pg_ultostr(cp, abs(fsec)); + + return end; + } + else + return cp; +} + + +/* + * Variant of above that's specialized to timestamp case. + * + * Returns a pointer to the new end of string. No NUL terminator is put + * there; callers are responsible for NUL terminating str themselves. + */ +static char * +AppendTimestampSeconds(char *cp, struct pg_tm *tm, fsec_t fsec) +{ + return AppendSeconds(cp, tm->tm_sec, fsec, MAX_TIMESTAMP_PRECISION, true); +} + + +/* + * Add val * multiplier to *sum. + * Returns true if successful, false on overflow. + */ +static bool +int64_multiply_add(int64 val, int64 multiplier, int64 *sum) +{ + int64 product; + + if (pg_mul_s64_overflow(val, multiplier, &product) || + pg_add_s64_overflow(*sum, product, sum)) + return false; + return true; +} + +/* + * Multiply frac by scale (to produce microseconds) and add to itm_in->tm_usec. + * Returns true if successful, false if itm_in overflows. + */ +static bool +AdjustFractMicroseconds(double frac, int64 scale, + struct pg_itm_in *itm_in) +{ + int64 usec; + + /* Fast path for common case */ + if (frac == 0) + return true; + + /* + * We assume the input frac has abs value less than 1, so overflow of frac + * or usec is not an issue for interesting values of scale. + */ + frac *= scale; + usec = (int64) frac; + + /* Round off any fractional microsecond */ + frac -= usec; + if (frac > 0.5) + usec++; + else if (frac < -0.5) + usec--; + + return !pg_add_s64_overflow(itm_in->tm_usec, usec, &itm_in->tm_usec); +} + +/* + * Multiply frac by scale (to produce days). Add the integral part of the + * result to itm_in->tm_mday, the fractional part to itm_in->tm_usec. + * Returns true if successful, false if itm_in overflows. + */ +static bool +AdjustFractDays(double frac, int scale, + struct pg_itm_in *itm_in) +{ + int extra_days; + + /* Fast path for common case */ + if (frac == 0) + return true; + + /* + * We assume the input frac has abs value less than 1, so overflow of frac + * or extra_days is not an issue. + */ + frac *= scale; + extra_days = (int) frac; + + /* ... but this could overflow, if tm_mday is already nonzero */ + if (pg_add_s32_overflow(itm_in->tm_mday, extra_days, &itm_in->tm_mday)) + return false; + + /* Handle any fractional day */ + frac -= extra_days; + return AdjustFractMicroseconds(frac, USECS_PER_DAY, itm_in); +} + +/* + * Multiply frac by scale (to produce years), then further scale up to months. + * Add the integral part of the result to itm_in->tm_mon, discarding any + * fractional part. + * Returns true if successful, false if itm_in overflows. + */ +static bool +AdjustFractYears(double frac, int scale, + struct pg_itm_in *itm_in) +{ + /* + * As above, we assume abs(frac) < 1, so this can't overflow for any + * interesting value of scale. + */ + int extra_months = (int) rint(frac * scale * MONTHS_PER_YEAR); + + return !pg_add_s32_overflow(itm_in->tm_mon, extra_months, &itm_in->tm_mon); +} + +/* + * Add (val + fval) * scale to itm_in->tm_usec. + * Returns true if successful, false if itm_in overflows. + */ +static bool +AdjustMicroseconds(int64 val, double fval, int64 scale, + struct pg_itm_in *itm_in) +{ + /* Handle the integer part */ + if (!int64_multiply_add(val, scale, &itm_in->tm_usec)) + return false; + /* Handle the float part */ + return AdjustFractMicroseconds(fval, scale, itm_in); +} + +/* + * Multiply val by scale (to produce days) and add to itm_in->tm_mday. + * Returns true if successful, false if itm_in overflows. + */ +static bool +AdjustDays(int64 val, int scale, struct pg_itm_in *itm_in) +{ + int days; + + if (val < INT_MIN || val > INT_MAX) + return false; + return !pg_mul_s32_overflow((int32) val, scale, &days) && + !pg_add_s32_overflow(itm_in->tm_mday, days, &itm_in->tm_mday); +} + +/* + * Add val to itm_in->tm_mon (no need for scale here, as val is always + * in months already). + * Returns true if successful, false if itm_in overflows. + */ +static bool +AdjustMonths(int64 val, struct pg_itm_in *itm_in) +{ + if (val < INT_MIN || val > INT_MAX) + return false; + return !pg_add_s32_overflow(itm_in->tm_mon, (int32) val, &itm_in->tm_mon); +} + +/* + * Multiply val by scale (to produce years) and add to itm_in->tm_year. + * Returns true if successful, false if itm_in overflows. + */ +static bool +AdjustYears(int64 val, int scale, + struct pg_itm_in *itm_in) +{ + int years; + + if (val < INT_MIN || val > INT_MAX) + return false; + return !pg_mul_s32_overflow((int32) val, scale, &years) && + !pg_add_s32_overflow(itm_in->tm_year, years, &itm_in->tm_year); +} + + +/* + * Parse the fractional part of a number (decimal point and optional digits, + * followed by end of string). Returns the fractional value into *frac. + * + * Returns 0 if successful, DTERR code if bogus input detected. + */ +static int +ParseFraction(char *cp, double *frac) +{ + /* Caller should always pass the start of the fraction part */ + Assert(*cp == '.'); + + /* + * We want to allow just "." with no digits, but some versions of strtod + * will report EINVAL for that, so special-case it. + */ + if (cp[1] == '\0') + { + *frac = 0; + } + else + { + errno = 0; + *frac = strtod(cp, &cp); + /* check for parse failure */ + if (*cp != '\0' || errno != 0) + return DTERR_BAD_FORMAT; + } + return 0; +} + +/* + * Fetch a fractional-second value with suitable error checking. + * Same as ParseFraction except we convert the result to integer microseconds. + */ +static int +ParseFractionalSecond(char *cp, fsec_t *fsec) +{ + double frac; + int dterr; + + dterr = ParseFraction(cp, &frac); + if (dterr) + return dterr; + *fsec = rint(frac * 1000000); + return 0; +} + + +/* ParseDateTime() + * Break string into tokens based on a date/time context. + * Returns 0 if successful, DTERR code if bogus input detected. + * + * timestr - the input string + * workbuf - workspace for field string storage. This must be + * larger than the largest legal input for this datetime type -- + * some additional space will be needed to NUL terminate fields. + * buflen - the size of workbuf + * field[] - pointers to field strings are returned in this array + * ftype[] - field type indicators are returned in this array + * maxfields - dimensions of the above two arrays + * *numfields - set to the actual number of fields detected + * + * The fields extracted from the input are stored as separate, + * null-terminated strings in the workspace at workbuf. Any text is + * converted to lower case. + * + * Several field types are assigned: + * DTK_NUMBER - digits and (possibly) a decimal point + * DTK_DATE - digits and two delimiters, or digits and text + * DTK_TIME - digits, colon delimiters, and possibly a decimal point + * DTK_STRING - text (no digits or punctuation) + * DTK_SPECIAL - leading "+" or "-" followed by text + * DTK_TZ - leading "+" or "-" followed by digits (also eats ':', '.', '-') + * + * Note that some field types can hold unexpected items: + * DTK_NUMBER can hold date fields (yy.ddd) + * DTK_STRING can hold months (January) and time zones (PST) + * DTK_DATE can hold time zone names (America/New_York, GMT-8) + */ +int +ParseDateTime(const char *timestr, char *workbuf, size_t buflen, + char **field, int *ftype, int maxfields, int *numfields) +{ + int nf = 0; + const char *cp = timestr; + char *bufp = workbuf; + const char *bufend = workbuf + buflen; + + /* + * Set the character pointed-to by "bufptr" to "newchar", and increment + * "bufptr". "end" gives the end of the buffer -- we return an error if + * there is no space left to append a character to the buffer. Note that + * "bufptr" is evaluated twice. + */ +#define APPEND_CHAR(bufptr, end, newchar) \ + do \ + { \ + if (((bufptr) + 1) >= (end)) \ + return DTERR_BAD_FORMAT; \ + *(bufptr)++ = newchar; \ + } while (0) + + /* outer loop through fields */ + while (*cp != '\0') + { + /* Ignore spaces between fields */ + if (isspace((unsigned char) *cp)) + { + cp++; + continue; + } + + /* Record start of current field */ + if (nf >= maxfields) + return DTERR_BAD_FORMAT; + field[nf] = bufp; + + /* leading digit? then date or time */ + if (isdigit((unsigned char) *cp)) + { + APPEND_CHAR(bufp, bufend, *cp++); + while (isdigit((unsigned char) *cp)) + APPEND_CHAR(bufp, bufend, *cp++); + + /* time field? */ + if (*cp == ':') + { + ftype[nf] = DTK_TIME; + APPEND_CHAR(bufp, bufend, *cp++); + while (isdigit((unsigned char) *cp) || + (*cp == ':') || (*cp == '.')) + APPEND_CHAR(bufp, bufend, *cp++); + } + /* date field? allow embedded text month */ + else if (*cp == '-' || *cp == '/' || *cp == '.') + { + /* save delimiting character to use later */ + char delim = *cp; + + APPEND_CHAR(bufp, bufend, *cp++); + /* second field is all digits? then no embedded text month */ + if (isdigit((unsigned char) *cp)) + { + ftype[nf] = ((delim == '.') ? DTK_NUMBER : DTK_DATE); + while (isdigit((unsigned char) *cp)) + APPEND_CHAR(bufp, bufend, *cp++); + + /* + * insist that the delimiters match to get a three-field + * date. + */ + if (*cp == delim) + { + ftype[nf] = DTK_DATE; + APPEND_CHAR(bufp, bufend, *cp++); + while (isdigit((unsigned char) *cp) || *cp == delim) + APPEND_CHAR(bufp, bufend, *cp++); + } + } + else + { + ftype[nf] = DTK_DATE; + while (isalnum((unsigned char) *cp) || *cp == delim) + APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++)); + } + } + + /* + * otherwise, number only and will determine year, month, day, or + * concatenated fields later... + */ + else + ftype[nf] = DTK_NUMBER; + } + /* Leading decimal point? Then fractional seconds... */ + else if (*cp == '.') + { + APPEND_CHAR(bufp, bufend, *cp++); + while (isdigit((unsigned char) *cp)) + APPEND_CHAR(bufp, bufend, *cp++); + + ftype[nf] = DTK_NUMBER; + } + + /* + * text? then date string, month, day of week, special, or timezone + */ + else if (isalpha((unsigned char) *cp)) + { + bool is_date; + + ftype[nf] = DTK_STRING; + APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++)); + while (isalpha((unsigned char) *cp)) + APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++)); + + /* + * Dates can have embedded '-', '/', or '.' separators. It could + * also be a timezone name containing embedded '/', '+', '-', '_', + * or ':' (but '_' or ':' can't be the first punctuation). If the + * next character is a digit or '+', we need to check whether what + * we have so far is a recognized non-timezone keyword --- if so, + * don't believe that this is the start of a timezone. + */ + is_date = false; + if (*cp == '-' || *cp == '/' || *cp == '.') + is_date = true; + else if (*cp == '+' || isdigit((unsigned char) *cp)) + { + *bufp = '\0'; /* null-terminate current field value */ + /* we need search only the core token table, not TZ names */ + if (datebsearch(field[nf], datetktbl, szdatetktbl) == NULL) + is_date = true; + } + if (is_date) + { + ftype[nf] = DTK_DATE; + do + { + APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++)); + } while (*cp == '+' || *cp == '-' || + *cp == '/' || *cp == '_' || + *cp == '.' || *cp == ':' || + isalnum((unsigned char) *cp)); + } + } + /* sign? then special or numeric timezone */ + else if (*cp == '+' || *cp == '-') + { + APPEND_CHAR(bufp, bufend, *cp++); + /* soak up leading whitespace */ + while (isspace((unsigned char) *cp)) + cp++; + /* numeric timezone? */ + /* note that "DTK_TZ" could also be a signed float or yyyy-mm */ + if (isdigit((unsigned char) *cp)) + { + ftype[nf] = DTK_TZ; + APPEND_CHAR(bufp, bufend, *cp++); + while (isdigit((unsigned char) *cp) || + *cp == ':' || *cp == '.' || *cp == '-') + APPEND_CHAR(bufp, bufend, *cp++); + } + /* special? */ + else if (isalpha((unsigned char) *cp)) + { + ftype[nf] = DTK_SPECIAL; + APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++)); + while (isalpha((unsigned char) *cp)) + APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++)); + } + /* otherwise something wrong... */ + else + return DTERR_BAD_FORMAT; + } + /* ignore other punctuation but use as delimiter */ + else if (ispunct((unsigned char) *cp)) + { + cp++; + continue; + } + /* otherwise, something is not right... */ + else + return DTERR_BAD_FORMAT; + + /* force in a delimiter after each field */ + *bufp++ = '\0'; + nf++; + } + + *numfields = nf; + + return 0; +} + + +/* DecodeDateTime() + * Interpret previously parsed fields for general date and time. + * Return 0 if full date, 1 if only time, and negative DTERR code if problems. + * (Currently, all callers treat 1 as an error return too.) + * + * Inputs are field[] and ftype[] arrays, of length nf. + * Other arguments are outputs. + * + * External format(s): + * "<weekday> <month>-<day>-<year> <hour>:<minute>:<second>" + * "Fri Feb-7-1997 15:23:27" + * "Feb-7-1997 15:23:27" + * "2-7-1997 15:23:27" + * "1997-2-7 15:23:27" + * "1997.038 15:23:27" (day of year 1-366) + * Also supports input in compact time: + * "970207 152327" + * "97038 152327" + * "20011225T040506.789-07" + * + * Use the system-provided functions to get the current time zone + * if not specified in the input string. + * + * If the date is outside the range of pg_time_t (in practice that could only + * happen if pg_time_t is just 32 bits), then assume UTC time zone - thomas + * 1997-05-27 + */ +int +DecodeDateTime(char **field, int *ftype, int nf, + int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp, + DateTimeErrorExtra *extra) +{ + int fmask = 0, + tmask, + type; + int ptype = 0; /* "prefix type" for ISO and Julian formats */ + int i; + int val; + int dterr; + int mer = HR24; + bool haveTextMonth = false; + bool isjulian = false; + bool is2digits = false; + bool bc = false; + pg_tz *namedTz = NULL; + pg_tz *abbrevTz = NULL; + pg_tz *valtz; + char *abbrev = NULL; + struct pg_tm cur_tm; + + /* + * We'll insist on at least all of the date fields, but initialize the + * remaining fields in case they are not set later... + */ + *dtype = DTK_DATE; + tm->tm_hour = 0; + tm->tm_min = 0; + tm->tm_sec = 0; + *fsec = 0; + /* don't know daylight savings time status apriori */ + tm->tm_isdst = -1; + if (tzp != NULL) + *tzp = 0; + + for (i = 0; i < nf; i++) + { + switch (ftype[i]) + { + case DTK_DATE: + + /* + * Integral julian day with attached time zone? All other + * forms with JD will be separated into distinct fields, so we + * handle just this case here. + */ + if (ptype == DTK_JULIAN) + { + char *cp; + int jday; + + if (tzp == NULL) + return DTERR_BAD_FORMAT; + + errno = 0; + jday = strtoint(field[i], &cp, 10); + if (errno == ERANGE || jday < 0) + return DTERR_FIELD_OVERFLOW; + + j2date(jday, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + isjulian = true; + + /* Get the time zone from the end of the string */ + dterr = DecodeTimezone(cp, tzp); + if (dterr) + return dterr; + + tmask = DTK_DATE_M | DTK_TIME_M | DTK_M(TZ); + ptype = 0; + break; + } + + /* + * Already have a date? Then this might be a time zone name + * with embedded punctuation (e.g. "America/New_York") or a + * run-together time with trailing time zone (e.g. hhmmss-zz). + * - thomas 2001-12-25 + * + * We consider it a time zone if we already have month & day. + * This is to allow the form "mmm dd hhmmss tz year", which + * we've historically accepted. + */ + else if (ptype != 0 || + ((fmask & (DTK_M(MONTH) | DTK_M(DAY))) == + (DTK_M(MONTH) | DTK_M(DAY)))) + { + /* No time zone accepted? Then quit... */ + if (tzp == NULL) + return DTERR_BAD_FORMAT; + + if (isdigit((unsigned char) *field[i]) || ptype != 0) + { + char *cp; + + /* + * Allow a preceding "t" field, but no other units. + */ + if (ptype != 0) + { + /* Sanity check; should not fail this test */ + if (ptype != DTK_TIME) + return DTERR_BAD_FORMAT; + ptype = 0; + } + + /* + * Starts with a digit but we already have a time + * field? Then we are in trouble with a date and time + * already... + */ + if ((fmask & DTK_TIME_M) == DTK_TIME_M) + return DTERR_BAD_FORMAT; + + if ((cp = strchr(field[i], '-')) == NULL) + return DTERR_BAD_FORMAT; + + /* Get the time zone from the end of the string */ + dterr = DecodeTimezone(cp, tzp); + if (dterr) + return dterr; + *cp = '\0'; + + /* + * Then read the rest of the field as a concatenated + * time + */ + dterr = DecodeNumberField(strlen(field[i]), field[i], + fmask, + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + + /* + * modify tmask after returning from + * DecodeNumberField() + */ + tmask |= DTK_M(TZ); + } + else + { + namedTz = pg_tzset(field[i]); + if (!namedTz) + { + extra->dtee_timezone = field[i]; + return DTERR_BAD_TIMEZONE; + } + /* we'll apply the zone setting below */ + tmask = DTK_M(TZ); + } + } + else + { + dterr = DecodeDate(field[i], fmask, + &tmask, &is2digits, tm); + if (dterr) + return dterr; + } + break; + + case DTK_TIME: + + /* + * This might be an ISO time following a "t" field. + */ + if (ptype != 0) + { + /* Sanity check; should not fail this test */ + if (ptype != DTK_TIME) + return DTERR_BAD_FORMAT; + ptype = 0; + } + dterr = DecodeTime(field[i], fmask, INTERVAL_FULL_RANGE, + &tmask, tm, fsec); + if (dterr) + return dterr; + + /* check for time overflow */ + if (time_overflows(tm->tm_hour, tm->tm_min, tm->tm_sec, + *fsec)) + return DTERR_FIELD_OVERFLOW; + break; + + case DTK_TZ: + { + int tz; + + if (tzp == NULL) + return DTERR_BAD_FORMAT; + + dterr = DecodeTimezone(field[i], &tz); + if (dterr) + return dterr; + *tzp = tz; + tmask = DTK_M(TZ); + } + break; + + case DTK_NUMBER: + + /* + * Deal with cases where previous field labeled this one + */ + if (ptype != 0) + { + char *cp; + int value; + + errno = 0; + value = strtoint(field[i], &cp, 10); + if (errno == ERANGE) + return DTERR_FIELD_OVERFLOW; + if (*cp != '.' && *cp != '\0') + return DTERR_BAD_FORMAT; + + switch (ptype) + { + case DTK_JULIAN: + /* previous field was a label for "julian date" */ + if (value < 0) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_DATE_M; + j2date(value, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + isjulian = true; + + /* fractional Julian Day? */ + if (*cp == '.') + { + double time; + + dterr = ParseFraction(cp, &time); + if (dterr) + return dterr; + time *= USECS_PER_DAY; + dt2time(time, + &tm->tm_hour, &tm->tm_min, + &tm->tm_sec, fsec); + tmask |= DTK_TIME_M; + } + break; + + case DTK_TIME: + /* previous field was "t" for ISO time */ + dterr = DecodeNumberField(strlen(field[i]), field[i], + (fmask | DTK_DATE_M), + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + if (tmask != DTK_TIME_M) + return DTERR_BAD_FORMAT; + break; + + default: + return DTERR_BAD_FORMAT; + break; + } + + ptype = 0; + *dtype = DTK_DATE; + } + else + { + char *cp; + int flen; + + flen = strlen(field[i]); + cp = strchr(field[i], '.'); + + /* Embedded decimal and no date yet? */ + if (cp != NULL && !(fmask & DTK_DATE_M)) + { + dterr = DecodeDate(field[i], fmask, + &tmask, &is2digits, tm); + if (dterr) + return dterr; + } + /* embedded decimal and several digits before? */ + else if (cp != NULL && flen - strlen(cp) > 2) + { + /* + * Interpret as a concatenated date or time Set the + * type field to allow decoding other fields later. + * Example: 20011223 or 040506 + */ + dterr = DecodeNumberField(flen, field[i], fmask, + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + } + + /* + * Is this a YMD or HMS specification, or a year number? + * YMD and HMS are required to be six digits or more, so + * if it is 5 digits, it is a year. If it is six or more + * digits, we assume it is YMD or HMS unless no date and + * no time values have been specified. This forces 6+ + * digit years to be at the end of the string, or to use + * the ISO date specification. + */ + else if (flen >= 6 && (!(fmask & DTK_DATE_M) || + !(fmask & DTK_TIME_M))) + { + dterr = DecodeNumberField(flen, field[i], fmask, + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + } + /* otherwise it is a single date/time field... */ + else + { + dterr = DecodeNumber(flen, field[i], + haveTextMonth, fmask, + &tmask, tm, + fsec, &is2digits); + if (dterr) + return dterr; + } + } + break; + + case DTK_STRING: + case DTK_SPECIAL: + /* timezone abbrevs take precedence over built-in tokens */ + dterr = DecodeTimezoneAbbrev(i, field[i], + &type, &val, &valtz, extra); + if (dterr) + return dterr; + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(i, field[i], &val); + if (type == IGNORE_DTF) + continue; + + tmask = DTK_M(type); + switch (type) + { + case RESERV: + switch (val) + { + case DTK_NOW: + tmask = (DTK_DATE_M | DTK_TIME_M | DTK_M(TZ)); + *dtype = DTK_DATE; + GetCurrentTimeUsec(tm, fsec, tzp); + break; + + case DTK_YESTERDAY: + tmask = DTK_DATE_M; + *dtype = DTK_DATE; + GetCurrentDateTime(&cur_tm); + j2date(date2j(cur_tm.tm_year, cur_tm.tm_mon, cur_tm.tm_mday) - 1, + &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + break; + + case DTK_TODAY: + tmask = DTK_DATE_M; + *dtype = DTK_DATE; + GetCurrentDateTime(&cur_tm); + tm->tm_year = cur_tm.tm_year; + tm->tm_mon = cur_tm.tm_mon; + tm->tm_mday = cur_tm.tm_mday; + break; + + case DTK_TOMORROW: + tmask = DTK_DATE_M; + *dtype = DTK_DATE; + GetCurrentDateTime(&cur_tm); + j2date(date2j(cur_tm.tm_year, cur_tm.tm_mon, cur_tm.tm_mday) + 1, + &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + break; + + case DTK_ZULU: + tmask = (DTK_TIME_M | DTK_M(TZ)); + *dtype = DTK_DATE; + tm->tm_hour = 0; + tm->tm_min = 0; + tm->tm_sec = 0; + if (tzp != NULL) + *tzp = 0; + break; + + case DTK_EPOCH: + case DTK_LATE: + case DTK_EARLY: + tmask = (DTK_DATE_M | DTK_TIME_M | DTK_M(TZ)); + *dtype = val; + /* caller ignores tm for these dtype codes */ + break; + + default: + elog(ERROR, "unrecognized RESERV datetime token: %d", + val); + } + + break; + + case MONTH: + + /* + * already have a (numeric) month? then see if we can + * substitute... + */ + if ((fmask & DTK_M(MONTH)) && !haveTextMonth && + !(fmask & DTK_M(DAY)) && tm->tm_mon >= 1 && + tm->tm_mon <= 31) + { + tm->tm_mday = tm->tm_mon; + tmask = DTK_M(DAY); + } + haveTextMonth = true; + tm->tm_mon = val; + break; + + case DTZMOD: + + /* + * daylight savings time modifier (solves "MET DST" + * syntax) + */ + tmask |= DTK_M(DTZ); + tm->tm_isdst = 1; + if (tzp == NULL) + return DTERR_BAD_FORMAT; + *tzp -= val; + break; + + case DTZ: + + /* + * set mask for TZ here _or_ check for DTZ later when + * getting default timezone + */ + tmask |= DTK_M(TZ); + tm->tm_isdst = 1; + if (tzp == NULL) + return DTERR_BAD_FORMAT; + *tzp = -val; + break; + + case TZ: + tm->tm_isdst = 0; + if (tzp == NULL) + return DTERR_BAD_FORMAT; + *tzp = -val; + break; + + case DYNTZ: + tmask |= DTK_M(TZ); + if (tzp == NULL) + return DTERR_BAD_FORMAT; + /* we'll determine the actual offset later */ + abbrevTz = valtz; + abbrev = field[i]; + break; + + case AMPM: + mer = val; + break; + + case ADBC: + bc = (val == BC); + break; + + case DOW: + tm->tm_wday = val; + break; + + case UNITS: + tmask = 0; + /* reject consecutive unhandled units */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + ptype = val; + break; + + case ISOTIME: + + /* + * This is a filler field "t" indicating that the next + * field is time. Try to verify that this is sensible. + */ + tmask = 0; + + /* No preceding date? Then quit... */ + if ((fmask & DTK_DATE_M) != DTK_DATE_M) + return DTERR_BAD_FORMAT; + + /* reject consecutive unhandled units */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + ptype = val; + break; + + case UNKNOWN_FIELD: + + /* + * Before giving up and declaring error, check to see + * if it is an all-alpha timezone name. + */ + namedTz = pg_tzset(field[i]); + if (!namedTz) + return DTERR_BAD_FORMAT; + /* we'll apply the zone setting below */ + tmask = DTK_M(TZ); + break; + + default: + return DTERR_BAD_FORMAT; + } + break; + + default: + return DTERR_BAD_FORMAT; + } + + if (tmask & fmask) + return DTERR_BAD_FORMAT; + fmask |= tmask; + } /* end loop over fields */ + + /* reject if prefix type appeared and was never handled */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + + /* do additional checking for normal date specs (but not "infinity" etc) */ + if (*dtype == DTK_DATE) + { + /* do final checking/adjustment of Y/M/D fields */ + dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm); + if (dterr) + return dterr; + + /* handle AM/PM */ + if (mer != HR24 && tm->tm_hour > HOURS_PER_DAY / 2) + return DTERR_FIELD_OVERFLOW; + if (mer == AM && tm->tm_hour == HOURS_PER_DAY / 2) + tm->tm_hour = 0; + else if (mer == PM && tm->tm_hour != HOURS_PER_DAY / 2) + tm->tm_hour += HOURS_PER_DAY / 2; + + /* check for incomplete input */ + if ((fmask & DTK_DATE_M) != DTK_DATE_M) + { + if ((fmask & DTK_TIME_M) == DTK_TIME_M) + return 1; + return DTERR_BAD_FORMAT; + } + + /* + * If we had a full timezone spec, compute the offset (we could not do + * it before, because we need the date to resolve DST status). + */ + if (namedTz != NULL) + { + /* daylight savings time modifier disallowed with full TZ */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + *tzp = DetermineTimeZoneOffset(tm, namedTz); + } + + /* + * Likewise, if we had a dynamic timezone abbreviation, resolve it + * now. + */ + if (abbrevTz != NULL) + { + /* daylight savings time modifier disallowed with dynamic TZ */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + *tzp = DetermineTimeZoneAbbrevOffset(tm, abbrev, abbrevTz); + } + + /* timezone not specified? then use session timezone */ + if (tzp != NULL && !(fmask & DTK_M(TZ))) + { + /* + * daylight savings time modifier but no standard timezone? then + * error + */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + *tzp = DetermineTimeZoneOffset(tm, session_timezone); + } + } + + return 0; +} + + +/* DetermineTimeZoneOffset() + * + * Given a struct pg_tm in which tm_year, tm_mon, tm_mday, tm_hour, tm_min, + * and tm_sec fields are set, and a zic-style time zone definition, determine + * the applicable GMT offset and daylight-savings status at that time. + * Set the struct pg_tm's tm_isdst field accordingly, and return the GMT + * offset as the function result. + * + * Note: if the date is out of the range we can deal with, we return zero + * as the GMT offset and set tm_isdst = 0. We don't throw an error here, + * though probably some higher-level code will. + */ +int +DetermineTimeZoneOffset(struct pg_tm *tm, pg_tz *tzp) +{ + pg_time_t t; + + return DetermineTimeZoneOffsetInternal(tm, tzp, &t); +} + + +/* DetermineTimeZoneOffsetInternal() + * + * As above, but also return the actual UTC time imputed to the date/time + * into *tp. + * + * In event of an out-of-range date, we punt by returning zero into *tp. + * This is okay for the immediate callers but is a good reason for not + * exposing this worker function globally. + * + * Note: it might seem that we should use mktime() for this, but bitter + * experience teaches otherwise. This code is much faster than most versions + * of mktime(), anyway. + */ +static int +DetermineTimeZoneOffsetInternal(struct pg_tm *tm, pg_tz *tzp, pg_time_t *tp) +{ + int date, + sec; + pg_time_t day, + mytime, + prevtime, + boundary, + beforetime, + aftertime; + long int before_gmtoff, + after_gmtoff; + int before_isdst, + after_isdst; + int res; + + /* + * First, generate the pg_time_t value corresponding to the given + * y/m/d/h/m/s taken as GMT time. If this overflows, punt and decide the + * timezone is GMT. (For a valid Julian date, integer overflow should be + * impossible with 64-bit pg_time_t, but let's check for safety.) + */ + if (!IS_VALID_JULIAN(tm->tm_year, tm->tm_mon, tm->tm_mday)) + goto overflow; + date = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - UNIX_EPOCH_JDATE; + + day = ((pg_time_t) date) * SECS_PER_DAY; + if (day / SECS_PER_DAY != date) + goto overflow; + sec = tm->tm_sec + (tm->tm_min + tm->tm_hour * MINS_PER_HOUR) * SECS_PER_MINUTE; + mytime = day + sec; + /* since sec >= 0, overflow could only be from +day to -mytime */ + if (mytime < 0 && day > 0) + goto overflow; + + /* + * Find the DST time boundary just before or following the target time. We + * assume that all zones have GMT offsets less than 24 hours, and that DST + * boundaries can't be closer together than 48 hours, so backing up 24 + * hours and finding the "next" boundary will work. + */ + prevtime = mytime - SECS_PER_DAY; + if (mytime < 0 && prevtime > 0) + goto overflow; + + res = pg_next_dst_boundary(&prevtime, + &before_gmtoff, &before_isdst, + &boundary, + &after_gmtoff, &after_isdst, + tzp); + if (res < 0) + goto overflow; /* failure? */ + + if (res == 0) + { + /* Non-DST zone, life is simple */ + tm->tm_isdst = before_isdst; + *tp = mytime - before_gmtoff; + return -(int) before_gmtoff; + } + + /* + * Form the candidate pg_time_t values with local-time adjustment + */ + beforetime = mytime - before_gmtoff; + if ((before_gmtoff > 0 && + mytime < 0 && beforetime > 0) || + (before_gmtoff <= 0 && + mytime > 0 && beforetime < 0)) + goto overflow; + aftertime = mytime - after_gmtoff; + if ((after_gmtoff > 0 && + mytime < 0 && aftertime > 0) || + (after_gmtoff <= 0 && + mytime > 0 && aftertime < 0)) + goto overflow; + + /* + * If both before or both after the boundary time, we know what to do. The + * boundary time itself is considered to be after the transition, which + * means we can accept aftertime == boundary in the second case. + */ + if (beforetime < boundary && aftertime < boundary) + { + tm->tm_isdst = before_isdst; + *tp = beforetime; + return -(int) before_gmtoff; + } + if (beforetime > boundary && aftertime >= boundary) + { + tm->tm_isdst = after_isdst; + *tp = aftertime; + return -(int) after_gmtoff; + } + + /* + * It's an invalid or ambiguous time due to timezone transition. In a + * spring-forward transition, prefer the "before" interpretation; in a + * fall-back transition, prefer "after". (We used to define and implement + * this test as "prefer the standard-time interpretation", but that rule + * does not help to resolve the behavior when both times are reported as + * standard time; which does happen, eg Europe/Moscow in Oct 2014. Also, + * in some zones such as Europe/Dublin, there is widespread confusion + * about which time offset is "standard" time, so it's fortunate that our + * behavior doesn't depend on that.) + */ + if (beforetime > aftertime) + { + tm->tm_isdst = before_isdst; + *tp = beforetime; + return -(int) before_gmtoff; + } + tm->tm_isdst = after_isdst; + *tp = aftertime; + return -(int) after_gmtoff; + +overflow: + /* Given date is out of range, so assume UTC */ + tm->tm_isdst = 0; + *tp = 0; + return 0; +} + + +/* DetermineTimeZoneAbbrevOffset() + * + * Determine the GMT offset and DST flag to be attributed to a dynamic + * time zone abbreviation, that is one whose meaning has changed over time. + * *tm contains the local time at which the meaning should be determined, + * and tm->tm_isdst receives the DST flag. + * + * This differs from the behavior of DetermineTimeZoneOffset() in that a + * standard-time or daylight-time abbreviation forces use of the corresponding + * GMT offset even when the zone was then in DS or standard time respectively. + * (However, that happens only if we can match the given abbreviation to some + * abbreviation that appears in the IANA timezone data. Otherwise, we fall + * back to doing DetermineTimeZoneOffset().) + */ +int +DetermineTimeZoneAbbrevOffset(struct pg_tm *tm, const char *abbr, pg_tz *tzp) +{ + pg_time_t t; + int zone_offset; + int abbr_offset; + int abbr_isdst; + + /* + * Compute the UTC time we want to probe at. (In event of overflow, we'll + * probe at the epoch, which is a bit random but probably doesn't matter.) + */ + zone_offset = DetermineTimeZoneOffsetInternal(tm, tzp, &t); + + /* + * Try to match the abbreviation to something in the zone definition. + */ + if (DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp, + &abbr_offset, &abbr_isdst)) + { + /* Success, so use the abbrev-specific answers. */ + tm->tm_isdst = abbr_isdst; + return abbr_offset; + } + + /* + * No match, so use the answers we already got from + * DetermineTimeZoneOffsetInternal. + */ + return zone_offset; +} + + +/* DetermineTimeZoneAbbrevOffsetTS() + * + * As above but the probe time is specified as a TimestampTz (hence, UTC time), + * and DST status is returned into *isdst rather than into tm->tm_isdst. + */ +int +DetermineTimeZoneAbbrevOffsetTS(TimestampTz ts, const char *abbr, + pg_tz *tzp, int *isdst) +{ + pg_time_t t = timestamptz_to_time_t(ts); + int zone_offset; + int abbr_offset; + int tz; + struct pg_tm tm; + fsec_t fsec; + + /* + * If the abbrev matches anything in the zone data, this is pretty easy. + */ + if (DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp, + &abbr_offset, isdst)) + return abbr_offset; + + /* + * Else, break down the timestamp so we can use DetermineTimeZoneOffset. + */ + if (timestamp2tm(ts, &tz, &tm, &fsec, NULL, tzp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + zone_offset = DetermineTimeZoneOffset(&tm, tzp); + *isdst = tm.tm_isdst; + return zone_offset; +} + + +/* DetermineTimeZoneAbbrevOffsetInternal() + * + * Workhorse for above two functions: work from a pg_time_t probe instant. + * On success, return GMT offset and DST status into *offset and *isdst. + */ +static bool +DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, pg_tz *tzp, + int *offset, int *isdst) +{ + char upabbr[TZ_STRLEN_MAX + 1]; + unsigned char *p; + long int gmtoff; + + /* We need to force the abbrev to upper case */ + strlcpy(upabbr, abbr, sizeof(upabbr)); + for (p = (unsigned char *) upabbr; *p; p++) + *p = pg_toupper(*p); + + /* Look up the abbrev's meaning at this time in this zone */ + if (pg_interpret_timezone_abbrev(upabbr, + &t, + &gmtoff, + isdst, + tzp)) + { + /* Change sign to agree with DetermineTimeZoneOffset() */ + *offset = (int) -gmtoff; + return true; + } + return false; +} + + +/* DecodeTimeOnly() + * Interpret parsed string as time fields only. + * Returns 0 if successful, DTERR code if bogus input detected. + * + * Inputs are field[] and ftype[] arrays, of length nf. + * Other arguments are outputs. + * + * Note that support for time zone is here for + * SQL TIME WITH TIME ZONE, but it reveals + * bogosity with SQL date/time standards, since + * we must infer a time zone from current time. + * - thomas 2000-03-10 + * Allow specifying date to get a better time zone, + * if time zones are allowed. - thomas 2001-12-26 + */ +int +DecodeTimeOnly(char **field, int *ftype, int nf, + int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp, + DateTimeErrorExtra *extra) +{ + int fmask = 0, + tmask, + type; + int ptype = 0; /* "prefix type" for ISO and Julian formats */ + int i; + int val; + int dterr; + bool isjulian = false; + bool is2digits = false; + bool bc = false; + int mer = HR24; + pg_tz *namedTz = NULL; + pg_tz *abbrevTz = NULL; + char *abbrev = NULL; + pg_tz *valtz; + + *dtype = DTK_TIME; + tm->tm_hour = 0; + tm->tm_min = 0; + tm->tm_sec = 0; + *fsec = 0; + /* don't know daylight savings time status apriori */ + tm->tm_isdst = -1; + + if (tzp != NULL) + *tzp = 0; + + for (i = 0; i < nf; i++) + { + switch (ftype[i]) + { + case DTK_DATE: + + /* + * Time zone not allowed? Then should not accept dates or time + * zones no matter what else! + */ + if (tzp == NULL) + return DTERR_BAD_FORMAT; + + /* Under limited circumstances, we will accept a date... */ + if (i == 0 && nf >= 2 && + (ftype[nf - 1] == DTK_DATE || ftype[1] == DTK_TIME)) + { + dterr = DecodeDate(field[i], fmask, + &tmask, &is2digits, tm); + if (dterr) + return dterr; + } + /* otherwise, this is a time and/or time zone */ + else + { + if (isdigit((unsigned char) *field[i])) + { + char *cp; + + /* + * Starts with a digit but we already have a time + * field? Then we are in trouble with time already... + */ + if ((fmask & DTK_TIME_M) == DTK_TIME_M) + return DTERR_BAD_FORMAT; + + /* + * Should not get here and fail. Sanity check only... + */ + if ((cp = strchr(field[i], '-')) == NULL) + return DTERR_BAD_FORMAT; + + /* Get the time zone from the end of the string */ + dterr = DecodeTimezone(cp, tzp); + if (dterr) + return dterr; + *cp = '\0'; + + /* + * Then read the rest of the field as a concatenated + * time + */ + dterr = DecodeNumberField(strlen(field[i]), field[i], + (fmask | DTK_DATE_M), + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + ftype[i] = dterr; + + tmask |= DTK_M(TZ); + } + else + { + namedTz = pg_tzset(field[i]); + if (!namedTz) + { + extra->dtee_timezone = field[i]; + return DTERR_BAD_TIMEZONE; + } + /* we'll apply the zone setting below */ + ftype[i] = DTK_TZ; + tmask = DTK_M(TZ); + } + } + break; + + case DTK_TIME: + dterr = DecodeTime(field[i], (fmask | DTK_DATE_M), + INTERVAL_FULL_RANGE, + &tmask, tm, fsec); + if (dterr) + return dterr; + break; + + case DTK_TZ: + { + int tz; + + if (tzp == NULL) + return DTERR_BAD_FORMAT; + + dterr = DecodeTimezone(field[i], &tz); + if (dterr) + return dterr; + *tzp = tz; + tmask = DTK_M(TZ); + } + break; + + case DTK_NUMBER: + + /* + * Deal with cases where previous field labeled this one + */ + if (ptype != 0) + { + char *cp; + int value; + + errno = 0; + value = strtoint(field[i], &cp, 10); + if (errno == ERANGE) + return DTERR_FIELD_OVERFLOW; + if (*cp != '.' && *cp != '\0') + return DTERR_BAD_FORMAT; + + switch (ptype) + { + case DTK_JULIAN: + /* previous field was a label for "julian date" */ + if (tzp == NULL) + return DTERR_BAD_FORMAT; + if (value < 0) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_DATE_M; + j2date(value, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + isjulian = true; + + if (*cp == '.') + { + double time; + + dterr = ParseFraction(cp, &time); + if (dterr) + return dterr; + time *= USECS_PER_DAY; + dt2time(time, + &tm->tm_hour, &tm->tm_min, + &tm->tm_sec, fsec); + tmask |= DTK_TIME_M; + } + break; + + case DTK_TIME: + /* previous field was "t" for ISO time */ + dterr = DecodeNumberField(strlen(field[i]), field[i], + (fmask | DTK_DATE_M), + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + ftype[i] = dterr; + + if (tmask != DTK_TIME_M) + return DTERR_BAD_FORMAT; + break; + + default: + return DTERR_BAD_FORMAT; + break; + } + + ptype = 0; + *dtype = DTK_DATE; + } + else + { + char *cp; + int flen; + + flen = strlen(field[i]); + cp = strchr(field[i], '.'); + + /* Embedded decimal? */ + if (cp != NULL) + { + /* + * Under limited circumstances, we will accept a + * date... + */ + if (i == 0 && nf >= 2 && ftype[nf - 1] == DTK_DATE) + { + dterr = DecodeDate(field[i], fmask, + &tmask, &is2digits, tm); + if (dterr) + return dterr; + } + /* embedded decimal and several digits before? */ + else if (flen - strlen(cp) > 2) + { + /* + * Interpret as a concatenated date or time Set + * the type field to allow decoding other fields + * later. Example: 20011223 or 040506 + */ + dterr = DecodeNumberField(flen, field[i], + (fmask | DTK_DATE_M), + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + ftype[i] = dterr; + } + else + return DTERR_BAD_FORMAT; + } + else if (flen > 4) + { + dterr = DecodeNumberField(flen, field[i], + (fmask | DTK_DATE_M), + &tmask, tm, + fsec, &is2digits); + if (dterr < 0) + return dterr; + ftype[i] = dterr; + } + /* otherwise it is a single date/time field... */ + else + { + dterr = DecodeNumber(flen, field[i], + false, + (fmask | DTK_DATE_M), + &tmask, tm, + fsec, &is2digits); + if (dterr) + return dterr; + } + } + break; + + case DTK_STRING: + case DTK_SPECIAL: + /* timezone abbrevs take precedence over built-in tokens */ + dterr = DecodeTimezoneAbbrev(i, field[i], + &type, &val, &valtz, extra); + if (dterr) + return dterr; + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(i, field[i], &val); + if (type == IGNORE_DTF) + continue; + + tmask = DTK_M(type); + switch (type) + { + case RESERV: + switch (val) + { + case DTK_NOW: + tmask = DTK_TIME_M; + *dtype = DTK_TIME; + GetCurrentTimeUsec(tm, fsec, NULL); + break; + + case DTK_ZULU: + tmask = (DTK_TIME_M | DTK_M(TZ)); + *dtype = DTK_TIME; + tm->tm_hour = 0; + tm->tm_min = 0; + tm->tm_sec = 0; + tm->tm_isdst = 0; + break; + + default: + return DTERR_BAD_FORMAT; + } + + break; + + case DTZMOD: + + /* + * daylight savings time modifier (solves "MET DST" + * syntax) + */ + tmask |= DTK_M(DTZ); + tm->tm_isdst = 1; + if (tzp == NULL) + return DTERR_BAD_FORMAT; + *tzp -= val; + break; + + case DTZ: + + /* + * set mask for TZ here _or_ check for DTZ later when + * getting default timezone + */ + tmask |= DTK_M(TZ); + tm->tm_isdst = 1; + if (tzp == NULL) + return DTERR_BAD_FORMAT; + *tzp = -val; + ftype[i] = DTK_TZ; + break; + + case TZ: + tm->tm_isdst = 0; + if (tzp == NULL) + return DTERR_BAD_FORMAT; + *tzp = -val; + ftype[i] = DTK_TZ; + break; + + case DYNTZ: + tmask |= DTK_M(TZ); + if (tzp == NULL) + return DTERR_BAD_FORMAT; + /* we'll determine the actual offset later */ + abbrevTz = valtz; + abbrev = field[i]; + ftype[i] = DTK_TZ; + break; + + case AMPM: + mer = val; + break; + + case ADBC: + bc = (val == BC); + break; + + case UNITS: + tmask = 0; + /* reject consecutive unhandled units */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + ptype = val; + break; + + case ISOTIME: + tmask = 0; + /* reject consecutive unhandled units */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + ptype = val; + break; + + case UNKNOWN_FIELD: + + /* + * Before giving up and declaring error, check to see + * if it is an all-alpha timezone name. + */ + namedTz = pg_tzset(field[i]); + if (!namedTz) + return DTERR_BAD_FORMAT; + /* we'll apply the zone setting below */ + tmask = DTK_M(TZ); + break; + + default: + return DTERR_BAD_FORMAT; + } + break; + + default: + return DTERR_BAD_FORMAT; + } + + if (tmask & fmask) + return DTERR_BAD_FORMAT; + fmask |= tmask; + } /* end loop over fields */ + + /* reject if prefix type appeared and was never handled */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + + /* do final checking/adjustment of Y/M/D fields */ + dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm); + if (dterr) + return dterr; + + /* handle AM/PM */ + if (mer != HR24 && tm->tm_hour > HOURS_PER_DAY / 2) + return DTERR_FIELD_OVERFLOW; + if (mer == AM && tm->tm_hour == HOURS_PER_DAY / 2) + tm->tm_hour = 0; + else if (mer == PM && tm->tm_hour != HOURS_PER_DAY / 2) + tm->tm_hour += HOURS_PER_DAY / 2; + + /* check for time overflow */ + if (time_overflows(tm->tm_hour, tm->tm_min, tm->tm_sec, *fsec)) + return DTERR_FIELD_OVERFLOW; + + if ((fmask & DTK_TIME_M) != DTK_TIME_M) + return DTERR_BAD_FORMAT; + + /* + * If we had a full timezone spec, compute the offset (we could not do it + * before, because we may need the date to resolve DST status). + */ + if (namedTz != NULL) + { + long int gmtoff; + + /* daylight savings time modifier disallowed with full TZ */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + /* if non-DST zone, we do not need to know the date */ + if (pg_get_timezone_offset(namedTz, &gmtoff)) + { + *tzp = -(int) gmtoff; + } + else + { + /* a date has to be specified */ + if ((fmask & DTK_DATE_M) != DTK_DATE_M) + return DTERR_BAD_FORMAT; + *tzp = DetermineTimeZoneOffset(tm, namedTz); + } + } + + /* + * Likewise, if we had a dynamic timezone abbreviation, resolve it now. + */ + if (abbrevTz != NULL) + { + struct pg_tm tt, + *tmp = &tt; + + /* + * daylight savings time modifier but no standard timezone? then error + */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + if ((fmask & DTK_DATE_M) == 0) + GetCurrentDateTime(tmp); + else + { + /* a date has to be specified */ + if ((fmask & DTK_DATE_M) != DTK_DATE_M) + return DTERR_BAD_FORMAT; + tmp->tm_year = tm->tm_year; + tmp->tm_mon = tm->tm_mon; + tmp->tm_mday = tm->tm_mday; + } + tmp->tm_hour = tm->tm_hour; + tmp->tm_min = tm->tm_min; + tmp->tm_sec = tm->tm_sec; + *tzp = DetermineTimeZoneAbbrevOffset(tmp, abbrev, abbrevTz); + tm->tm_isdst = tmp->tm_isdst; + } + + /* timezone not specified? then use session timezone */ + if (tzp != NULL && !(fmask & DTK_M(TZ))) + { + struct pg_tm tt, + *tmp = &tt; + + /* + * daylight savings time modifier but no standard timezone? then error + */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + if ((fmask & DTK_DATE_M) == 0) + GetCurrentDateTime(tmp); + else + { + /* a date has to be specified */ + if ((fmask & DTK_DATE_M) != DTK_DATE_M) + return DTERR_BAD_FORMAT; + tmp->tm_year = tm->tm_year; + tmp->tm_mon = tm->tm_mon; + tmp->tm_mday = tm->tm_mday; + } + tmp->tm_hour = tm->tm_hour; + tmp->tm_min = tm->tm_min; + tmp->tm_sec = tm->tm_sec; + *tzp = DetermineTimeZoneOffset(tmp, session_timezone); + tm->tm_isdst = tmp->tm_isdst; + } + + return 0; +} + +/* DecodeDate() + * Decode date string which includes delimiters. + * Return 0 if okay, a DTERR code if not. + * + * str: field to be parsed + * fmask: bitmask for field types already seen + * *tmask: receives bitmask for fields found here + * *is2digits: set to true if we find 2-digit year + * *tm: field values are stored into appropriate members of this struct + */ +static int +DecodeDate(char *str, int fmask, int *tmask, bool *is2digits, + struct pg_tm *tm) +{ + fsec_t fsec; + int nf = 0; + int i, + len; + int dterr; + bool haveTextMonth = false; + int type, + val, + dmask = 0; + char *field[MAXDATEFIELDS]; + + *tmask = 0; + + /* parse this string... */ + while (*str != '\0' && nf < MAXDATEFIELDS) + { + /* skip field separators */ + while (*str != '\0' && !isalnum((unsigned char) *str)) + str++; + + if (*str == '\0') + return DTERR_BAD_FORMAT; /* end of string after separator */ + + field[nf] = str; + if (isdigit((unsigned char) *str)) + { + while (isdigit((unsigned char) *str)) + str++; + } + else if (isalpha((unsigned char) *str)) + { + while (isalpha((unsigned char) *str)) + str++; + } + + /* Just get rid of any non-digit, non-alpha characters... */ + if (*str != '\0') + *str++ = '\0'; + nf++; + } + + /* look first for text fields, since that will be unambiguous month */ + for (i = 0; i < nf; i++) + { + if (isalpha((unsigned char) *field[i])) + { + type = DecodeSpecial(i, field[i], &val); + if (type == IGNORE_DTF) + continue; + + dmask = DTK_M(type); + switch (type) + { + case MONTH: + tm->tm_mon = val; + haveTextMonth = true; + break; + + default: + return DTERR_BAD_FORMAT; + } + if (fmask & dmask) + return DTERR_BAD_FORMAT; + + fmask |= dmask; + *tmask |= dmask; + + /* mark this field as being completed */ + field[i] = NULL; + } + } + + /* now pick up remaining numeric fields */ + for (i = 0; i < nf; i++) + { + if (field[i] == NULL) + continue; + + if ((len = strlen(field[i])) <= 0) + return DTERR_BAD_FORMAT; + + dterr = DecodeNumber(len, field[i], haveTextMonth, fmask, + &dmask, tm, + &fsec, is2digits); + if (dterr) + return dterr; + + if (fmask & dmask) + return DTERR_BAD_FORMAT; + + fmask |= dmask; + *tmask |= dmask; + } + + if ((fmask & ~(DTK_M(DOY) | DTK_M(TZ))) != DTK_DATE_M) + return DTERR_BAD_FORMAT; + + /* validation of the field values must wait until ValidateDate() */ + + return 0; +} + +/* ValidateDate() + * Check valid year/month/day values, handle BC and DOY cases + * Return 0 if okay, a DTERR code if not. + */ +int +ValidateDate(int fmask, bool isjulian, bool is2digits, bool bc, + struct pg_tm *tm) +{ + if (fmask & DTK_M(YEAR)) + { + if (isjulian) + { + /* tm_year is correct and should not be touched */ + } + else if (bc) + { + /* there is no year zero in AD/BC notation */ + if (tm->tm_year <= 0) + return DTERR_FIELD_OVERFLOW; + /* internally, we represent 1 BC as year zero, 2 BC as -1, etc */ + tm->tm_year = -(tm->tm_year - 1); + } + else if (is2digits) + { + /* process 1 or 2-digit input as 1970-2069 AD, allow '0' and '00' */ + if (tm->tm_year < 0) /* just paranoia */ + return DTERR_FIELD_OVERFLOW; + if (tm->tm_year < 70) + tm->tm_year += 2000; + else if (tm->tm_year < 100) + tm->tm_year += 1900; + } + else + { + /* there is no year zero in AD/BC notation */ + if (tm->tm_year <= 0) + return DTERR_FIELD_OVERFLOW; + } + } + + /* now that we have correct year, decode DOY */ + if (fmask & DTK_M(DOY)) + { + j2date(date2j(tm->tm_year, 1, 1) + tm->tm_yday - 1, + &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + } + + /* check for valid month */ + if (fmask & DTK_M(MONTH)) + { + if (tm->tm_mon < 1 || tm->tm_mon > MONTHS_PER_YEAR) + return DTERR_MD_FIELD_OVERFLOW; + } + + /* minimal check for valid day */ + if (fmask & DTK_M(DAY)) + { + if (tm->tm_mday < 1 || tm->tm_mday > 31) + return DTERR_MD_FIELD_OVERFLOW; + } + + if ((fmask & DTK_DATE_M) == DTK_DATE_M) + { + /* + * Check for valid day of month, now that we know for sure the month + * and year. Note we don't use MD_FIELD_OVERFLOW here, since it seems + * unlikely that "Feb 29" is a YMD-order error. + */ + if (tm->tm_mday > day_tab[isleap(tm->tm_year)][tm->tm_mon - 1]) + return DTERR_FIELD_OVERFLOW; + } + + return 0; +} + + +/* DecodeTimeCommon() + * Decode time string which includes delimiters. + * Return 0 if okay, a DTERR code if not. + * tmask and itm are output parameters. + * + * This code is shared between the timestamp and interval cases. + * We return a struct pg_itm (of which only the tm_usec, tm_sec, tm_min, + * and tm_hour fields are used) and let the wrapper functions below + * convert and range-check as necessary. + */ +static int +DecodeTimeCommon(char *str, int fmask, int range, + int *tmask, struct pg_itm *itm) +{ + char *cp; + int dterr; + fsec_t fsec = 0; + + *tmask = DTK_TIME_M; + + errno = 0; + itm->tm_hour = strtoi64(str, &cp, 10); + if (errno == ERANGE) + return DTERR_FIELD_OVERFLOW; + if (*cp != ':') + return DTERR_BAD_FORMAT; + errno = 0; + itm->tm_min = strtoint(cp + 1, &cp, 10); + if (errno == ERANGE) + return DTERR_FIELD_OVERFLOW; + if (*cp == '\0') + { + itm->tm_sec = 0; + /* If it's a MINUTE TO SECOND interval, take 2 fields as being mm:ss */ + if (range == (INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND))) + { + if (itm->tm_hour > INT_MAX || itm->tm_hour < INT_MIN) + return DTERR_FIELD_OVERFLOW; + itm->tm_sec = itm->tm_min; + itm->tm_min = (int) itm->tm_hour; + itm->tm_hour = 0; + } + } + else if (*cp == '.') + { + /* always assume mm:ss.sss is MINUTE TO SECOND */ + dterr = ParseFractionalSecond(cp, &fsec); + if (dterr) + return dterr; + if (itm->tm_hour > INT_MAX || itm->tm_hour < INT_MIN) + return DTERR_FIELD_OVERFLOW; + itm->tm_sec = itm->tm_min; + itm->tm_min = (int) itm->tm_hour; + itm->tm_hour = 0; + } + else if (*cp == ':') + { + errno = 0; + itm->tm_sec = strtoint(cp + 1, &cp, 10); + if (errno == ERANGE) + return DTERR_FIELD_OVERFLOW; + if (*cp == '.') + { + dterr = ParseFractionalSecond(cp, &fsec); + if (dterr) + return dterr; + } + else if (*cp != '\0') + return DTERR_BAD_FORMAT; + } + else + return DTERR_BAD_FORMAT; + + /* do a sanity check; but caller must check the range of tm_hour */ + if (itm->tm_hour < 0 || + itm->tm_min < 0 || itm->tm_min > MINS_PER_HOUR - 1 || + itm->tm_sec < 0 || itm->tm_sec > SECS_PER_MINUTE || + fsec < 0 || fsec > USECS_PER_SEC) + return DTERR_FIELD_OVERFLOW; + + itm->tm_usec = (int) fsec; + + return 0; +} + +/* DecodeTime() + * Decode time string which includes delimiters. + * Return 0 if okay, a DTERR code if not. + * + * This version is used for timestamps. The results are returned into + * the tm_hour/tm_min/tm_sec fields of *tm, and microseconds into *fsec. + */ +static int +DecodeTime(char *str, int fmask, int range, + int *tmask, struct pg_tm *tm, fsec_t *fsec) +{ + struct pg_itm itm; + int dterr; + + dterr = DecodeTimeCommon(str, fmask, range, + tmask, &itm); + if (dterr) + return dterr; + + if (itm.tm_hour > INT_MAX) + return DTERR_FIELD_OVERFLOW; + tm->tm_hour = (int) itm.tm_hour; + tm->tm_min = itm.tm_min; + tm->tm_sec = itm.tm_sec; + *fsec = itm.tm_usec; + + return 0; +} + +/* DecodeTimeForInterval() + * Decode time string which includes delimiters. + * Return 0 if okay, a DTERR code if not. + * + * This version is used for intervals. The results are returned into + * itm_in->tm_usec. + */ +static int +DecodeTimeForInterval(char *str, int fmask, int range, + int *tmask, struct pg_itm_in *itm_in) +{ + struct pg_itm itm; + int dterr; + + dterr = DecodeTimeCommon(str, fmask, range, + tmask, &itm); + if (dterr) + return dterr; + + itm_in->tm_usec = itm.tm_usec; + if (!int64_multiply_add(itm.tm_hour, USECS_PER_HOUR, &itm_in->tm_usec) || + !int64_multiply_add(itm.tm_min, USECS_PER_MINUTE, &itm_in->tm_usec) || + !int64_multiply_add(itm.tm_sec, USECS_PER_SEC, &itm_in->tm_usec)) + return DTERR_FIELD_OVERFLOW; + + return 0; +} + + +/* DecodeNumber() + * Interpret plain numeric field as a date value in context. + * Return 0 if okay, a DTERR code if not. + */ +static int +DecodeNumber(int flen, char *str, bool haveTextMonth, int fmask, + int *tmask, struct pg_tm *tm, fsec_t *fsec, bool *is2digits) +{ + int val; + char *cp; + int dterr; + + *tmask = 0; + + errno = 0; + val = strtoint(str, &cp, 10); + if (errno == ERANGE) + return DTERR_FIELD_OVERFLOW; + if (cp == str) + return DTERR_BAD_FORMAT; + + if (*cp == '.') + { + /* + * More than two digits before decimal point? Then could be a date or + * a run-together time: 2001.360 20011225 040506.789 + */ + if (cp - str > 2) + { + dterr = DecodeNumberField(flen, str, + (fmask | DTK_DATE_M), + tmask, tm, + fsec, is2digits); + if (dterr < 0) + return dterr; + return 0; + } + + dterr = ParseFractionalSecond(cp, fsec); + if (dterr) + return dterr; + } + else if (*cp != '\0') + return DTERR_BAD_FORMAT; + + /* Special case for day of year */ + if (flen == 3 && (fmask & DTK_DATE_M) == DTK_M(YEAR) && val >= 1 && + val <= 366) + { + *tmask = (DTK_M(DOY) | DTK_M(MONTH) | DTK_M(DAY)); + tm->tm_yday = val; + /* tm_mon and tm_mday can't actually be set yet ... */ + return 0; + } + + /* Switch based on what we have so far */ + switch (fmask & DTK_DATE_M) + { + case 0: + + /* + * Nothing so far; make a decision about what we think the input + * is. There used to be lots of heuristics here, but the + * consensus now is to be paranoid. It *must* be either + * YYYY-MM-DD (with a more-than-two-digit year field), or the + * field order defined by DateOrder. + */ + if (flen >= 3 || DateOrder == DATEORDER_YMD) + { + *tmask = DTK_M(YEAR); + tm->tm_year = val; + } + else if (DateOrder == DATEORDER_DMY) + { + *tmask = DTK_M(DAY); + tm->tm_mday = val; + } + else + { + *tmask = DTK_M(MONTH); + tm->tm_mon = val; + } + break; + + case (DTK_M(YEAR)): + /* Must be at second field of YY-MM-DD */ + *tmask = DTK_M(MONTH); + tm->tm_mon = val; + break; + + case (DTK_M(MONTH)): + if (haveTextMonth) + { + /* + * We are at the first numeric field of a date that included a + * textual month name. We want to support the variants + * MON-DD-YYYY, DD-MON-YYYY, and YYYY-MON-DD as unambiguous + * inputs. We will also accept MON-DD-YY or DD-MON-YY in + * either DMY or MDY modes, as well as YY-MON-DD in YMD mode. + */ + if (flen >= 3 || DateOrder == DATEORDER_YMD) + { + *tmask = DTK_M(YEAR); + tm->tm_year = val; + } + else + { + *tmask = DTK_M(DAY); + tm->tm_mday = val; + } + } + else + { + /* Must be at second field of MM-DD-YY */ + *tmask = DTK_M(DAY); + tm->tm_mday = val; + } + break; + + case (DTK_M(YEAR) | DTK_M(MONTH)): + if (haveTextMonth) + { + /* Need to accept DD-MON-YYYY even in YMD mode */ + if (flen >= 3 && *is2digits) + { + /* Guess that first numeric field is day was wrong */ + *tmask = DTK_M(DAY); /* YEAR is already set */ + tm->tm_mday = tm->tm_year; + tm->tm_year = val; + *is2digits = false; + } + else + { + *tmask = DTK_M(DAY); + tm->tm_mday = val; + } + } + else + { + /* Must be at third field of YY-MM-DD */ + *tmask = DTK_M(DAY); + tm->tm_mday = val; + } + break; + + case (DTK_M(DAY)): + /* Must be at second field of DD-MM-YY */ + *tmask = DTK_M(MONTH); + tm->tm_mon = val; + break; + + case (DTK_M(MONTH) | DTK_M(DAY)): + /* Must be at third field of DD-MM-YY or MM-DD-YY */ + *tmask = DTK_M(YEAR); + tm->tm_year = val; + break; + + case (DTK_M(YEAR) | DTK_M(MONTH) | DTK_M(DAY)): + /* we have all the date, so it must be a time field */ + dterr = DecodeNumberField(flen, str, fmask, + tmask, tm, + fsec, is2digits); + if (dterr < 0) + return dterr; + return 0; + + default: + /* Anything else is bogus input */ + return DTERR_BAD_FORMAT; + } + + /* + * When processing a year field, mark it for adjustment if it's only one + * or two digits. + */ + if (*tmask == DTK_M(YEAR)) + *is2digits = (flen <= 2); + + return 0; +} + + +/* DecodeNumberField() + * Interpret numeric string as a concatenated date or time field. + * Return a DTK token (>= 0) if successful, a DTERR code (< 0) if not. + * + * Use the context of previously decoded fields to help with + * the interpretation. + */ +static int +DecodeNumberField(int len, char *str, int fmask, + int *tmask, struct pg_tm *tm, fsec_t *fsec, bool *is2digits) +{ + char *cp; + + /* + * Have a decimal point? Then this is a date or something with a seconds + * field... + */ + if ((cp = strchr(str, '.')) != NULL) + { + /* + * Can we use ParseFractionalSecond here? Not clear whether trailing + * junk should be rejected ... + */ + if (cp[1] == '\0') + { + /* avoid assuming that strtod will accept "." */ + *fsec = 0; + } + else + { + double frac; + + errno = 0; + frac = strtod(cp, NULL); + if (errno != 0) + return DTERR_BAD_FORMAT; + *fsec = rint(frac * 1000000); + } + /* Now truncate off the fraction for further processing */ + *cp = '\0'; + len = strlen(str); + } + /* No decimal point and no complete date yet? */ + else if ((fmask & DTK_DATE_M) != DTK_DATE_M) + { + if (len >= 6) + { + *tmask = DTK_DATE_M; + + /* + * Start from end and consider first 2 as Day, next 2 as Month, + * and the rest as Year. + */ + tm->tm_mday = atoi(str + (len - 2)); + *(str + (len - 2)) = '\0'; + tm->tm_mon = atoi(str + (len - 4)); + *(str + (len - 4)) = '\0'; + tm->tm_year = atoi(str); + if ((len - 4) == 2) + *is2digits = true; + + return DTK_DATE; + } + } + + /* not all time fields are specified? */ + if ((fmask & DTK_TIME_M) != DTK_TIME_M) + { + /* hhmmss */ + if (len == 6) + { + *tmask = DTK_TIME_M; + tm->tm_sec = atoi(str + 4); + *(str + 4) = '\0'; + tm->tm_min = atoi(str + 2); + *(str + 2) = '\0'; + tm->tm_hour = atoi(str); + + return DTK_TIME; + } + /* hhmm? */ + else if (len == 4) + { + *tmask = DTK_TIME_M; + tm->tm_sec = 0; + tm->tm_min = atoi(str + 2); + *(str + 2) = '\0'; + tm->tm_hour = atoi(str); + + return DTK_TIME; + } + } + + return DTERR_BAD_FORMAT; +} + + +/* DecodeTimezone() + * Interpret string as a numeric timezone. + * + * Return 0 if okay (and set *tzp), a DTERR code if not okay. + */ +int +DecodeTimezone(const char *str, int *tzp) +{ + int tz; + int hr, + min, + sec = 0; + char *cp; + + /* leading character must be "+" or "-" */ + if (*str != '+' && *str != '-') + return DTERR_BAD_FORMAT; + + errno = 0; + hr = strtoint(str + 1, &cp, 10); + if (errno == ERANGE) + return DTERR_TZDISP_OVERFLOW; + + /* explicit delimiter? */ + if (*cp == ':') + { + errno = 0; + min = strtoint(cp + 1, &cp, 10); + if (errno == ERANGE) + return DTERR_TZDISP_OVERFLOW; + if (*cp == ':') + { + errno = 0; + sec = strtoint(cp + 1, &cp, 10); + if (errno == ERANGE) + return DTERR_TZDISP_OVERFLOW; + } + } + /* otherwise, might have run things together... */ + else if (*cp == '\0' && strlen(str) > 3) + { + min = hr % 100; + hr = hr / 100; + /* we could, but don't, support a run-together hhmmss format */ + } + else + min = 0; + + /* Range-check the values; see notes in datatype/timestamp.h */ + if (hr < 0 || hr > MAX_TZDISP_HOUR) + return DTERR_TZDISP_OVERFLOW; + if (min < 0 || min >= MINS_PER_HOUR) + return DTERR_TZDISP_OVERFLOW; + if (sec < 0 || sec >= SECS_PER_MINUTE) + return DTERR_TZDISP_OVERFLOW; + + tz = (hr * MINS_PER_HOUR + min) * SECS_PER_MINUTE + sec; + if (*str == '-') + tz = -tz; + + *tzp = -tz; + + if (*cp != '\0') + return DTERR_BAD_FORMAT; + + return 0; +} + + +/* DecodeTimezoneAbbrev() + * Interpret string as a timezone abbreviation, if possible. + * + * Sets *ftype to an abbreviation type (TZ, DTZ, or DYNTZ), or UNKNOWN_FIELD if + * string is not any known abbreviation. On success, set *offset and *tz to + * represent the UTC offset (for TZ or DTZ) or underlying zone (for DYNTZ). + * Note that full timezone names (such as America/New_York) are not handled + * here, mostly for historical reasons. + * + * The function result is 0 or a DTERR code; in the latter case, *extra + * is filled as needed. Note that unknown-abbreviation is not considered + * an error case. Also note that many callers assume that the DTERR code + * is one that DateTimeParseError does not require "str" or "datatype" + * strings for. + * + * Given string must be lowercased already. + * + * Implement a cache lookup since it is likely that dates + * will be related in format. + */ +int +DecodeTimezoneAbbrev(int field, const char *lowtoken, + int *ftype, int *offset, pg_tz **tz, + DateTimeErrorExtra *extra) +{ + const datetkn *tp; + + tp = abbrevcache[field]; + /* use strncmp so that we match truncated tokens */ + if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0) + { + if (zoneabbrevtbl) + tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs, + zoneabbrevtbl->numabbrevs); + else + tp = NULL; + } + if (tp == NULL) + { + *ftype = UNKNOWN_FIELD; + *offset = 0; + *tz = NULL; + } + else + { + abbrevcache[field] = tp; + *ftype = tp->type; + if (tp->type == DYNTZ) + { + *offset = 0; + *tz = FetchDynamicTimeZone(zoneabbrevtbl, tp, extra); + if (*tz == NULL) + return DTERR_BAD_ZONE_ABBREV; + } + else + { + *offset = tp->value; + *tz = NULL; + } + } + + return 0; +} + + +/* DecodeSpecial() + * Decode text string using lookup table. + * + * Recognizes the keywords listed in datetktbl. + * Note: at one time this would also recognize timezone abbreviations, + * but no more; use DecodeTimezoneAbbrev for that. + * + * Given string must be lowercased already. + * + * Implement a cache lookup since it is likely that dates + * will be related in format. + */ +int +DecodeSpecial(int field, const char *lowtoken, int *val) +{ + int type; + const datetkn *tp; + + tp = datecache[field]; + /* use strncmp so that we match truncated tokens */ + if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0) + { + tp = datebsearch(lowtoken, datetktbl, szdatetktbl); + } + if (tp == NULL) + { + type = UNKNOWN_FIELD; + *val = 0; + } + else + { + datecache[field] = tp; + type = tp->type; + *val = tp->value; + } + + return type; +} + + +/* DecodeTimezoneName() + * Interpret string as a timezone abbreviation or name. + * Throw error if the name is not recognized. + * + * The return value indicates what kind of zone identifier it is: + * TZNAME_FIXED_OFFSET: fixed offset from UTC + * TZNAME_DYNTZ: dynamic timezone abbreviation + * TZNAME_ZONE: full tzdb zone name + * + * For TZNAME_FIXED_OFFSET, *offset receives the UTC offset (in seconds, + * with ISO sign convention: positive is east of Greenwich). + * For the other two cases, *tz receives the timezone struct representing + * the zone name or the abbreviation's underlying zone. + */ +int +DecodeTimezoneName(const char *tzname, int *offset, pg_tz **tz) +{ + char *lowzone; + int dterr, + type; + DateTimeErrorExtra extra; + + /* + * First we look in the timezone abbreviation table (to handle cases like + * "EST"), and if that fails, we look in the timezone database (to handle + * cases like "America/New_York"). This matches the order in which + * timestamp input checks the cases; it's important because the timezone + * database unwisely uses a few zone names that are identical to offset + * abbreviations. + */ + + /* DecodeTimezoneAbbrev requires lowercase input */ + lowzone = downcase_truncate_identifier(tzname, + strlen(tzname), + false); + + dterr = DecodeTimezoneAbbrev(0, lowzone, &type, offset, tz, &extra); + if (dterr) + DateTimeParseError(dterr, &extra, NULL, NULL, NULL); + + if (type == TZ || type == DTZ) + { + /* fixed-offset abbreviation, return the offset */ + return TZNAME_FIXED_OFFSET; + } + else if (type == DYNTZ) + { + /* dynamic-offset abbreviation, return its referenced timezone */ + return TZNAME_DYNTZ; + } + else + { + /* try it as a full zone name */ + *tz = pg_tzset(tzname); + if (*tz == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("time zone \"%s\" not recognized", tzname))); + return TZNAME_ZONE; + } +} + +/* DecodeTimezoneNameToTz() + * Interpret string as a timezone abbreviation or name. + * Throw error if the name is not recognized. + * + * This is a simple wrapper for DecodeTimezoneName that produces a pg_tz * + * result in all cases. + */ +pg_tz * +DecodeTimezoneNameToTz(const char *tzname) +{ + pg_tz *result; + int offset; + + if (DecodeTimezoneName(tzname, &offset, &result) == TZNAME_FIXED_OFFSET) + { + /* fixed-offset abbreviation, get a pg_tz descriptor for that */ + result = pg_tzset_offset(-offset); /* flip to POSIX sign convention */ + } + return result; +} + + +/* ClearPgItmIn + * + * Zero out a pg_itm_in + */ +static inline void +ClearPgItmIn(struct pg_itm_in *itm_in) +{ + itm_in->tm_usec = 0; + itm_in->tm_mday = 0; + itm_in->tm_mon = 0; + itm_in->tm_year = 0; +} + + +/* DecodeInterval() + * Interpret previously parsed fields for general time interval. + * Returns 0 if successful, DTERR code if bogus input detected. + * dtype and itm_in are output parameters. + * + * Allow "date" field DTK_DATE since this could be just + * an unsigned floating point number. - thomas 1997-11-16 + * + * Allow ISO-style time span, with implicit units on number of days + * preceding an hh:mm:ss field. - thomas 1998-04-30 + */ +int +DecodeInterval(char **field, int *ftype, int nf, int range, + int *dtype, struct pg_itm_in *itm_in) +{ + bool force_negative = false; + bool is_before = false; + char *cp; + int fmask = 0, + tmask, + type, + uval; + int i; + int dterr; + int64 val; + double fval; + + *dtype = DTK_DELTA; + type = IGNORE_DTF; + ClearPgItmIn(itm_in); + + /*---------- + * The SQL standard defines the interval literal + * '-1 1:00:00' + * to mean "negative 1 days and negative 1 hours", while Postgres + * traditionally treats this as meaning "negative 1 days and positive + * 1 hours". In SQL_STANDARD intervalstyle, we apply the leading sign + * to all fields if there are no other explicit signs. + * + * We leave the signs alone if there are additional explicit signs. + * This protects us against misinterpreting postgres-style dump output, + * since the postgres-style output code has always put an explicit sign on + * all fields following a negative field. But note that SQL-spec output + * is ambiguous and can be misinterpreted on load! (So it's best practice + * to dump in postgres style, not SQL style.) + *---------- + */ + if (IntervalStyle == INTSTYLE_SQL_STANDARD && nf > 0 && *field[0] == '-') + { + force_negative = true; + /* Check for additional explicit signs */ + for (i = 1; i < nf; i++) + { + if (*field[i] == '-' || *field[i] == '+') + { + force_negative = false; + break; + } + } + } + + /* read through list backwards to pick up units before values */ + for (i = nf - 1; i >= 0; i--) + { + switch (ftype[i]) + { + case DTK_TIME: + dterr = DecodeTimeForInterval(field[i], fmask, range, + &tmask, itm_in); + if (dterr) + return dterr; + if (force_negative && + itm_in->tm_usec > 0) + itm_in->tm_usec = -itm_in->tm_usec; + type = DTK_DAY; + break; + + case DTK_TZ: + + /* + * Timezone means a token with a leading sign character and at + * least one digit; there could be ':', '.', '-' embedded in + * it as well. + */ + Assert(*field[i] == '-' || *field[i] == '+'); + + /* + * Check for signed hh:mm or hh:mm:ss. If so, process exactly + * like DTK_TIME case above, plus handling the sign. + */ + if (strchr(field[i] + 1, ':') != NULL && + DecodeTimeForInterval(field[i] + 1, fmask, range, + &tmask, itm_in) == 0) + { + if (*field[i] == '-') + { + /* flip the sign on time field */ + if (itm_in->tm_usec == PG_INT64_MIN) + return DTERR_FIELD_OVERFLOW; + itm_in->tm_usec = -itm_in->tm_usec; + } + + if (force_negative && + itm_in->tm_usec > 0) + itm_in->tm_usec = -itm_in->tm_usec; + + /* + * Set the next type to be a day, if units are not + * specified. This handles the case of '1 +02:03' since we + * are reading right to left. + */ + type = DTK_DAY; + break; + } + + /* + * Otherwise, fall through to DTK_NUMBER case, which can + * handle signed float numbers and signed year-month values. + */ + + /* FALLTHROUGH */ + + case DTK_DATE: + case DTK_NUMBER: + if (type == IGNORE_DTF) + { + /* use typmod to decide what rightmost field is */ + switch (range) + { + case INTERVAL_MASK(YEAR): + type = DTK_YEAR; + break; + case INTERVAL_MASK(MONTH): + case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH): + type = DTK_MONTH; + break; + case INTERVAL_MASK(DAY): + type = DTK_DAY; + break; + case INTERVAL_MASK(HOUR): + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR): + type = DTK_HOUR; + break; + case INTERVAL_MASK(MINUTE): + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + type = DTK_MINUTE; + break; + case INTERVAL_MASK(SECOND): + case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + type = DTK_SECOND; + break; + default: + type = DTK_SECOND; + break; + } + } + + errno = 0; + val = strtoi64(field[i], &cp, 10); + if (errno == ERANGE) + return DTERR_FIELD_OVERFLOW; + + if (*cp == '-') + { + /* SQL "years-months" syntax */ + int val2; + + val2 = strtoint(cp + 1, &cp, 10); + if (errno == ERANGE || val2 < 0 || val2 >= MONTHS_PER_YEAR) + return DTERR_FIELD_OVERFLOW; + if (*cp != '\0') + return DTERR_BAD_FORMAT; + type = DTK_MONTH; + if (*field[i] == '-') + val2 = -val2; + if (pg_mul_s64_overflow(val, MONTHS_PER_YEAR, &val)) + return DTERR_FIELD_OVERFLOW; + if (pg_add_s64_overflow(val, val2, &val)) + return DTERR_FIELD_OVERFLOW; + fval = 0; + } + else if (*cp == '.') + { + dterr = ParseFraction(cp, &fval); + if (dterr) + return dterr; + if (*field[i] == '-') + fval = -fval; + } + else if (*cp == '\0') + fval = 0; + else + return DTERR_BAD_FORMAT; + + tmask = 0; /* DTK_M(type); */ + + if (force_negative) + { + /* val and fval should be of same sign, but test anyway */ + if (val > 0) + val = -val; + if (fval > 0) + fval = -fval; + } + + switch (type) + { + case DTK_MICROSEC: + if (!AdjustMicroseconds(val, fval, 1, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(MICROSECOND); + break; + + case DTK_MILLISEC: + if (!AdjustMicroseconds(val, fval, 1000, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(MILLISECOND); + break; + + case DTK_SECOND: + if (!AdjustMicroseconds(val, fval, USECS_PER_SEC, itm_in)) + return DTERR_FIELD_OVERFLOW; + + /* + * If any subseconds were specified, consider this + * microsecond and millisecond input as well. + */ + if (fval == 0) + tmask = DTK_M(SECOND); + else + tmask = DTK_ALL_SECS_M; + break; + + case DTK_MINUTE: + if (!AdjustMicroseconds(val, fval, USECS_PER_MINUTE, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(MINUTE); + break; + + case DTK_HOUR: + if (!AdjustMicroseconds(val, fval, USECS_PER_HOUR, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(HOUR); + type = DTK_DAY; /* set for next field */ + break; + + case DTK_DAY: + if (!AdjustDays(val, 1, itm_in) || + !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(DAY); + break; + + case DTK_WEEK: + if (!AdjustDays(val, 7, itm_in) || + !AdjustFractDays(fval, 7, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(WEEK); + break; + + case DTK_MONTH: + if (!AdjustMonths(val, itm_in) || + !AdjustFractDays(fval, DAYS_PER_MONTH, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(MONTH); + break; + + case DTK_YEAR: + if (!AdjustYears(val, 1, itm_in) || + !AdjustFractYears(fval, 1, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(YEAR); + break; + + case DTK_DECADE: + if (!AdjustYears(val, 10, itm_in) || + !AdjustFractYears(fval, 10, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(DECADE); + break; + + case DTK_CENTURY: + if (!AdjustYears(val, 100, itm_in) || + !AdjustFractYears(fval, 100, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(CENTURY); + break; + + case DTK_MILLENNIUM: + if (!AdjustYears(val, 1000, itm_in) || + !AdjustFractYears(fval, 1000, itm_in)) + return DTERR_FIELD_OVERFLOW; + tmask = DTK_M(MILLENNIUM); + break; + + default: + return DTERR_BAD_FORMAT; + } + break; + + case DTK_STRING: + case DTK_SPECIAL: + type = DecodeUnits(i, field[i], &uval); + if (type == IGNORE_DTF) + continue; + + tmask = 0; /* DTK_M(type); */ + switch (type) + { + case UNITS: + type = uval; + break; + + case AGO: + is_before = true; + type = uval; + break; + + case RESERV: + tmask = (DTK_DATE_M | DTK_TIME_M); + *dtype = uval; + break; + + default: + return DTERR_BAD_FORMAT; + } + break; + + default: + return DTERR_BAD_FORMAT; + } + + if (tmask & fmask) + return DTERR_BAD_FORMAT; + fmask |= tmask; + } + + /* ensure that at least one time field has been found */ + if (fmask == 0) + return DTERR_BAD_FORMAT; + + /* finally, AGO negates everything */ + if (is_before) + { + if (itm_in->tm_usec == PG_INT64_MIN || + itm_in->tm_mday == INT_MIN || + itm_in->tm_mon == INT_MIN || + itm_in->tm_year == INT_MIN) + return DTERR_FIELD_OVERFLOW; + + itm_in->tm_usec = -itm_in->tm_usec; + itm_in->tm_mday = -itm_in->tm_mday; + itm_in->tm_mon = -itm_in->tm_mon; + itm_in->tm_year = -itm_in->tm_year; + } + + return 0; +} + + +/* + * Helper functions to avoid duplicated code in DecodeISO8601Interval. + * + * Parse a decimal value and break it into integer and fractional parts. + * Set *endptr to end+1 of the parsed substring. + * Returns 0 or DTERR code. + */ +static int +ParseISO8601Number(char *str, char **endptr, int64 *ipart, double *fpart) +{ + double val; + + /* + * Historically this has accepted anything that strtod() would take, + * notably including "e" notation, so continue doing that. This is + * slightly annoying because the precision of double is less than that of + * int64, so we would lose accuracy for inputs larger than 2^53 or so. + * However, historically we rejected inputs outside the int32 range, + * making that concern moot. What we do now is reject abs(val) above + * 1.0e15 (a round number a bit less than 2^50), so that any accepted + * value will have an exact integer part, and thereby a fraction part with + * abs(*fpart) less than 1. In the absence of field complaints it doesn't + * seem worth working harder. + */ + if (!(isdigit((unsigned char) *str) || *str == '-' || *str == '.')) + return DTERR_BAD_FORMAT; + errno = 0; + val = strtod(str, endptr); + /* did we not see anything that looks like a double? */ + if (*endptr == str || errno != 0) + return DTERR_BAD_FORMAT; + /* watch out for overflow, including infinities; reject NaN too */ + if (isnan(val) || val < -1.0e15 || val > 1.0e15) + return DTERR_FIELD_OVERFLOW; + /* be very sure we truncate towards zero (cf dtrunc()) */ + if (val >= 0) + *ipart = (int64) floor(val); + else + *ipart = (int64) -floor(-val); + *fpart = val - *ipart; + /* Callers expect this to hold */ + Assert(*fpart > -1.0 && *fpart < 1.0); + return 0; +} + +/* + * Determine number of integral digits in a valid ISO 8601 number field + * (we should ignore sign and any fraction part) + */ +static int +ISO8601IntegerWidth(char *fieldstart) +{ + /* We might have had a leading '-' */ + if (*fieldstart == '-') + fieldstart++; + return strspn(fieldstart, "0123456789"); +} + + +/* DecodeISO8601Interval() + * Decode an ISO 8601 time interval of the "format with designators" + * (section 4.4.3.2) or "alternative format" (section 4.4.3.3) + * Examples: P1D for 1 day + * PT1H for 1 hour + * P2Y6M7DT1H30M for 2 years, 6 months, 7 days 1 hour 30 min + * P0002-06-07T01:30:00 the same value in alternative format + * + * Returns 0 if successful, DTERR code if bogus input detected. + * Note: error code should be DTERR_BAD_FORMAT if input doesn't look like + * ISO8601, otherwise this could cause unexpected error messages. + * dtype and itm_in are output parameters. + * + * A couple exceptions from the spec: + * - a week field ('W') may coexist with other units + * - allows decimals in fields other than the least significant unit. + */ +int +DecodeISO8601Interval(char *str, + int *dtype, struct pg_itm_in *itm_in) +{ + bool datepart = true; + bool havefield = false; + + *dtype = DTK_DELTA; + ClearPgItmIn(itm_in); + + if (strlen(str) < 2 || str[0] != 'P') + return DTERR_BAD_FORMAT; + + str++; + while (*str) + { + char *fieldstart; + int64 val; + double fval; + char unit; + int dterr; + + if (*str == 'T') /* T indicates the beginning of the time part */ + { + datepart = false; + havefield = false; + str++; + continue; + } + + fieldstart = str; + dterr = ParseISO8601Number(str, &str, &val, &fval); + if (dterr) + return dterr; + + /* + * Note: we could step off the end of the string here. Code below + * *must* exit the loop if unit == '\0'. + */ + unit = *str++; + + if (datepart) + { + switch (unit) /* before T: Y M W D */ + { + case 'Y': + if (!AdjustYears(val, 1, itm_in) || + !AdjustFractYears(fval, 1, itm_in)) + return DTERR_FIELD_OVERFLOW; + break; + case 'M': + if (!AdjustMonths(val, itm_in) || + !AdjustFractDays(fval, DAYS_PER_MONTH, itm_in)) + return DTERR_FIELD_OVERFLOW; + break; + case 'W': + if (!AdjustDays(val, 7, itm_in) || + !AdjustFractDays(fval, 7, itm_in)) + return DTERR_FIELD_OVERFLOW; + break; + case 'D': + if (!AdjustDays(val, 1, itm_in) || + !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in)) + return DTERR_FIELD_OVERFLOW; + break; + case 'T': /* ISO 8601 4.4.3.3 Alternative Format / Basic */ + case '\0': + if (ISO8601IntegerWidth(fieldstart) == 8 && !havefield) + { + if (!AdjustYears(val / 10000, 1, itm_in) || + !AdjustMonths((val / 100) % 100, itm_in) || + !AdjustDays(val % 100, 1, itm_in) || + !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in)) + return DTERR_FIELD_OVERFLOW; + if (unit == '\0') + return 0; + datepart = false; + havefield = false; + continue; + } + /* Else fall through to extended alternative format */ + /* FALLTHROUGH */ + case '-': /* ISO 8601 4.4.3.3 Alternative Format, + * Extended */ + if (havefield) + return DTERR_BAD_FORMAT; + + if (!AdjustYears(val, 1, itm_in) || + !AdjustFractYears(fval, 1, itm_in)) + return DTERR_FIELD_OVERFLOW; + if (unit == '\0') + return 0; + if (unit == 'T') + { + datepart = false; + havefield = false; + continue; + } + + dterr = ParseISO8601Number(str, &str, &val, &fval); + if (dterr) + return dterr; + if (!AdjustMonths(val, itm_in) || + !AdjustFractDays(fval, DAYS_PER_MONTH, itm_in)) + return DTERR_FIELD_OVERFLOW; + if (*str == '\0') + return 0; + if (*str == 'T') + { + datepart = false; + havefield = false; + continue; + } + if (*str != '-') + return DTERR_BAD_FORMAT; + str++; + + dterr = ParseISO8601Number(str, &str, &val, &fval); + if (dterr) + return dterr; + if (!AdjustDays(val, 1, itm_in) || + !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in)) + return DTERR_FIELD_OVERFLOW; + if (*str == '\0') + return 0; + if (*str == 'T') + { + datepart = false; + havefield = false; + continue; + } + return DTERR_BAD_FORMAT; + default: + /* not a valid date unit suffix */ + return DTERR_BAD_FORMAT; + } + } + else + { + switch (unit) /* after T: H M S */ + { + case 'H': + if (!AdjustMicroseconds(val, fval, USECS_PER_HOUR, itm_in)) + return DTERR_FIELD_OVERFLOW; + break; + case 'M': + if (!AdjustMicroseconds(val, fval, USECS_PER_MINUTE, itm_in)) + return DTERR_FIELD_OVERFLOW; + break; + case 'S': + if (!AdjustMicroseconds(val, fval, USECS_PER_SEC, itm_in)) + return DTERR_FIELD_OVERFLOW; + break; + case '\0': /* ISO 8601 4.4.3.3 Alternative Format */ + if (ISO8601IntegerWidth(fieldstart) == 6 && !havefield) + { + if (!AdjustMicroseconds(val / 10000, 0, USECS_PER_HOUR, itm_in) || + !AdjustMicroseconds((val / 100) % 100, 0, USECS_PER_MINUTE, itm_in) || + !AdjustMicroseconds(val % 100, 0, USECS_PER_SEC, itm_in) || + !AdjustFractMicroseconds(fval, 1, itm_in)) + return DTERR_FIELD_OVERFLOW; + return 0; + } + /* Else fall through to extended alternative format */ + /* FALLTHROUGH */ + case ':': /* ISO 8601 4.4.3.3 Alternative Format, + * Extended */ + if (havefield) + return DTERR_BAD_FORMAT; + + if (!AdjustMicroseconds(val, fval, USECS_PER_HOUR, itm_in)) + return DTERR_FIELD_OVERFLOW; + if (unit == '\0') + return 0; + + dterr = ParseISO8601Number(str, &str, &val, &fval); + if (dterr) + return dterr; + if (!AdjustMicroseconds(val, fval, USECS_PER_MINUTE, itm_in)) + return DTERR_FIELD_OVERFLOW; + if (*str == '\0') + return 0; + if (*str != ':') + return DTERR_BAD_FORMAT; + str++; + + dterr = ParseISO8601Number(str, &str, &val, &fval); + if (dterr) + return dterr; + if (!AdjustMicroseconds(val, fval, USECS_PER_SEC, itm_in)) + return DTERR_FIELD_OVERFLOW; + if (*str == '\0') + return 0; + return DTERR_BAD_FORMAT; + + default: + /* not a valid time unit suffix */ + return DTERR_BAD_FORMAT; + } + } + + havefield = true; + } + + return 0; +} + + +/* DecodeUnits() + * Decode text string using lookup table. + * + * This routine recognizes keywords associated with time interval units. + * + * Given string must be lowercased already. + * + * Implement a cache lookup since it is likely that dates + * will be related in format. + */ +int +DecodeUnits(int field, const char *lowtoken, int *val) +{ + int type; + const datetkn *tp; + + tp = deltacache[field]; + /* use strncmp so that we match truncated tokens */ + if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0) + { + tp = datebsearch(lowtoken, deltatktbl, szdeltatktbl); + } + if (tp == NULL) + { + type = UNKNOWN_FIELD; + *val = 0; + } + else + { + deltacache[field] = tp; + type = tp->type; + *val = tp->value; + } + + return type; +} /* DecodeUnits() */ + +/* + * Report an error detected by one of the datetime input processing routines. + * + * dterr is the error code, and *extra contains any auxiliary info we need + * for the error report. extra can be NULL if not needed for the particular + * dterr value. + * + * str is the original input string, and datatype is the name of the datatype + * we were trying to accept. (For some DTERR codes, these are not used and + * can be NULL.) + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error. + * + * Note: it might seem useless to distinguish DTERR_INTERVAL_OVERFLOW and + * DTERR_TZDISP_OVERFLOW from DTERR_FIELD_OVERFLOW, but SQL99 mandates three + * separate SQLSTATE codes, so ... + */ +void +DateTimeParseError(int dterr, DateTimeErrorExtra *extra, + const char *str, const char *datatype, + Node *escontext) +{ + switch (dterr) + { + case DTERR_FIELD_OVERFLOW: + errsave(escontext, + (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), + errmsg("date/time field value out of range: \"%s\"", + str))); + break; + case DTERR_MD_FIELD_OVERFLOW: + /* <nanny>same as above, but add hint about DateStyle</nanny> */ + errsave(escontext, + (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), + errmsg("date/time field value out of range: \"%s\"", + str), + errhint("Perhaps you need a different \"datestyle\" setting."))); + break; + case DTERR_INTERVAL_OVERFLOW: + errsave(escontext, + (errcode(ERRCODE_INTERVAL_FIELD_OVERFLOW), + errmsg("interval field value out of range: \"%s\"", + str))); + break; + case DTERR_TZDISP_OVERFLOW: + errsave(escontext, + (errcode(ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE), + errmsg("time zone displacement out of range: \"%s\"", + str))); + break; + case DTERR_BAD_TIMEZONE: + errsave(escontext, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("time zone \"%s\" not recognized", + extra->dtee_timezone))); + break; + case DTERR_BAD_ZONE_ABBREV: + errsave(escontext, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("time zone \"%s\" not recognized", + extra->dtee_timezone), + errdetail("This time zone name appears in the configuration file for time zone abbreviation \"%s\".", + extra->dtee_abbrev))); + break; + case DTERR_BAD_FORMAT: + default: + errsave(escontext, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid input syntax for type %s: \"%s\"", + datatype, str))); + break; + } +} + +/* datebsearch() + * Binary search -- from Knuth (6.2.1) Algorithm B. Special case like this + * is WAY faster than the generic bsearch(). + */ +static const datetkn * +datebsearch(const char *key, const datetkn *base, int nel) +{ + if (nel > 0) + { + const datetkn *last = base + nel - 1, + *position; + int result; + + while (last >= base) + { + position = base + ((last - base) >> 1); + /* precheck the first character for a bit of extra speed */ + result = (int) key[0] - (int) position->token[0]; + if (result == 0) + { + /* use strncmp so that we match truncated tokens */ + result = strncmp(key, position->token, TOKMAXLEN); + if (result == 0) + return position; + } + if (result < 0) + last = position - 1; + else + base = position + 1; + } + } + return NULL; +} + +/* EncodeTimezone() + * Copies representation of a numeric timezone offset to str. + * + * Returns a pointer to the new end of string. No NUL terminator is put + * there; callers are responsible for NUL terminating str themselves. + */ +static char * +EncodeTimezone(char *str, int tz, int style) +{ + int hour, + min, + sec; + + sec = abs(tz); + min = sec / SECS_PER_MINUTE; + sec -= min * SECS_PER_MINUTE; + hour = min / MINS_PER_HOUR; + min -= hour * MINS_PER_HOUR; + + /* TZ is negated compared to sign we wish to display ... */ + *str++ = (tz <= 0 ? '+' : '-'); + + if (sec != 0) + { + str = pg_ultostr_zeropad(str, hour, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, min, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, sec, 2); + } + else if (min != 0 || style == USE_XSD_DATES) + { + str = pg_ultostr_zeropad(str, hour, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, min, 2); + } + else + str = pg_ultostr_zeropad(str, hour, 2); + return str; +} + +/* EncodeDateOnly() + * Encode date as local time. + */ +void +EncodeDateOnly(struct pg_tm *tm, int style, char *str) +{ + Assert(tm->tm_mon >= 1 && tm->tm_mon <= MONTHS_PER_YEAR); + + switch (style) + { + case USE_ISO_DATES: + case USE_XSD_DATES: + /* compatible with ISO date formats */ + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = '-'; + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + *str++ = '-'; + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + break; + + case USE_SQL_DATES: + /* compatible with Oracle/Ingres date formats */ + if (DateOrder == DATEORDER_DMY) + { + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + *str++ = '/'; + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + } + else + { + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + *str++ = '/'; + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = '/'; + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + break; + + case USE_GERMAN_DATES: + /* German-style date format */ + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + *str++ = '.'; + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + *str++ = '.'; + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + break; + + case USE_POSTGRES_DATES: + default: + /* traditional date-only style for Postgres */ + if (DateOrder == DATEORDER_DMY) + { + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + *str++ = '-'; + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + } + else + { + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + *str++ = '-'; + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = '-'; + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + break; + } + + if (tm->tm_year <= 0) + { + memcpy(str, " BC", 3); /* Don't copy NUL */ + str += 3; + } + *str = '\0'; +} + + +/* EncodeTimeOnly() + * Encode time fields only. + * + * tm and fsec are the value to encode, print_tz determines whether to include + * a time zone (the difference between time and timetz types), tz is the + * numeric time zone offset, style is the date style, str is where to write the + * output. + */ +void +EncodeTimeOnly(struct pg_tm *tm, fsec_t fsec, bool print_tz, int tz, int style, char *str) +{ + str = pg_ultostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendSeconds(str, tm->tm_sec, fsec, MAX_TIME_PRECISION, true); + if (print_tz) + str = EncodeTimezone(str, tz, style); + *str = '\0'; +} + + +/* EncodeDateTime() + * Encode date and time interpreted as local time. + * + * tm and fsec are the value to encode, print_tz determines whether to include + * a time zone (the difference between timestamp and timestamptz types), tz is + * the numeric time zone offset, tzn is the textual time zone, which if + * specified will be used instead of tz by some styles, style is the date + * style, str is where to write the output. + * + * Supported date styles: + * Postgres - day mon hh:mm:ss yyyy tz + * SQL - mm/dd/yyyy hh:mm:ss.ss tz + * ISO - yyyy-mm-dd hh:mm:ss+/-tz + * German - dd.mm.yyyy hh:mm:ss tz + * XSD - yyyy-mm-ddThh:mm:ss.ss+/-tz + */ +void +EncodeDateTime(struct pg_tm *tm, fsec_t fsec, bool print_tz, int tz, const char *tzn, int style, char *str) +{ + int day; + + Assert(tm->tm_mon >= 1 && tm->tm_mon <= MONTHS_PER_YEAR); + + /* + * Negative tm_isdst means we have no valid time zone translation. + */ + if (tm->tm_isdst < 0) + print_tz = false; + + switch (style) + { + case USE_ISO_DATES: + case USE_XSD_DATES: + /* Compatible with ISO-8601 date formats */ + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = '-'; + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + *str++ = '-'; + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + *str++ = (style == USE_ISO_DATES) ? ' ' : 'T'; + str = pg_ultostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); + if (print_tz) + str = EncodeTimezone(str, tz, style); + break; + + case USE_SQL_DATES: + /* Compatible with Oracle/Ingres date formats */ + if (DateOrder == DATEORDER_DMY) + { + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + *str++ = '/'; + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + } + else + { + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + *str++ = '/'; + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = '/'; + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = ' '; + str = pg_ultostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); + + /* + * Note: the uses of %.*s in this function would be risky if the + * timezone names ever contain non-ASCII characters, since we are + * not being careful to do encoding-aware clipping. However, all + * TZ abbreviations in the IANA database are plain ASCII. + */ + if (print_tz) + { + if (tzn) + { + sprintf(str, " %.*s", MAXTZLEN, tzn); + str += strlen(str); + } + else + str = EncodeTimezone(str, tz, style); + } + break; + + case USE_GERMAN_DATES: + /* German variant on European style */ + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + *str++ = '.'; + str = pg_ultostr_zeropad(str, tm->tm_mon, 2); + *str++ = '.'; + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = ' '; + str = pg_ultostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); + + if (print_tz) + { + if (tzn) + { + sprintf(str, " %.*s", MAXTZLEN, tzn); + str += strlen(str); + } + else + str = EncodeTimezone(str, tz, style); + } + break; + + case USE_POSTGRES_DATES: + default: + /* Backward-compatible with traditional Postgres abstime dates */ + day = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); + tm->tm_wday = j2day(day); + memcpy(str, days[tm->tm_wday], 3); + str += 3; + *str++ = ' '; + if (DateOrder == DATEORDER_DMY) + { + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + *str++ = ' '; + memcpy(str, months[tm->tm_mon - 1], 3); + str += 3; + } + else + { + memcpy(str, months[tm->tm_mon - 1], 3); + str += 3; + *str++ = ' '; + str = pg_ultostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = ' '; + str = pg_ultostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ultostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); + *str++ = ' '; + str = pg_ultostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + + if (print_tz) + { + if (tzn) + { + sprintf(str, " %.*s", MAXTZLEN, tzn); + str += strlen(str); + } + else + { + /* + * We have a time zone, but no string version. Use the + * numeric form, but be sure to include a leading space to + * avoid formatting something which would be rejected by + * the date/time parser later. - thomas 2001-10-19 + */ + *str++ = ' '; + str = EncodeTimezone(str, tz, style); + } + } + break; + } + + if (tm->tm_year <= 0) + { + memcpy(str, " BC", 3); /* Don't copy NUL */ + str += 3; + } + *str = '\0'; +} + + +/* + * Helper functions to avoid duplicated code in EncodeInterval. + */ + +/* Append an ISO-8601-style interval field, but only if value isn't zero */ +static char * +AddISO8601IntPart(char *cp, int64 value, char units) +{ + if (value == 0) + return cp; + sprintf(cp, "%lld%c", (long long) value, units); + return cp + strlen(cp); +} + +/* Append a postgres-style interval field, but only if value isn't zero */ +static char * +AddPostgresIntPart(char *cp, int64 value, const char *units, + bool *is_zero, bool *is_before) +{ + if (value == 0) + return cp; + sprintf(cp, "%s%s%lld %s%s", + (!*is_zero) ? " " : "", + (*is_before && value > 0) ? "+" : "", + (long long) value, + units, + (value != 1) ? "s" : ""); + + /* + * Each nonzero field sets is_before for (only) the next one. This is a + * tad bizarre but it's how it worked before... + */ + *is_before = (value < 0); + *is_zero = false; + return cp + strlen(cp); +} + +/* Append a verbose-style interval field, but only if value isn't zero */ +static char * +AddVerboseIntPart(char *cp, int64 value, const char *units, + bool *is_zero, bool *is_before) +{ + if (value == 0) + return cp; + /* first nonzero value sets is_before */ + if (*is_zero) + { + *is_before = (value < 0); + value = i64abs(value); + } + else if (*is_before) + value = -value; + sprintf(cp, " %lld %s%s", (long long) value, units, (value == 1) ? "" : "s"); + *is_zero = false; + return cp + strlen(cp); +} + + +/* EncodeInterval() + * Interpret time structure as a delta time and convert to string. + * + * Support "traditional Postgres" and ISO-8601 styles. + * Actually, afaik ISO does not address time interval formatting, + * but this looks similar to the spec for absolute date/time. + * - thomas 1998-04-30 + * + * Actually, afaik, ISO 8601 does specify formats for "time + * intervals...[of the]...format with time-unit designators", which + * are pretty ugly. The format looks something like + * P1Y1M1DT1H1M1.12345S + * but useful for exchanging data with computers instead of humans. + * - ron 2003-07-14 + * + * And ISO's SQL 2008 standard specifies standards for + * "year-month literal"s (that look like '2-3') and + * "day-time literal"s (that look like ('4 5:6:7') + */ +void +EncodeInterval(struct pg_itm *itm, int style, char *str) +{ + char *cp = str; + int year = itm->tm_year; + int mon = itm->tm_mon; + int64 mday = itm->tm_mday; /* tm_mday could be INT_MIN */ + int64 hour = itm->tm_hour; + int min = itm->tm_min; + int sec = itm->tm_sec; + int fsec = itm->tm_usec; + bool is_before = false; + bool is_zero = true; + + /* + * The sign of year and month are guaranteed to match, since they are + * stored internally as "month". But we'll need to check for is_before and + * is_zero when determining the signs of day and hour/minute/seconds + * fields. + */ + switch (style) + { + /* SQL Standard interval format */ + case INTSTYLE_SQL_STANDARD: + { + bool has_negative = year < 0 || mon < 0 || + mday < 0 || hour < 0 || + min < 0 || sec < 0 || fsec < 0; + bool has_positive = year > 0 || mon > 0 || + mday > 0 || hour > 0 || + min > 0 || sec > 0 || fsec > 0; + bool has_year_month = year != 0 || mon != 0; + bool has_day_time = mday != 0 || hour != 0 || + min != 0 || sec != 0 || fsec != 0; + bool has_day = mday != 0; + bool sql_standard_value = !(has_negative && has_positive) && + !(has_year_month && has_day_time); + + /* + * SQL Standard wants only 1 "<sign>" preceding the whole + * interval ... but can't do that if mixed signs. + */ + if (has_negative && sql_standard_value) + { + *cp++ = '-'; + year = -year; + mon = -mon; + mday = -mday; + hour = -hour; + min = -min; + sec = -sec; + fsec = -fsec; + } + + if (!has_negative && !has_positive) + { + sprintf(cp, "0"); + } + else if (!sql_standard_value) + { + /* + * For non sql-standard interval values, force outputting + * the signs to avoid ambiguities with intervals with + * mixed sign components. + */ + char year_sign = (year < 0 || mon < 0) ? '-' : '+'; + char day_sign = (mday < 0) ? '-' : '+'; + char sec_sign = (hour < 0 || min < 0 || + sec < 0 || fsec < 0) ? '-' : '+'; + + sprintf(cp, "%c%d-%d %c%lld %c%lld:%02d:", + year_sign, abs(year), abs(mon), + day_sign, (long long) i64abs(mday), + sec_sign, (long long) i64abs(hour), abs(min)); + cp += strlen(cp); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; + } + else if (has_year_month) + { + sprintf(cp, "%d-%d", year, mon); + } + else if (has_day) + { + sprintf(cp, "%lld %lld:%02d:", + (long long) mday, (long long) hour, min); + cp += strlen(cp); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; + } + else + { + sprintf(cp, "%lld:%02d:", (long long) hour, min); + cp += strlen(cp); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; + } + } + break; + + /* ISO 8601 "time-intervals by duration only" */ + case INTSTYLE_ISO_8601: + /* special-case zero to avoid printing nothing */ + if (year == 0 && mon == 0 && mday == 0 && + hour == 0 && min == 0 && sec == 0 && fsec == 0) + { + sprintf(cp, "PT0S"); + break; + } + *cp++ = 'P'; + cp = AddISO8601IntPart(cp, year, 'Y'); + cp = AddISO8601IntPart(cp, mon, 'M'); + cp = AddISO8601IntPart(cp, mday, 'D'); + if (hour != 0 || min != 0 || sec != 0 || fsec != 0) + *cp++ = 'T'; + cp = AddISO8601IntPart(cp, hour, 'H'); + cp = AddISO8601IntPart(cp, min, 'M'); + if (sec != 0 || fsec != 0) + { + if (sec < 0 || fsec < 0) + *cp++ = '-'; + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false); + *cp++ = 'S'; + *cp++ = '\0'; + } + break; + + /* Compatible with postgresql < 8.4 when DateStyle = 'iso' */ + case INTSTYLE_POSTGRES: + cp = AddPostgresIntPart(cp, year, "year", &is_zero, &is_before); + + /* + * Ideally we should spell out "month" like we do for "year" and + * "day". However, for backward compatibility, we can't easily + * fix this. bjm 2011-05-24 + */ + cp = AddPostgresIntPart(cp, mon, "mon", &is_zero, &is_before); + cp = AddPostgresIntPart(cp, mday, "day", &is_zero, &is_before); + if (is_zero || hour != 0 || min != 0 || sec != 0 || fsec != 0) + { + bool minus = (hour < 0 || min < 0 || sec < 0 || fsec < 0); + + sprintf(cp, "%s%s%02lld:%02d:", + is_zero ? "" : " ", + (minus ? "-" : (is_before ? "+" : "")), + (long long) i64abs(hour), abs(min)); + cp += strlen(cp); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; + } + break; + + /* Compatible with postgresql < 8.4 when DateStyle != 'iso' */ + case INTSTYLE_POSTGRES_VERBOSE: + default: + strcpy(cp, "@"); + cp++; + cp = AddVerboseIntPart(cp, year, "year", &is_zero, &is_before); + cp = AddVerboseIntPart(cp, mon, "mon", &is_zero, &is_before); + cp = AddVerboseIntPart(cp, mday, "day", &is_zero, &is_before); + cp = AddVerboseIntPart(cp, hour, "hour", &is_zero, &is_before); + cp = AddVerboseIntPart(cp, min, "min", &is_zero, &is_before); + if (sec != 0 || fsec != 0) + { + *cp++ = ' '; + if (sec < 0 || (sec == 0 && fsec < 0)) + { + if (is_zero) + is_before = true; + else if (!is_before) + *cp++ = '-'; + } + else if (is_before) + *cp++ = '-'; + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false); + /* We output "ago", not negatives, so use abs(). */ + sprintf(cp, " sec%s", + (abs(sec) != 1 || fsec != 0) ? "s" : ""); + is_zero = false; + } + /* identically zero? then put in a unitless zero... */ + if (is_zero) + strcat(cp, " 0"); + if (is_before) + strcat(cp, " ago"); + break; + } +} + + +/* + * We've been burnt by stupid errors in the ordering of the datetkn tables + * once too often. Arrange to check them during postmaster start. + */ +static bool +CheckDateTokenTable(const char *tablename, const datetkn *base, int nel) +{ + bool ok = true; + int i; + + for (i = 0; i < nel; i++) + { + /* check for token strings that don't fit */ + if (strlen(base[i].token) > TOKMAXLEN) + { + /* %.*s is safe since all our tokens are ASCII */ + elog(LOG, "token too long in %s table: \"%.*s\"", + tablename, + TOKMAXLEN + 1, base[i].token); + ok = false; + break; /* don't risk applying strcmp */ + } + /* check for out of order */ + if (i > 0 && + strcmp(base[i - 1].token, base[i].token) >= 0) + { + elog(LOG, "ordering error in %s table: \"%s\" >= \"%s\"", + tablename, + base[i - 1].token, + base[i].token); + ok = false; + } + } + return ok; +} + +bool +CheckDateTokenTables(void) +{ + bool ok = true; + + Assert(UNIX_EPOCH_JDATE == date2j(1970, 1, 1)); + Assert(POSTGRES_EPOCH_JDATE == date2j(2000, 1, 1)); + + ok &= CheckDateTokenTable("datetktbl", datetktbl, szdatetktbl); + ok &= CheckDateTokenTable("deltatktbl", deltatktbl, szdeltatktbl); + return ok; +} + +/* + * Common code for temporal prosupport functions: simplify, if possible, + * a call to a temporal type's length-coercion function. + * + * Types time, timetz, timestamp and timestamptz each have a range of allowed + * precisions. An unspecified precision is rigorously equivalent to the + * highest specifiable precision. We can replace the function call with a + * no-op RelabelType if it is coercing to the same or higher precision as the + * input is known to have. + * + * The input Node is always a FuncExpr, but to reduce the #include footprint + * of datetime.h, we declare it as Node *. + * + * Note: timestamp_scale throws an error when the typmod is out of range, but + * we can't get there from a cast: our typmodin will have caught it already. + */ +Node * +TemporalSimplify(int32 max_precis, Node *node) +{ + FuncExpr *expr = castNode(FuncExpr, node); + Node *ret = NULL; + Node *typmod; + + Assert(list_length(expr->args) >= 2); + + typmod = (Node *) lsecond(expr->args); + + if (IsA(typmod, Const) && !((Const *) typmod)->constisnull) + { + Node *source = (Node *) linitial(expr->args); + int32 old_precis = exprTypmod(source); + int32 new_precis = DatumGetInt32(((Const *) typmod)->constvalue); + + if (new_precis < 0 || new_precis == max_precis || + (old_precis >= 0 && new_precis >= old_precis)) + ret = relabel_to_typmod(source, new_precis); + } + + return ret; +} + +/* + * This function gets called during timezone config file load or reload + * to create the final array of timezone tokens. The argument array + * is already sorted in name order. + * + * The result is a TimeZoneAbbrevTable (which must be a single guc_malloc'd + * chunk) or NULL on alloc failure. No other error conditions are defined. + */ +TimeZoneAbbrevTable * +ConvertTimeZoneAbbrevs(struct tzEntry *abbrevs, int n) +{ + TimeZoneAbbrevTable *tbl; + Size tbl_size; + int i; + + /* Space for fixed fields and datetkn array */ + tbl_size = offsetof(TimeZoneAbbrevTable, abbrevs) + + n * sizeof(datetkn); + tbl_size = MAXALIGN(tbl_size); + /* Count up space for dynamic abbreviations */ + for (i = 0; i < n; i++) + { + struct tzEntry *abbr = abbrevs + i; + + if (abbr->zone != NULL) + { + Size dsize; + + dsize = offsetof(DynamicZoneAbbrev, zone) + + strlen(abbr->zone) + 1; + tbl_size += MAXALIGN(dsize); + } + } + + /* Alloc the result ... */ + tbl = guc_malloc(LOG, tbl_size); + if (!tbl) + return NULL; + + /* ... and fill it in */ + tbl->tblsize = tbl_size; + tbl->numabbrevs = n; + /* in this loop, tbl_size reprises the space calculation above */ + tbl_size = offsetof(TimeZoneAbbrevTable, abbrevs) + + n * sizeof(datetkn); + tbl_size = MAXALIGN(tbl_size); + for (i = 0; i < n; i++) + { + struct tzEntry *abbr = abbrevs + i; + datetkn *dtoken = tbl->abbrevs + i; + + /* use strlcpy to truncate name if necessary */ + strlcpy(dtoken->token, abbr->abbrev, TOKMAXLEN + 1); + if (abbr->zone != NULL) + { + /* Allocate a DynamicZoneAbbrev for this abbreviation */ + DynamicZoneAbbrev *dtza; + Size dsize; + + dtza = (DynamicZoneAbbrev *) ((char *) tbl + tbl_size); + dtza->tz = NULL; + strcpy(dtza->zone, abbr->zone); + + dtoken->type = DYNTZ; + /* value is offset from table start to DynamicZoneAbbrev */ + dtoken->value = (int32) tbl_size; + + dsize = offsetof(DynamicZoneAbbrev, zone) + + strlen(abbr->zone) + 1; + tbl_size += MAXALIGN(dsize); + } + else + { + dtoken->type = abbr->is_dst ? DTZ : TZ; + dtoken->value = abbr->offset; + } + } + + /* Assert the two loops above agreed on size calculations */ + Assert(tbl->tblsize == tbl_size); + + /* Check the ordering, if testing */ + Assert(CheckDateTokenTable("timezone abbreviations", tbl->abbrevs, n)); + + return tbl; +} + +/* + * Install a TimeZoneAbbrevTable as the active table. + * + * Caller is responsible that the passed table doesn't go away while in use. + */ +void +InstallTimeZoneAbbrevs(TimeZoneAbbrevTable *tbl) +{ + zoneabbrevtbl = tbl; + /* reset abbrevcache, which may contain pointers into old table */ + memset(abbrevcache, 0, sizeof(abbrevcache)); +} + +/* + * Helper subroutine to locate pg_tz timezone for a dynamic abbreviation. + * + * On failure, returns NULL and fills *extra for a DTERR_BAD_ZONE_ABBREV error. + */ +static pg_tz * +FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp, + DateTimeErrorExtra *extra) +{ + DynamicZoneAbbrev *dtza; + + /* Just some sanity checks to prevent indexing off into nowhere */ + Assert(tp->type == DYNTZ); + Assert(tp->value > 0 && tp->value < tbl->tblsize); + + dtza = (DynamicZoneAbbrev *) ((char *) tbl + tp->value); + + /* Look up the underlying zone if we haven't already */ + if (dtza->tz == NULL) + { + dtza->tz = pg_tzset(dtza->zone); + if (dtza->tz == NULL) + { + /* Ooops, bogus zone name in config file entry */ + extra->dtee_timezone = dtza->zone; + extra->dtee_abbrev = tp->token; + } + } + return dtza->tz; +} + + +/* + * This set-returning function reads all the available time zone abbreviations + * and returns a set of (abbrev, utc_offset, is_dst). + */ +Datum +pg_timezone_abbrevs(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + int *pindex; + Datum result; + HeapTuple tuple; + Datum values[3]; + bool nulls[3] = {0}; + const datetkn *tp; + char buffer[TOKMAXLEN + 1]; + int gmtoffset; + bool is_dst; + unsigned char *p; + struct pg_itm_in itm_in; + Interval *resInterval; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext oldcontext; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* allocate memory for user context */ + pindex = (int *) palloc(sizeof(int)); + *pindex = 0; + funcctx->user_fctx = (void *) pindex; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + funcctx->tuple_desc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + pindex = (int *) funcctx->user_fctx; + + if (zoneabbrevtbl == NULL || + *pindex >= zoneabbrevtbl->numabbrevs) + SRF_RETURN_DONE(funcctx); + + tp = zoneabbrevtbl->abbrevs + *pindex; + + switch (tp->type) + { + case TZ: + gmtoffset = tp->value; + is_dst = false; + break; + case DTZ: + gmtoffset = tp->value; + is_dst = true; + break; + case DYNTZ: + { + /* Determine the current meaning of the abbrev */ + pg_tz *tzp; + DateTimeErrorExtra extra; + TimestampTz now; + int isdst; + + tzp = FetchDynamicTimeZone(zoneabbrevtbl, tp, &extra); + if (tzp == NULL) + DateTimeParseError(DTERR_BAD_ZONE_ABBREV, &extra, + NULL, NULL, NULL); + now = GetCurrentTransactionStartTimestamp(); + gmtoffset = -DetermineTimeZoneAbbrevOffsetTS(now, + tp->token, + tzp, + &isdst); + is_dst = (bool) isdst; + break; + } + default: + elog(ERROR, "unrecognized timezone type %d", (int) tp->type); + gmtoffset = 0; /* keep compiler quiet */ + is_dst = false; + break; + } + + /* + * Convert name to text, using upcasing conversion that is the inverse of + * what ParseDateTime() uses. + */ + strlcpy(buffer, tp->token, sizeof(buffer)); + for (p = (unsigned char *) buffer; *p; p++) + *p = pg_toupper(*p); + + values[0] = CStringGetTextDatum(buffer); + + /* Convert offset (in seconds) to an interval; can't overflow */ + MemSet(&itm_in, 0, sizeof(struct pg_itm_in)); + itm_in.tm_usec = (int64) gmtoffset * USECS_PER_SEC; + resInterval = (Interval *) palloc(sizeof(Interval)); + (void) itmin2interval(&itm_in, resInterval); + values[1] = IntervalPGetDatum(resInterval); + + values[2] = BoolGetDatum(is_dst); + + (*pindex)++; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + + SRF_RETURN_NEXT(funcctx, result); +} + +/* + * This set-returning function reads all the available full time zones + * and returns a set of (name, abbrev, utc_offset, is_dst). + */ +Datum +pg_timezone_names(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + pg_tzenum *tzenum; + pg_tz *tz; + Datum values[4]; + bool nulls[4] = {0}; + int tzoff; + struct pg_tm tm; + fsec_t fsec; + const char *tzn; + Interval *resInterval; + struct pg_itm_in itm_in; + + InitMaterializedSRF(fcinfo, 0); + + /* initialize timezone scanning code */ + tzenum = pg_tzenumerate_start(); + + /* search for another zone to display */ + for (;;) + { + tz = pg_tzenumerate_next(tzenum); + if (!tz) + break; + + /* Convert now() to local time in this zone */ + if (timestamp2tm(GetCurrentTransactionStartTimestamp(), + &tzoff, &tm, &fsec, &tzn, tz) != 0) + continue; /* ignore if conversion fails */ + + /* + * IANA's rather silly "Factory" time zone used to emit ridiculously + * long "abbreviations" such as "Local time zone must be set--see zic + * manual page" or "Local time zone must be set--use tzsetup". While + * modern versions of tzdb emit the much saner "-00", it seems some + * benighted packagers are hacking the IANA data so that it continues + * to produce these strings. To prevent producing a weirdly wide + * abbrev column, reject ridiculously long abbreviations. + */ + if (tzn && strlen(tzn) > 31) + continue; + + values[0] = CStringGetTextDatum(pg_get_timezone_name(tz)); + values[1] = CStringGetTextDatum(tzn ? tzn : ""); + + /* Convert tzoff to an interval; can't overflow */ + MemSet(&itm_in, 0, sizeof(struct pg_itm_in)); + itm_in.tm_usec = (int64) -tzoff * USECS_PER_SEC; + resInterval = (Interval *) palloc(sizeof(Interval)); + (void) itmin2interval(&itm_in, resInterval); + values[2] = IntervalPGetDatum(resInterval); + + values[3] = BoolGetDatum(tm.tm_isdst > 0); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + pg_tzenumerate_end(tzenum); + return (Datum) 0; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datum.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datum.c new file mode 100644 index 00000000000..251dd23ca81 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datum.c @@ -0,0 +1,554 @@ +/*------------------------------------------------------------------------- + * + * datum.c + * POSTGRES Datum (abstract data type) manipulation routines. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/datum.c + * + *------------------------------------------------------------------------- + */ + +/* + * In the implementation of these routines we assume the following: + * + * A) if a type is "byVal" then all the information is stored in the + * Datum itself (i.e. no pointers involved!). In this case the + * length of the type is always greater than zero and not more than + * "sizeof(Datum)" + * + * B) if a type is not "byVal" and it has a fixed length (typlen > 0), + * then the "Datum" always contains a pointer to a stream of bytes. + * The number of significant bytes are always equal to the typlen. + * + * C) if a type is not "byVal" and has typlen == -1, + * then the "Datum" always points to a "struct varlena". + * This varlena structure has information about the actual length of this + * particular instance of the type and about its value. + * + * D) if a type is not "byVal" and has typlen == -2, + * then the "Datum" always points to a null-terminated C string. + * + * Note that we do not treat "toasted" datums specially; therefore what + * will be copied or compared is the compressed data or toast reference. + * An exception is made for datumCopy() of an expanded object, however, + * because most callers expect to get a simple contiguous (and pfree'able) + * result from datumCopy(). See also datumTransfer(). + */ + +#include "postgres.h" + +#include "access/detoast.h" +#include "catalog/pg_type_d.h" +#include "common/hashfn.h" +#include "fmgr.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/expandeddatum.h" + + +/*------------------------------------------------------------------------- + * datumGetSize + * + * Find the "real" size of a datum, given the datum value, + * whether it is a "by value", and the declared type length. + * (For TOAST pointer datums, this is the size of the pointer datum.) + * + * This is essentially an out-of-line version of the att_addlength_datum() + * macro in access/tupmacs.h. We do a tad more error checking though. + *------------------------------------------------------------------------- + */ +Size +datumGetSize(Datum value, bool typByVal, int typLen) +{ + Size size; + + if (typByVal) + { + /* Pass-by-value types are always fixed-length */ + Assert(typLen > 0 && typLen <= sizeof(Datum)); + size = (Size) typLen; + } + else + { + if (typLen > 0) + { + /* Fixed-length pass-by-ref type */ + size = (Size) typLen; + } + else if (typLen == -1) + { + /* It is a varlena datatype */ + struct varlena *s = (struct varlena *) DatumGetPointer(value); + + if (!PointerIsValid(s)) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid Datum pointer"))); + + size = (Size) VARSIZE_ANY(s); + } + else if (typLen == -2) + { + /* It is a cstring datatype */ + char *s = (char *) DatumGetPointer(value); + + if (!PointerIsValid(s)) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid Datum pointer"))); + + size = (Size) (strlen(s) + 1); + } + else + { + elog(ERROR, "invalid typLen: %d", typLen); + size = 0; /* keep compiler quiet */ + } + } + + return size; +} + +/*------------------------------------------------------------------------- + * datumCopy + * + * Make a copy of a non-NULL datum. + * + * If the datatype is pass-by-reference, memory is obtained with palloc(). + * + * If the value is a reference to an expanded object, we flatten into memory + * obtained with palloc(). We need to copy because one of the main uses of + * this function is to copy a datum out of a transient memory context that's + * about to be destroyed, and the expanded object is probably in a child + * context that will also go away. Moreover, many callers assume that the + * result is a single pfree-able chunk. + *------------------------------------------------------------------------- + */ +Datum +datumCopy(Datum value, bool typByVal, int typLen) +{ + Datum res; + + if (typByVal) + res = value; + else if (typLen == -1) + { + /* It is a varlena datatype */ + struct varlena *vl = (struct varlena *) DatumGetPointer(value); + + if (VARATT_IS_EXTERNAL_EXPANDED(vl)) + { + /* Flatten into the caller's memory context */ + ExpandedObjectHeader *eoh = DatumGetEOHP(value); + Size resultsize; + char *resultptr; + + resultsize = EOH_get_flat_size(eoh); + resultptr = (char *) palloc(resultsize); + EOH_flatten_into(eoh, (void *) resultptr, resultsize); + res = PointerGetDatum(resultptr); + } + else + { + /* Otherwise, just copy the varlena datum verbatim */ + Size realSize; + char *resultptr; + + realSize = (Size) VARSIZE_ANY(vl); + resultptr = (char *) palloc(realSize); + memcpy(resultptr, vl, realSize); + res = PointerGetDatum(resultptr); + } + } + else + { + /* Pass by reference, but not varlena, so not toasted */ + Size realSize; + char *resultptr; + + realSize = datumGetSize(value, typByVal, typLen); + + resultptr = (char *) palloc(realSize); + memcpy(resultptr, DatumGetPointer(value), realSize); + res = PointerGetDatum(resultptr); + } + return res; +} + +/*------------------------------------------------------------------------- + * datumTransfer + * + * Transfer a non-NULL datum into the current memory context. + * + * This is equivalent to datumCopy() except when the datum is a read-write + * pointer to an expanded object. In that case we merely reparent the object + * into the current context, and return its standard R/W pointer (in case the + * given one is a transient pointer of shorter lifespan). + *------------------------------------------------------------------------- + */ +Datum +datumTransfer(Datum value, bool typByVal, int typLen) +{ + if (!typByVal && typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value))) + value = TransferExpandedObject(value, CurrentMemoryContext); + else + value = datumCopy(value, typByVal, typLen); + return value; +} + +/*------------------------------------------------------------------------- + * datumIsEqual + * + * Return true if two datums are equal, false otherwise + * + * NOTE: XXX! + * We just compare the bytes of the two values, one by one. + * This routine will return false if there are 2 different + * representations of the same value (something along the lines + * of say the representation of zero in one's complement arithmetic). + * Also, it will probably not give the answer you want if either + * datum has been "toasted". + * + * Do not try to make this any smarter than it currently is with respect + * to "toasted" datums, because some of the callers could be working in the + * context of an aborted transaction. + *------------------------------------------------------------------------- + */ +bool +datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) +{ + bool res; + + if (typByVal) + { + /* + * just compare the two datums. NOTE: just comparing "len" bytes will + * not do the work, because we do not know how these bytes are aligned + * inside the "Datum". We assume instead that any given datatype is + * consistent about how it fills extraneous bits in the Datum. + */ + res = (value1 == value2); + } + else + { + Size size1, + size2; + char *s1, + *s2; + + /* + * Compare the bytes pointed by the pointers stored in the datums. + */ + size1 = datumGetSize(value1, typByVal, typLen); + size2 = datumGetSize(value2, typByVal, typLen); + if (size1 != size2) + return false; + s1 = (char *) DatumGetPointer(value1); + s2 = (char *) DatumGetPointer(value2); + res = (memcmp(s1, s2, size1) == 0); + } + return res; +} + +/*------------------------------------------------------------------------- + * datum_image_eq + * + * Compares two datums for identical contents, based on byte images. Return + * true if the two datums are equal, false otherwise. + *------------------------------------------------------------------------- + */ +bool +datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) +{ + Size len1, + len2; + bool result = true; + + if (typByVal) + { + result = (value1 == value2); + } + else if (typLen > 0) + { + result = (memcmp(DatumGetPointer(value1), + DatumGetPointer(value2), + typLen) == 0); + } + else if (typLen == -1) + { + len1 = toast_raw_datum_size(value1); + len2 = toast_raw_datum_size(value2); + /* No need to de-toast if lengths don't match. */ + if (len1 != len2) + result = false; + else + { + struct varlena *arg1val; + struct varlena *arg2val; + + arg1val = PG_DETOAST_DATUM_PACKED(value1); + arg2val = PG_DETOAST_DATUM_PACKED(value2); + + result = (memcmp(VARDATA_ANY(arg1val), + VARDATA_ANY(arg2val), + len1 - VARHDRSZ) == 0); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) arg1val != (Pointer) value1) + pfree(arg1val); + if ((Pointer) arg2val != (Pointer) value2) + pfree(arg2val); + } + } + else if (typLen == -2) + { + char *s1, + *s2; + + /* Compare cstring datums */ + s1 = DatumGetCString(value1); + s2 = DatumGetCString(value2); + len1 = strlen(s1) + 1; + len2 = strlen(s2) + 1; + if (len1 != len2) + return false; + result = (memcmp(s1, s2, len1) == 0); + } + else + elog(ERROR, "unexpected typLen: %d", typLen); + + return result; +} + +/*------------------------------------------------------------------------- + * datum_image_hash + * + * Generate a hash value based on the binary representation of 'value'. Most + * use cases will want to use the hash function specific to the Datum's type, + * however, some corner cases require generating a hash value based on the + * actual bits rather than the logical value. + *------------------------------------------------------------------------- + */ +uint32 +datum_image_hash(Datum value, bool typByVal, int typLen) +{ + Size len; + uint32 result; + + if (typByVal) + result = hash_bytes((unsigned char *) &value, sizeof(Datum)); + else if (typLen > 0) + result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen); + else if (typLen == -1) + { + struct varlena *val; + + len = toast_raw_datum_size(value); + + val = PG_DETOAST_DATUM_PACKED(value); + + result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) val != (Pointer) value) + pfree(val); + } + else if (typLen == -2) + { + char *s; + + s = DatumGetCString(value); + len = strlen(s) + 1; + + result = hash_bytes((unsigned char *) s, len); + } + else + { + elog(ERROR, "unexpected typLen: %d", typLen); + result = 0; /* keep compiler quiet */ + } + + return result; +} + +/*------------------------------------------------------------------------- + * btequalimage + * + * Generic "equalimage" support function. + * + * B-Tree operator classes whose equality function could safely be replaced by + * datum_image_eq() in all cases can use this as their "equalimage" support + * function. + * + * Earlier minor releases erroneously associated this function with + * interval_ops. Detect that case to rescind deduplication support, without + * requiring initdb. + *------------------------------------------------------------------------- + */ +Datum +btequalimage(PG_FUNCTION_ARGS) +{ + Oid opcintype = PG_GETARG_OID(0); + + PG_RETURN_BOOL(opcintype != INTERVALOID); +} + +/*------------------------------------------------------------------------- + * datumEstimateSpace + * + * Compute the amount of space that datumSerialize will require for a + * particular Datum. + *------------------------------------------------------------------------- + */ +Size +datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) +{ + Size sz = sizeof(int); + + if (!isnull) + { + /* no need to use add_size, can't overflow */ + if (typByVal) + sz += sizeof(Datum); + else if (typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) + { + /* Expanded objects need to be flattened, see comment below */ + sz += EOH_get_flat_size(DatumGetEOHP(value)); + } + else + sz += datumGetSize(value, typByVal, typLen); + } + + return sz; +} + +/*------------------------------------------------------------------------- + * datumSerialize + * + * Serialize a possibly-NULL datum into caller-provided storage. + * + * Note: "expanded" objects are flattened so as to produce a self-contained + * representation, but other sorts of toast pointers are transferred as-is. + * This is because the intended use of this function is to pass the value + * to another process within the same database server. The other process + * could not access an "expanded" object within this process's memory, but + * we assume it can dereference the same TOAST pointers this one can. + * + * The format is as follows: first, we write a 4-byte header word, which + * is either the length of a pass-by-reference datum, -1 for a + * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing + * further is written. If it is pass-by-value, sizeof(Datum) bytes + * follow. Otherwise, the number of bytes indicated by the header word + * follow. The caller is responsible for ensuring that there is enough + * storage to store the number of bytes that will be written; use + * datumEstimateSpace() to find out how many will be needed. + * *start_address is updated to point to the byte immediately following + * those written. + *------------------------------------------------------------------------- + */ +void +datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, + char **start_address) +{ + ExpandedObjectHeader *eoh = NULL; + int header; + + /* Write header word. */ + if (isnull) + header = -2; + else if (typByVal) + header = -1; + else if (typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) + { + eoh = DatumGetEOHP(value); + header = EOH_get_flat_size(eoh); + } + else + header = datumGetSize(value, typByVal, typLen); + memcpy(*start_address, &header, sizeof(int)); + *start_address += sizeof(int); + + /* If not null, write payload bytes. */ + if (!isnull) + { + if (typByVal) + { + memcpy(*start_address, &value, sizeof(Datum)); + *start_address += sizeof(Datum); + } + else if (eoh) + { + char *tmp; + + /* + * EOH_flatten_into expects the target address to be maxaligned, + * so we can't store directly to *start_address. + */ + tmp = (char *) palloc(header); + EOH_flatten_into(eoh, (void *) tmp, header); + memcpy(*start_address, tmp, header); + *start_address += header; + + /* be tidy. */ + pfree(tmp); + } + else + { + memcpy(*start_address, DatumGetPointer(value), header); + *start_address += header; + } + } +} + +/*------------------------------------------------------------------------- + * datumRestore + * + * Restore a possibly-NULL datum previously serialized by datumSerialize. + * *start_address is updated according to the number of bytes consumed. + *------------------------------------------------------------------------- + */ +Datum +datumRestore(char **start_address, bool *isnull) +{ + int header; + void *d; + + /* Read header word. */ + memcpy(&header, *start_address, sizeof(int)); + *start_address += sizeof(int); + + /* If this datum is NULL, we can stop here. */ + if (header == -2) + { + *isnull = true; + return (Datum) 0; + } + + /* OK, datum is not null. */ + *isnull = false; + + /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */ + if (header == -1) + { + Datum val; + + memcpy(&val, *start_address, sizeof(Datum)); + *start_address += sizeof(Datum); + return val; + } + + /* Pass-by-reference case; copy indicated number of bytes. */ + Assert(header > 0); + d = palloc(header); + memcpy(d, *start_address, header); + *start_address += header; + return PointerGetDatum(d); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/dbsize.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/dbsize.c new file mode 100644 index 00000000000..c22837e48fb --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/dbsize.c @@ -0,0 +1,1028 @@ +/* + * dbsize.c + * Database object size functions, and related inquiries + * + * Copyright (c) 2002-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/dbsize.c + * + */ + +#include "postgres.h" + +#include <sys/stat.h> + +#include "access/htup_details.h" +#include "access/relation.h" +#include "catalog/catalog.h" +#include "catalog/namespace.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_database.h" +#include "catalog/pg_tablespace.h" +#include "commands/dbcommands.h" +#include "commands/tablespace.h" +#include "miscadmin.h" +#include "storage/fd.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/numeric.h" +#include "utils/rel.h" +#include "utils/relfilenumbermap.h" +#include "utils/relmapper.h" +#include "utils/syscache.h" + +/* Divide by two and round away from zero */ +#define half_rounded(x) (((x) + ((x) < 0 ? -1 : 1)) / 2) + +/* Units used in pg_size_pretty functions. All units must be powers of 2 */ +struct size_pretty_unit +{ + const char *name; /* bytes, kB, MB, GB etc */ + uint32 limit; /* upper limit, prior to half rounding after + * converting to this unit. */ + bool round; /* do half rounding for this unit */ + uint8 unitbits; /* (1 << unitbits) bytes to make 1 of this + * unit */ +}; + +/* When adding units here also update the docs and the error message in pg_size_bytes */ +static const struct size_pretty_unit size_pretty_units[] = { + {"bytes", 10 * 1024, false, 0}, + {"kB", 20 * 1024 - 1, true, 10}, + {"MB", 20 * 1024 - 1, true, 20}, + {"GB", 20 * 1024 - 1, true, 30}, + {"TB", 20 * 1024 - 1, true, 40}, + {"PB", 20 * 1024 - 1, true, 50}, + {NULL, 0, false, 0} +}; + +/* Additional unit aliases accepted by pg_size_bytes */ +struct size_bytes_unit_alias +{ + const char *alias; + int unit_index; /* corresponding size_pretty_units element */ +}; + +/* When adding units here also update the docs and the error message in pg_size_bytes */ +static const struct size_bytes_unit_alias size_bytes_aliases[] = { + {"B", 0}, + {NULL} +}; + +/* Return physical size of directory contents, or 0 if dir doesn't exist */ +static int64 +db_dir_size(const char *path) +{ + int64 dirsize = 0; + struct dirent *direntry; + DIR *dirdesc; + char filename[MAXPGPATH * 2]; + + dirdesc = AllocateDir(path); + + if (!dirdesc) + return 0; + + while ((direntry = ReadDir(dirdesc, path)) != NULL) + { + struct stat fst; + + CHECK_FOR_INTERRUPTS(); + + if (strcmp(direntry->d_name, ".") == 0 || + strcmp(direntry->d_name, "..") == 0) + continue; + + snprintf(filename, sizeof(filename), "%s/%s", path, direntry->d_name); + + if (stat(filename, &fst) < 0) + { + if (errno == ENOENT) + continue; + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", filename))); + } + dirsize += fst.st_size; + } + + FreeDir(dirdesc); + return dirsize; +} + +/* + * calculate size of database in all tablespaces + */ +static int64 +calculate_database_size(Oid dbOid) +{ + int64 totalsize; + DIR *dirdesc; + struct dirent *direntry; + char dirpath[MAXPGPATH]; + char pathname[MAXPGPATH + 21 + sizeof(TABLESPACE_VERSION_DIRECTORY)]; + AclResult aclresult; + + /* + * User must have connect privilege for target database or have privileges + * of pg_read_all_stats + */ + aclresult = object_aclcheck(DatabaseRelationId, dbOid, GetUserId(), ACL_CONNECT); + if (aclresult != ACLCHECK_OK && + !has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS)) + { + aclcheck_error(aclresult, OBJECT_DATABASE, + get_database_name(dbOid)); + } + + /* Shared storage in pg_global is not counted */ + + /* Include pg_default storage */ + snprintf(pathname, sizeof(pathname), "base/%u", dbOid); + totalsize = db_dir_size(pathname); + + /* Scan the non-default tablespaces */ + snprintf(dirpath, MAXPGPATH, "pg_tblspc"); + dirdesc = AllocateDir(dirpath); + + while ((direntry = ReadDir(dirdesc, dirpath)) != NULL) + { + CHECK_FOR_INTERRUPTS(); + + if (strcmp(direntry->d_name, ".") == 0 || + strcmp(direntry->d_name, "..") == 0) + continue; + + snprintf(pathname, sizeof(pathname), "pg_tblspc/%s/%s/%u", + direntry->d_name, TABLESPACE_VERSION_DIRECTORY, dbOid); + totalsize += db_dir_size(pathname); + } + + FreeDir(dirdesc); + + return totalsize; +} + +Datum +pg_database_size_oid(PG_FUNCTION_ARGS) +{ + Oid dbOid = PG_GETARG_OID(0); + int64 size; + + size = calculate_database_size(dbOid); + + if (size == 0) + PG_RETURN_NULL(); + + PG_RETURN_INT64(size); +} + +Datum +pg_database_size_name(PG_FUNCTION_ARGS) +{ + Name dbName = PG_GETARG_NAME(0); + Oid dbOid = get_database_oid(NameStr(*dbName), false); + int64 size; + + size = calculate_database_size(dbOid); + + if (size == 0) + PG_RETURN_NULL(); + + PG_RETURN_INT64(size); +} + + +/* + * Calculate total size of tablespace. Returns -1 if the tablespace directory + * cannot be found. + */ +static int64 +calculate_tablespace_size(Oid tblspcOid) +{ + char tblspcPath[MAXPGPATH]; + char pathname[MAXPGPATH * 2]; + int64 totalsize = 0; + DIR *dirdesc; + struct dirent *direntry; + AclResult aclresult; + + /* + * User must have privileges of pg_read_all_stats or have CREATE privilege + * for target tablespace, either explicitly granted or implicitly because + * it is default for current database. + */ + if (tblspcOid != MyDatabaseTableSpace && + !has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS)) + { + aclresult = object_aclcheck(TableSpaceRelationId, tblspcOid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + get_tablespace_name(tblspcOid)); + } + + if (tblspcOid == DEFAULTTABLESPACE_OID) + snprintf(tblspcPath, MAXPGPATH, "base"); + else if (tblspcOid == GLOBALTABLESPACE_OID) + snprintf(tblspcPath, MAXPGPATH, "global"); + else + snprintf(tblspcPath, MAXPGPATH, "pg_tblspc/%u/%s", tblspcOid, + TABLESPACE_VERSION_DIRECTORY); + + dirdesc = AllocateDir(tblspcPath); + + if (!dirdesc) + return -1; + + while ((direntry = ReadDir(dirdesc, tblspcPath)) != NULL) + { + struct stat fst; + + CHECK_FOR_INTERRUPTS(); + + if (strcmp(direntry->d_name, ".") == 0 || + strcmp(direntry->d_name, "..") == 0) + continue; + + snprintf(pathname, sizeof(pathname), "%s/%s", tblspcPath, direntry->d_name); + + if (stat(pathname, &fst) < 0) + { + if (errno == ENOENT) + continue; + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", pathname))); + } + + if (S_ISDIR(fst.st_mode)) + totalsize += db_dir_size(pathname); + + totalsize += fst.st_size; + } + + FreeDir(dirdesc); + + return totalsize; +} + +Datum +pg_tablespace_size_oid(PG_FUNCTION_ARGS) +{ + Oid tblspcOid = PG_GETARG_OID(0); + int64 size; + + size = calculate_tablespace_size(tblspcOid); + + if (size < 0) + PG_RETURN_NULL(); + + PG_RETURN_INT64(size); +} + +Datum +pg_tablespace_size_name(PG_FUNCTION_ARGS) +{ + Name tblspcName = PG_GETARG_NAME(0); + Oid tblspcOid = get_tablespace_oid(NameStr(*tblspcName), false); + int64 size; + + size = calculate_tablespace_size(tblspcOid); + + if (size < 0) + PG_RETURN_NULL(); + + PG_RETURN_INT64(size); +} + + +/* + * calculate size of (one fork of) a relation + * + * Note: we can safely apply this to temp tables of other sessions, so there + * is no check here or at the call sites for that. + */ +static int64 +calculate_relation_size(RelFileLocator *rfn, BackendId backend, ForkNumber forknum) +{ + int64 totalsize = 0; + char *relationpath; + char pathname[MAXPGPATH]; + unsigned int segcount = 0; + + relationpath = relpathbackend(*rfn, backend, forknum); + + for (segcount = 0;; segcount++) + { + struct stat fst; + + CHECK_FOR_INTERRUPTS(); + + if (segcount == 0) + snprintf(pathname, MAXPGPATH, "%s", + relationpath); + else + snprintf(pathname, MAXPGPATH, "%s.%u", + relationpath, segcount); + + if (stat(pathname, &fst) < 0) + { + if (errno == ENOENT) + break; + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", pathname))); + } + totalsize += fst.st_size; + } + + return totalsize; +} + +Datum +pg_relation_size(PG_FUNCTION_ARGS) +{ + Oid relOid = PG_GETARG_OID(0); + text *forkName = PG_GETARG_TEXT_PP(1); + Relation rel; + int64 size; + + rel = try_relation_open(relOid, AccessShareLock); + + /* + * Before 9.2, we used to throw an error if the relation didn't exist, but + * that makes queries like "SELECT pg_relation_size(oid) FROM pg_class" + * less robust, because while we scan pg_class with an MVCC snapshot, + * someone else might drop the table. It's better to return NULL for + * already-dropped tables than throw an error and abort the whole query. + */ + if (rel == NULL) + PG_RETURN_NULL(); + + size = calculate_relation_size(&(rel->rd_locator), rel->rd_backend, + forkname_to_number(text_to_cstring(forkName))); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(size); +} + +/* + * Calculate total on-disk size of a TOAST relation, including its indexes. + * Must not be applied to non-TOAST relations. + */ +static int64 +calculate_toast_table_size(Oid toastrelid) +{ + int64 size = 0; + Relation toastRel; + ForkNumber forkNum; + ListCell *lc; + List *indexlist; + + toastRel = relation_open(toastrelid, AccessShareLock); + + /* toast heap size, including FSM and VM size */ + for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) + size += calculate_relation_size(&(toastRel->rd_locator), + toastRel->rd_backend, forkNum); + + /* toast index size, including FSM and VM size */ + indexlist = RelationGetIndexList(toastRel); + + /* Size is calculated using all the indexes available */ + foreach(lc, indexlist) + { + Relation toastIdxRel; + + toastIdxRel = relation_open(lfirst_oid(lc), + AccessShareLock); + for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) + size += calculate_relation_size(&(toastIdxRel->rd_locator), + toastIdxRel->rd_backend, forkNum); + + relation_close(toastIdxRel, AccessShareLock); + } + list_free(indexlist); + relation_close(toastRel, AccessShareLock); + + return size; +} + +/* + * Calculate total on-disk size of a given table, + * including FSM and VM, plus TOAST table if any. + * Indexes other than the TOAST table's index are not included. + * + * Note that this also behaves sanely if applied to an index or toast table; + * those won't have attached toast tables, but they can have multiple forks. + */ +static int64 +calculate_table_size(Relation rel) +{ + int64 size = 0; + ForkNumber forkNum; + + /* + * heap size, including FSM and VM + */ + for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) + size += calculate_relation_size(&(rel->rd_locator), rel->rd_backend, + forkNum); + + /* + * Size of toast relation + */ + if (OidIsValid(rel->rd_rel->reltoastrelid)) + size += calculate_toast_table_size(rel->rd_rel->reltoastrelid); + + return size; +} + +/* + * Calculate total on-disk size of all indexes attached to the given table. + * + * Can be applied safely to an index, but you'll just get zero. + */ +static int64 +calculate_indexes_size(Relation rel) +{ + int64 size = 0; + + /* + * Aggregate all indexes on the given relation + */ + if (rel->rd_rel->relhasindex) + { + List *index_oids = RelationGetIndexList(rel); + ListCell *cell; + + foreach(cell, index_oids) + { + Oid idxOid = lfirst_oid(cell); + Relation idxRel; + ForkNumber forkNum; + + idxRel = relation_open(idxOid, AccessShareLock); + + for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) + size += calculate_relation_size(&(idxRel->rd_locator), + idxRel->rd_backend, + forkNum); + + relation_close(idxRel, AccessShareLock); + } + + list_free(index_oids); + } + + return size; +} + +Datum +pg_table_size(PG_FUNCTION_ARGS) +{ + Oid relOid = PG_GETARG_OID(0); + Relation rel; + int64 size; + + rel = try_relation_open(relOid, AccessShareLock); + + if (rel == NULL) + PG_RETURN_NULL(); + + size = calculate_table_size(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(size); +} + +Datum +pg_indexes_size(PG_FUNCTION_ARGS) +{ + Oid relOid = PG_GETARG_OID(0); + Relation rel; + int64 size; + + rel = try_relation_open(relOid, AccessShareLock); + + if (rel == NULL) + PG_RETURN_NULL(); + + size = calculate_indexes_size(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(size); +} + +/* + * Compute the on-disk size of all files for the relation, + * including heap data, index data, toast data, FSM, VM. + */ +static int64 +calculate_total_relation_size(Relation rel) +{ + int64 size; + + /* + * Aggregate the table size, this includes size of the heap, toast and + * toast index with free space and visibility map + */ + size = calculate_table_size(rel); + + /* + * Add size of all attached indexes as well + */ + size += calculate_indexes_size(rel); + + return size; +} + +Datum +pg_total_relation_size(PG_FUNCTION_ARGS) +{ + return (Datum)0; +} + +Datum +pg_total_relation_size_original(PG_FUNCTION_ARGS) +{ + Oid relOid = PG_GETARG_OID(0); + Relation rel; + int64 size; + + rel = try_relation_open(relOid, AccessShareLock); + + if (rel == NULL) + PG_RETURN_NULL(); + + size = calculate_total_relation_size(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(size); +} + +/* + * formatting with size units + */ +Datum +pg_size_pretty(PG_FUNCTION_ARGS) +{ + int64 size = PG_GETARG_INT64(0); + char buf[64]; + const struct size_pretty_unit *unit; + + for (unit = size_pretty_units; unit->name != NULL; unit++) + { + uint8 bits; + + /* use this unit if there are no more units or we're below the limit */ + if (unit[1].name == NULL || i64abs(size) < unit->limit) + { + if (unit->round) + size = half_rounded(size); + + snprintf(buf, sizeof(buf), INT64_FORMAT " %s", size, unit->name); + break; + } + + /* + * Determine the number of bits to use to build the divisor. We may + * need to use 1 bit less than the difference between this and the + * next unit if the next unit uses half rounding. Or we may need to + * shift an extra bit if this unit uses half rounding and the next one + * does not. We use division rather than shifting right by this + * number of bits to ensure positive and negative values are rounded + * in the same way. + */ + bits = (unit[1].unitbits - unit->unitbits - (unit[1].round == true) + + (unit->round == true)); + size /= ((int64) 1) << bits; + } + + PG_RETURN_TEXT_P(cstring_to_text(buf)); +} + +static char * +numeric_to_cstring(Numeric n) +{ + Datum d = NumericGetDatum(n); + + return DatumGetCString(DirectFunctionCall1(numeric_out, d)); +} + +static bool +numeric_is_less(Numeric a, Numeric b) +{ + Datum da = NumericGetDatum(a); + Datum db = NumericGetDatum(b); + + return DatumGetBool(DirectFunctionCall2(numeric_lt, da, db)); +} + +static Numeric +numeric_absolute(Numeric n) +{ + Datum d = NumericGetDatum(n); + Datum result; + + result = DirectFunctionCall1(numeric_abs, d); + return DatumGetNumeric(result); +} + +static Numeric +numeric_half_rounded(Numeric n) +{ + Datum d = NumericGetDatum(n); + Datum zero; + Datum one; + Datum two; + Datum result; + + zero = NumericGetDatum(int64_to_numeric(0)); + one = NumericGetDatum(int64_to_numeric(1)); + two = NumericGetDatum(int64_to_numeric(2)); + + if (DatumGetBool(DirectFunctionCall2(numeric_ge, d, zero))) + d = DirectFunctionCall2(numeric_add, d, one); + else + d = DirectFunctionCall2(numeric_sub, d, one); + + result = DirectFunctionCall2(numeric_div_trunc, d, two); + return DatumGetNumeric(result); +} + +static Numeric +numeric_truncated_divide(Numeric n, int64 divisor) +{ + Datum d = NumericGetDatum(n); + Datum divisor_numeric; + Datum result; + + divisor_numeric = NumericGetDatum(int64_to_numeric(divisor)); + result = DirectFunctionCall2(numeric_div_trunc, d, divisor_numeric); + return DatumGetNumeric(result); +} + +Datum +pg_size_pretty_numeric(PG_FUNCTION_ARGS) +{ + Numeric size = PG_GETARG_NUMERIC(0); + char *result = NULL; + const struct size_pretty_unit *unit; + + for (unit = size_pretty_units; unit->name != NULL; unit++) + { + unsigned int shiftby; + + /* use this unit if there are no more units or we're below the limit */ + if (unit[1].name == NULL || + numeric_is_less(numeric_absolute(size), + int64_to_numeric(unit->limit))) + { + if (unit->round) + size = numeric_half_rounded(size); + + result = psprintf("%s %s", numeric_to_cstring(size), unit->name); + break; + } + + /* + * Determine the number of bits to use to build the divisor. We may + * need to use 1 bit less than the difference between this and the + * next unit if the next unit uses half rounding. Or we may need to + * shift an extra bit if this unit uses half rounding and the next one + * does not. + */ + shiftby = (unit[1].unitbits - unit->unitbits - (unit[1].round == true) + + (unit->round == true)); + size = numeric_truncated_divide(size, ((int64) 1) << shiftby); + } + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +/* + * Convert a human-readable size to a size in bytes + */ +Datum +pg_size_bytes(PG_FUNCTION_ARGS) +{ + text *arg = PG_GETARG_TEXT_PP(0); + char *str, + *strptr, + *endptr; + char saved_char; + Numeric num; + int64 result; + bool have_digits = false; + + str = text_to_cstring(arg); + + /* Skip leading whitespace */ + strptr = str; + while (isspace((unsigned char) *strptr)) + strptr++; + + /* Check that we have a valid number and determine where it ends */ + endptr = strptr; + + /* Part (1): sign */ + if (*endptr == '-' || *endptr == '+') + endptr++; + + /* Part (2): main digit string */ + if (isdigit((unsigned char) *endptr)) + { + have_digits = true; + do + endptr++; + while (isdigit((unsigned char) *endptr)); + } + + /* Part (3): optional decimal point and fractional digits */ + if (*endptr == '.') + { + endptr++; + if (isdigit((unsigned char) *endptr)) + { + have_digits = true; + do + endptr++; + while (isdigit((unsigned char) *endptr)); + } + } + + /* Complain if we don't have a valid number at this point */ + if (!have_digits) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid size: \"%s\"", str))); + + /* Part (4): optional exponent */ + if (*endptr == 'e' || *endptr == 'E') + { + long exponent; + char *cp; + + /* + * Note we might one day support EB units, so if what follows 'E' + * isn't a number, just treat it all as a unit to be parsed. + */ + exponent = strtol(endptr + 1, &cp, 10); + (void) exponent; /* Silence -Wunused-result warnings */ + if (cp > endptr + 1) + endptr = cp; + } + + /* + * Parse the number, saving the next character, which may be the first + * character of the unit string. + */ + saved_char = *endptr; + *endptr = '\0'; + + num = DatumGetNumeric(DirectFunctionCall3(numeric_in, + CStringGetDatum(strptr), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1))); + + *endptr = saved_char; + + /* Skip whitespace between number and unit */ + strptr = endptr; + while (isspace((unsigned char) *strptr)) + strptr++; + + /* Handle possible unit */ + if (*strptr != '\0') + { + const struct size_pretty_unit *unit; + int64 multiplier = 0; + + /* Trim any trailing whitespace */ + endptr = str + VARSIZE_ANY_EXHDR(arg) - 1; + + while (isspace((unsigned char) *endptr)) + endptr--; + + endptr++; + *endptr = '\0'; + + for (unit = size_pretty_units; unit->name != NULL; unit++) + { + /* Parse the unit case-insensitively */ + if (pg_strcasecmp(strptr, unit->name) == 0) + break; + } + + /* If not found, look in table of aliases */ + if (unit->name == NULL) + { + for (const struct size_bytes_unit_alias *a = size_bytes_aliases; a->alias != NULL; a++) + { + if (pg_strcasecmp(strptr, a->alias) == 0) + { + unit = &size_pretty_units[a->unit_index]; + break; + } + } + } + + /* Verify we found a valid unit in the loop above */ + if (unit->name == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid size: \"%s\"", text_to_cstring(arg)), + errdetail("Invalid size unit: \"%s\".", strptr), + errhint("Valid units are \"bytes\", \"B\", \"kB\", \"MB\", \"GB\", \"TB\", and \"PB\"."))); + + multiplier = ((int64) 1) << unit->unitbits; + + if (multiplier > 1) + { + Numeric mul_num; + + mul_num = int64_to_numeric(multiplier); + + num = DatumGetNumeric(DirectFunctionCall2(numeric_mul, + NumericGetDatum(mul_num), + NumericGetDatum(num))); + } + } + + result = DatumGetInt64(DirectFunctionCall1(numeric_int8, + NumericGetDatum(num))); + + PG_RETURN_INT64(result); +} + +/* + * Get the filenode of a relation + * + * This is expected to be used in queries like + * SELECT pg_relation_filenode(oid) FROM pg_class; + * That leads to a couple of choices. We work from the pg_class row alone + * rather than actually opening each relation, for efficiency. We don't + * fail if we can't find the relation --- some rows might be visible in + * the query's MVCC snapshot even though the relations have been dropped. + * (Note: we could avoid using the catcache, but there's little point + * because the relation mapper also works "in the now".) We also don't + * fail if the relation doesn't have storage. In all these cases it + * seems better to quietly return NULL. + */ +Datum +pg_relation_filenode(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + RelFileNumber result; + HeapTuple tuple; + Form_pg_class relform; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + PG_RETURN_NULL(); + relform = (Form_pg_class) GETSTRUCT(tuple); + + if (RELKIND_HAS_STORAGE(relform->relkind)) + { + if (relform->relfilenode) + result = relform->relfilenode; + else /* Consult the relation mapper */ + result = RelationMapOidToFilenumber(relid, + relform->relisshared); + } + else + { + /* no storage, return NULL */ + result = InvalidRelFileNumber; + } + + ReleaseSysCache(tuple); + + if (!RelFileNumberIsValid(result)) + PG_RETURN_NULL(); + + PG_RETURN_OID(result); +} + +/* + * Get the relation via (reltablespace, relfilenumber) + * + * This is expected to be used when somebody wants to match an individual file + * on the filesystem back to its table. That's not trivially possible via + * pg_class, because that doesn't contain the relfilenumbers of shared and nailed + * tables. + * + * We don't fail but return NULL if we cannot find a mapping. + * + * InvalidOid can be passed instead of the current database's default + * tablespace. + */ +Datum +pg_filenode_relation(PG_FUNCTION_ARGS) +{ + Oid reltablespace = PG_GETARG_OID(0); + RelFileNumber relfilenumber = PG_GETARG_OID(1); + Oid heaprel; + + /* test needed so RelidByRelfilenumber doesn't misbehave */ + if (!RelFileNumberIsValid(relfilenumber)) + PG_RETURN_NULL(); + + heaprel = RelidByRelfilenumber(reltablespace, relfilenumber); + + if (!OidIsValid(heaprel)) + PG_RETURN_NULL(); + else + PG_RETURN_OID(heaprel); +} + +/* + * Get the pathname (relative to $PGDATA) of a relation + * + * See comments for pg_relation_filenode. + */ +Datum +pg_relation_filepath(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + HeapTuple tuple; + Form_pg_class relform; + RelFileLocator rlocator; + BackendId backend; + char *path; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + PG_RETURN_NULL(); + relform = (Form_pg_class) GETSTRUCT(tuple); + + if (RELKIND_HAS_STORAGE(relform->relkind)) + { + /* This logic should match RelationInitPhysicalAddr */ + if (relform->reltablespace) + rlocator.spcOid = relform->reltablespace; + else + rlocator.spcOid = MyDatabaseTableSpace; + if (rlocator.spcOid == GLOBALTABLESPACE_OID) + rlocator.dbOid = InvalidOid; + else + rlocator.dbOid = MyDatabaseId; + if (relform->relfilenode) + rlocator.relNumber = relform->relfilenode; + else /* Consult the relation mapper */ + rlocator.relNumber = RelationMapOidToFilenumber(relid, + relform->relisshared); + } + else + { + /* no storage, return NULL */ + rlocator.relNumber = InvalidRelFileNumber; + /* some compilers generate warnings without these next two lines */ + rlocator.dbOid = InvalidOid; + rlocator.spcOid = InvalidOid; + } + + if (!RelFileNumberIsValid(rlocator.relNumber)) + { + ReleaseSysCache(tuple); + PG_RETURN_NULL(); + } + + /* Determine owning backend. */ + switch (relform->relpersistence) + { + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + backend = InvalidBackendId; + break; + case RELPERSISTENCE_TEMP: + if (isTempOrTempToastNamespace(relform->relnamespace)) + backend = BackendIdForTempRelations(); + else + { + /* Do it the hard way. */ + backend = GetTempNamespaceBackendId(relform->relnamespace); + Assert(backend != InvalidBackendId); + } + break; + default: + elog(ERROR, "invalid relpersistence: %c", relform->relpersistence); + backend = InvalidBackendId; /* placate compiler */ + break; + } + + ReleaseSysCache(tuple); + + path = relpathbackend(rlocator, backend, MAIN_FORKNUM); + + PG_RETURN_TEXT_P(cstring_to_text(path)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/domains.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/domains.c new file mode 100644 index 00000000000..8d766f68e31 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/domains.c @@ -0,0 +1,406 @@ +/*------------------------------------------------------------------------- + * + * domains.c + * I/O functions for domain types. + * + * The output functions for a domain type are just the same ones provided + * by its underlying base type. The input functions, however, must be + * prepared to apply any constraints defined by the type. So, we create + * special input functions that invoke the base type's input function + * and then check the constraints. + * + * The overhead required for constraint checking can be high, since examining + * the catalogs to discover the constraints for a given domain is not cheap. + * We have three mechanisms for minimizing this cost: + * 1. We rely on the typcache to keep up-to-date copies of the constraints. + * 2. In a nest of domains, we flatten the checking of all the levels + * into just one operation (the typcache does this for us). + * 3. If there are CHECK constraints, we cache a standalone ExprContext + * to evaluate them in. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/domains.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "lib/stringinfo.h" +#include "utils/builtins.h" +#include "utils/expandeddatum.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + + +/* + * structure to cache state across multiple calls + */ +typedef struct DomainIOData +{ + Oid domain_type; + /* Data needed to call base type's input function */ + Oid typiofunc; + Oid typioparam; + int32 typtypmod; + FmgrInfo proc; + /* Reference to cached list of constraint items to check */ + DomainConstraintRef constraint_ref; + /* Context for evaluating CHECK constraints in */ + ExprContext *econtext; + /* Memory context this cache is in */ + MemoryContext mcxt; +} DomainIOData; + + +/* + * domain_state_setup - initialize the cache for a new domain type. + * + * Note: we can't re-use the same cache struct for a new domain type, + * since there's no provision for releasing the DomainConstraintRef. + * If a call site needs to deal with a new domain type, we just leak + * the old struct for the duration of the query. + */ +static DomainIOData * +domain_state_setup(Oid domainType, bool binary, MemoryContext mcxt) +{ + DomainIOData *my_extra; + TypeCacheEntry *typentry; + Oid baseType; + + my_extra = (DomainIOData *) MemoryContextAlloc(mcxt, sizeof(DomainIOData)); + + /* + * Verify that domainType represents a valid domain type. We need to be + * careful here because domain_in and domain_recv can be called from SQL, + * possibly with incorrect arguments. We use lookup_type_cache mainly + * because it will throw a clean user-facing error for a bad OID; but also + * it can cache the underlying base type info. + */ + typentry = lookup_type_cache(domainType, TYPECACHE_DOMAIN_BASE_INFO); + if (typentry->typtype != TYPTYPE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("type %s is not a domain", + format_type_be(domainType)))); + + /* Find out the base type */ + baseType = typentry->domainBaseType; + my_extra->typtypmod = typentry->domainBaseTypmod; + + /* Look up underlying I/O function */ + if (binary) + getTypeBinaryInputInfo(baseType, + &my_extra->typiofunc, + &my_extra->typioparam); + else + getTypeInputInfo(baseType, + &my_extra->typiofunc, + &my_extra->typioparam); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, mcxt); + + /* Look up constraints for domain */ + InitDomainConstraintRef(domainType, &my_extra->constraint_ref, mcxt, true); + + /* We don't make an ExprContext until needed */ + my_extra->econtext = NULL; + my_extra->mcxt = mcxt; + + /* Mark cache valid */ + my_extra->domain_type = domainType; + + return my_extra; +} + +/* + * domain_check_input - apply the cached checks. + * + * This is roughly similar to the handling of CoerceToDomain nodes in + * execExpr*.c, but we execute each constraint separately, rather than + * compiling them in-line within a larger expression. + * + * If escontext points to an ErrorSaveContext, any failures are reported + * there, otherwise they are ereport'ed. Note that we do not attempt to do + * soft reporting of errors raised during execution of CHECK constraints. + */ +static void +domain_check_input(Datum value, bool isnull, DomainIOData *my_extra, + Node *escontext) +{ + ExprContext *econtext = my_extra->econtext; + ListCell *l; + + /* Make sure we have up-to-date constraints */ + UpdateDomainConstraintRef(&my_extra->constraint_ref); + + foreach(l, my_extra->constraint_ref.constraints) + { + DomainConstraintState *con = (DomainConstraintState *) lfirst(l); + + switch (con->constrainttype) + { + case DOM_CONSTRAINT_NOTNULL: + if (isnull) + { + errsave(escontext, + (errcode(ERRCODE_NOT_NULL_VIOLATION), + errmsg("domain %s does not allow null values", + format_type_be(my_extra->domain_type)), + errdatatype(my_extra->domain_type))); + goto fail; + } + break; + case DOM_CONSTRAINT_CHECK: + { + /* Make the econtext if we didn't already */ + if (econtext == NULL) + { + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(my_extra->mcxt); + econtext = CreateStandaloneExprContext(); + MemoryContextSwitchTo(oldcontext); + my_extra->econtext = econtext; + } + + /* + * Set up value to be returned by CoerceToDomainValue + * nodes. Unlike in the generic expression case, this + * econtext couldn't be shared with anything else, so no + * need to save and restore fields. But we do need to + * protect the passed-in value against being changed by + * called functions. (It couldn't be a R/W expanded + * object for most uses, but that seems possible for + * domain_check().) + */ + econtext->domainValue_datum = + MakeExpandedObjectReadOnly(value, isnull, + my_extra->constraint_ref.tcache->typlen); + econtext->domainValue_isNull = isnull; + + if (!ExecCheck(con->check_exprstate, econtext)) + { + errsave(escontext, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("value for domain %s violates check constraint \"%s\"", + format_type_be(my_extra->domain_type), + con->name), + errdomainconstraint(my_extra->domain_type, + con->name))); + goto fail; + } + break; + } + default: + elog(ERROR, "unrecognized constraint type: %d", + (int) con->constrainttype); + break; + } + } + + /* + * Before exiting, call any shutdown callbacks and reset econtext's + * per-tuple memory. This avoids leaking non-memory resources, if + * anything in the expression(s) has any. + */ +fail: + if (econtext) + ReScanExprContext(econtext); +} + + +/* + * domain_in - input routine for any domain type. + */ +Datum +domain_in(PG_FUNCTION_ARGS) +{ + char *string; + Oid domainType; + Node *escontext = fcinfo->context; + DomainIOData *my_extra; + Datum value; + + /* + * Since domain_in is not strict, we have to check for null inputs. The + * typioparam argument should never be null in normal system usage, but it + * could be null in a manual invocation --- if so, just return null. + */ + if (PG_ARGISNULL(0)) + string = NULL; + else + string = PG_GETARG_CSTRING(0); + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + domainType = PG_GETARG_OID(1); + + /* + * We arrange to look up the needed info just once per series of calls, + * assuming the domain type doesn't change underneath us (which really + * shouldn't happen, but cope if it does). + */ + my_extra = (DomainIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || my_extra->domain_type != domainType) + { + my_extra = domain_state_setup(domainType, false, + fcinfo->flinfo->fn_mcxt); + fcinfo->flinfo->fn_extra = (void *) my_extra; + } + + /* + * Invoke the base type's typinput procedure to convert the data. + */ + if (!InputFunctionCallSafe(&my_extra->proc, + string, + my_extra->typioparam, + my_extra->typtypmod, + escontext, + &value)) + PG_RETURN_NULL(); + + /* + * Do the necessary checks to ensure it's a valid domain value. + */ + domain_check_input(value, (string == NULL), my_extra, escontext); + + if (string == NULL) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(value); +} + +/* + * domain_recv - binary input routine for any domain type. + */ +Datum +domain_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf; + Oid domainType; + DomainIOData *my_extra; + Datum value; + + /* + * Since domain_recv is not strict, we have to check for null inputs. The + * typioparam argument should never be null in normal system usage, but it + * could be null in a manual invocation --- if so, just return null. + */ + if (PG_ARGISNULL(0)) + buf = NULL; + else + buf = (StringInfo) PG_GETARG_POINTER(0); + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + domainType = PG_GETARG_OID(1); + + /* + * We arrange to look up the needed info just once per series of calls, + * assuming the domain type doesn't change underneath us (which really + * shouldn't happen, but cope if it does). + */ + my_extra = (DomainIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || my_extra->domain_type != domainType) + { + my_extra = domain_state_setup(domainType, true, + fcinfo->flinfo->fn_mcxt); + fcinfo->flinfo->fn_extra = (void *) my_extra; + } + + /* + * Invoke the base type's typreceive procedure to convert the data. + */ + value = ReceiveFunctionCall(&my_extra->proc, + buf, + my_extra->typioparam, + my_extra->typtypmod); + + /* + * Do the necessary checks to ensure it's a valid domain value. + */ + domain_check_input(value, (buf == NULL), my_extra, NULL); + + if (buf == NULL) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(value); +} + +/* + * domain_check - check that a datum satisfies the constraints of a + * domain. extra and mcxt can be passed if they are available from, + * say, a FmgrInfo structure, or they can be NULL, in which case the + * setup is repeated for each call. + */ +void +domain_check(Datum value, bool isnull, Oid domainType, + void **extra, MemoryContext mcxt) +{ + DomainIOData *my_extra = NULL; + + if (mcxt == NULL) + mcxt = CurrentMemoryContext; + + /* + * We arrange to look up the needed info just once per series of calls, + * assuming the domain type doesn't change underneath us (which really + * shouldn't happen, but cope if it does). + */ + if (extra) + my_extra = (DomainIOData *) *extra; + if (my_extra == NULL || my_extra->domain_type != domainType) + { + my_extra = domain_state_setup(domainType, true, mcxt); + if (extra) + *extra = (void *) my_extra; + } + + /* + * Do the necessary checks to ensure it's a valid domain value. + */ + domain_check_input(value, isnull, my_extra, NULL); +} + +/* + * errdatatype --- stores schema_name and datatype_name of a datatype + * within the current errordata. + */ +int +errdatatype(Oid datatypeOid) +{ + HeapTuple tup; + Form_pg_type typtup; + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(datatypeOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", datatypeOid); + typtup = (Form_pg_type) GETSTRUCT(tup); + + err_generic_string(PG_DIAG_SCHEMA_NAME, + get_namespace_name(typtup->typnamespace)); + err_generic_string(PG_DIAG_DATATYPE_NAME, NameStr(typtup->typname)); + + ReleaseSysCache(tup); + + return 0; /* return value does not matter */ +} + +/* + * errdomainconstraint --- stores schema_name, datatype_name and + * constraint_name of a domain-related constraint within the current errordata. + */ +int +errdomainconstraint(Oid datatypeOid, const char *conname) +{ + errdatatype(datatypeOid); + err_generic_string(PG_DIAG_CONSTRAINT_NAME, conname); + + return 0; /* return value does not matter */ +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/encode.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/encode.c new file mode 100644 index 00000000000..e5ac3ad23df --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/encode.c @@ -0,0 +1,612 @@ +/*------------------------------------------------------------------------- + * + * encode.c + * Various data encoding/decoding things. + * + * Copyright (c) 2001-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/encode.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> + +#include "mb/pg_wchar.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "varatt.h" + + +/* + * Encoding conversion API. + * encode_len() and decode_len() compute the amount of space needed, while + * encode() and decode() perform the actual conversions. It is okay for + * the _len functions to return an overestimate, but not an underestimate. + * (Having said that, large overestimates could cause unnecessary errors, + * so it's better to get it right.) The conversion routines write to the + * buffer at *res and return the true length of their output. + */ +struct pg_encoding +{ + uint64 (*encode_len) (const char *data, size_t dlen); + uint64 (*decode_len) (const char *data, size_t dlen); + uint64 (*encode) (const char *data, size_t dlen, char *res); + uint64 (*decode) (const char *data, size_t dlen, char *res); +}; + +static const struct pg_encoding *pg_find_encoding(const char *name); + +/* + * SQL functions. + */ + +Datum +binary_encode(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_PP(0); + Datum name = PG_GETARG_DATUM(1); + text *result; + char *namebuf; + char *dataptr; + size_t datalen; + uint64 resultlen; + uint64 res; + const struct pg_encoding *enc; + + namebuf = TextDatumGetCString(name); + + enc = pg_find_encoding(namebuf); + if (enc == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized encoding: \"%s\"", namebuf))); + + dataptr = VARDATA_ANY(data); + datalen = VARSIZE_ANY_EXHDR(data); + + resultlen = enc->encode_len(dataptr, datalen); + + /* + * resultlen possibly overflows uint32, therefore on 32-bit machines it's + * unsafe to rely on palloc's internal check. + */ + if (resultlen > MaxAllocSize - VARHDRSZ) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("result of encoding conversion is too large"))); + + result = palloc(VARHDRSZ + resultlen); + + res = enc->encode(dataptr, datalen, VARDATA(result)); + + /* Make this FATAL 'cause we've trodden on memory ... */ + if (res > resultlen) + elog(FATAL, "overflow - encode estimate too small"); + + SET_VARSIZE(result, VARHDRSZ + res); + + PG_RETURN_TEXT_P(result); +} + +Datum +binary_decode(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_PP(0); + Datum name = PG_GETARG_DATUM(1); + bytea *result; + char *namebuf; + char *dataptr; + size_t datalen; + uint64 resultlen; + uint64 res; + const struct pg_encoding *enc; + + namebuf = TextDatumGetCString(name); + + enc = pg_find_encoding(namebuf); + if (enc == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized encoding: \"%s\"", namebuf))); + + dataptr = VARDATA_ANY(data); + datalen = VARSIZE_ANY_EXHDR(data); + + resultlen = enc->decode_len(dataptr, datalen); + + /* + * resultlen possibly overflows uint32, therefore on 32-bit machines it's + * unsafe to rely on palloc's internal check. + */ + if (resultlen > MaxAllocSize - VARHDRSZ) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("result of decoding conversion is too large"))); + + result = palloc(VARHDRSZ + resultlen); + + res = enc->decode(dataptr, datalen, VARDATA(result)); + + /* Make this FATAL 'cause we've trodden on memory ... */ + if (res > resultlen) + elog(FATAL, "overflow - decode estimate too small"); + + SET_VARSIZE(result, VARHDRSZ + res); + + PG_RETURN_BYTEA_P(result); +} + + +/* + * HEX + */ + +static const char hextbl[] = "0123456789abcdef"; + +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +uint64 +hex_encode(const char *src, size_t len, char *dst) +{ + const char *end = src + len; + + while (src < end) + { + *dst++ = hextbl[(*src >> 4) & 0xF]; + *dst++ = hextbl[*src & 0xF]; + src++; + } + return (uint64) len * 2; +} + +static inline bool +get_hex(const char *cp, char *out) +{ + unsigned char c = (unsigned char) *cp; + int res = -1; + + if (c < 127) + res = hexlookup[c]; + + *out = (char) res; + + return (res >= 0); +} + +uint64 +hex_decode(const char *src, size_t len, char *dst) +{ + return hex_decode_safe(src, len, dst, NULL); +} + +uint64 +hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) +{ + const char *s, + *srcend; + char v1, + v2, + *p; + + srcend = src + len; + s = src; + p = dst; + while (s < srcend) + { + if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r') + { + s++; + continue; + } + if (!get_hex(s, &v1)) + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid hexadecimal digit: \"%.*s\"", + pg_mblen(s), s))); + s++; + if (s >= srcend) + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid hexadecimal data: odd number of digits"))); + if (!get_hex(s, &v2)) + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid hexadecimal digit: \"%.*s\"", + pg_mblen(s), s))); + s++; + *p++ = (v1 << 4) | v2; + } + + return p - dst; +} + +static uint64 +hex_enc_len(const char *src, size_t srclen) +{ + return (uint64) srclen << 1; +} + +static uint64 +hex_dec_len(const char *src, size_t srclen) +{ + return (uint64) srclen >> 1; +} + +/* + * BASE64 + */ + +static const char _base64[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static const int8 b64lookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, +}; + +static uint64 +pg_base64_encode(const char *src, size_t len, char *dst) +{ + char *p, + *lend = dst + 76; + const char *s, + *end = src + len; + int pos = 2; + uint32 buf = 0; + + s = src; + p = dst; + + while (s < end) + { + buf |= (unsigned char) *s << (pos << 3); + pos--; + s++; + + /* write it out */ + if (pos < 0) + { + *p++ = _base64[(buf >> 18) & 0x3f]; + *p++ = _base64[(buf >> 12) & 0x3f]; + *p++ = _base64[(buf >> 6) & 0x3f]; + *p++ = _base64[buf & 0x3f]; + + pos = 2; + buf = 0; + } + if (p >= lend) + { + *p++ = '\n'; + lend = p + 76; + } + } + if (pos != 2) + { + *p++ = _base64[(buf >> 18) & 0x3f]; + *p++ = _base64[(buf >> 12) & 0x3f]; + *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; + *p++ = '='; + } + + return p - dst; +} + +static uint64 +pg_base64_decode(const char *src, size_t len, char *dst) +{ + const char *srcend = src + len, + *s = src; + char *p = dst; + char c; + int b = 0; + uint32 buf = 0; + int pos = 0, + end = 0; + + while (s < srcend) + { + c = *s++; + + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + continue; + + if (c == '=') + { + /* end sequence */ + if (!end) + { + if (pos == 2) + end = 1; + else if (pos == 3) + end = 2; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unexpected \"=\" while decoding base64 sequence"))); + } + b = 0; + } + else + { + b = -1; + if (c > 0 && c < 127) + b = b64lookup[(unsigned char) c]; + if (b < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence", + pg_mblen(s - 1), s - 1))); + } + /* add it to buffer */ + buf = (buf << 6) + b; + pos++; + if (pos == 4) + { + *p++ = (buf >> 16) & 255; + if (end == 0 || end > 1) + *p++ = (buf >> 8) & 255; + if (end == 0 || end > 2) + *p++ = buf & 255; + buf = 0; + pos = 0; + } + } + + if (pos != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid base64 end sequence"), + errhint("Input data is missing padding, is truncated, or is otherwise corrupted."))); + + return p - dst; +} + + +static uint64 +pg_base64_enc_len(const char *src, size_t srclen) +{ + /* 3 bytes will be converted to 4, linefeed after 76 chars */ + return ((uint64) srclen + 2) / 3 * 4 + (uint64) srclen / (76 * 3 / 4); +} + +static uint64 +pg_base64_dec_len(const char *src, size_t srclen) +{ + return ((uint64) srclen * 3) >> 2; +} + +/* + * Escape + * Minimally escape bytea to text. + * De-escape text to bytea. + * + * We must escape zero bytes and high-bit-set bytes to avoid generating + * text that might be invalid in the current encoding, or that might + * change to something else if passed through an encoding conversion + * (leading to failing to de-escape to the original bytea value). + * Also of course backslash itself has to be escaped. + * + * De-escaping processes \\ and any \### octal + */ + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +static uint64 +esc_encode(const char *src, size_t srclen, char *dst) +{ + const char *end = src + srclen; + char *rp = dst; + uint64 len = 0; + + while (src < end) + { + unsigned char c = (unsigned char) *src; + + if (c == '\0' || IS_HIGHBIT_SET(c)) + { + rp[0] = '\\'; + rp[1] = DIG(c >> 6); + rp[2] = DIG((c >> 3) & 7); + rp[3] = DIG(c & 7); + rp += 4; + len += 4; + } + else if (c == '\\') + { + rp[0] = '\\'; + rp[1] = '\\'; + rp += 2; + len += 2; + } + else + { + *rp++ = c; + len++; + } + + src++; + } + + return len; +} + +static uint64 +esc_decode(const char *src, size_t srclen, char *dst) +{ + const char *end = src + srclen; + char *rp = dst; + uint64 len = 0; + + while (src < end) + { + if (src[0] != '\\') + *rp++ = *src++; + else if (src + 3 < end && + (src[1] >= '0' && src[1] <= '3') && + (src[2] >= '0' && src[2] <= '7') && + (src[3] >= '0' && src[3] <= '7')) + { + int val; + + val = VAL(src[1]); + val <<= 3; + val += VAL(src[2]); + val <<= 3; + *rp++ = val + VAL(src[3]); + src += 4; + } + else if (src + 1 < end && + (src[1] == '\\')) + { + *rp++ = '\\'; + src += 2; + } + else + { + /* + * One backslash, not followed by ### valid octal. Should never + * get here, since esc_dec_len does same check. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + + len++; + } + + return len; +} + +static uint64 +esc_enc_len(const char *src, size_t srclen) +{ + const char *end = src + srclen; + uint64 len = 0; + + while (src < end) + { + if (*src == '\0' || IS_HIGHBIT_SET(*src)) + len += 4; + else if (*src == '\\') + len += 2; + else + len++; + + src++; + } + + return len; +} + +static uint64 +esc_dec_len(const char *src, size_t srclen) +{ + const char *end = src + srclen; + uint64 len = 0; + + while (src < end) + { + if (src[0] != '\\') + src++; + else if (src + 3 < end && + (src[1] >= '0' && src[1] <= '3') && + (src[2] >= '0' && src[2] <= '7') && + (src[3] >= '0' && src[3] <= '7')) + { + /* + * backslash + valid octal + */ + src += 4; + } + else if (src + 1 < end && + (src[1] == '\\')) + { + /* + * two backslashes = backslash + */ + src += 2; + } + else + { + /* + * one backslash, not followed by ### valid octal + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + + len++; + } + return len; +} + +/* + * Common + */ + +static const struct +{ + const char *name; + struct pg_encoding enc; +} enclist[] = + +{ + { + "hex", + { + hex_enc_len, hex_dec_len, hex_encode, hex_decode + } + }, + { + "base64", + { + pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode + } + }, + { + "escape", + { + esc_enc_len, esc_dec_len, esc_encode, esc_decode + } + }, + { + NULL, + { + NULL, NULL, NULL, NULL + } + } +}; + +static const struct pg_encoding * +pg_find_encoding(const char *name) +{ + int i; + + for (i = 0; enclist[i].name; i++) + if (pg_strcasecmp(enclist[i].name, name) == 0) + return &enclist[i].enc; + + return NULL; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/enum.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/enum.c new file mode 100644 index 00000000000..fdfdf7d0d2c --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/enum.c @@ -0,0 +1,616 @@ +/*------------------------------------------------------------------------- + * + * enum.c + * I/O functions, operators, aggregates etc for enum types + * + * Copyright (c) 2006-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/enum.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/pg_enum.h" +#include "libpq/pqformat.h" +#include "storage/procarray.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + + +static Oid enum_endpoint(Oid enumtypoid, ScanDirection direction); +static ArrayType *enum_range_internal(Oid enumtypoid, Oid lower, Oid upper); + + +/* + * Disallow use of an uncommitted pg_enum tuple. + * + * We need to make sure that uncommitted enum values don't get into indexes. + * If they did, and if we then rolled back the pg_enum addition, we'd have + * broken the index because value comparisons will not work reliably without + * an underlying pg_enum entry. (Note that removal of the heap entry + * containing an enum value is not sufficient to ensure that it doesn't appear + * in upper levels of indexes.) To do this we prevent an uncommitted row from + * being used for any SQL-level purpose. This is stronger than necessary, + * since the value might not be getting inserted into a table or there might + * be no index on its column, but it's easy to enforce centrally. + * + * However, it's okay to allow use of uncommitted values belonging to enum + * types that were themselves created in the same transaction, because then + * any such index would also be new and would go away altogether on rollback. + * We don't implement that fully right now, but we do allow free use of enum + * values created during CREATE TYPE AS ENUM, which are surely of the same + * lifespan as the enum type. (This case is required by "pg_restore -1".) + * Values added by ALTER TYPE ADD VALUE are currently restricted, but could + * be allowed if the enum type could be proven to have been created earlier + * in the same transaction. (Note that comparing tuple xmins would not work + * for that, because the type tuple might have been updated in the current + * transaction. Subtransactions also create hazards to be accounted for.) + * + * This function needs to be called (directly or indirectly) in any of the + * functions below that could return an enum value to SQL operations. + */ +static void +check_safe_enum_use(HeapTuple enumval_tup) +{ + TransactionId xmin; + Form_pg_enum en = (Form_pg_enum) GETSTRUCT(enumval_tup); + + /* + * If the row is hinted as committed, it's surely safe. This provides a + * fast path for all normal use-cases. + */ + if (HeapTupleHeaderXminCommitted(enumval_tup->t_data)) + return; + + /* + * Usually, a row would get hinted as committed when it's read or loaded + * into syscache; but just in case not, let's check the xmin directly. + */ + xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data); + if (!TransactionIdIsInProgress(xmin) && + TransactionIdDidCommit(xmin)) + return; + + /* + * Check if the enum value is uncommitted. If not, it's safe, because it + * was made during CREATE TYPE AS ENUM and can't be shorter-lived than its + * owning type. (This'd also be false for values made by other + * transactions; but the previous tests should have handled all of those.) + */ + if (!EnumUncommitted(en->oid)) + return; + + /* + * There might well be other tests we could do here to narrow down the + * unsafe conditions, but for now just raise an exception. + */ + ereport(ERROR, + (errcode(ERRCODE_UNSAFE_NEW_ENUM_VALUE_USAGE), + errmsg("unsafe use of new value \"%s\" of enum type %s", + NameStr(en->enumlabel), + format_type_be(en->enumtypid)), + errhint("New enum values must be committed before they can be used."))); +} + + +/* Basic I/O support */ + +Datum +enum_in(PG_FUNCTION_ARGS) +{ + char *name = PG_GETARG_CSTRING(0); + Oid enumtypoid = PG_GETARG_OID(1); + Node *escontext = fcinfo->context; + Oid enumoid; + HeapTuple tup; + + /* must check length to prevent Assert failure within SearchSysCache */ + if (strlen(name) >= NAMEDATALEN) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input value for enum %s: \"%s\"", + format_type_be(enumtypoid), + name))); + + tup = SearchSysCache2(ENUMTYPOIDNAME, + ObjectIdGetDatum(enumtypoid), + CStringGetDatum(name)); + if (!HeapTupleIsValid(tup)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input value for enum %s: \"%s\"", + format_type_be(enumtypoid), + name))); + + /* + * Check it's safe to use in SQL. Perhaps we should take the trouble to + * report "unsafe use" softly; but it's unclear that it's worth the + * trouble, or indeed that that is a legitimate bad-input case at all + * rather than an implementation shortcoming. + */ + check_safe_enum_use(tup); + + /* + * This comes from pg_enum.oid and stores system oids in user tables. This + * oid must be preserved by binary upgrades. + */ + enumoid = ((Form_pg_enum) GETSTRUCT(tup))->oid; + + ReleaseSysCache(tup); + + PG_RETURN_OID(enumoid); +} + +Datum +enum_out(PG_FUNCTION_ARGS) +{ + Oid enumval = PG_GETARG_OID(0); + char *result; + HeapTuple tup; + Form_pg_enum en; + + tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(enumval)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid internal value for enum: %u", + enumval))); + en = (Form_pg_enum) GETSTRUCT(tup); + + result = pstrdup(NameStr(en->enumlabel)); + + ReleaseSysCache(tup); + + PG_RETURN_CSTRING(result); +} + +/* Binary I/O support */ +Datum +enum_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Oid enumtypoid = PG_GETARG_OID(1); + Oid enumoid; + HeapTuple tup; + char *name; + int nbytes; + + name = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + + /* must check length to prevent Assert failure within SearchSysCache */ + if (strlen(name) >= NAMEDATALEN) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input value for enum %s: \"%s\"", + format_type_be(enumtypoid), + name))); + + tup = SearchSysCache2(ENUMTYPOIDNAME, + ObjectIdGetDatum(enumtypoid), + CStringGetDatum(name)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input value for enum %s: \"%s\"", + format_type_be(enumtypoid), + name))); + + /* check it's safe to use in SQL */ + check_safe_enum_use(tup); + + enumoid = ((Form_pg_enum) GETSTRUCT(tup))->oid; + + ReleaseSysCache(tup); + + pfree(name); + + PG_RETURN_OID(enumoid); +} + +Datum +enum_send(PG_FUNCTION_ARGS) +{ + Oid enumval = PG_GETARG_OID(0); + StringInfoData buf; + HeapTuple tup; + Form_pg_enum en; + + tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(enumval)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid internal value for enum: %u", + enumval))); + en = (Form_pg_enum) GETSTRUCT(tup); + + pq_begintypsend(&buf); + pq_sendtext(&buf, NameStr(en->enumlabel), strlen(NameStr(en->enumlabel))); + + ReleaseSysCache(tup); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* Comparison functions and related */ + +/* + * enum_cmp_internal is the common engine for all the visible comparison + * functions, except for enum_eq and enum_ne which can just check for OID + * equality directly. + */ +static int +enum_cmp_internal(Oid arg1, Oid arg2, FunctionCallInfo fcinfo) +{ + TypeCacheEntry *tcache; + + /* + * We don't need the typcache except in the hopefully-uncommon case that + * one or both Oids are odd. This means that cursory testing of code that + * fails to pass flinfo to an enum comparison function might not disclose + * the oversight. To make such errors more obvious, Assert that we have a + * place to cache even when we take a fast-path exit. + */ + Assert(fcinfo->flinfo != NULL); + + /* Equal OIDs are equal no matter what */ + if (arg1 == arg2) + return 0; + + /* Fast path: even-numbered Oids are known to compare correctly */ + if ((arg1 & 1) == 0 && (arg2 & 1) == 0) + { + if (arg1 < arg2) + return -1; + else + return 1; + } + + /* Locate the typcache entry for the enum type */ + tcache = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (tcache == NULL) + { + HeapTuple enum_tup; + Form_pg_enum en; + Oid typeoid; + + /* Get the OID of the enum type containing arg1 */ + enum_tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(arg1)); + if (!HeapTupleIsValid(enum_tup)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid internal value for enum: %u", + arg1))); + en = (Form_pg_enum) GETSTRUCT(enum_tup); + typeoid = en->enumtypid; + ReleaseSysCache(enum_tup); + /* Now locate and remember the typcache entry */ + tcache = lookup_type_cache(typeoid, 0); + fcinfo->flinfo->fn_extra = (void *) tcache; + } + + /* The remaining comparison logic is in typcache.c */ + return compare_values_of_enum(tcache, arg1, arg2); +} + +Datum +enum_lt(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) < 0); +} + +Datum +enum_le(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) <= 0); +} + +Datum +enum_eq(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_BOOL(a == b); +} + +Datum +enum_ne(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_BOOL(a != b); +} + +Datum +enum_ge(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) >= 0); +} + +Datum +enum_gt(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) > 0); +} + +Datum +enum_smaller(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_OID(enum_cmp_internal(a, b, fcinfo) < 0 ? a : b); +} + +Datum +enum_larger(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_OID(enum_cmp_internal(a, b, fcinfo) > 0 ? a : b); +} + +Datum +enum_cmp(PG_FUNCTION_ARGS) +{ + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + PG_RETURN_INT32(enum_cmp_internal(a, b, fcinfo)); +} + +/* Enum programming support functions */ + +/* + * enum_endpoint: common code for enum_first/enum_last + */ +static Oid +enum_endpoint(Oid enumtypoid, ScanDirection direction) +{ + Relation enum_rel; + Relation enum_idx; + SysScanDesc enum_scan; + HeapTuple enum_tuple; + ScanKeyData skey; + Oid minmax; + + /* + * Find the first/last enum member using pg_enum_typid_sortorder_index. + * Note we must not use the syscache. See comments for RenumberEnumType + * in catalog/pg_enum.c for more info. + */ + ScanKeyInit(&skey, + Anum_pg_enum_enumtypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(enumtypoid)); + + enum_rel = table_open(EnumRelationId, AccessShareLock); + enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); + enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, + 1, &skey); + + enum_tuple = systable_getnext_ordered(enum_scan, direction); + if (HeapTupleIsValid(enum_tuple)) + { + /* check it's safe to use in SQL */ + check_safe_enum_use(enum_tuple); + minmax = ((Form_pg_enum) GETSTRUCT(enum_tuple))->oid; + } + else + { + /* should only happen with an empty enum */ + minmax = InvalidOid; + } + + systable_endscan_ordered(enum_scan); + index_close(enum_idx, AccessShareLock); + table_close(enum_rel, AccessShareLock); + + return minmax; +} + +Datum +enum_first(PG_FUNCTION_ARGS) +{ + Oid enumtypoid; + Oid min; + + /* + * We rely on being able to get the specific enum type from the calling + * expression tree. Notice that the actual value of the argument isn't + * examined at all; in particular it might be NULL. + */ + enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0); + if (enumtypoid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not determine actual enum type"))); + + /* Get the OID using the index */ + min = enum_endpoint(enumtypoid, ForwardScanDirection); + + if (!OidIsValid(min)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("enum %s contains no values", + format_type_be(enumtypoid)))); + + PG_RETURN_OID(min); +} + +Datum +enum_last(PG_FUNCTION_ARGS) +{ + Oid enumtypoid; + Oid max; + + /* + * We rely on being able to get the specific enum type from the calling + * expression tree. Notice that the actual value of the argument isn't + * examined at all; in particular it might be NULL. + */ + enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0); + if (enumtypoid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not determine actual enum type"))); + + /* Get the OID using the index */ + max = enum_endpoint(enumtypoid, BackwardScanDirection); + + if (!OidIsValid(max)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("enum %s contains no values", + format_type_be(enumtypoid)))); + + PG_RETURN_OID(max); +} + +/* 2-argument variant of enum_range */ +Datum +enum_range_bounds(PG_FUNCTION_ARGS) +{ + Oid lower; + Oid upper; + Oid enumtypoid; + + if (PG_ARGISNULL(0)) + lower = InvalidOid; + else + lower = PG_GETARG_OID(0); + if (PG_ARGISNULL(1)) + upper = InvalidOid; + else + upper = PG_GETARG_OID(1); + + /* + * We rely on being able to get the specific enum type from the calling + * expression tree. The generic type mechanism should have ensured that + * both are of the same type. + */ + enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0); + if (enumtypoid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not determine actual enum type"))); + + PG_RETURN_ARRAYTYPE_P(enum_range_internal(enumtypoid, lower, upper)); +} + +/* 1-argument variant of enum_range */ +Datum +enum_range_all(PG_FUNCTION_ARGS) +{ + Oid enumtypoid; + + /* + * We rely on being able to get the specific enum type from the calling + * expression tree. Notice that the actual value of the argument isn't + * examined at all; in particular it might be NULL. + */ + enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0); + if (enumtypoid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not determine actual enum type"))); + + PG_RETURN_ARRAYTYPE_P(enum_range_internal(enumtypoid, + InvalidOid, InvalidOid)); +} + +static ArrayType * +enum_range_internal(Oid enumtypoid, Oid lower, Oid upper) +{ + ArrayType *result; + Relation enum_rel; + Relation enum_idx; + SysScanDesc enum_scan; + HeapTuple enum_tuple; + ScanKeyData skey; + Datum *elems; + int max, + cnt; + bool left_found; + + /* + * Scan the enum members in order using pg_enum_typid_sortorder_index. + * Note we must not use the syscache. See comments for RenumberEnumType + * in catalog/pg_enum.c for more info. + */ + ScanKeyInit(&skey, + Anum_pg_enum_enumtypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(enumtypoid)); + + enum_rel = table_open(EnumRelationId, AccessShareLock); + enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); + enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey); + + max = 64; + elems = (Datum *) palloc(max * sizeof(Datum)); + cnt = 0; + left_found = !OidIsValid(lower); + + while (HeapTupleIsValid(enum_tuple = systable_getnext_ordered(enum_scan, ForwardScanDirection))) + { + Oid enum_oid = ((Form_pg_enum) GETSTRUCT(enum_tuple))->oid; + + if (!left_found && lower == enum_oid) + left_found = true; + + if (left_found) + { + /* check it's safe to use in SQL */ + check_safe_enum_use(enum_tuple); + + if (cnt >= max) + { + max *= 2; + elems = (Datum *) repalloc(elems, max * sizeof(Datum)); + } + + elems[cnt++] = ObjectIdGetDatum(enum_oid); + } + + if (OidIsValid(upper) && upper == enum_oid) + break; + } + + systable_endscan_ordered(enum_scan); + index_close(enum_idx, AccessShareLock); + table_close(enum_rel, AccessShareLock); + + /* and build the result array */ + /* note this hardwires some details about the representation of Oid */ + result = construct_array(elems, cnt, enumtypoid, + sizeof(Oid), true, TYPALIGN_INT); + + pfree(elems); + + return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/expandeddatum.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/expandeddatum.c new file mode 100644 index 00000000000..24dc9473b42 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/expandeddatum.c @@ -0,0 +1,145 @@ +/*------------------------------------------------------------------------- + * + * expandeddatum.c + * Support functions for "expanded" value representations. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/expandeddatum.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/expandeddatum.h" +#include "utils/memutils.h" + +/* + * DatumGetEOHP + * + * Given a Datum that is an expanded-object reference, extract the pointer. + * + * This is a bit tedious since the pointer may not be properly aligned; + * compare VARATT_EXTERNAL_GET_POINTER(). + */ +ExpandedObjectHeader * +DatumGetEOHP(Datum d) +{ + varattrib_1b_e *datum = (varattrib_1b_e *) DatumGetPointer(d); + varatt_expanded ptr; + + Assert(VARATT_IS_EXTERNAL_EXPANDED(datum)); + memcpy(&ptr, VARDATA_EXTERNAL(datum), sizeof(ptr)); + Assert(VARATT_IS_EXPANDED_HEADER(ptr.eohptr)); + return ptr.eohptr; +} + +/* + * EOH_init_header + * + * Initialize the common header of an expanded object. + * + * The main thing this encapsulates is initializing the TOAST pointers. + */ +void +EOH_init_header(ExpandedObjectHeader *eohptr, + const ExpandedObjectMethods *methods, + MemoryContext obj_context) +{ + varatt_expanded ptr; + + eohptr->vl_len_ = EOH_HEADER_MAGIC; + eohptr->eoh_methods = methods; + eohptr->eoh_context = obj_context; + + ptr.eohptr = eohptr; + + SET_VARTAG_EXTERNAL(eohptr->eoh_rw_ptr, VARTAG_EXPANDED_RW); + memcpy(VARDATA_EXTERNAL(eohptr->eoh_rw_ptr), &ptr, sizeof(ptr)); + + SET_VARTAG_EXTERNAL(eohptr->eoh_ro_ptr, VARTAG_EXPANDED_RO); + memcpy(VARDATA_EXTERNAL(eohptr->eoh_ro_ptr), &ptr, sizeof(ptr)); +} + +/* + * EOH_get_flat_size + * EOH_flatten_into + * + * Convenience functions for invoking the "methods" of an expanded object. + */ + +Size +EOH_get_flat_size(ExpandedObjectHeader *eohptr) +{ + return eohptr->eoh_methods->get_flat_size(eohptr); +} + +void +EOH_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size) +{ + eohptr->eoh_methods->flatten_into(eohptr, result, allocated_size); +} + +/* + * If the Datum represents a R/W expanded object, change it to R/O. + * Otherwise return the original Datum. + * + * Caller must ensure that the datum is a non-null varlena value. Typically + * this is invoked via MakeExpandedObjectReadOnly(), which checks that. + */ +Datum +MakeExpandedObjectReadOnlyInternal(Datum d) +{ + ExpandedObjectHeader *eohptr; + + /* Nothing to do if not a read-write expanded-object pointer */ + if (!VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + return d; + + /* Now safe to extract the object pointer */ + eohptr = DatumGetEOHP(d); + + /* Return the built-in read-only pointer instead of given pointer */ + return EOHPGetRODatum(eohptr); +} + +/* + * Transfer ownership of an expanded object to a new parent memory context. + * The object must be referenced by a R/W pointer, and what we return is + * always its "standard" R/W pointer, which is certain to have the same + * lifespan as the object itself. (The passed-in pointer might not, and + * in any case wouldn't provide a unique identifier if it's not that one.) + */ +Datum +TransferExpandedObject(Datum d, MemoryContext new_parent) +{ + ExpandedObjectHeader *eohptr = DatumGetEOHP(d); + + /* Assert caller gave a R/W pointer */ + Assert(VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))); + + /* Transfer ownership */ + MemoryContextSetParent(eohptr->eoh_context, new_parent); + + /* Return the object's standard read-write pointer */ + return EOHPGetRWDatum(eohptr); +} + +/* + * Delete an expanded object (must be referenced by a R/W pointer). + */ +void +DeleteExpandedObject(Datum d) +{ + ExpandedObjectHeader *eohptr = DatumGetEOHP(d); + + /* Assert caller gave a R/W pointer */ + Assert(VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))); + + /* Kill it */ + MemoryContextDelete(eohptr->eoh_context); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/expandedrecord.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/expandedrecord.c new file mode 100644 index 00000000000..c46e5aa36f2 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/expandedrecord.c @@ -0,0 +1,1633 @@ +/*------------------------------------------------------------------------- + * + * expandedrecord.c + * Functions for manipulating composite expanded objects. + * + * This module supports "expanded objects" (cf. expandeddatum.h) that can + * store values of named composite types, domains over named composite types, + * and record types (registered or anonymous). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/expandedrecord.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/detoast.h" +#include "access/heaptoast.h" +#include "access/htup_details.h" +#include "catalog/heap.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/expandedrecord.h" +#include "utils/memutils.h" +#include "utils/typcache.h" + + +/* "Methods" required for an expanded object */ +static Size ER_get_flat_size(ExpandedObjectHeader *eohptr); +static void ER_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size); + +static const ExpandedObjectMethods ER_methods = +{ + ER_get_flat_size, + ER_flatten_into +}; + +/* Other local functions */ +static void ER_mc_callback(void *arg); +static MemoryContext get_short_term_cxt(ExpandedRecordHeader *erh); +static void build_dummy_expanded_header(ExpandedRecordHeader *main_erh); +static pg_noinline void check_domain_for_new_field(ExpandedRecordHeader *erh, + int fnumber, + Datum newValue, bool isnull); +static pg_noinline void check_domain_for_new_tuple(ExpandedRecordHeader *erh, + HeapTuple tuple); + + +/* + * Build an expanded record of the specified composite type + * + * type_id can be RECORDOID, but only if a positive typmod is given. + * + * The expanded record is initially "empty", having a state logically + * equivalent to a NULL composite value (not ROW(NULL, NULL, ...)). + * Note that this might not be a valid state for a domain type; + * if the caller needs to check that, call + * expanded_record_set_tuple(erh, NULL, false, false). + * + * The expanded object will be a child of parentcontext. + */ +ExpandedRecordHeader * +make_expanded_record_from_typeid(Oid type_id, int32 typmod, + MemoryContext parentcontext) +{ + ExpandedRecordHeader *erh; + int flags = 0; + TupleDesc tupdesc; + uint64 tupdesc_id; + MemoryContext objcxt; + char *chunk; + + if (type_id != RECORDOID) + { + /* + * Consult the typcache to see if it's a domain over composite, and in + * any case to get the tupdesc and tupdesc identifier. + */ + TypeCacheEntry *typentry; + + typentry = lookup_type_cache(type_id, + TYPECACHE_TUPDESC | + TYPECACHE_DOMAIN_BASE_INFO); + if (typentry->typtype == TYPTYPE_DOMAIN) + { + flags |= ER_FLAG_IS_DOMAIN; + typentry = lookup_type_cache(typentry->domainBaseType, + TYPECACHE_TUPDESC); + } + if (typentry->tupDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("type %s is not composite", + format_type_be(type_id)))); + tupdesc = typentry->tupDesc; + tupdesc_id = typentry->tupDesc_identifier; + } + else + { + /* + * For RECORD types, get the tupdesc and identifier from typcache. + */ + tupdesc = lookup_rowtype_tupdesc(type_id, typmod); + tupdesc_id = assign_record_type_identifier(type_id, typmod); + } + + /* + * Allocate private context for expanded object. We use a regular-size + * context, not a small one, to improve the odds that we can fit a tupdesc + * into it without needing an extra malloc block. (This code path doesn't + * ever need to copy a tupdesc into the expanded record, but let's be + * consistent with the other ways of making an expanded record.) + */ + objcxt = AllocSetContextCreate(parentcontext, + "expanded record", + ALLOCSET_DEFAULT_SIZES); + + /* + * Since we already know the number of fields in the tupdesc, we can + * allocate the dvalues/dnulls arrays along with the record header. This + * is useless if we never need those arrays, but it costs almost nothing, + * and it will save a palloc cycle if we do need them. + */ + erh = (ExpandedRecordHeader *) + MemoryContextAlloc(objcxt, MAXALIGN(sizeof(ExpandedRecordHeader)) + + tupdesc->natts * (sizeof(Datum) + sizeof(bool))); + + /* Ensure all header fields are initialized to 0/null */ + memset(erh, 0, sizeof(ExpandedRecordHeader)); + + EOH_init_header(&erh->hdr, &ER_methods, objcxt); + erh->er_magic = ER_MAGIC; + + /* Set up dvalues/dnulls, with no valid contents as yet */ + chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader)); + erh->dvalues = (Datum *) chunk; + erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum)); + erh->nfields = tupdesc->natts; + + /* Fill in composite-type identification info */ + erh->er_decltypeid = type_id; + erh->er_typeid = tupdesc->tdtypeid; + erh->er_typmod = tupdesc->tdtypmod; + erh->er_tupdesc_id = tupdesc_id; + + erh->flags = flags; + + /* + * If what we got from the typcache is a refcounted tupdesc, we need to + * acquire our own refcount on it. We manage the refcount with a memory + * context callback rather than assuming that the CurrentResourceOwner is + * longer-lived than this expanded object. + */ + if (tupdesc->tdrefcount >= 0) + { + /* Register callback to release the refcount */ + erh->er_mcb.func = ER_mc_callback; + erh->er_mcb.arg = (void *) erh; + MemoryContextRegisterResetCallback(erh->hdr.eoh_context, + &erh->er_mcb); + + /* And save the pointer */ + erh->er_tupdesc = tupdesc; + tupdesc->tdrefcount++; + + /* If we called lookup_rowtype_tupdesc, release the pin it took */ + if (type_id == RECORDOID) + ReleaseTupleDesc(tupdesc); + } + else + { + /* + * If it's not refcounted, just assume it will outlive the expanded + * object. (This can happen for shared record types, for instance.) + */ + erh->er_tupdesc = tupdesc; + } + + /* + * We don't set ER_FLAG_DVALUES_VALID or ER_FLAG_FVALUE_VALID, so the + * record remains logically empty. + */ + + return erh; +} + +/* + * Build an expanded record of the rowtype defined by the tupdesc + * + * The tupdesc is copied if necessary (i.e., if we can't just bump its + * reference count instead). + * + * The expanded record is initially "empty", having a state logically + * equivalent to a NULL composite value (not ROW(NULL, NULL, ...)). + * + * The expanded object will be a child of parentcontext. + */ +ExpandedRecordHeader * +make_expanded_record_from_tupdesc(TupleDesc tupdesc, + MemoryContext parentcontext) +{ + ExpandedRecordHeader *erh; + uint64 tupdesc_id; + MemoryContext objcxt; + MemoryContext oldcxt; + char *chunk; + + if (tupdesc->tdtypeid != RECORDOID) + { + /* + * If it's a named composite type (not RECORD), we prefer to reference + * the typcache's copy of the tupdesc, which is guaranteed to be + * refcounted (the given tupdesc might not be). In any case, we need + * to consult the typcache to get the correct tupdesc identifier. + * + * Note that tdtypeid couldn't be a domain type, so we need not + * consider that case here. + */ + TypeCacheEntry *typentry; + + typentry = lookup_type_cache(tupdesc->tdtypeid, TYPECACHE_TUPDESC); + if (typentry->tupDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("type %s is not composite", + format_type_be(tupdesc->tdtypeid)))); + tupdesc = typentry->tupDesc; + tupdesc_id = typentry->tupDesc_identifier; + } + else + { + /* + * For RECORD types, get the appropriate unique identifier (possibly + * freshly assigned). + */ + tupdesc_id = assign_record_type_identifier(tupdesc->tdtypeid, + tupdesc->tdtypmod); + } + + /* + * Allocate private context for expanded object. We use a regular-size + * context, not a small one, to improve the odds that we can fit a tupdesc + * into it without needing an extra malloc block. + */ + objcxt = AllocSetContextCreate(parentcontext, + "expanded record", + ALLOCSET_DEFAULT_SIZES); + + /* + * Since we already know the number of fields in the tupdesc, we can + * allocate the dvalues/dnulls arrays along with the record header. This + * is useless if we never need those arrays, but it costs almost nothing, + * and it will save a palloc cycle if we do need them. + */ + erh = (ExpandedRecordHeader *) + MemoryContextAlloc(objcxt, MAXALIGN(sizeof(ExpandedRecordHeader)) + + tupdesc->natts * (sizeof(Datum) + sizeof(bool))); + + /* Ensure all header fields are initialized to 0/null */ + memset(erh, 0, sizeof(ExpandedRecordHeader)); + + EOH_init_header(&erh->hdr, &ER_methods, objcxt); + erh->er_magic = ER_MAGIC; + + /* Set up dvalues/dnulls, with no valid contents as yet */ + chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader)); + erh->dvalues = (Datum *) chunk; + erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum)); + erh->nfields = tupdesc->natts; + + /* Fill in composite-type identification info */ + erh->er_decltypeid = erh->er_typeid = tupdesc->tdtypeid; + erh->er_typmod = tupdesc->tdtypmod; + erh->er_tupdesc_id = tupdesc_id; + + /* + * Copy tupdesc if needed, but we prefer to bump its refcount if possible. + * We manage the refcount with a memory context callback rather than + * assuming that the CurrentResourceOwner is longer-lived than this + * expanded object. + */ + if (tupdesc->tdrefcount >= 0) + { + /* Register callback to release the refcount */ + erh->er_mcb.func = ER_mc_callback; + erh->er_mcb.arg = (void *) erh; + MemoryContextRegisterResetCallback(erh->hdr.eoh_context, + &erh->er_mcb); + + /* And save the pointer */ + erh->er_tupdesc = tupdesc; + tupdesc->tdrefcount++; + } + else + { + /* Just copy it */ + oldcxt = MemoryContextSwitchTo(objcxt); + erh->er_tupdesc = CreateTupleDescCopy(tupdesc); + erh->flags |= ER_FLAG_TUPDESC_ALLOCED; + MemoryContextSwitchTo(oldcxt); + } + + /* + * We don't set ER_FLAG_DVALUES_VALID or ER_FLAG_FVALUE_VALID, so the + * record remains logically empty. + */ + + return erh; +} + +/* + * Build an expanded record of the same rowtype as the given expanded record + * + * This is faster than either of the above routines because we can bypass + * typcache lookup(s). + * + * The expanded record is initially "empty" --- we do not copy whatever + * tuple might be in the source expanded record. + * + * The expanded object will be a child of parentcontext. + */ +ExpandedRecordHeader * +make_expanded_record_from_exprecord(ExpandedRecordHeader *olderh, + MemoryContext parentcontext) +{ + ExpandedRecordHeader *erh; + TupleDesc tupdesc = expanded_record_get_tupdesc(olderh); + MemoryContext objcxt; + MemoryContext oldcxt; + char *chunk; + + /* + * Allocate private context for expanded object. We use a regular-size + * context, not a small one, to improve the odds that we can fit a tupdesc + * into it without needing an extra malloc block. + */ + objcxt = AllocSetContextCreate(parentcontext, + "expanded record", + ALLOCSET_DEFAULT_SIZES); + + /* + * Since we already know the number of fields in the tupdesc, we can + * allocate the dvalues/dnulls arrays along with the record header. This + * is useless if we never need those arrays, but it costs almost nothing, + * and it will save a palloc cycle if we do need them. + */ + erh = (ExpandedRecordHeader *) + MemoryContextAlloc(objcxt, MAXALIGN(sizeof(ExpandedRecordHeader)) + + tupdesc->natts * (sizeof(Datum) + sizeof(bool))); + + /* Ensure all header fields are initialized to 0/null */ + memset(erh, 0, sizeof(ExpandedRecordHeader)); + + EOH_init_header(&erh->hdr, &ER_methods, objcxt); + erh->er_magic = ER_MAGIC; + + /* Set up dvalues/dnulls, with no valid contents as yet */ + chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader)); + erh->dvalues = (Datum *) chunk; + erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum)); + erh->nfields = tupdesc->natts; + + /* Fill in composite-type identification info */ + erh->er_decltypeid = olderh->er_decltypeid; + erh->er_typeid = olderh->er_typeid; + erh->er_typmod = olderh->er_typmod; + erh->er_tupdesc_id = olderh->er_tupdesc_id; + + /* The only flag bit that transfers over is IS_DOMAIN */ + erh->flags = olderh->flags & ER_FLAG_IS_DOMAIN; + + /* + * Copy tupdesc if needed, but we prefer to bump its refcount if possible. + * We manage the refcount with a memory context callback rather than + * assuming that the CurrentResourceOwner is longer-lived than this + * expanded object. + */ + if (tupdesc->tdrefcount >= 0) + { + /* Register callback to release the refcount */ + erh->er_mcb.func = ER_mc_callback; + erh->er_mcb.arg = (void *) erh; + MemoryContextRegisterResetCallback(erh->hdr.eoh_context, + &erh->er_mcb); + + /* And save the pointer */ + erh->er_tupdesc = tupdesc; + tupdesc->tdrefcount++; + } + else if (olderh->flags & ER_FLAG_TUPDESC_ALLOCED) + { + /* We need to make our own copy of the tupdesc */ + oldcxt = MemoryContextSwitchTo(objcxt); + erh->er_tupdesc = CreateTupleDescCopy(tupdesc); + erh->flags |= ER_FLAG_TUPDESC_ALLOCED; + MemoryContextSwitchTo(oldcxt); + } + else + { + /* + * Assume the tupdesc will outlive this expanded object, just like + * we're assuming it will outlive the source object. + */ + erh->er_tupdesc = tupdesc; + } + + /* + * We don't set ER_FLAG_DVALUES_VALID or ER_FLAG_FVALUE_VALID, so the + * record remains logically empty. + */ + + return erh; +} + +/* + * Insert given tuple as the value of the expanded record + * + * It is caller's responsibility that the tuple matches the record's + * previously-assigned rowtype. (However domain constraints, if any, + * will be checked here.) + * + * The tuple is physically copied into the expanded record's local storage + * if "copy" is true, otherwise it's caller's responsibility that the tuple + * will live as long as the expanded record does. + * + * Out-of-line field values in the tuple are automatically inlined if + * "expand_external" is true, otherwise not. (The combination copy = false, + * expand_external = true is not sensible and not supported.) + * + * Alternatively, tuple can be NULL, in which case we just set the expanded + * record to be empty. + */ +void +expanded_record_set_tuple(ExpandedRecordHeader *erh, + HeapTuple tuple, + bool copy, + bool expand_external) +{ + int oldflags; + HeapTuple oldtuple; + char *oldfstartptr; + char *oldfendptr; + int newflags; + HeapTuple newtuple; + MemoryContext oldcxt; + + /* Shouldn't ever be trying to assign new data to a dummy header */ + Assert(!(erh->flags & ER_FLAG_IS_DUMMY)); + + /* + * Before performing the assignment, see if result will satisfy domain. + */ + if (erh->flags & ER_FLAG_IS_DOMAIN) + check_domain_for_new_tuple(erh, tuple); + + /* + * If we need to get rid of out-of-line field values, do so, using the + * short-term context to avoid leaking whatever cruft the toast fetch + * might generate. + */ + if (expand_external && tuple) + { + /* Assert caller didn't ask for unsupported case */ + Assert(copy); + if (HeapTupleHasExternal(tuple)) + { + oldcxt = MemoryContextSwitchTo(get_short_term_cxt(erh)); + tuple = toast_flatten_tuple(tuple, erh->er_tupdesc); + MemoryContextSwitchTo(oldcxt); + } + else + expand_external = false; /* need not clean up below */ + } + + /* + * Initialize new flags, keeping only non-data status bits. + */ + oldflags = erh->flags; + newflags = oldflags & ER_FLAGS_NON_DATA; + + /* + * Copy tuple into local storage if needed. We must be sure this succeeds + * before we start to modify the expanded record's state. + */ + if (copy && tuple) + { + oldcxt = MemoryContextSwitchTo(erh->hdr.eoh_context); + newtuple = heap_copytuple(tuple); + newflags |= ER_FLAG_FVALUE_ALLOCED; + MemoryContextSwitchTo(oldcxt); + + /* We can now flush anything that detoasting might have leaked. */ + if (expand_external) + MemoryContextReset(erh->er_short_term_cxt); + } + else + newtuple = tuple; + + /* Make copies of fields we're about to overwrite */ + oldtuple = erh->fvalue; + oldfstartptr = erh->fstartptr; + oldfendptr = erh->fendptr; + + /* + * It's now safe to update the expanded record's state. + */ + if (newtuple) + { + /* Save flat representation */ + erh->fvalue = newtuple; + erh->fstartptr = (char *) newtuple->t_data; + erh->fendptr = ((char *) newtuple->t_data) + newtuple->t_len; + newflags |= ER_FLAG_FVALUE_VALID; + + /* Remember if we have any out-of-line field values */ + if (HeapTupleHasExternal(newtuple)) + newflags |= ER_FLAG_HAVE_EXTERNAL; + } + else + { + erh->fvalue = NULL; + erh->fstartptr = erh->fendptr = NULL; + } + + erh->flags = newflags; + + /* Reset flat-size info; we don't bother to make it valid now */ + erh->flat_size = 0; + + /* + * Now, release any storage belonging to old field values. It's safe to + * do this because ER_FLAG_DVALUES_VALID is no longer set in erh->flags; + * even if we fail partway through, the record is valid, and at worst + * we've failed to reclaim some space. + */ + if (oldflags & ER_FLAG_DVALUES_ALLOCED) + { + TupleDesc tupdesc = erh->er_tupdesc; + int i; + + for (i = 0; i < erh->nfields; i++) + { + if (!erh->dnulls[i] && + !(TupleDescAttr(tupdesc, i)->attbyval)) + { + char *oldValue = (char *) DatumGetPointer(erh->dvalues[i]); + + if (oldValue < oldfstartptr || oldValue >= oldfendptr) + pfree(oldValue); + } + } + } + + /* Likewise free the old tuple, if it was locally allocated */ + if (oldflags & ER_FLAG_FVALUE_ALLOCED) + heap_freetuple(oldtuple); + + /* We won't make a new deconstructed representation until/unless needed */ +} + +/* + * make_expanded_record_from_datum: build expanded record from composite Datum + * + * This combines the functions of make_expanded_record_from_typeid and + * expanded_record_set_tuple. However, we do not force a lookup of the + * tupdesc immediately, reasoning that it might never be needed. + * + * The expanded object will be a child of parentcontext. + * + * Note: a composite datum cannot self-identify as being of a domain type, + * so we need not consider domain cases here. + */ +Datum +make_expanded_record_from_datum(Datum recorddatum, MemoryContext parentcontext) +{ + ExpandedRecordHeader *erh; + HeapTupleHeader tuphdr; + HeapTupleData tmptup; + HeapTuple newtuple; + MemoryContext objcxt; + MemoryContext oldcxt; + + /* + * Allocate private context for expanded object. We use a regular-size + * context, not a small one, to improve the odds that we can fit a tupdesc + * into it without needing an extra malloc block. + */ + objcxt = AllocSetContextCreate(parentcontext, + "expanded record", + ALLOCSET_DEFAULT_SIZES); + + /* Set up expanded record header, initializing fields to 0/null */ + erh = (ExpandedRecordHeader *) + MemoryContextAllocZero(objcxt, sizeof(ExpandedRecordHeader)); + + EOH_init_header(&erh->hdr, &ER_methods, objcxt); + erh->er_magic = ER_MAGIC; + + /* + * Detoast and copy source record into private context, as a HeapTuple. + * (If we actually have to detoast the source, we'll leak some memory in + * the caller's context, but it doesn't seem worth worrying about.) + */ + tuphdr = DatumGetHeapTupleHeader(recorddatum); + + tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr); + ItemPointerSetInvalid(&(tmptup.t_self)); + tmptup.t_tableOid = InvalidOid; + tmptup.t_data = tuphdr; + + oldcxt = MemoryContextSwitchTo(objcxt); + newtuple = heap_copytuple(&tmptup); + erh->flags |= ER_FLAG_FVALUE_ALLOCED; + MemoryContextSwitchTo(oldcxt); + + /* Fill in composite-type identification info */ + erh->er_decltypeid = erh->er_typeid = HeapTupleHeaderGetTypeId(tuphdr); + erh->er_typmod = HeapTupleHeaderGetTypMod(tuphdr); + + /* remember we have a flat representation */ + erh->fvalue = newtuple; + erh->fstartptr = (char *) newtuple->t_data; + erh->fendptr = ((char *) newtuple->t_data) + newtuple->t_len; + erh->flags |= ER_FLAG_FVALUE_VALID; + + /* Shouldn't need to set ER_FLAG_HAVE_EXTERNAL */ + Assert(!HeapTupleHeaderHasExternal(tuphdr)); + + /* + * We won't look up the tupdesc till we have to, nor make a deconstructed + * representation. We don't have enough info to fill flat_size and + * friends, either. + */ + + /* return a R/W pointer to the expanded record */ + return EOHPGetRWDatum(&erh->hdr); +} + +/* + * get_flat_size method for expanded records + * + * Note: call this in a reasonably short-lived memory context, in case of + * memory leaks from activities such as detoasting. + */ +static Size +ER_get_flat_size(ExpandedObjectHeader *eohptr) +{ + ExpandedRecordHeader *erh = (ExpandedRecordHeader *) eohptr; + TupleDesc tupdesc; + Size len; + Size data_len; + int hoff; + bool hasnull; + int i; + + Assert(erh->er_magic == ER_MAGIC); + + /* + * The flat representation has to be a valid composite datum. Make sure + * that we have a registered, not anonymous, RECORD type. + */ + if (erh->er_typeid == RECORDOID && + erh->er_typmod < 0) + { + tupdesc = expanded_record_get_tupdesc(erh); + assign_record_type_typmod(tupdesc); + erh->er_typmod = tupdesc->tdtypmod; + } + + /* + * If we have a valid flattened value without out-of-line fields, we can + * just use it as-is. + */ + if (erh->flags & ER_FLAG_FVALUE_VALID && + !(erh->flags & ER_FLAG_HAVE_EXTERNAL)) + return erh->fvalue->t_len; + + /* If we have a cached size value, believe that */ + if (erh->flat_size) + return erh->flat_size; + + /* If we haven't yet deconstructed the tuple, do that */ + if (!(erh->flags & ER_FLAG_DVALUES_VALID)) + deconstruct_expanded_record(erh); + + /* Tuple descriptor must be valid by now */ + tupdesc = erh->er_tupdesc; + + /* + * Composite datums mustn't contain any out-of-line values. + */ + if (erh->flags & ER_FLAG_HAVE_EXTERNAL) + { + for (i = 0; i < erh->nfields; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + + if (!erh->dnulls[i] && + !attr->attbyval && attr->attlen == -1 && + VARATT_IS_EXTERNAL(DatumGetPointer(erh->dvalues[i]))) + { + /* + * expanded_record_set_field_internal can do the actual work + * of detoasting. It needn't recheck domain constraints. + */ + expanded_record_set_field_internal(erh, i + 1, + erh->dvalues[i], false, + true, + false); + } + } + + /* + * We have now removed all external field values, so we can clear the + * flag about them. This won't cause ER_flatten_into() to mistakenly + * take the fast path, since expanded_record_set_field() will have + * cleared ER_FLAG_FVALUE_VALID. + */ + erh->flags &= ~ER_FLAG_HAVE_EXTERNAL; + } + + /* Test if we currently have any null values */ + hasnull = false; + for (i = 0; i < erh->nfields; i++) + { + if (erh->dnulls[i]) + { + hasnull = true; + break; + } + } + + /* Determine total space needed */ + len = offsetof(HeapTupleHeaderData, t_bits); + + if (hasnull) + len += BITMAPLEN(tupdesc->natts); + + hoff = len = MAXALIGN(len); /* align user data safely */ + + data_len = heap_compute_data_size(tupdesc, erh->dvalues, erh->dnulls); + + len += data_len; + + /* Cache for next time */ + erh->flat_size = len; + erh->data_len = data_len; + erh->hoff = hoff; + erh->hasnull = hasnull; + + return len; +} + +/* + * flatten_into method for expanded records + */ +static void +ER_flatten_into(ExpandedObjectHeader *eohptr, + void *result, Size allocated_size) +{ + ExpandedRecordHeader *erh = (ExpandedRecordHeader *) eohptr; + HeapTupleHeader tuphdr = (HeapTupleHeader) result; + TupleDesc tupdesc; + + Assert(erh->er_magic == ER_MAGIC); + + /* Easy if we have a valid flattened value without out-of-line fields */ + if (erh->flags & ER_FLAG_FVALUE_VALID && + !(erh->flags & ER_FLAG_HAVE_EXTERNAL)) + { + Assert(allocated_size == erh->fvalue->t_len); + memcpy(tuphdr, erh->fvalue->t_data, allocated_size); + /* The original flattened value might not have datum header fields */ + HeapTupleHeaderSetDatumLength(tuphdr, allocated_size); + HeapTupleHeaderSetTypeId(tuphdr, erh->er_typeid); + HeapTupleHeaderSetTypMod(tuphdr, erh->er_typmod); + return; + } + + /* Else allocation should match previous get_flat_size result */ + Assert(allocated_size == erh->flat_size); + + /* We'll need the tuple descriptor */ + tupdesc = expanded_record_get_tupdesc(erh); + + /* We must ensure that any pad space is zero-filled */ + memset(tuphdr, 0, allocated_size); + + /* Set up header fields of composite Datum */ + HeapTupleHeaderSetDatumLength(tuphdr, allocated_size); + HeapTupleHeaderSetTypeId(tuphdr, erh->er_typeid); + HeapTupleHeaderSetTypMod(tuphdr, erh->er_typmod); + /* We also make sure that t_ctid is invalid unless explicitly set */ + ItemPointerSetInvalid(&(tuphdr->t_ctid)); + + HeapTupleHeaderSetNatts(tuphdr, tupdesc->natts); + tuphdr->t_hoff = erh->hoff; + + /* And fill the data area from dvalues/dnulls */ + heap_fill_tuple(tupdesc, + erh->dvalues, + erh->dnulls, + (char *) tuphdr + erh->hoff, + erh->data_len, + &tuphdr->t_infomask, + (erh->hasnull ? tuphdr->t_bits : NULL)); +} + +/* + * Look up the tupdesc for the expanded record's actual type + * + * Note: code internal to this module is allowed to just fetch + * erh->er_tupdesc if ER_FLAG_DVALUES_VALID is set; otherwise it should call + * expanded_record_get_tupdesc. This function is the out-of-line portion + * of expanded_record_get_tupdesc. + */ +TupleDesc +expanded_record_fetch_tupdesc(ExpandedRecordHeader *erh) +{ + TupleDesc tupdesc; + + /* Easy if we already have it (but caller should have checked already) */ + if (erh->er_tupdesc) + return erh->er_tupdesc; + + /* Lookup the composite type's tupdesc using the typcache */ + tupdesc = lookup_rowtype_tupdesc(erh->er_typeid, erh->er_typmod); + + /* + * If it's a refcounted tupdesc rather than a statically allocated one, we + * want to manage the refcount with a memory context callback rather than + * assuming that the CurrentResourceOwner is longer-lived than this + * expanded object. + */ + if (tupdesc->tdrefcount >= 0) + { + /* Register callback if we didn't already */ + if (erh->er_mcb.arg == NULL) + { + erh->er_mcb.func = ER_mc_callback; + erh->er_mcb.arg = (void *) erh; + MemoryContextRegisterResetCallback(erh->hdr.eoh_context, + &erh->er_mcb); + } + + /* Remember our own pointer */ + erh->er_tupdesc = tupdesc; + tupdesc->tdrefcount++; + + /* Release the pin lookup_rowtype_tupdesc acquired */ + ReleaseTupleDesc(tupdesc); + } + else + { + /* Just remember the pointer */ + erh->er_tupdesc = tupdesc; + } + + /* In either case, fetch the process-global ID for this tupdesc */ + erh->er_tupdesc_id = assign_record_type_identifier(tupdesc->tdtypeid, + tupdesc->tdtypmod); + + return tupdesc; +} + +/* + * Get a HeapTuple representing the current value of the expanded record + * + * If valid, the originally stored tuple is returned, so caller must not + * scribble on it. Otherwise, we return a HeapTuple created in the current + * memory context. In either case, no attempt has been made to inline + * out-of-line toasted values, so the tuple isn't usable as a composite + * datum. + * + * Returns NULL if expanded record is empty. + */ +HeapTuple +expanded_record_get_tuple(ExpandedRecordHeader *erh) +{ + /* Easy case if we still have original tuple */ + if (erh->flags & ER_FLAG_FVALUE_VALID) + return erh->fvalue; + + /* Else just build a tuple from datums */ + if (erh->flags & ER_FLAG_DVALUES_VALID) + return heap_form_tuple(erh->er_tupdesc, erh->dvalues, erh->dnulls); + + /* Expanded record is empty */ + return NULL; +} + +/* + * Memory context reset callback for cleaning up external resources + */ +static void +ER_mc_callback(void *arg) +{ + ExpandedRecordHeader *erh = (ExpandedRecordHeader *) arg; + TupleDesc tupdesc = erh->er_tupdesc; + + /* Release our privately-managed tupdesc refcount, if any */ + if (tupdesc) + { + erh->er_tupdesc = NULL; /* just for luck */ + if (tupdesc->tdrefcount > 0) + { + if (--tupdesc->tdrefcount == 0) + FreeTupleDesc(tupdesc); + } + } +} + +/* + * DatumGetExpandedRecord: get a writable expanded record from an input argument + * + * Caution: if the input is a read/write pointer, this returns the input + * argument; so callers must be sure that their changes are "safe", that is + * they cannot leave the record in a corrupt state. + */ +ExpandedRecordHeader * +DatumGetExpandedRecord(Datum d) +{ + /* If it's a writable expanded record already, just return it */ + if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d))) + { + ExpandedRecordHeader *erh = (ExpandedRecordHeader *) DatumGetEOHP(d); + + Assert(erh->er_magic == ER_MAGIC); + return erh; + } + + /* Else expand the hard way */ + d = make_expanded_record_from_datum(d, CurrentMemoryContext); + return (ExpandedRecordHeader *) DatumGetEOHP(d); +} + +/* + * Create the Datum/isnull representation of an expanded record object + * if we didn't do so already. After calling this, it's OK to read the + * dvalues/dnulls arrays directly, rather than going through get_field. + * + * Note that if the object is currently empty ("null"), this will change + * it to represent a row of nulls. + */ +void +deconstruct_expanded_record(ExpandedRecordHeader *erh) +{ + TupleDesc tupdesc; + Datum *dvalues; + bool *dnulls; + int nfields; + + if (erh->flags & ER_FLAG_DVALUES_VALID) + return; /* already valid, nothing to do */ + + /* We'll need the tuple descriptor */ + tupdesc = expanded_record_get_tupdesc(erh); + + /* + * Allocate arrays in private context, if we don't have them already. We + * don't expect to see a change in nfields here, so while we cope if it + * happens, we don't bother avoiding a leak of the old arrays (which might + * not be separately palloc'd, anyway). + */ + nfields = tupdesc->natts; + if (erh->dvalues == NULL || erh->nfields != nfields) + { + char *chunk; + + /* + * To save a palloc cycle, we allocate both the Datum and isnull + * arrays in one palloc chunk. + */ + chunk = MemoryContextAlloc(erh->hdr.eoh_context, + nfields * (sizeof(Datum) + sizeof(bool))); + dvalues = (Datum *) chunk; + dnulls = (bool *) (chunk + nfields * sizeof(Datum)); + erh->dvalues = dvalues; + erh->dnulls = dnulls; + erh->nfields = nfields; + } + else + { + dvalues = erh->dvalues; + dnulls = erh->dnulls; + } + + if (erh->flags & ER_FLAG_FVALUE_VALID) + { + /* Deconstruct tuple */ + heap_deform_tuple(erh->fvalue, tupdesc, dvalues, dnulls); + } + else + { + /* If record was empty, instantiate it as a row of nulls */ + memset(dvalues, 0, nfields * sizeof(Datum)); + memset(dnulls, true, nfields * sizeof(bool)); + } + + /* Mark the dvalues as valid */ + erh->flags |= ER_FLAG_DVALUES_VALID; +} + +/* + * Look up a record field by name + * + * If there is a field named "fieldname", fill in the contents of finfo + * and return "true". Else return "false" without changing *finfo. + */ +bool +expanded_record_lookup_field(ExpandedRecordHeader *erh, const char *fieldname, + ExpandedRecordFieldInfo *finfo) +{ + TupleDesc tupdesc; + int fno; + Form_pg_attribute attr; + const FormData_pg_attribute *sysattr; + + tupdesc = expanded_record_get_tupdesc(erh); + + /* First, check user-defined attributes */ + for (fno = 0; fno < tupdesc->natts; fno++) + { + attr = TupleDescAttr(tupdesc, fno); + if (namestrcmp(&attr->attname, fieldname) == 0 && + !attr->attisdropped) + { + finfo->fnumber = attr->attnum; + finfo->ftypeid = attr->atttypid; + finfo->ftypmod = attr->atttypmod; + finfo->fcollation = attr->attcollation; + return true; + } + } + + /* How about system attributes? */ + sysattr = SystemAttributeByName(fieldname); + if (sysattr != NULL) + { + finfo->fnumber = sysattr->attnum; + finfo->ftypeid = sysattr->atttypid; + finfo->ftypmod = sysattr->atttypmod; + finfo->fcollation = sysattr->attcollation; + return true; + } + + return false; +} + +/* + * Fetch value of record field + * + * expanded_record_get_field is the frontend for this; it handles the + * easy inline-able cases. + */ +Datum +expanded_record_fetch_field(ExpandedRecordHeader *erh, int fnumber, + bool *isnull) +{ + if (fnumber > 0) + { + /* Empty record has null fields */ + if (ExpandedRecordIsEmpty(erh)) + { + *isnull = true; + return (Datum) 0; + } + /* Make sure we have deconstructed form */ + deconstruct_expanded_record(erh); + /* Out-of-range field number reads as null */ + if (unlikely(fnumber > erh->nfields)) + { + *isnull = true; + return (Datum) 0; + } + *isnull = erh->dnulls[fnumber - 1]; + return erh->dvalues[fnumber - 1]; + } + else + { + /* System columns read as null if we haven't got flat tuple */ + if (erh->fvalue == NULL) + { + *isnull = true; + return (Datum) 0; + } + /* heap_getsysattr doesn't actually use tupdesc, so just pass null */ + return heap_getsysattr(erh->fvalue, fnumber, NULL, isnull); + } +} + +/* + * Set value of record field + * + * If the expanded record is of domain type, the assignment will be rejected + * (without changing the record's state) if the domain's constraints would + * be violated. + * + * If expand_external is true and newValue is an out-of-line value, we'll + * forcibly detoast it so that the record does not depend on external storage. + * + * Internal callers can pass check_constraints = false to skip application + * of domain constraints. External callers should never do that. + */ +void +expanded_record_set_field_internal(ExpandedRecordHeader *erh, int fnumber, + Datum newValue, bool isnull, + bool expand_external, + bool check_constraints) +{ + TupleDesc tupdesc; + Form_pg_attribute attr; + Datum *dvalues; + bool *dnulls; + char *oldValue; + + /* + * Shouldn't ever be trying to assign new data to a dummy header, except + * in the case of an internal call for field inlining. + */ + Assert(!(erh->flags & ER_FLAG_IS_DUMMY) || !check_constraints); + + /* Before performing the assignment, see if result will satisfy domain */ + if ((erh->flags & ER_FLAG_IS_DOMAIN) && check_constraints) + check_domain_for_new_field(erh, fnumber, newValue, isnull); + + /* If we haven't yet deconstructed the tuple, do that */ + if (!(erh->flags & ER_FLAG_DVALUES_VALID)) + deconstruct_expanded_record(erh); + + /* Tuple descriptor must be valid by now */ + tupdesc = erh->er_tupdesc; + Assert(erh->nfields == tupdesc->natts); + + /* Caller error if fnumber is system column or nonexistent column */ + if (unlikely(fnumber <= 0 || fnumber > erh->nfields)) + elog(ERROR, "cannot assign to field %d of expanded record", fnumber); + + /* + * Copy new field value into record's context, and deal with detoasting, + * if needed. + */ + attr = TupleDescAttr(tupdesc, fnumber - 1); + if (!isnull && !attr->attbyval) + { + MemoryContext oldcxt; + + /* If requested, detoast any external value */ + if (expand_external) + { + if (attr->attlen == -1 && + VARATT_IS_EXTERNAL(DatumGetPointer(newValue))) + { + /* Detoasting should be done in short-lived context. */ + oldcxt = MemoryContextSwitchTo(get_short_term_cxt(erh)); + newValue = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(newValue))); + MemoryContextSwitchTo(oldcxt); + } + else + expand_external = false; /* need not clean up below */ + } + + /* Copy value into record's context */ + oldcxt = MemoryContextSwitchTo(erh->hdr.eoh_context); + newValue = datumCopy(newValue, false, attr->attlen); + MemoryContextSwitchTo(oldcxt); + + /* We can now flush anything that detoasting might have leaked */ + if (expand_external) + MemoryContextReset(erh->er_short_term_cxt); + + /* Remember that we have field(s) that may need to be pfree'd */ + erh->flags |= ER_FLAG_DVALUES_ALLOCED; + + /* + * While we're here, note whether it's an external toasted value, + * because that could mean we need to inline it later. (Think not to + * merge this into the previous expand_external logic: datumCopy could + * by itself have made the value non-external.) + */ + if (attr->attlen == -1 && + VARATT_IS_EXTERNAL(DatumGetPointer(newValue))) + erh->flags |= ER_FLAG_HAVE_EXTERNAL; + } + + /* + * We're ready to make irreversible changes. + */ + dvalues = erh->dvalues; + dnulls = erh->dnulls; + + /* Flattened value will no longer represent record accurately */ + erh->flags &= ~ER_FLAG_FVALUE_VALID; + /* And we don't know the flattened size either */ + erh->flat_size = 0; + + /* Grab old field value for pfree'ing, if needed. */ + if (!attr->attbyval && !dnulls[fnumber - 1]) + oldValue = (char *) DatumGetPointer(dvalues[fnumber - 1]); + else + oldValue = NULL; + + /* And finally we can insert the new field. */ + dvalues[fnumber - 1] = newValue; + dnulls[fnumber - 1] = isnull; + + /* + * Free old field if needed; this keeps repeated field replacements from + * bloating the record's storage. If the pfree somehow fails, it won't + * corrupt the record. + * + * If we're updating a dummy header, we can't risk pfree'ing the old + * value, because most likely the expanded record's main header still has + * a pointer to it. This won't result in any sustained memory leak, since + * whatever we just allocated here is in the short-lived domain check + * context. + */ + if (oldValue && !(erh->flags & ER_FLAG_IS_DUMMY)) + { + /* Don't try to pfree a part of the original flat record */ + if (oldValue < erh->fstartptr || oldValue >= erh->fendptr) + pfree(oldValue); + } +} + +/* + * Set all record field(s) + * + * Caller must ensure that the provided datums are of the right types + * to match the record's previously assigned rowtype. + * + * If expand_external is true, we'll forcibly detoast out-of-line field values + * so that the record does not depend on external storage. + * + * Unlike repeated application of expanded_record_set_field(), this does not + * guarantee to leave the expanded record in a non-corrupt state in event + * of an error. Typically it would only be used for initializing a new + * expanded record. Also, because we expect this to be applied at most once + * in the lifespan of an expanded record, we do not worry about any cruft + * that detoasting might leak. + */ +void +expanded_record_set_fields(ExpandedRecordHeader *erh, + const Datum *newValues, const bool *isnulls, + bool expand_external) +{ + TupleDesc tupdesc; + Datum *dvalues; + bool *dnulls; + int fnumber; + MemoryContext oldcxt; + + /* Shouldn't ever be trying to assign new data to a dummy header */ + Assert(!(erh->flags & ER_FLAG_IS_DUMMY)); + + /* If we haven't yet deconstructed the tuple, do that */ + if (!(erh->flags & ER_FLAG_DVALUES_VALID)) + deconstruct_expanded_record(erh); + + /* Tuple descriptor must be valid by now */ + tupdesc = erh->er_tupdesc; + Assert(erh->nfields == tupdesc->natts); + + /* Flattened value will no longer represent record accurately */ + erh->flags &= ~ER_FLAG_FVALUE_VALID; + /* And we don't know the flattened size either */ + erh->flat_size = 0; + + oldcxt = MemoryContextSwitchTo(erh->hdr.eoh_context); + + dvalues = erh->dvalues; + dnulls = erh->dnulls; + + for (fnumber = 0; fnumber < erh->nfields; fnumber++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, fnumber); + Datum newValue; + bool isnull; + + /* Ignore dropped columns */ + if (attr->attisdropped) + continue; + + newValue = newValues[fnumber]; + isnull = isnulls[fnumber]; + + if (!attr->attbyval) + { + /* + * Copy new field value into record's context, and deal with + * detoasting, if needed. + */ + if (!isnull) + { + /* Is it an external toasted value? */ + if (attr->attlen == -1 && + VARATT_IS_EXTERNAL(DatumGetPointer(newValue))) + { + if (expand_external) + { + /* Detoast as requested while copying the value */ + newValue = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(newValue))); + } + else + { + /* Just copy the value */ + newValue = datumCopy(newValue, false, -1); + /* If it's still external, remember that */ + if (VARATT_IS_EXTERNAL(DatumGetPointer(newValue))) + erh->flags |= ER_FLAG_HAVE_EXTERNAL; + } + } + else + { + /* Not an external value, just copy it */ + newValue = datumCopy(newValue, false, attr->attlen); + } + + /* Remember that we have field(s) that need to be pfree'd */ + erh->flags |= ER_FLAG_DVALUES_ALLOCED; + } + + /* + * Free old field value, if any (not likely, since really we ought + * to be inserting into an empty record). + */ + if (unlikely(!dnulls[fnumber])) + { + char *oldValue; + + oldValue = (char *) DatumGetPointer(dvalues[fnumber]); + /* Don't try to pfree a part of the original flat record */ + if (oldValue < erh->fstartptr || oldValue >= erh->fendptr) + pfree(oldValue); + } + } + + /* And finally we can insert the new field. */ + dvalues[fnumber] = newValue; + dnulls[fnumber] = isnull; + } + + /* + * Because we don't guarantee atomicity of set_fields(), we can just leave + * checking of domain constraints to occur as the final step; if it throws + * an error, too bad. + */ + if (erh->flags & ER_FLAG_IS_DOMAIN) + { + /* We run domain_check in a short-lived context to limit cruft */ + MemoryContextSwitchTo(get_short_term_cxt(erh)); + + domain_check(ExpandedRecordGetRODatum(erh), false, + erh->er_decltypeid, + &erh->er_domaininfo, + erh->hdr.eoh_context); + } + + MemoryContextSwitchTo(oldcxt); +} + +/* + * Construct (or reset) working memory context for short-term operations. + * + * This context is used for domain check evaluation and for detoasting. + * + * If we don't have a short-lived memory context, make one; if we have one, + * reset it to get rid of any leftover cruft. (It is a tad annoying to need a + * whole context for this, since it will often go unused --- but it's hard to + * avoid memory leaks otherwise. We can make the context small, at least.) + */ +static MemoryContext +get_short_term_cxt(ExpandedRecordHeader *erh) +{ + if (erh->er_short_term_cxt == NULL) + erh->er_short_term_cxt = + AllocSetContextCreate(erh->hdr.eoh_context, + "expanded record short-term context", + ALLOCSET_SMALL_SIZES); + else + MemoryContextReset(erh->er_short_term_cxt); + return erh->er_short_term_cxt; +} + +/* + * Construct "dummy header" for checking domain constraints. + * + * Since we don't want to modify the state of the expanded record until + * we've validated the constraints, our approach is to set up a dummy + * record header containing the new field value(s) and then pass that to + * domain_check. We retain the dummy header as part of the expanded + * record's state to save palloc cycles, but reinitialize (most of) + * its contents on each use. + */ +static void +build_dummy_expanded_header(ExpandedRecordHeader *main_erh) +{ + ExpandedRecordHeader *erh; + TupleDesc tupdesc = expanded_record_get_tupdesc(main_erh); + + /* Ensure we have a short-lived context */ + (void) get_short_term_cxt(main_erh); + + /* + * Allocate dummy header on first time through, or in the unlikely event + * that the number of fields changes (in which case we just leak the old + * one). Include space for its field values in the request. + */ + erh = main_erh->er_dummy_header; + if (erh == NULL || erh->nfields != tupdesc->natts) + { + char *chunk; + + erh = (ExpandedRecordHeader *) + MemoryContextAlloc(main_erh->hdr.eoh_context, + MAXALIGN(sizeof(ExpandedRecordHeader)) + + tupdesc->natts * (sizeof(Datum) + sizeof(bool))); + + /* Ensure all header fields are initialized to 0/null */ + memset(erh, 0, sizeof(ExpandedRecordHeader)); + + /* + * We set up the dummy header with an indication that its memory + * context is the short-lived context. This is so that, if any + * detoasting of out-of-line values happens due to an attempt to + * extract a composite datum from the dummy header, the detoasted + * stuff will end up in the short-lived context and not cause a leak. + * This is cheating a bit on the expanded-object protocol; but since + * we never pass a R/W pointer to the dummy object to any other code, + * nothing else is authorized to delete or transfer ownership of the + * object's context, so it should be safe enough. + */ + EOH_init_header(&erh->hdr, &ER_methods, main_erh->er_short_term_cxt); + erh->er_magic = ER_MAGIC; + + /* Set up dvalues/dnulls, with no valid contents as yet */ + chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader)); + erh->dvalues = (Datum *) chunk; + erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum)); + erh->nfields = tupdesc->natts; + + /* + * The fields we just set are assumed to remain constant through + * multiple uses of the dummy header to check domain constraints. All + * other dummy header fields should be explicitly reset below, to + * ensure there's not accidental effects of one check on the next one. + */ + + main_erh->er_dummy_header = erh; + } + + /* + * If anything inquires about the dummy header's declared type, it should + * report the composite base type, not the domain type (since the VALUE in + * a domain check constraint is of the base type not the domain). Hence + * we do not transfer over the IS_DOMAIN flag, nor indeed any of the main + * header's flags, since the dummy header is empty of data at this point. + * But don't forget to mark header as dummy. + */ + erh->flags = ER_FLAG_IS_DUMMY; + + /* Copy composite-type identification info */ + erh->er_decltypeid = erh->er_typeid = main_erh->er_typeid; + erh->er_typmod = main_erh->er_typmod; + + /* Dummy header does not need its own tupdesc refcount */ + erh->er_tupdesc = tupdesc; + erh->er_tupdesc_id = main_erh->er_tupdesc_id; + + /* + * It's tempting to copy over whatever we know about the flat size, but + * there's no point since we're surely about to modify the dummy record's + * field(s). Instead just clear anything left over from a previous usage + * cycle. + */ + erh->flat_size = 0; + + /* Copy over fvalue if we have it, so that system columns are available */ + erh->fvalue = main_erh->fvalue; + erh->fstartptr = main_erh->fstartptr; + erh->fendptr = main_erh->fendptr; +} + +/* + * Precheck domain constraints for a set_field operation + */ +static pg_noinline void +check_domain_for_new_field(ExpandedRecordHeader *erh, int fnumber, + Datum newValue, bool isnull) +{ + ExpandedRecordHeader *dummy_erh; + MemoryContext oldcxt; + + /* Construct dummy header to contain proposed new field set */ + build_dummy_expanded_header(erh); + dummy_erh = erh->er_dummy_header; + + /* + * If record isn't empty, just deconstruct it (if needed) and copy over + * the existing field values. If it is empty, just fill fields with nulls + * manually --- don't call deconstruct_expanded_record prematurely. + */ + if (!ExpandedRecordIsEmpty(erh)) + { + deconstruct_expanded_record(erh); + memcpy(dummy_erh->dvalues, erh->dvalues, + dummy_erh->nfields * sizeof(Datum)); + memcpy(dummy_erh->dnulls, erh->dnulls, + dummy_erh->nfields * sizeof(bool)); + /* There might be some external values in there... */ + dummy_erh->flags |= erh->flags & ER_FLAG_HAVE_EXTERNAL; + } + else + { + memset(dummy_erh->dvalues, 0, dummy_erh->nfields * sizeof(Datum)); + memset(dummy_erh->dnulls, true, dummy_erh->nfields * sizeof(bool)); + } + + /* Either way, we now have valid dvalues */ + dummy_erh->flags |= ER_FLAG_DVALUES_VALID; + + /* Caller error if fnumber is system column or nonexistent column */ + if (unlikely(fnumber <= 0 || fnumber > dummy_erh->nfields)) + elog(ERROR, "cannot assign to field %d of expanded record", fnumber); + + /* Insert proposed new value into dummy field array */ + dummy_erh->dvalues[fnumber - 1] = newValue; + dummy_erh->dnulls[fnumber - 1] = isnull; + + /* + * The proposed new value might be external, in which case we'd better set + * the flag for that in dummy_erh. (This matters in case something in the + * domain check expressions tries to extract a flat value from the dummy + * header.) + */ + if (!isnull) + { + Form_pg_attribute attr = TupleDescAttr(erh->er_tupdesc, fnumber - 1); + + if (!attr->attbyval && attr->attlen == -1 && + VARATT_IS_EXTERNAL(DatumGetPointer(newValue))) + dummy_erh->flags |= ER_FLAG_HAVE_EXTERNAL; + } + + /* + * We call domain_check in the short-lived context, so that any cruft + * leaked by expression evaluation can be reclaimed. + */ + oldcxt = MemoryContextSwitchTo(erh->er_short_term_cxt); + + /* + * And now we can apply the check. Note we use main header's domain cache + * space, so that caching carries across repeated uses. + */ + domain_check(ExpandedRecordGetRODatum(dummy_erh), false, + erh->er_decltypeid, + &erh->er_domaininfo, + erh->hdr.eoh_context); + + MemoryContextSwitchTo(oldcxt); + + /* We might as well clean up cruft immediately. */ + MemoryContextReset(erh->er_short_term_cxt); +} + +/* + * Precheck domain constraints for a set_tuple operation + */ +static pg_noinline void +check_domain_for_new_tuple(ExpandedRecordHeader *erh, HeapTuple tuple) +{ + ExpandedRecordHeader *dummy_erh; + MemoryContext oldcxt; + + /* If we're being told to set record to empty, just see if NULL is OK */ + if (tuple == NULL) + { + /* We run domain_check in a short-lived context to limit cruft */ + oldcxt = MemoryContextSwitchTo(get_short_term_cxt(erh)); + + domain_check((Datum) 0, true, + erh->er_decltypeid, + &erh->er_domaininfo, + erh->hdr.eoh_context); + + MemoryContextSwitchTo(oldcxt); + + /* We might as well clean up cruft immediately. */ + MemoryContextReset(erh->er_short_term_cxt); + + return; + } + + /* Construct dummy header to contain replacement tuple */ + build_dummy_expanded_header(erh); + dummy_erh = erh->er_dummy_header; + + /* Insert tuple, but don't bother to deconstruct its fields for now */ + dummy_erh->fvalue = tuple; + dummy_erh->fstartptr = (char *) tuple->t_data; + dummy_erh->fendptr = ((char *) tuple->t_data) + tuple->t_len; + dummy_erh->flags |= ER_FLAG_FVALUE_VALID; + + /* Remember if we have any out-of-line field values */ + if (HeapTupleHasExternal(tuple)) + dummy_erh->flags |= ER_FLAG_HAVE_EXTERNAL; + + /* + * We call domain_check in the short-lived context, so that any cruft + * leaked by expression evaluation can be reclaimed. + */ + oldcxt = MemoryContextSwitchTo(erh->er_short_term_cxt); + + /* + * And now we can apply the check. Note we use main header's domain cache + * space, so that caching carries across repeated uses. + */ + domain_check(ExpandedRecordGetRODatum(dummy_erh), false, + erh->er_decltypeid, + &erh->er_domaininfo, + erh->hdr.eoh_context); + + MemoryContextSwitchTo(oldcxt); + + /* We might as well clean up cruft immediately. */ + MemoryContextReset(erh->er_short_term_cxt); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/float.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/float.c new file mode 100644 index 00000000000..902d7912961 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/float.c @@ -0,0 +1,4177 @@ +/*------------------------------------------------------------------------- + * + * float.c + * Functions for the built-in floating-point types. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/float.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <float.h> +#include <math.h> +#include <limits.h> + +#include "catalog/pg_type.h" +#include "common/int.h" +#include "common/pg_prng.h" +#include "common/shortest_dec.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "utils/array.h" +#include "utils/float.h" +#include "utils/fmgrprotos.h" +#include "utils/sortsupport.h" +#include "utils/timestamp.h" + + +/* + * Configurable GUC parameter + * + * If >0, use shortest-decimal format for output; this is both the default and + * allows for compatibility with clients that explicitly set a value here to + * get round-trip-accurate results. If 0 or less, then use the old, slow, + * decimal rounding method. + */ +__thread int extra_float_digits = 1; + +/* Cached constants for degree-based trig functions */ +static __thread bool degree_consts_set = false; +static __thread float8 sin_30 = 0; +static __thread float8 one_minus_cos_60 = 0; +static __thread float8 asin_0_5 = 0; +static __thread float8 acos_0_5 = 0; +static __thread float8 atan_1_0 = 0; +static __thread float8 tan_45 = 0; +static __thread float8 cot_45 = 0; + +/* + * These are intentionally not static; don't "fix" them. They will never + * be referenced by other files, much less changed; but we don't want the + * compiler to know that, else it might try to precompute expressions + * involving them. See comments for init_degree_constants(). + */ +__thread float8 degree_c_thirty = 30.0; +__thread float8 degree_c_forty_five = 45.0; +__thread float8 degree_c_sixty = 60.0; +__thread float8 degree_c_one_half = 0.5; +__thread float8 degree_c_one = 1.0; + +/* State for drandom() and setseed() */ +static __thread bool drandom_seed_set = false; +static __thread pg_prng_state drandom_seed; + +/* Local function prototypes */ +static double sind_q1(double x); +static double cosd_q1(double x); +static void init_degree_constants(void); + + +/* + * We use these out-of-line ereport() calls to report float overflow, + * underflow, and zero-divide, because following our usual practice of + * repeating them at each call site would lead to a lot of code bloat. + * + * This does mean that you don't get a useful error location indicator. + */ +pg_noinline void +float_overflow_error(void) +{ + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: overflow"))); +} + +pg_noinline void +float_underflow_error(void) +{ + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: underflow"))); +} + +pg_noinline void +float_zero_divide_error(void) +{ + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); +} + + +/* + * Returns -1 if 'val' represents negative infinity, 1 if 'val' + * represents (positive) infinity, and 0 otherwise. On some platforms, + * this is equivalent to the isinf() macro, but not everywhere: C99 + * does not specify that isinf() needs to distinguish between positive + * and negative infinity. + */ +int +is_infinite(double val) +{ + int inf = isinf(val); + + if (inf == 0) + return 0; + else if (val > 0) + return 1; + else + return -1; +} + + +/* ========== USER I/O ROUTINES ========== */ + + +/* + * float4in - converts "num" to float4 + * + * Note that this code now uses strtof(), where it used to use strtod(). + * + * The motivation for using strtof() is to avoid a double-rounding problem: + * for certain decimal inputs, if you round the input correctly to a double, + * and then round the double to a float, the result is incorrect in that it + * does not match the result of rounding the decimal value to float directly. + * + * One of the best examples is 7.038531e-26: + * + * 0xAE43FDp-107 = 7.03853069185120912085...e-26 + * midpoint 7.03853100000000022281...e-26 + * 0xAE43FEp-107 = 7.03853130814879132477...e-26 + * + * making 0xAE43FDp-107 the correct float result, but if you do the conversion + * via a double, you get + * + * 0xAE43FD.7FFFFFF8p-107 = 7.03853099999999907487...e-26 + * midpoint 7.03853099999999964884...e-26 + * 0xAE43FD.80000000p-107 = 7.03853100000000022281...e-26 + * 0xAE43FD.80000008p-107 = 7.03853100000000137076...e-26 + * + * so the value rounds to the double exactly on the midpoint between the two + * nearest floats, and then rounding again to a float gives the incorrect + * result of 0xAE43FEp-107. + * + */ +Datum +float4in(PG_FUNCTION_ARGS) +{ + char *num = PG_GETARG_CSTRING(0); + + PG_RETURN_FLOAT4(float4in_internal(num, NULL, "real", num, + fcinfo->context)); +} + +/* + * float4in_internal - guts of float4in() + * + * This is exposed for use by functions that want a reasonably + * platform-independent way of inputting floats. The behavior is + * essentially like strtof + ereturn on error. + * + * Uses the same API as float8in_internal below, so most of its + * comments also apply here, except regarding use in geometric types. + */ +float4 +float4in_internal(char *num, char **endptr_p, + const char *type_name, const char *orig_string, + struct Node *escontext) +{ + float val; + char *endptr; + + /* + * endptr points to the first character _after_ the sequence we recognized + * as a valid floating point number. orig_string points to the original + * input string. + */ + + /* skip leading whitespace */ + while (*num != '\0' && isspace((unsigned char) *num)) + num++; + + /* + * Check for an empty-string input to begin with, to avoid the vagaries of + * strtod() on different platforms. + */ + if (*num == '\0') + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); + + errno = 0; + val = strtof(num, &endptr); + + /* did we not see anything that looks like a double? */ + if (endptr == num || errno != 0) + { + int save_errno = errno; + + /* + * C99 requires that strtof() accept NaN, [+-]Infinity, and [+-]Inf, + * but not all platforms support all of these (and some accept them + * but set ERANGE anyway...) Therefore, we check for these inputs + * ourselves if strtof() fails. + * + * Note: C99 also requires hexadecimal input as well as some extended + * forms of NaN, but we consider these forms unportable and don't try + * to support them. You can use 'em if your strtof() takes 'em. + */ + if (pg_strncasecmp(num, "NaN", 3) == 0) + { + val = get_float4_nan(); + endptr = num + 3; + } + else if (pg_strncasecmp(num, "Infinity", 8) == 0) + { + val = get_float4_infinity(); + endptr = num + 8; + } + else if (pg_strncasecmp(num, "+Infinity", 9) == 0) + { + val = get_float4_infinity(); + endptr = num + 9; + } + else if (pg_strncasecmp(num, "-Infinity", 9) == 0) + { + val = -get_float4_infinity(); + endptr = num + 9; + } + else if (pg_strncasecmp(num, "inf", 3) == 0) + { + val = get_float4_infinity(); + endptr = num + 3; + } + else if (pg_strncasecmp(num, "+inf", 4) == 0) + { + val = get_float4_infinity(); + endptr = num + 4; + } + else if (pg_strncasecmp(num, "-inf", 4) == 0) + { + val = -get_float4_infinity(); + endptr = num + 4; + } + else if (save_errno == ERANGE) + { + /* + * Some platforms return ERANGE for denormalized numbers (those + * that are not zero, but are too close to zero to have full + * precision). We'd prefer not to throw error for that, so try to + * detect whether it's a "real" out-of-range condition by checking + * to see if the result is zero or huge. + */ + if (val == 0.0 || +#if !defined(HUGE_VALF) + isinf(val) +#else + (val >= HUGE_VALF || val <= -HUGE_VALF) +#endif + ) + { + /* see comments in float8in_internal for rationale */ + char *errnumber = pstrdup(num); + + errnumber[endptr - num] = '\0'; + + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type real", + errnumber))); + } + } + else + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); + } + + /* skip trailing whitespace */ + while (*endptr != '\0' && isspace((unsigned char) *endptr)) + endptr++; + + /* report stopping point if wanted, else complain if not end of string */ + if (endptr_p) + *endptr_p = endptr; + else if (*endptr != '\0') + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); + + return val; +} + +/* + * float4out - converts a float4 number to a string + * using a standard output format + */ +Datum +float4out(PG_FUNCTION_ARGS) +{ + float4 num = PG_GETARG_FLOAT4(0); + char *ascii = (char *) palloc(32); + int ndig = FLT_DIG + extra_float_digits; + + if (extra_float_digits > 0) + { + float_to_shortest_decimal_buf(num, ascii); + PG_RETURN_CSTRING(ascii); + } + + (void) pg_strfromd(ascii, 32, ndig, num); + PG_RETURN_CSTRING(ascii); +} + +/* + * float4recv - converts external binary format to float4 + */ +Datum +float4recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_FLOAT4(pq_getmsgfloat4(buf)); +} + +/* + * float4send - converts float4 to binary format + */ +Datum +float4send(PG_FUNCTION_ARGS) +{ + float4 num = PG_GETARG_FLOAT4(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat4(&buf, num); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * float8in - converts "num" to float8 + */ +Datum +float8in(PG_FUNCTION_ARGS) +{ + char *num = PG_GETARG_CSTRING(0); + + PG_RETURN_FLOAT8(float8in_internal(num, NULL, "double precision", num, + fcinfo->context)); +} + +/* + * float8in_internal - guts of float8in() + * + * This is exposed for use by functions that want a reasonably + * platform-independent way of inputting doubles. The behavior is + * essentially like strtod + ereturn on error, but note the following + * differences: + * 1. Both leading and trailing whitespace are skipped. + * 2. If endptr_p is NULL, we report error if there's trailing junk. + * Otherwise, it's up to the caller to complain about trailing junk. + * 3. In event of a syntax error, the report mentions the given type_name + * and prints orig_string as the input; this is meant to support use of + * this function with types such as "box" and "point", where what we are + * parsing here is just a substring of orig_string. + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() + * to detect errors. + * + * "num" could validly be declared "const char *", but that results in an + * unreasonable amount of extra casting both here and in callers, so we don't. + */ +float8 +float8in_internal(char *num, char **endptr_p, + const char *type_name, const char *orig_string, + struct Node *escontext) +{ + double val; + char *endptr; + + /* skip leading whitespace */ + while (*num != '\0' && isspace((unsigned char) *num)) + num++; + + /* + * Check for an empty-string input to begin with, to avoid the vagaries of + * strtod() on different platforms. + */ + if (*num == '\0') + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); + + errno = 0; + val = strtod(num, &endptr); + + /* did we not see anything that looks like a double? */ + if (endptr == num || errno != 0) + { + int save_errno = errno; + + /* + * C99 requires that strtod() accept NaN, [+-]Infinity, and [+-]Inf, + * but not all platforms support all of these (and some accept them + * but set ERANGE anyway...) Therefore, we check for these inputs + * ourselves if strtod() fails. + * + * Note: C99 also requires hexadecimal input as well as some extended + * forms of NaN, but we consider these forms unportable and don't try + * to support them. You can use 'em if your strtod() takes 'em. + */ + if (pg_strncasecmp(num, "NaN", 3) == 0) + { + val = get_float8_nan(); + endptr = num + 3; + } + else if (pg_strncasecmp(num, "Infinity", 8) == 0) + { + val = get_float8_infinity(); + endptr = num + 8; + } + else if (pg_strncasecmp(num, "+Infinity", 9) == 0) + { + val = get_float8_infinity(); + endptr = num + 9; + } + else if (pg_strncasecmp(num, "-Infinity", 9) == 0) + { + val = -get_float8_infinity(); + endptr = num + 9; + } + else if (pg_strncasecmp(num, "inf", 3) == 0) + { + val = get_float8_infinity(); + endptr = num + 3; + } + else if (pg_strncasecmp(num, "+inf", 4) == 0) + { + val = get_float8_infinity(); + endptr = num + 4; + } + else if (pg_strncasecmp(num, "-inf", 4) == 0) + { + val = -get_float8_infinity(); + endptr = num + 4; + } + else if (save_errno == ERANGE) + { + /* + * Some platforms return ERANGE for denormalized numbers (those + * that are not zero, but are too close to zero to have full + * precision). We'd prefer not to throw error for that, so try to + * detect whether it's a "real" out-of-range condition by checking + * to see if the result is zero or huge. + * + * On error, we intentionally complain about double precision not + * the given type name, and we print only the part of the string + * that is the current number. + */ + if (val == 0.0 || val >= HUGE_VAL || val <= -HUGE_VAL) + { + char *errnumber = pstrdup(num); + + errnumber[endptr - num] = '\0'; + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type double precision", + errnumber))); + } + } + else + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); + } + + /* skip trailing whitespace */ + while (*endptr != '\0' && isspace((unsigned char) *endptr)) + endptr++; + + /* report stopping point if wanted, else complain if not end of string */ + if (endptr_p) + *endptr_p = endptr; + else if (*endptr != '\0') + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); + + return val; +} + + +/* + * float8out - converts float8 number to a string + * using a standard output format + */ +Datum +float8out(PG_FUNCTION_ARGS) +{ + float8 num = PG_GETARG_FLOAT8(0); + + PG_RETURN_CSTRING(float8out_internal(num)); +} + +/* + * float8out_internal - guts of float8out() + * + * This is exposed for use by functions that want a reasonably + * platform-independent way of outputting doubles. + * The result is always palloc'd. + */ +char * +float8out_internal(double num) +{ + char *ascii = (char *) palloc(32); + int ndig = DBL_DIG + extra_float_digits; + + if (extra_float_digits > 0) + { + double_to_shortest_decimal_buf(num, ascii); + return ascii; + } + + (void) pg_strfromd(ascii, 32, ndig, num); + return ascii; +} + +/* + * float8recv - converts external binary format to float8 + */ +Datum +float8recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_FLOAT8(pq_getmsgfloat8(buf)); +} + +/* + * float8send - converts float8 to binary format + */ +Datum +float8send(PG_FUNCTION_ARGS) +{ + float8 num = PG_GETARG_FLOAT8(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, num); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* ========== PUBLIC ROUTINES ========== */ + + +/* + * ====================== + * FLOAT4 BASE OPERATIONS + * ====================== + */ + +/* + * float4abs - returns |arg1| (absolute value) + */ +Datum +float4abs(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + + PG_RETURN_FLOAT4(fabsf(arg1)); +} + +/* + * float4um - returns -arg1 (unary minus) + */ +Datum +float4um(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 result; + + result = -arg1; + PG_RETURN_FLOAT4(result); +} + +Datum +float4up(PG_FUNCTION_ARGS) +{ + float4 arg = PG_GETARG_FLOAT4(0); + + PG_RETURN_FLOAT4(arg); +} + +Datum +float4larger(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + float4 result; + + if (float4_gt(arg1, arg2)) + result = arg1; + else + result = arg2; + PG_RETURN_FLOAT4(result); +} + +Datum +float4smaller(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + float4 result; + + if (float4_lt(arg1, arg2)) + result = arg1; + else + result = arg2; + PG_RETURN_FLOAT4(result); +} + +/* + * ====================== + * FLOAT8 BASE OPERATIONS + * ====================== + */ + +/* + * float8abs - returns |arg1| (absolute value) + */ +Datum +float8abs(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + + PG_RETURN_FLOAT8(fabs(arg1)); +} + + +/* + * float8um - returns -arg1 (unary minus) + */ +Datum +float8um(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + result = -arg1; + PG_RETURN_FLOAT8(result); +} + +Datum +float8up(PG_FUNCTION_ARGS) +{ + float8 arg = PG_GETARG_FLOAT8(0); + + PG_RETURN_FLOAT8(arg); +} + +Datum +float8larger(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + float8 result; + + if (float8_gt(arg1, arg2)) + result = arg1; + else + result = arg2; + PG_RETURN_FLOAT8(result); +} + +Datum +float8smaller(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + float8 result; + + if (float8_lt(arg1, arg2)) + result = arg1; + else + result = arg2; + PG_RETURN_FLOAT8(result); +} + + +/* + * ==================== + * ARITHMETIC OPERATORS + * ==================== + */ + +/* + * float4pl - returns arg1 + arg2 + * float4mi - returns arg1 - arg2 + * float4mul - returns arg1 * arg2 + * float4div - returns arg1 / arg2 + */ +Datum +float4pl(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT4(float4_pl(arg1, arg2)); +} + +Datum +float4mi(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT4(float4_mi(arg1, arg2)); +} + +Datum +float4mul(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT4(float4_mul(arg1, arg2)); +} + +Datum +float4div(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT4(float4_div(arg1, arg2)); +} + +/* + * float8pl - returns arg1 + arg2 + * float8mi - returns arg1 - arg2 + * float8mul - returns arg1 * arg2 + * float8div - returns arg1 / arg2 + */ +Datum +float8pl(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_pl(arg1, arg2)); +} + +Datum +float8mi(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_mi(arg1, arg2)); +} + +Datum +float8mul(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_mul(arg1, arg2)); +} + +Datum +float8div(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_div(arg1, arg2)); +} + + +/* + * ==================== + * COMPARISON OPERATORS + * ==================== + */ + +/* + * float4{eq,ne,lt,le,gt,ge} - float4/float4 comparison operations + */ +int +float4_cmp_internal(float4 a, float4 b) +{ + if (float4_gt(a, b)) + return 1; + if (float4_lt(a, b)) + return -1; + return 0; +} + +Datum +float4eq(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float4_eq(arg1, arg2)); +} + +Datum +float4ne(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float4_ne(arg1, arg2)); +} + +Datum +float4lt(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float4_lt(arg1, arg2)); +} + +Datum +float4le(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float4_le(arg1, arg2)); +} + +Datum +float4gt(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float4_gt(arg1, arg2)); +} + +Datum +float4ge(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float4_ge(arg1, arg2)); +} + +Datum +btfloat4cmp(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_INT32(float4_cmp_internal(arg1, arg2)); +} + +static int +btfloat4fastcmp(Datum x, Datum y, SortSupport ssup) +{ + float4 arg1 = DatumGetFloat4(x); + float4 arg2 = DatumGetFloat4(y); + + return float4_cmp_internal(arg1, arg2); +} + +Datum +btfloat4sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = btfloat4fastcmp; + PG_RETURN_VOID(); +} + +/* + * float8{eq,ne,lt,le,gt,ge} - float8/float8 comparison operations + */ +int +float8_cmp_internal(float8 a, float8 b) +{ + if (float8_gt(a, b)) + return 1; + if (float8_lt(a, b)) + return -1; + return 0; +} + +Datum +float8eq(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_eq(arg1, arg2)); +} + +Datum +float8ne(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_ne(arg1, arg2)); +} + +Datum +float8lt(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_lt(arg1, arg2)); +} + +Datum +float8le(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_le(arg1, arg2)); +} + +Datum +float8gt(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_gt(arg1, arg2)); +} + +Datum +float8ge(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_ge(arg1, arg2)); +} + +Datum +btfloat8cmp(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_INT32(float8_cmp_internal(arg1, arg2)); +} + +static int +btfloat8fastcmp(Datum x, Datum y, SortSupport ssup) +{ + float8 arg1 = DatumGetFloat8(x); + float8 arg2 = DatumGetFloat8(y); + + return float8_cmp_internal(arg1, arg2); +} + +Datum +btfloat8sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = btfloat8fastcmp; + PG_RETURN_VOID(); +} + +Datum +btfloat48cmp(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + /* widen float4 to float8 and then compare */ + PG_RETURN_INT32(float8_cmp_internal(arg1, arg2)); +} + +Datum +btfloat84cmp(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + /* widen float4 to float8 and then compare */ + PG_RETURN_INT32(float8_cmp_internal(arg1, arg2)); +} + +/* + * in_range support function for float8. + * + * Note: we needn't supply a float8_float4 variant, as implicit coercion + * of the offset value takes care of that scenario just as well. + */ +Datum +in_range_float8_float8(PG_FUNCTION_ARGS) +{ + float8 val = PG_GETARG_FLOAT8(0); + float8 base = PG_GETARG_FLOAT8(1); + float8 offset = PG_GETARG_FLOAT8(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + float8 sum; + + /* + * Reject negative or NaN offset. Negative is per spec, and NaN is + * because appropriate semantics for that seem non-obvious. + */ + if (isnan(offset) || offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* + * Deal with cases where val and/or base is NaN, following the rule that + * NaN sorts after non-NaN (cf float8_cmp_internal). The offset cannot + * affect the conclusion. + */ + if (isnan(val)) + { + if (isnan(base)) + PG_RETURN_BOOL(true); /* NAN = NAN */ + else + PG_RETURN_BOOL(!less); /* NAN > non-NAN */ + } + else if (isnan(base)) + { + PG_RETURN_BOOL(less); /* non-NAN < NAN */ + } + + /* + * Deal with cases where both base and offset are infinite, and computing + * base +/- offset would produce NaN. This corresponds to a window frame + * whose boundary infinitely precedes +inf or infinitely follows -inf, + * which is not well-defined. For consistency with other cases involving + * infinities, such as the fact that +inf infinitely follows +inf, we + * choose to assume that +inf infinitely precedes +inf and -inf infinitely + * follows -inf, and therefore that all finite and infinite values are in + * such a window frame. + * + * offset is known positive, so we need only check the sign of base in + * this test. + */ + if (isinf(offset) && isinf(base) && + (sub ? base > 0 : base < 0)) + PG_RETURN_BOOL(true); + + /* + * Otherwise it should be safe to compute base +/- offset. We trust the + * FPU to cope if an input is +/-inf or the true sum would overflow, and + * produce a suitably signed infinity, which will compare properly against + * val whether or not that's infinity. + */ + if (sub) + sum = base - offset; + else + sum = base + offset; + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +/* + * in_range support function for float4. + * + * We would need a float4_float8 variant in any case, so we supply that and + * let implicit coercion take care of the float4_float4 case. + */ +Datum +in_range_float4_float8(PG_FUNCTION_ARGS) +{ + float4 val = PG_GETARG_FLOAT4(0); + float4 base = PG_GETARG_FLOAT4(1); + float8 offset = PG_GETARG_FLOAT8(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + float8 sum; + + /* + * Reject negative or NaN offset. Negative is per spec, and NaN is + * because appropriate semantics for that seem non-obvious. + */ + if (isnan(offset) || offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* + * Deal with cases where val and/or base is NaN, following the rule that + * NaN sorts after non-NaN (cf float8_cmp_internal). The offset cannot + * affect the conclusion. + */ + if (isnan(val)) + { + if (isnan(base)) + PG_RETURN_BOOL(true); /* NAN = NAN */ + else + PG_RETURN_BOOL(!less); /* NAN > non-NAN */ + } + else if (isnan(base)) + { + PG_RETURN_BOOL(less); /* non-NAN < NAN */ + } + + /* + * Deal with cases where both base and offset are infinite, and computing + * base +/- offset would produce NaN. This corresponds to a window frame + * whose boundary infinitely precedes +inf or infinitely follows -inf, + * which is not well-defined. For consistency with other cases involving + * infinities, such as the fact that +inf infinitely follows +inf, we + * choose to assume that +inf infinitely precedes +inf and -inf infinitely + * follows -inf, and therefore that all finite and infinite values are in + * such a window frame. + * + * offset is known positive, so we need only check the sign of base in + * this test. + */ + if (isinf(offset) && isinf(base) && + (sub ? base > 0 : base < 0)) + PG_RETURN_BOOL(true); + + /* + * Otherwise it should be safe to compute base +/- offset. We trust the + * FPU to cope if an input is +/-inf or the true sum would overflow, and + * produce a suitably signed infinity, which will compare properly against + * val whether or not that's infinity. + */ + if (sub) + sum = base - offset; + else + sum = base + offset; + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + + +/* + * =================== + * CONVERSION ROUTINES + * =================== + */ + +/* + * ftod - converts a float4 number to a float8 number + */ +Datum +ftod(PG_FUNCTION_ARGS) +{ + float4 num = PG_GETARG_FLOAT4(0); + + PG_RETURN_FLOAT8((float8) num); +} + + +/* + * dtof - converts a float8 number to a float4 number + */ +Datum +dtof(PG_FUNCTION_ARGS) +{ + float8 num = PG_GETARG_FLOAT8(0); + float4 result; + + result = (float4) num; + if (unlikely(isinf(result)) && !isinf(num)) + float_overflow_error(); + if (unlikely(result == 0.0f) && num != 0.0) + float_underflow_error(); + + PG_RETURN_FLOAT4(result); +} + + +/* + * dtoi4 - converts a float8 number to an int4 number + */ +Datum +dtoi4(PG_FUNCTION_ARGS) +{ + float8 num = PG_GETARG_FLOAT8(0); + + /* + * Get rid of any fractional part in the input. This is so we don't fail + * on just-out-of-range values that would round into range. Note + * assumption that rint() will pass through a NaN or Inf unchanged. + */ + num = rint(num); + + /* Range check */ + if (unlikely(isnan(num) || !FLOAT8_FITS_IN_INT32(num))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + PG_RETURN_INT32((int32) num); +} + + +/* + * dtoi2 - converts a float8 number to an int2 number + */ +Datum +dtoi2(PG_FUNCTION_ARGS) +{ + float8 num = PG_GETARG_FLOAT8(0); + + /* + * Get rid of any fractional part in the input. This is so we don't fail + * on just-out-of-range values that would round into range. Note + * assumption that rint() will pass through a NaN or Inf unchanged. + */ + num = rint(num); + + /* Range check */ + if (unlikely(isnan(num) || !FLOAT8_FITS_IN_INT16(num))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + PG_RETURN_INT16((int16) num); +} + + +/* + * i4tod - converts an int4 number to a float8 number + */ +Datum +i4tod(PG_FUNCTION_ARGS) +{ + int32 num = PG_GETARG_INT32(0); + + PG_RETURN_FLOAT8((float8) num); +} + + +/* + * i2tod - converts an int2 number to a float8 number + */ +Datum +i2tod(PG_FUNCTION_ARGS) +{ + int16 num = PG_GETARG_INT16(0); + + PG_RETURN_FLOAT8((float8) num); +} + + +/* + * ftoi4 - converts a float4 number to an int4 number + */ +Datum +ftoi4(PG_FUNCTION_ARGS) +{ + float4 num = PG_GETARG_FLOAT4(0); + + /* + * Get rid of any fractional part in the input. This is so we don't fail + * on just-out-of-range values that would round into range. Note + * assumption that rint() will pass through a NaN or Inf unchanged. + */ + num = rint(num); + + /* Range check */ + if (unlikely(isnan(num) || !FLOAT4_FITS_IN_INT32(num))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + PG_RETURN_INT32((int32) num); +} + + +/* + * ftoi2 - converts a float4 number to an int2 number + */ +Datum +ftoi2(PG_FUNCTION_ARGS) +{ + float4 num = PG_GETARG_FLOAT4(0); + + /* + * Get rid of any fractional part in the input. This is so we don't fail + * on just-out-of-range values that would round into range. Note + * assumption that rint() will pass through a NaN or Inf unchanged. + */ + num = rint(num); + + /* Range check */ + if (unlikely(isnan(num) || !FLOAT4_FITS_IN_INT16(num))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + PG_RETURN_INT16((int16) num); +} + + +/* + * i4tof - converts an int4 number to a float4 number + */ +Datum +i4tof(PG_FUNCTION_ARGS) +{ + int32 num = PG_GETARG_INT32(0); + + PG_RETURN_FLOAT4((float4) num); +} + + +/* + * i2tof - converts an int2 number to a float4 number + */ +Datum +i2tof(PG_FUNCTION_ARGS) +{ + int16 num = PG_GETARG_INT16(0); + + PG_RETURN_FLOAT4((float4) num); +} + + +/* + * ======================= + * RANDOM FLOAT8 OPERATORS + * ======================= + */ + +/* + * dround - returns ROUND(arg1) + */ +Datum +dround(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + + PG_RETURN_FLOAT8(rint(arg1)); +} + +/* + * dceil - returns the smallest integer greater than or + * equal to the specified float + */ +Datum +dceil(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + + PG_RETURN_FLOAT8(ceil(arg1)); +} + +/* + * dfloor - returns the largest integer lesser than or + * equal to the specified float + */ +Datum +dfloor(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + + PG_RETURN_FLOAT8(floor(arg1)); +} + +/* + * dsign - returns -1 if the argument is less than 0, 0 + * if the argument is equal to 0, and 1 if the + * argument is greater than zero. + */ +Datum +dsign(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + if (arg1 > 0) + result = 1.0; + else if (arg1 < 0) + result = -1.0; + else + result = 0.0; + + PG_RETURN_FLOAT8(result); +} + +/* + * dtrunc - returns truncation-towards-zero of arg1, + * arg1 >= 0 ... the greatest integer less + * than or equal to arg1 + * arg1 < 0 ... the least integer greater + * than or equal to arg1 + */ +Datum +dtrunc(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + if (arg1 >= 0) + result = floor(arg1); + else + result = -floor(-arg1); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dsqrt - returns square root of arg1 + */ +Datum +dsqrt(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + if (arg1 < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("cannot take square root of a negative number"))); + + result = sqrt(arg1); + if (unlikely(isinf(result)) && !isinf(arg1)) + float_overflow_error(); + if (unlikely(result == 0.0) && arg1 != 0.0) + float_underflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dcbrt - returns cube root of arg1 + */ +Datum +dcbrt(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + result = cbrt(arg1); + if (unlikely(isinf(result)) && !isinf(arg1)) + float_overflow_error(); + if (unlikely(result == 0.0) && arg1 != 0.0) + float_underflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dpow - returns pow(arg1,arg2) + */ +Datum +dpow(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + float8 result; + + /* + * The POSIX spec says that NaN ^ 0 = 1, and 1 ^ NaN = 1, while all other + * cases with NaN inputs yield NaN (with no error). Many older platforms + * get one or more of these cases wrong, so deal with them via explicit + * logic rather than trusting pow(3). + */ + if (isnan(arg1)) + { + if (isnan(arg2) || arg2 != 0.0) + PG_RETURN_FLOAT8(get_float8_nan()); + PG_RETURN_FLOAT8(1.0); + } + if (isnan(arg2)) + { + if (arg1 != 1.0) + PG_RETURN_FLOAT8(get_float8_nan()); + PG_RETURN_FLOAT8(1.0); + } + + /* + * The SQL spec requires that we emit a particular SQLSTATE error code for + * certain error conditions. Specifically, we don't return a + * divide-by-zero error code for 0 ^ -1. + */ + if (arg1 == 0 && arg2 < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("zero raised to a negative power is undefined"))); + if (arg1 < 0 && floor(arg2) != arg2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("a negative number raised to a non-integer power yields a complex result"))); + + /* + * We don't trust the platform's pow() to handle infinity cases per POSIX + * spec either, so deal with those explicitly too. It's easier to handle + * infinite y first, so that it doesn't matter if x is also infinite. + */ + if (isinf(arg2)) + { + float8 absx = fabs(arg1); + + if (absx == 1.0) + result = 1.0; + else if (arg2 > 0.0) /* y = +Inf */ + { + if (absx > 1.0) + result = arg2; + else + result = 0.0; + } + else /* y = -Inf */ + { + if (absx > 1.0) + result = 0.0; + else + result = -arg2; + } + } + else if (isinf(arg1)) + { + if (arg2 == 0.0) + result = 1.0; + else if (arg1 > 0.0) /* x = +Inf */ + { + if (arg2 > 0.0) + result = arg1; + else + result = 0.0; + } + else /* x = -Inf */ + { + /* + * Per POSIX, the sign of the result depends on whether y is an + * odd integer. Since x < 0, we already know from the previous + * domain check that y is an integer. It is odd if y/2 is not + * also an integer. + */ + float8 halfy = arg2 / 2; /* should be computed exactly */ + bool yisoddinteger = (floor(halfy) != halfy); + + if (arg2 > 0.0) + result = yisoddinteger ? arg1 : -arg1; + else + result = yisoddinteger ? -0.0 : 0.0; + } + } + else + { + /* + * pow() sets errno on only some platforms, depending on whether it + * follows _IEEE_, _POSIX_, _XOPEN_, or _SVID_, so we must check both + * errno and invalid output values. (We can't rely on just the + * latter, either; some old platforms return a large-but-finite + * HUGE_VAL when reporting overflow.) + */ + errno = 0; + result = pow(arg1, arg2); + if (errno == EDOM || isnan(result)) + { + /* + * We handled all possible domain errors above, so this should be + * impossible. However, old glibc versions on x86 have a bug that + * causes them to fail this way for abs(y) greater than 2^63: + * + * https://sourceware.org/bugzilla/show_bug.cgi?id=3866 + * + * Hence, if we get here, assume y is finite but large (large + * enough to be certainly even). The result should be 0 if x == 0, + * 1.0 if abs(x) == 1.0, otherwise an overflow or underflow error. + */ + if (arg1 == 0.0) + result = 0.0; /* we already verified y is positive */ + else + { + float8 absx = fabs(arg1); + + if (absx == 1.0) + result = 1.0; + else if (arg2 >= 0.0 ? (absx > 1.0) : (absx < 1.0)) + float_overflow_error(); + else + float_underflow_error(); + } + } + else if (errno == ERANGE) + { + if (result != 0.0) + float_overflow_error(); + else + float_underflow_error(); + } + else + { + if (unlikely(isinf(result))) + float_overflow_error(); + if (unlikely(result == 0.0) && arg1 != 0.0) + float_underflow_error(); + } + } + + PG_RETURN_FLOAT8(result); +} + + +/* + * dexp - returns the exponential function of arg1 + */ +Datum +dexp(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * Handle NaN and Inf cases explicitly. This avoids needing to assume + * that the platform's exp() conforms to POSIX for these cases, and it + * removes some edge cases for the overflow checks below. + */ + if (isnan(arg1)) + result = arg1; + else if (isinf(arg1)) + { + /* Per POSIX, exp(-Inf) is 0 */ + result = (arg1 > 0.0) ? arg1 : 0; + } + else + { + /* + * On some platforms, exp() will not set errno but just return Inf or + * zero to report overflow/underflow; therefore, test both cases. + */ + errno = 0; + result = exp(arg1); + if (unlikely(errno == ERANGE)) + { + if (result != 0.0) + float_overflow_error(); + else + float_underflow_error(); + } + else if (unlikely(isinf(result))) + float_overflow_error(); + else if (unlikely(result == 0.0)) + float_underflow_error(); + } + + PG_RETURN_FLOAT8(result); +} + + +/* + * dlog1 - returns the natural logarithm of arg1 + */ +Datum +dlog1(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * Emit particular SQLSTATE error codes for ln(). This is required by the + * SQL standard. + */ + if (arg1 == 0.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of zero"))); + if (arg1 < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of a negative number"))); + + result = log(arg1); + if (unlikely(isinf(result)) && !isinf(arg1)) + float_overflow_error(); + if (unlikely(result == 0.0) && arg1 != 1.0) + float_underflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dlog10 - returns the base 10 logarithm of arg1 + */ +Datum +dlog10(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * Emit particular SQLSTATE error codes for log(). The SQL spec doesn't + * define log(), but it does define ln(), so it makes sense to emit the + * same error code for an analogous error condition. + */ + if (arg1 == 0.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of zero"))); + if (arg1 < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of a negative number"))); + + result = log10(arg1); + if (unlikely(isinf(result)) && !isinf(arg1)) + float_overflow_error(); + if (unlikely(result == 0.0) && arg1 != 1.0) + float_underflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dacos - returns the arccos of arg1 (radians) + */ +Datum +dacos(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* + * The principal branch of the inverse cosine function maps values in the + * range [-1, 1] to values in the range [0, Pi], so we should reject any + * inputs outside that range and the result will always be finite. + */ + if (arg1 < -1.0 || arg1 > 1.0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + result = acos(arg1); + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dasin - returns the arcsin of arg1 (radians) + */ +Datum +dasin(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* + * The principal branch of the inverse sine function maps values in the + * range [-1, 1] to values in the range [-Pi/2, Pi/2], so we should reject + * any inputs outside that range and the result will always be finite. + */ + if (arg1 < -1.0 || arg1 > 1.0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + result = asin(arg1); + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * datan - returns the arctan of arg1 (radians) + */ +Datum +datan(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* + * The principal branch of the inverse tangent function maps all inputs to + * values in the range [-Pi/2, Pi/2], so the result should always be + * finite, even if the input is infinite. + */ + result = atan(arg1); + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * atan2 - returns the arctan of arg1/arg2 (radians) + */ +Datum +datan2(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + float8 result; + + /* Per the POSIX spec, return NaN if either input is NaN */ + if (isnan(arg1) || isnan(arg2)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* + * atan2 maps all inputs to values in the range [-Pi, Pi], so the result + * should always be finite, even if the inputs are infinite. + */ + result = atan2(arg1, arg2); + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dcos - returns the cosine of arg1 (radians) + */ +Datum +dcos(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* + * cos() is periodic and so theoretically can work for all finite inputs, + * but some implementations may choose to throw error if the input is so + * large that there are no significant digits in the result. So we should + * check for errors. POSIX allows an error to be reported either via + * errno or via fetestexcept(), but currently we only support checking + * errno. (fetestexcept() is rumored to report underflow unreasonably + * early on some platforms, so it's not clear that believing it would be a + * net improvement anyway.) + * + * For infinite inputs, POSIX specifies that the trigonometric functions + * should return a domain error; but we won't notice that unless the + * platform reports via errno, so also explicitly test for infinite + * inputs. + */ + errno = 0; + result = cos(arg1); + if (errno != 0 || isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dcot - returns the cotangent of arg1 (radians) + */ +Datum +dcot(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* Be sure to throw an error if the input is infinite --- see dcos() */ + errno = 0; + result = tan(arg1); + if (errno != 0 || isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + result = 1.0 / result; + /* Not checking for overflow because cot(0) == Inf */ + + PG_RETURN_FLOAT8(result); +} + + +/* + * dsin - returns the sine of arg1 (radians) + */ +Datum +dsin(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* Be sure to throw an error if the input is infinite --- see dcos() */ + errno = 0; + result = sin(arg1); + if (errno != 0 || isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dtan - returns the tangent of arg1 (radians) + */ +Datum +dtan(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + /* Be sure to throw an error if the input is infinite --- see dcos() */ + errno = 0; + result = tan(arg1); + if (errno != 0 || isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + /* Not checking for overflow because tan(pi/2) == Inf */ + + PG_RETURN_FLOAT8(result); +} + + +/* ========== DEGREE-BASED TRIGONOMETRIC FUNCTIONS ========== */ + + +/* + * Initialize the cached constants declared at the head of this file + * (sin_30 etc). The fact that we need those at all, let alone need this + * Rube-Goldberg-worthy method of initializing them, is because there are + * compilers out there that will precompute expressions such as sin(constant) + * using a sin() function different from what will be used at runtime. If we + * want exact results, we must ensure that none of the scaling constants used + * in the degree-based trig functions are computed that way. To do so, we + * compute them from the variables degree_c_thirty etc, which are also really + * constants, but the compiler cannot assume that. + * + * Other hazards we are trying to forestall with this kluge include the + * possibility that compilers will rearrange the expressions, or compute + * some intermediate results in registers wider than a standard double. + * + * In the places where we use these constants, the typical pattern is like + * volatile float8 sin_x = sin(x * RADIANS_PER_DEGREE); + * return (sin_x / sin_30); + * where we hope to get a value of exactly 1.0 from the division when x = 30. + * The volatile temporary variable is needed on machines with wide float + * registers, to ensure that the result of sin(x) is rounded to double width + * the same as the value of sin_30 has been. Experimentation with gcc shows + * that marking the temp variable volatile is necessary to make the store and + * reload actually happen; hopefully the same trick works for other compilers. + * (gcc's documentation suggests using the -ffloat-store compiler switch to + * ensure this, but that is compiler-specific and it also pessimizes code in + * many places where we don't care about this.) + */ +static void +init_degree_constants(void) +{ + sin_30 = sin(degree_c_thirty * RADIANS_PER_DEGREE); + one_minus_cos_60 = 1.0 - cos(degree_c_sixty * RADIANS_PER_DEGREE); + asin_0_5 = asin(degree_c_one_half); + acos_0_5 = acos(degree_c_one_half); + atan_1_0 = atan(degree_c_one); + tan_45 = sind_q1(degree_c_forty_five) / cosd_q1(degree_c_forty_five); + cot_45 = cosd_q1(degree_c_forty_five) / sind_q1(degree_c_forty_five); + degree_consts_set = true; +} + +#define INIT_DEGREE_CONSTANTS() \ +do { \ + if (!degree_consts_set) \ + init_degree_constants(); \ +} while(0) + + +/* + * asind_q1 - returns the inverse sine of x in degrees, for x in + * the range [0, 1]. The result is an angle in the + * first quadrant --- [0, 90] degrees. + * + * For the 3 special case inputs (0, 0.5 and 1), this + * function will return exact values (0, 30 and 90 + * degrees respectively). + */ +static double +asind_q1(double x) +{ + /* + * Stitch together inverse sine and cosine functions for the ranges [0, + * 0.5] and (0.5, 1]. Each expression below is guaranteed to return + * exactly 30 for x=0.5, so the result is a continuous monotonic function + * over the full range. + */ + if (x <= 0.5) + { + volatile float8 asin_x = asin(x); + + return (asin_x / asin_0_5) * 30.0; + } + else + { + volatile float8 acos_x = acos(x); + + return 90.0 - (acos_x / acos_0_5) * 60.0; + } +} + + +/* + * acosd_q1 - returns the inverse cosine of x in degrees, for x in + * the range [0, 1]. The result is an angle in the + * first quadrant --- [0, 90] degrees. + * + * For the 3 special case inputs (0, 0.5 and 1), this + * function will return exact values (0, 60 and 90 + * degrees respectively). + */ +static double +acosd_q1(double x) +{ + /* + * Stitch together inverse sine and cosine functions for the ranges [0, + * 0.5] and (0.5, 1]. Each expression below is guaranteed to return + * exactly 60 for x=0.5, so the result is a continuous monotonic function + * over the full range. + */ + if (x <= 0.5) + { + volatile float8 asin_x = asin(x); + + return 90.0 - (asin_x / asin_0_5) * 30.0; + } + else + { + volatile float8 acos_x = acos(x); + + return (acos_x / acos_0_5) * 60.0; + } +} + + +/* + * dacosd - returns the arccos of arg1 (degrees) + */ +Datum +dacosd(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + INIT_DEGREE_CONSTANTS(); + + /* + * The principal branch of the inverse cosine function maps values in the + * range [-1, 1] to values in the range [0, 180], so we should reject any + * inputs outside that range and the result will always be finite. + */ + if (arg1 < -1.0 || arg1 > 1.0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + if (arg1 >= 0.0) + result = acosd_q1(arg1); + else + result = 90.0 + asind_q1(-arg1); + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dasind - returns the arcsin of arg1 (degrees) + */ +Datum +dasind(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + INIT_DEGREE_CONSTANTS(); + + /* + * The principal branch of the inverse sine function maps values in the + * range [-1, 1] to values in the range [-90, 90], so we should reject any + * inputs outside that range and the result will always be finite. + */ + if (arg1 < -1.0 || arg1 > 1.0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + if (arg1 >= 0.0) + result = asind_q1(arg1); + else + result = -asind_q1(-arg1); + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * datand - returns the arctan of arg1 (degrees) + */ +Datum +datand(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + volatile float8 atan_arg1; + + /* Per the POSIX spec, return NaN if the input is NaN */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + INIT_DEGREE_CONSTANTS(); + + /* + * The principal branch of the inverse tangent function maps all inputs to + * values in the range [-90, 90], so the result should always be finite, + * even if the input is infinite. Additionally, we take care to ensure + * than when arg1 is 1, the result is exactly 45. + */ + atan_arg1 = atan(arg1); + result = (atan_arg1 / atan_1_0) * 45.0; + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * atan2d - returns the arctan of arg1/arg2 (degrees) + */ +Datum +datan2d(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + float8 result; + volatile float8 atan2_arg1_arg2; + + /* Per the POSIX spec, return NaN if either input is NaN */ + if (isnan(arg1) || isnan(arg2)) + PG_RETURN_FLOAT8(get_float8_nan()); + + INIT_DEGREE_CONSTANTS(); + + /* + * atan2d maps all inputs to values in the range [-180, 180], so the + * result should always be finite, even if the inputs are infinite. + * + * Note: this coding assumes that atan(1.0) is a suitable scaling constant + * to get an exact result from atan2(). This might well fail on us at + * some point, requiring us to decide exactly what inputs we think we're + * going to guarantee an exact result for. + */ + atan2_arg1_arg2 = atan2(arg1, arg2); + result = (atan2_arg1_arg2 / atan_1_0) * 45.0; + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * sind_0_to_30 - returns the sine of an angle that lies between 0 and + * 30 degrees. This will return exactly 0 when x is 0, + * and exactly 0.5 when x is 30 degrees. + */ +static double +sind_0_to_30(double x) +{ + volatile float8 sin_x = sin(x * RADIANS_PER_DEGREE); + + return (sin_x / sin_30) / 2.0; +} + + +/* + * cosd_0_to_60 - returns the cosine of an angle that lies between 0 + * and 60 degrees. This will return exactly 1 when x + * is 0, and exactly 0.5 when x is 60 degrees. + */ +static double +cosd_0_to_60(double x) +{ + volatile float8 one_minus_cos_x = 1.0 - cos(x * RADIANS_PER_DEGREE); + + return 1.0 - (one_minus_cos_x / one_minus_cos_60) / 2.0; +} + + +/* + * sind_q1 - returns the sine of an angle in the first quadrant + * (0 to 90 degrees). + */ +static double +sind_q1(double x) +{ + /* + * Stitch together the sine and cosine functions for the ranges [0, 30] + * and (30, 90]. These guarantee to return exact answers at their + * endpoints, so the overall result is a continuous monotonic function + * that gives exact results when x = 0, 30 and 90 degrees. + */ + if (x <= 30.0) + return sind_0_to_30(x); + else + return cosd_0_to_60(90.0 - x); +} + + +/* + * cosd_q1 - returns the cosine of an angle in the first quadrant + * (0 to 90 degrees). + */ +static double +cosd_q1(double x) +{ + /* + * Stitch together the sine and cosine functions for the ranges [0, 60] + * and (60, 90]. These guarantee to return exact answers at their + * endpoints, so the overall result is a continuous monotonic function + * that gives exact results when x = 0, 60 and 90 degrees. + */ + if (x <= 60.0) + return cosd_0_to_60(x); + else + return sind_0_to_30(90.0 - x); +} + + +/* + * dcosd - returns the cosine of arg1 (degrees) + */ +Datum +dcosd(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + int sign = 1; + + /* + * Per the POSIX spec, return NaN if the input is NaN and throw an error + * if the input is infinite. + */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + if (isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + INIT_DEGREE_CONSTANTS(); + + /* Reduce the range of the input to [0,90] degrees */ + arg1 = fmod(arg1, 360.0); + + if (arg1 < 0.0) + { + /* cosd(-x) = cosd(x) */ + arg1 = -arg1; + } + + if (arg1 > 180.0) + { + /* cosd(360-x) = cosd(x) */ + arg1 = 360.0 - arg1; + } + + if (arg1 > 90.0) + { + /* cosd(180-x) = -cosd(x) */ + arg1 = 180.0 - arg1; + sign = -sign; + } + + result = sign * cosd_q1(arg1); + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dcotd - returns the cotangent of arg1 (degrees) + */ +Datum +dcotd(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + volatile float8 cot_arg1; + int sign = 1; + + /* + * Per the POSIX spec, return NaN if the input is NaN and throw an error + * if the input is infinite. + */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + if (isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + INIT_DEGREE_CONSTANTS(); + + /* Reduce the range of the input to [0,90] degrees */ + arg1 = fmod(arg1, 360.0); + + if (arg1 < 0.0) + { + /* cotd(-x) = -cotd(x) */ + arg1 = -arg1; + sign = -sign; + } + + if (arg1 > 180.0) + { + /* cotd(360-x) = -cotd(x) */ + arg1 = 360.0 - arg1; + sign = -sign; + } + + if (arg1 > 90.0) + { + /* cotd(180-x) = -cotd(x) */ + arg1 = 180.0 - arg1; + sign = -sign; + } + + cot_arg1 = cosd_q1(arg1) / sind_q1(arg1); + result = sign * (cot_arg1 / cot_45); + + /* + * On some machines we get cotd(270) = minus zero, but this isn't always + * true. For portability, and because the user constituency for this + * function probably doesn't want minus zero, force it to plain zero. + */ + if (result == 0.0) + result = 0.0; + + /* Not checking for overflow because cotd(0) == Inf */ + + PG_RETURN_FLOAT8(result); +} + + +/* + * dsind - returns the sine of arg1 (degrees) + */ +Datum +dsind(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + int sign = 1; + + /* + * Per the POSIX spec, return NaN if the input is NaN and throw an error + * if the input is infinite. + */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + if (isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + INIT_DEGREE_CONSTANTS(); + + /* Reduce the range of the input to [0,90] degrees */ + arg1 = fmod(arg1, 360.0); + + if (arg1 < 0.0) + { + /* sind(-x) = -sind(x) */ + arg1 = -arg1; + sign = -sign; + } + + if (arg1 > 180.0) + { + /* sind(360-x) = -sind(x) */ + arg1 = 360.0 - arg1; + sign = -sign; + } + + if (arg1 > 90.0) + { + /* sind(180-x) = sind(x) */ + arg1 = 180.0 - arg1; + } + + result = sign * sind_q1(arg1); + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* + * dtand - returns the tangent of arg1 (degrees) + */ +Datum +dtand(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + volatile float8 tan_arg1; + int sign = 1; + + /* + * Per the POSIX spec, return NaN if the input is NaN and throw an error + * if the input is infinite. + */ + if (isnan(arg1)) + PG_RETURN_FLOAT8(get_float8_nan()); + + if (isinf(arg1)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + INIT_DEGREE_CONSTANTS(); + + /* Reduce the range of the input to [0,90] degrees */ + arg1 = fmod(arg1, 360.0); + + if (arg1 < 0.0) + { + /* tand(-x) = -tand(x) */ + arg1 = -arg1; + sign = -sign; + } + + if (arg1 > 180.0) + { + /* tand(360-x) = -tand(x) */ + arg1 = 360.0 - arg1; + sign = -sign; + } + + if (arg1 > 90.0) + { + /* tand(180-x) = -tand(x) */ + arg1 = 180.0 - arg1; + sign = -sign; + } + + tan_arg1 = sind_q1(arg1) / cosd_q1(arg1); + result = sign * (tan_arg1 / tan_45); + + /* + * On some machines we get tand(180) = minus zero, but this isn't always + * true. For portability, and because the user constituency for this + * function probably doesn't want minus zero, force it to plain zero. + */ + if (result == 0.0) + result = 0.0; + + /* Not checking for overflow because tand(90) == Inf */ + + PG_RETURN_FLOAT8(result); +} + + +/* + * degrees - returns degrees converted from radians + */ +Datum +degrees(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + + PG_RETURN_FLOAT8(float8_div(arg1, RADIANS_PER_DEGREE)); +} + + +/* + * dpi - returns the constant PI + */ +Datum +dpi(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(M_PI); +} + + +/* + * radians - returns radians converted from degrees + */ +Datum +radians(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + + PG_RETURN_FLOAT8(float8_mul(arg1, RADIANS_PER_DEGREE)); +} + + +/* ========== HYPERBOLIC FUNCTIONS ========== */ + + +/* + * dsinh - returns the hyperbolic sine of arg1 + */ +Datum +dsinh(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + errno = 0; + result = sinh(arg1); + + /* + * if an ERANGE error occurs, it means there is an overflow. For sinh, + * the result should be either -infinity or infinity, depending on the + * sign of arg1. + */ + if (errno == ERANGE) + { + if (arg1 < 0) + result = -get_float8_infinity(); + else + result = get_float8_infinity(); + } + + PG_RETURN_FLOAT8(result); +} + + +/* + * dcosh - returns the hyperbolic cosine of arg1 + */ +Datum +dcosh(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + errno = 0; + result = cosh(arg1); + + /* + * if an ERANGE error occurs, it means there is an overflow. As cosh is + * always positive, it always means the result is positive infinity. + */ + if (errno == ERANGE) + result = get_float8_infinity(); + + if (unlikely(result == 0.0)) + float_underflow_error(); + + PG_RETURN_FLOAT8(result); +} + +/* + * dtanh - returns the hyperbolic tangent of arg1 + */ +Datum +dtanh(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * For tanh, we don't need an errno check because it never overflows. + */ + result = tanh(arg1); + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + +/* + * dasinh - returns the inverse hyperbolic sine of arg1 + */ +Datum +dasinh(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * For asinh, we don't need an errno check because it never overflows. + */ + result = asinh(arg1); + + PG_RETURN_FLOAT8(result); +} + +/* + * dacosh - returns the inverse hyperbolic cosine of arg1 + */ +Datum +dacosh(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * acosh is only defined for inputs >= 1.0. By checking this ourselves, + * we need not worry about checking for an EDOM error, which is a good + * thing because some implementations will report that for NaN. Otherwise, + * no error is possible. + */ + if (arg1 < 1.0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + result = acosh(arg1); + + PG_RETURN_FLOAT8(result); +} + +/* + * datanh - returns the inverse hyperbolic tangent of arg1 + */ +Datum +datanh(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * atanh is only defined for inputs between -1 and 1. By checking this + * ourselves, we need not worry about checking for an EDOM error, which is + * a good thing because some implementations will report that for NaN. + */ + if (arg1 < -1.0 || arg1 > 1.0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("input is out of range"))); + + /* + * Also handle the infinity cases ourselves; this is helpful because old + * glibc versions may produce the wrong errno for this. All other inputs + * cannot produce an error. + */ + if (arg1 == -1.0) + result = -get_float8_infinity(); + else if (arg1 == 1.0) + result = get_float8_infinity(); + else + result = atanh(arg1); + + PG_RETURN_FLOAT8(result); +} + + +/* ========== ERROR FUNCTIONS ========== */ + + +/* + * derf - returns the error function: erf(arg1) + */ +Datum +derf(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * For erf, we don't need an errno check because it never overflows. + */ + result = erf(arg1); + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + +/* + * derfc - returns the complementary error function: 1 - erf(arg1) + */ +Datum +derfc(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float8 result; + + /* + * For erfc, we don't need an errno check because it never overflows. + */ + result = erfc(arg1); + + if (unlikely(isinf(result))) + float_overflow_error(); + + PG_RETURN_FLOAT8(result); +} + + +/* ========== RANDOM FUNCTIONS ========== */ + + +/* + * initialize_drandom_seed - initialize drandom_seed if not yet done + */ +static void +initialize_drandom_seed(void) +{ + /* Initialize random seed, if not done yet in this process */ + if (unlikely(!drandom_seed_set)) + { + /* + * If possible, initialize the seed using high-quality random bits. + * Should that fail for some reason, we fall back on a lower-quality + * seed based on current time and PID. + */ + if (unlikely(!pg_prng_strong_seed(&drandom_seed))) + { + TimestampTz now = GetCurrentTimestamp(); + uint64 iseed; + + /* Mix the PID with the most predictable bits of the timestamp */ + iseed = (uint64) now ^ ((uint64) MyProcPid << 32); + pg_prng_seed(&drandom_seed, iseed); + } + drandom_seed_set = true; + } +} + +/* + * drandom - returns a random number + */ +Datum +drandom(PG_FUNCTION_ARGS) +{ + float8 result; + + initialize_drandom_seed(); + + /* pg_prng_double produces desired result range [0.0 - 1.0) */ + result = pg_prng_double(&drandom_seed); + + PG_RETURN_FLOAT8(result); +} + +/* + * drandom_normal - returns a random number from a normal distribution + */ +Datum +drandom_normal(PG_FUNCTION_ARGS) +{ + float8 mean = PG_GETARG_FLOAT8(0); + float8 stddev = PG_GETARG_FLOAT8(1); + float8 result, + z; + + initialize_drandom_seed(); + + /* Get random value from standard normal(mean = 0.0, stddev = 1.0) */ + z = pg_prng_double_normal(&drandom_seed); + /* Transform the normal standard variable (z) */ + /* using the target normal distribution parameters */ + result = (stddev * z) + mean; + + PG_RETURN_FLOAT8(result); +} + +/* + * setseed - set seed for the random number generator + */ +Datum +setseed(PG_FUNCTION_ARGS) +{ + float8 seed = PG_GETARG_FLOAT8(0); + + if (seed < -1 || seed > 1 || isnan(seed)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("setseed parameter %g is out of allowed range [-1,1]", + seed))); + + pg_prng_fseed(&drandom_seed, seed); + drandom_seed_set = true; + + PG_RETURN_VOID(); +} + + + +/* + * ========================= + * FLOAT AGGREGATE OPERATORS + * ========================= + * + * float8_accum - accumulate for AVG(), variance aggregates, etc. + * float4_accum - same, but input data is float4 + * float8_avg - produce final result for float AVG() + * float8_var_samp - produce final result for float VAR_SAMP() + * float8_var_pop - produce final result for float VAR_POP() + * float8_stddev_samp - produce final result for float STDDEV_SAMP() + * float8_stddev_pop - produce final result for float STDDEV_POP() + * + * The naive schoolbook implementation of these aggregates works by + * accumulating sum(X) and sum(X^2). However, this approach suffers from + * large rounding errors in the final computation of quantities like the + * population variance (N*sum(X^2) - sum(X)^2) / N^2, since each of the + * intermediate terms is potentially very large, while the difference is often + * quite small. + * + * Instead we use the Youngs-Cramer algorithm [1] which works by accumulating + * Sx=sum(X) and Sxx=sum((X-Sx/N)^2), using a numerically stable algorithm to + * incrementally update those quantities. The final computations of each of + * the aggregate values is then trivial and gives more accurate results (for + * example, the population variance is just Sxx/N). This algorithm is also + * fairly easy to generalize to allow parallel execution without loss of + * precision (see, for example, [2]). For more details, and a comparison of + * this with other algorithms, see [3]. + * + * The transition datatype for all these aggregates is a 3-element array + * of float8, holding the values N, Sx, Sxx in that order. + * + * Note that we represent N as a float to avoid having to build a special + * datatype. Given a reasonable floating-point implementation, there should + * be no accuracy loss unless N exceeds 2 ^ 52 or so (by which time the + * user will have doubtless lost interest anyway...) + * + * [1] Some Results Relevant to Choice of Sum and Sum-of-Product Algorithms, + * E. A. Youngs and E. M. Cramer, Technometrics Vol 13, No 3, August 1971. + * + * [2] Updating Formulae and a Pairwise Algorithm for Computing Sample + * Variances, T. F. Chan, G. H. Golub & R. J. LeVeque, COMPSTAT 1982. + * + * [3] Numerically Stable Parallel Computation of (Co-)Variance, Erich + * Schubert and Michael Gertz, Proceedings of the 30th International + * Conference on Scientific and Statistical Database Management, 2018. + */ + +static float8 * +check_float8_array(ArrayType *transarray, const char *caller, int n) +{ + /* + * We expect the input to be an N-element float array; verify that. We + * don't need to use deconstruct_array() since the array data is just + * going to look like a C array of N float8 values. + */ + if (ARR_NDIM(transarray) != 1 || + ARR_DIMS(transarray)[0] != n || + ARR_HASNULL(transarray) || + ARR_ELEMTYPE(transarray) != FLOAT8OID) + elog(ERROR, "%s: expected %d-element float8 array", caller, n); + return (float8 *) ARR_DATA_PTR(transarray); +} + +/* + * float8_combine + * + * An aggregate combine function used to combine two 3 fields + * aggregate transition data into a single transition data. + * This function is used only in two stage aggregation and + * shouldn't be called outside aggregate context. + */ +Datum +float8_combine(PG_FUNCTION_ARGS) +{ + ArrayType *transarray1 = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); + float8 *transvalues1; + float8 *transvalues2; + float8 N1, + Sx1, + Sxx1, + N2, + Sx2, + Sxx2, + tmp, + N, + Sx, + Sxx; + + transvalues1 = check_float8_array(transarray1, "float8_combine", 3); + transvalues2 = check_float8_array(transarray2, "float8_combine", 3); + + N1 = transvalues1[0]; + Sx1 = transvalues1[1]; + Sxx1 = transvalues1[2]; + + N2 = transvalues2[0]; + Sx2 = transvalues2[1]; + Sxx2 = transvalues2[2]; + + /*-------------------- + * The transition values combine using a generalization of the + * Youngs-Cramer algorithm as follows: + * + * N = N1 + N2 + * Sx = Sx1 + Sx2 + * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N; + * + * It's worth handling the special cases N1 = 0 and N2 = 0 separately + * since those cases are trivial, and we then don't need to worry about + * division-by-zero errors in the general case. + *-------------------- + */ + if (N1 == 0.0) + { + N = N2; + Sx = Sx2; + Sxx = Sxx2; + } + else if (N2 == 0.0) + { + N = N1; + Sx = Sx1; + Sxx = Sxx1; + } + else + { + N = N1 + N2; + Sx = float8_pl(Sx1, Sx2); + tmp = Sx1 / N1 - Sx2 / N2; + Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp * tmp / N; + if (unlikely(isinf(Sxx)) && !isinf(Sxx1) && !isinf(Sxx2)) + float_overflow_error(); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we construct a + * new array with the updated transition data and return it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + { + transvalues1[0] = N; + transvalues1[1] = Sx; + transvalues1[2] = Sxx; + + PG_RETURN_ARRAYTYPE_P(transarray1); + } + else + { + Datum transdatums[3]; + ArrayType *result; + + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + + result = construct_array(transdatums, 3, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); + } +} + +Datum +float8_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 newval = PG_GETARG_FLOAT8(1); + float8 *transvalues; + float8 N, + Sx, + Sxx, + tmp; + + transvalues = check_float8_array(transarray, "float8_accum", 3); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + + /* + * Use the Youngs-Cramer algorithm to incorporate the new value into the + * transition values. + */ + N += 1.0; + Sx += newval; + if (transvalues[0] > 0.0) + { + tmp = newval * N - Sx; + Sxx += tmp * tmp / (N * transvalues[0]); + + /* + * Overflow check. We only report an overflow error when finite + * inputs lead to infinite results. Note also that Sxx should be NaN + * if any of the inputs are infinite, so we intentionally prevent Sxx + * from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx)) + { + if (!isinf(transvalues[1]) && !isinf(newval)) + float_overflow_error(); + + Sxx = get_float8_nan(); + } + } + else + { + /* + * At the first input, we normally can leave Sxx as 0. However, if + * the first input is Inf or NaN, we'd better force Sxx to NaN; + * otherwise we will falsely report variance zero when there are no + * more inputs. + */ + if (isnan(newval) || isinf(newval)) + Sxx = get_float8_nan(); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we construct a + * new array with the updated transition data and return it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + { + transvalues[0] = N; + transvalues[1] = Sx; + transvalues[2] = Sxx; + + PG_RETURN_ARRAYTYPE_P(transarray); + } + else + { + Datum transdatums[3]; + ArrayType *result; + + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + + result = construct_array(transdatums, 3, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); + } +} + +Datum +float4_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + + /* do computations as float8 */ + float8 newval = PG_GETARG_FLOAT4(1); + float8 *transvalues; + float8 N, + Sx, + Sxx, + tmp; + + transvalues = check_float8_array(transarray, "float4_accum", 3); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + + /* + * Use the Youngs-Cramer algorithm to incorporate the new value into the + * transition values. + */ + N += 1.0; + Sx += newval; + if (transvalues[0] > 0.0) + { + tmp = newval * N - Sx; + Sxx += tmp * tmp / (N * transvalues[0]); + + /* + * Overflow check. We only report an overflow error when finite + * inputs lead to infinite results. Note also that Sxx should be NaN + * if any of the inputs are infinite, so we intentionally prevent Sxx + * from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx)) + { + if (!isinf(transvalues[1]) && !isinf(newval)) + float_overflow_error(); + + Sxx = get_float8_nan(); + } + } + else + { + /* + * At the first input, we normally can leave Sxx as 0. However, if + * the first input is Inf or NaN, we'd better force Sxx to NaN; + * otherwise we will falsely report variance zero when there are no + * more inputs. + */ + if (isnan(newval) || isinf(newval)) + Sxx = get_float8_nan(); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we construct a + * new array with the updated transition data and return it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + { + transvalues[0] = N; + transvalues[1] = Sx; + transvalues[2] = Sxx; + + PG_RETURN_ARRAYTYPE_P(transarray); + } + else + { + Datum transdatums[3]; + ArrayType *result; + + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + + result = construct_array(transdatums, 3, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); + } +} + +Datum +float8_avg(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sx; + + transvalues = check_float8_array(transarray, "float8_avg", 3); + N = transvalues[0]; + Sx = transvalues[1]; + /* ignore Sxx */ + + /* SQL defines AVG of no values to be NULL */ + if (N == 0.0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(Sx / N); +} + +Datum +float8_var_pop(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx; + + transvalues = check_float8_array(transarray, "float8_var_pop", 3); + N = transvalues[0]; + /* ignore Sx */ + Sxx = transvalues[2]; + + /* Population variance is undefined when N is 0, so return NULL */ + if (N == 0.0) + PG_RETURN_NULL(); + + /* Note that Sxx is guaranteed to be non-negative */ + + PG_RETURN_FLOAT8(Sxx / N); +} + +Datum +float8_var_samp(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx; + + transvalues = check_float8_array(transarray, "float8_var_samp", 3); + N = transvalues[0]; + /* ignore Sx */ + Sxx = transvalues[2]; + + /* Sample variance is undefined when N is 0 or 1, so return NULL */ + if (N <= 1.0) + PG_RETURN_NULL(); + + /* Note that Sxx is guaranteed to be non-negative */ + + PG_RETURN_FLOAT8(Sxx / (N - 1.0)); +} + +Datum +float8_stddev_pop(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx; + + transvalues = check_float8_array(transarray, "float8_stddev_pop", 3); + N = transvalues[0]; + /* ignore Sx */ + Sxx = transvalues[2]; + + /* Population stddev is undefined when N is 0, so return NULL */ + if (N == 0.0) + PG_RETURN_NULL(); + + /* Note that Sxx is guaranteed to be non-negative */ + + PG_RETURN_FLOAT8(sqrt(Sxx / N)); +} + +Datum +float8_stddev_samp(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx; + + transvalues = check_float8_array(transarray, "float8_stddev_samp", 3); + N = transvalues[0]; + /* ignore Sx */ + Sxx = transvalues[2]; + + /* Sample stddev is undefined when N is 0 or 1, so return NULL */ + if (N <= 1.0) + PG_RETURN_NULL(); + + /* Note that Sxx is guaranteed to be non-negative */ + + PG_RETURN_FLOAT8(sqrt(Sxx / (N - 1.0))); +} + +/* + * ========================= + * SQL2003 BINARY AGGREGATES + * ========================= + * + * As with the preceding aggregates, we use the Youngs-Cramer algorithm to + * reduce rounding errors in the aggregate final functions. + * + * The transition datatype for all these aggregates is a 6-element array of + * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y), + * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order. + * + * Note that Y is the first argument to all these aggregates! + * + * It might seem attractive to optimize this by having multiple accumulator + * functions that only calculate the sums actually needed. But on most + * modern machines, a couple of extra floating-point multiplies will be + * insignificant compared to the other per-tuple overhead, so I've chosen + * to minimize code space instead. + */ + +Datum +float8_regr_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 newvalY = PG_GETARG_FLOAT8(1); + float8 newvalX = PG_GETARG_FLOAT8(2); + float8 *transvalues; + float8 N, + Sx, + Sxx, + Sy, + Syy, + Sxy, + tmpX, + tmpY, + scale; + + transvalues = check_float8_array(transarray, "float8_regr_accum", 6); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + Sy = transvalues[3]; + Syy = transvalues[4]; + Sxy = transvalues[5]; + + /* + * Use the Youngs-Cramer algorithm to incorporate the new values into the + * transition values. + */ + N += 1.0; + Sx += newvalX; + Sy += newvalY; + if (transvalues[0] > 0.0) + { + tmpX = newvalX * N - Sx; + tmpY = newvalY * N - Sy; + scale = 1.0 / (N * transvalues[0]); + Sxx += tmpX * tmpX * scale; + Syy += tmpY * tmpY * scale; + Sxy += tmpX * tmpY * scale; + + /* + * Overflow check. We only report an overflow error when finite + * inputs lead to infinite results. Note also that Sxx, Syy and Sxy + * should be NaN if any of the relevant inputs are infinite, so we + * intentionally prevent them from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy)) + { + if (((isinf(Sx) || isinf(Sxx)) && + !isinf(transvalues[1]) && !isinf(newvalX)) || + ((isinf(Sy) || isinf(Syy)) && + !isinf(transvalues[3]) && !isinf(newvalY)) || + (isinf(Sxy) && + !isinf(transvalues[1]) && !isinf(newvalX) && + !isinf(transvalues[3]) && !isinf(newvalY))) + float_overflow_error(); + + if (isinf(Sxx)) + Sxx = get_float8_nan(); + if (isinf(Syy)) + Syy = get_float8_nan(); + if (isinf(Sxy)) + Sxy = get_float8_nan(); + } + } + else + { + /* + * At the first input, we normally can leave Sxx et al as 0. However, + * if the first input is Inf or NaN, we'd better force the dependent + * sums to NaN; otherwise we will falsely report variance zero when + * there are no more inputs. + */ + if (isnan(newvalX) || isinf(newvalX)) + Sxx = Sxy = get_float8_nan(); + if (isnan(newvalY) || isinf(newvalY)) + Syy = Sxy = get_float8_nan(); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we construct a + * new array with the updated transition data and return it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + { + transvalues[0] = N; + transvalues[1] = Sx; + transvalues[2] = Sxx; + transvalues[3] = Sy; + transvalues[4] = Syy; + transvalues[5] = Sxy; + + PG_RETURN_ARRAYTYPE_P(transarray); + } + else + { + Datum transdatums[6]; + ArrayType *result; + + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + transdatums[3] = Float8GetDatumFast(Sy); + transdatums[4] = Float8GetDatumFast(Syy); + transdatums[5] = Float8GetDatumFast(Sxy); + + result = construct_array(transdatums, 6, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); + } +} + +/* + * float8_regr_combine + * + * An aggregate combine function used to combine two 6 fields + * aggregate transition data into a single transition data. + * This function is used only in two stage aggregation and + * shouldn't be called outside aggregate context. + */ +Datum +float8_regr_combine(PG_FUNCTION_ARGS) +{ + ArrayType *transarray1 = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); + float8 *transvalues1; + float8 *transvalues2; + float8 N1, + Sx1, + Sxx1, + Sy1, + Syy1, + Sxy1, + N2, + Sx2, + Sxx2, + Sy2, + Syy2, + Sxy2, + tmp1, + tmp2, + N, + Sx, + Sxx, + Sy, + Syy, + Sxy; + + transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6); + transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6); + + N1 = transvalues1[0]; + Sx1 = transvalues1[1]; + Sxx1 = transvalues1[2]; + Sy1 = transvalues1[3]; + Syy1 = transvalues1[4]; + Sxy1 = transvalues1[5]; + + N2 = transvalues2[0]; + Sx2 = transvalues2[1]; + Sxx2 = transvalues2[2]; + Sy2 = transvalues2[3]; + Syy2 = transvalues2[4]; + Sxy2 = transvalues2[5]; + + /*-------------------- + * The transition values combine using a generalization of the + * Youngs-Cramer algorithm as follows: + * + * N = N1 + N2 + * Sx = Sx1 + Sx2 + * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N + * Sy = Sy1 + Sy2 + * Syy = Syy1 + Syy2 + N1 * N2 * (Sy1/N1 - Sy2/N2)^2 / N + * Sxy = Sxy1 + Sxy2 + N1 * N2 * (Sx1/N1 - Sx2/N2) * (Sy1/N1 - Sy2/N2) / N + * + * It's worth handling the special cases N1 = 0 and N2 = 0 separately + * since those cases are trivial, and we then don't need to worry about + * division-by-zero errors in the general case. + *-------------------- + */ + if (N1 == 0.0) + { + N = N2; + Sx = Sx2; + Sxx = Sxx2; + Sy = Sy2; + Syy = Syy2; + Sxy = Sxy2; + } + else if (N2 == 0.0) + { + N = N1; + Sx = Sx1; + Sxx = Sxx1; + Sy = Sy1; + Syy = Syy1; + Sxy = Sxy1; + } + else + { + N = N1 + N2; + Sx = float8_pl(Sx1, Sx2); + tmp1 = Sx1 / N1 - Sx2 / N2; + Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp1 * tmp1 / N; + if (unlikely(isinf(Sxx)) && !isinf(Sxx1) && !isinf(Sxx2)) + float_overflow_error(); + Sy = float8_pl(Sy1, Sy2); + tmp2 = Sy1 / N1 - Sy2 / N2; + Syy = Syy1 + Syy2 + N1 * N2 * tmp2 * tmp2 / N; + if (unlikely(isinf(Syy)) && !isinf(Syy1) && !isinf(Syy2)) + float_overflow_error(); + Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N; + if (unlikely(isinf(Sxy)) && !isinf(Sxy1) && !isinf(Sxy2)) + float_overflow_error(); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we construct a + * new array with the updated transition data and return it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + { + transvalues1[0] = N; + transvalues1[1] = Sx; + transvalues1[2] = Sxx; + transvalues1[3] = Sy; + transvalues1[4] = Syy; + transvalues1[5] = Sxy; + + PG_RETURN_ARRAYTYPE_P(transarray1); + } + else + { + Datum transdatums[6]; + ArrayType *result; + + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + transdatums[3] = Float8GetDatumFast(Sy); + transdatums[4] = Float8GetDatumFast(Syy); + transdatums[5] = Float8GetDatumFast(Sxy); + + result = construct_array(transdatums, 6, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); + } +} + + +Datum +float8_regr_sxx(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx; + + transvalues = check_float8_array(transarray, "float8_regr_sxx", 6); + N = transvalues[0]; + Sxx = transvalues[2]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + /* Note that Sxx is guaranteed to be non-negative */ + + PG_RETURN_FLOAT8(Sxx); +} + +Datum +float8_regr_syy(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Syy; + + transvalues = check_float8_array(transarray, "float8_regr_syy", 6); + N = transvalues[0]; + Syy = transvalues[4]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + /* Note that Syy is guaranteed to be non-negative */ + + PG_RETURN_FLOAT8(Syy); +} + +Datum +float8_regr_sxy(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxy; + + transvalues = check_float8_array(transarray, "float8_regr_sxy", 6); + N = transvalues[0]; + Sxy = transvalues[5]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + /* A negative result is valid here */ + + PG_RETURN_FLOAT8(Sxy); +} + +Datum +float8_regr_avgx(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sx; + + transvalues = check_float8_array(transarray, "float8_regr_avgx", 6); + N = transvalues[0]; + Sx = transvalues[1]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(Sx / N); +} + +Datum +float8_regr_avgy(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sy; + + transvalues = check_float8_array(transarray, "float8_regr_avgy", 6); + N = transvalues[0]; + Sy = transvalues[3]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(Sy / N); +} + +Datum +float8_covar_pop(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxy; + + transvalues = check_float8_array(transarray, "float8_covar_pop", 6); + N = transvalues[0]; + Sxy = transvalues[5]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(Sxy / N); +} + +Datum +float8_covar_samp(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxy; + + transvalues = check_float8_array(transarray, "float8_covar_samp", 6); + N = transvalues[0]; + Sxy = transvalues[5]; + + /* if N is <= 1 we should return NULL */ + if (N < 2.0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(Sxy / (N - 1.0)); +} + +Datum +float8_corr(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx, + Syy, + Sxy; + + transvalues = check_float8_array(transarray, "float8_corr", 6); + N = transvalues[0]; + Sxx = transvalues[2]; + Syy = transvalues[4]; + Sxy = transvalues[5]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + /* Note that Sxx and Syy are guaranteed to be non-negative */ + + /* per spec, return NULL for horizontal and vertical lines */ + if (Sxx == 0 || Syy == 0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy)); +} + +Datum +float8_regr_r2(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx, + Syy, + Sxy; + + transvalues = check_float8_array(transarray, "float8_regr_r2", 6); + N = transvalues[0]; + Sxx = transvalues[2]; + Syy = transvalues[4]; + Sxy = transvalues[5]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + /* Note that Sxx and Syy are guaranteed to be non-negative */ + + /* per spec, return NULL for a vertical line */ + if (Sxx == 0) + PG_RETURN_NULL(); + + /* per spec, return 1.0 for a horizontal line */ + if (Syy == 0) + PG_RETURN_FLOAT8(1.0); + + PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy)); +} + +Datum +float8_regr_slope(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sxx, + Sxy; + + transvalues = check_float8_array(transarray, "float8_regr_slope", 6); + N = transvalues[0]; + Sxx = transvalues[2]; + Sxy = transvalues[5]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + /* Note that Sxx is guaranteed to be non-negative */ + + /* per spec, return NULL for a vertical line */ + if (Sxx == 0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(Sxy / Sxx); +} + +Datum +float8_regr_intercept(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + float8 *transvalues; + float8 N, + Sx, + Sxx, + Sy, + Sxy; + + transvalues = check_float8_array(transarray, "float8_regr_intercept", 6); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + Sy = transvalues[3]; + Sxy = transvalues[5]; + + /* if N is 0 we should return NULL */ + if (N < 1.0) + PG_RETURN_NULL(); + + /* Note that Sxx is guaranteed to be non-negative */ + + /* per spec, return NULL for a vertical line */ + if (Sxx == 0) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N); +} + + +/* + * ==================================== + * MIXED-PRECISION ARITHMETIC OPERATORS + * ==================================== + */ + +/* + * float48pl - returns arg1 + arg2 + * float48mi - returns arg1 - arg2 + * float48mul - returns arg1 * arg2 + * float48div - returns arg1 / arg2 + */ +Datum +float48pl(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_pl((float8) arg1, arg2)); +} + +Datum +float48mi(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_mi((float8) arg1, arg2)); +} + +Datum +float48mul(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_mul((float8) arg1, arg2)); +} + +Datum +float48div(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_FLOAT8(float8_div((float8) arg1, arg2)); +} + +/* + * float84pl - returns arg1 + arg2 + * float84mi - returns arg1 - arg2 + * float84mul - returns arg1 * arg2 + * float84div - returns arg1 / arg2 + */ +Datum +float84pl(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT8(float8_pl(arg1, (float8) arg2)); +} + +Datum +float84mi(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT8(float8_mi(arg1, (float8) arg2)); +} + +Datum +float84mul(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT8(float8_mul(arg1, (float8) arg2)); +} + +Datum +float84div(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_FLOAT8(float8_div(arg1, (float8) arg2)); +} + +/* + * ==================== + * COMPARISON OPERATORS + * ==================== + */ + +/* + * float48{eq,ne,lt,le,gt,ge} - float4/float8 comparison operations + */ +Datum +float48eq(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_eq((float8) arg1, arg2)); +} + +Datum +float48ne(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_ne((float8) arg1, arg2)); +} + +Datum +float48lt(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_lt((float8) arg1, arg2)); +} + +Datum +float48le(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_le((float8) arg1, arg2)); +} + +Datum +float48gt(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_gt((float8) arg1, arg2)); +} + +Datum +float48ge(PG_FUNCTION_ARGS) +{ + float4 arg1 = PG_GETARG_FLOAT4(0); + float8 arg2 = PG_GETARG_FLOAT8(1); + + PG_RETURN_BOOL(float8_ge((float8) arg1, arg2)); +} + +/* + * float84{eq,ne,lt,le,gt,ge} - float8/float4 comparison operations + */ +Datum +float84eq(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float8_eq(arg1, (float8) arg2)); +} + +Datum +float84ne(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float8_ne(arg1, (float8) arg2)); +} + +Datum +float84lt(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float8_lt(arg1, (float8) arg2)); +} + +Datum +float84le(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float8_le(arg1, (float8) arg2)); +} + +Datum +float84gt(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float8_gt(arg1, (float8) arg2)); +} + +Datum +float84ge(PG_FUNCTION_ARGS) +{ + float8 arg1 = PG_GETARG_FLOAT8(0); + float4 arg2 = PG_GETARG_FLOAT4(1); + + PG_RETURN_BOOL(float8_ge(arg1, (float8) arg2)); +} + +/* + * Implements the float8 version of the width_bucket() function + * defined by SQL2003. See also width_bucket_numeric(). + * + * 'bound1' and 'bound2' are the lower and upper bounds of the + * histogram's range, respectively. 'count' is the number of buckets + * in the histogram. width_bucket() returns an integer indicating the + * bucket number that 'operand' belongs to in an equiwidth histogram + * with the specified characteristics. An operand smaller than the + * lower bound is assigned to bucket 0. An operand greater than the + * upper bound is assigned to an additional bucket (with number + * count+1). We don't allow "NaN" for any of the float8 inputs, and we + * don't allow either of the histogram bounds to be +/- infinity. + */ +Datum +width_bucket_float8(PG_FUNCTION_ARGS) +{ + float8 operand = PG_GETARG_FLOAT8(0); + float8 bound1 = PG_GETARG_FLOAT8(1); + float8 bound2 = PG_GETARG_FLOAT8(2); + int32 count = PG_GETARG_INT32(3); + int32 result; + + if (count <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("count must be greater than zero"))); + + if (isnan(operand) || isnan(bound1) || isnan(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("operand, lower bound, and upper bound cannot be NaN"))); + + /* Note that we allow "operand" to be infinite */ + if (isinf(bound1) || isinf(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower and upper bounds must be finite"))); + + if (bound1 < bound2) + { + if (operand < bound1) + result = 0; + else if (operand >= bound2) + { + if (pg_add_s32_overflow(count, 1, &result)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else + { + if (!isinf(bound2 - bound1)) + { + /* The quotient is surely in [0,1], so this can't overflow */ + result = count * ((operand - bound1) / (bound2 - bound1)); + } + else + { + /* + * We get here if bound2 - bound1 overflows DBL_MAX. Since + * both bounds are finite, their difference can't exceed twice + * DBL_MAX; so we can perform the computation without overflow + * by dividing all the inputs by 2. That should be exact too, + * except in the case where a very small operand underflows to + * zero, which would have negligible impact on the result + * given such large bounds. + */ + result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2)); + } + /* The quotient could round to 1.0, which would be a lie */ + if (result >= count) + result = count - 1; + /* Having done that, we can add 1 without fear of overflow */ + result++; + } + } + else if (bound1 > bound2) + { + if (operand > bound1) + result = 0; + else if (operand <= bound2) + { + if (pg_add_s32_overflow(count, 1, &result)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else + { + if (!isinf(bound1 - bound2)) + result = count * ((bound1 - operand) / (bound1 - bound2)); + else + result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2)); + if (result >= count) + result = count - 1; + result++; + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower bound cannot equal upper bound"))); + result = 0; /* keep the compiler quiet */ + } + + PG_RETURN_INT32(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/format_type.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/format_type.c new file mode 100644 index 00000000000..12402a06379 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/format_type.c @@ -0,0 +1,480 @@ +/*------------------------------------------------------------------------- + * + * format_type.c + * Display type names "nicely". + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/format_type.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <ctype.h> + +#include "access/htup_details.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "mb/pg_wchar.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/numeric.h" +#include "utils/syscache.h" + +static char *printTypmod(const char *typname, int32 typmod, Oid typmodout); + + +/* + * SQL function: format_type(type_oid, typemod) + * + * `type_oid' is from pg_type.oid, `typemod' is from + * pg_attribute.atttypmod. This function will get the type name and + * format it and the modifier to canonical SQL format, if the type is + * a standard type. Otherwise you just get pg_type.typname back, + * double quoted if it contains funny characters or matches a keyword. + * + * If typemod is NULL then we are formatting a type name in a context where + * no typemod is available, eg a function argument or result type. This + * yields a slightly different result from specifying typemod = -1 in some + * cases. Given typemod = -1 we feel compelled to produce an output that + * the parser will interpret as having typemod -1, so that pg_dump will + * produce CREATE TABLE commands that recreate the original state. But + * given NULL typemod, we assume that the parser's interpretation of + * typemod doesn't matter, and so we are willing to output a slightly + * "prettier" representation of the same type. For example, type = bpchar + * and typemod = NULL gets you "character", whereas typemod = -1 gets you + * "bpchar" --- the former will be interpreted as character(1) by the + * parser, which does not yield typemod -1. + * + * XXX encoding a meaning in typemod = NULL is ugly; it'd have been + * cleaner to make two functions of one and two arguments respectively. + * Not worth changing it now, however. + */ +Datum +format_type(PG_FUNCTION_ARGS) +{ + Oid type_oid; + int32 typemod; + char *result; + bits16 flags = FORMAT_TYPE_ALLOW_INVALID; + + /* Since this function is not strict, we must test for null args */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + type_oid = PG_GETARG_OID(0); + + if (PG_ARGISNULL(1)) + typemod = -1; + else + { + typemod = PG_GETARG_INT32(1); + flags |= FORMAT_TYPE_TYPEMOD_GIVEN; + } + + result = format_type_extended(type_oid, typemod, flags); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +/* + * format_type_extended + * Generate a possibly-qualified type name. + * + * The default behavior is to only qualify if the type is not in the search + * path, to ignore the given typmod, and to raise an error if a non-existent + * type_oid is given. + * + * The following bits in 'flags' modify the behavior: + * - FORMAT_TYPE_TYPEMOD_GIVEN + * include the typmod in the output (typmod could still be -1 though) + * - FORMAT_TYPE_ALLOW_INVALID + * if the type OID is invalid or unknown, return ??? or such instead + * of failing + * - FORMAT_TYPE_INVALID_AS_NULL + * if the type OID is invalid or unknown, return NULL instead of ??? + * or such + * - FORMAT_TYPE_FORCE_QUALIFY + * always schema-qualify type names, regardless of search_path + * + * Note that TYPEMOD_GIVEN is not interchangeable with "typemod == -1"; + * see the comments above for format_type(). + * + * Returns a palloc'd string, or NULL. + */ +char * +format_type_extended(Oid type_oid, int32 typemod, bits16 flags) +{ + HeapTuple tuple; + Form_pg_type typeform; + Oid array_base_type; + bool is_array; + char *buf; + bool with_typemod; + + if (type_oid == InvalidOid) + { + if ((flags & FORMAT_TYPE_INVALID_AS_NULL) != 0) + return NULL; + else if ((flags & FORMAT_TYPE_ALLOW_INVALID) != 0) + return pstrdup("-"); + } + + tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid)); + if (!HeapTupleIsValid(tuple)) + { + if ((flags & FORMAT_TYPE_INVALID_AS_NULL) != 0) + return NULL; + else if ((flags & FORMAT_TYPE_ALLOW_INVALID) != 0) + return pstrdup("???"); + else + elog(ERROR, "cache lookup failed for type %u", type_oid); + } + typeform = (Form_pg_type) GETSTRUCT(tuple); + + /* + * Check if it's a "true" array type. Pseudo-array types such as "name" + * shouldn't get deconstructed. Also check the toast property, and don't + * deconstruct "plain storage" array types --- this is because we don't + * want to show oidvector as oid[]. + */ + array_base_type = typeform->typelem; + + if (IsTrueArrayType(typeform) && + typeform->typstorage != TYPSTORAGE_PLAIN) + { + /* Switch our attention to the array element type */ + ReleaseSysCache(tuple); + tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(array_base_type)); + if (!HeapTupleIsValid(tuple)) + { + if ((flags & FORMAT_TYPE_INVALID_AS_NULL) != 0) + return NULL; + else if ((flags & FORMAT_TYPE_ALLOW_INVALID) != 0) + return pstrdup("???[]"); + else + elog(ERROR, "cache lookup failed for type %u", type_oid); + } + typeform = (Form_pg_type) GETSTRUCT(tuple); + type_oid = array_base_type; + is_array = true; + } + else + is_array = false; + + with_typemod = (flags & FORMAT_TYPE_TYPEMOD_GIVEN) != 0 && (typemod >= 0); + + /* + * See if we want to special-case the output for certain built-in types. + * Note that these special cases should all correspond to special + * productions in gram.y, to ensure that the type name will be taken as a + * system type, not a user type of the same name. + * + * If we do not provide a special-case output here, the type name will be + * handled the same way as a user type name --- in particular, it will be + * double-quoted if it matches any lexer keyword. This behavior is + * essential for some cases, such as types "bit" and "char". + */ + buf = NULL; /* flag for no special case */ + + switch (type_oid) + { + case BITOID: + if (with_typemod) + buf = printTypmod("bit", typemod, typeform->typmodout); + else if ((flags & FORMAT_TYPE_TYPEMOD_GIVEN) != 0) + { + /* + * bit with typmod -1 is not the same as BIT, which means + * BIT(1) per SQL spec. Report it as the quoted typename so + * that parser will not assign a bogus typmod. + */ + } + else + buf = pstrdup("bit"); + break; + + case BOOLOID: + buf = pstrdup("boolean"); + break; + + case BPCHAROID: + if (with_typemod) + buf = printTypmod("character", typemod, typeform->typmodout); + else if ((flags & FORMAT_TYPE_TYPEMOD_GIVEN) != 0) + { + /* + * bpchar with typmod -1 is not the same as CHARACTER, which + * means CHARACTER(1) per SQL spec. Report it as bpchar so + * that parser will not assign a bogus typmod. + */ + } + else + buf = pstrdup("character"); + break; + + case FLOAT4OID: + buf = pstrdup("real"); + break; + + case FLOAT8OID: + buf = pstrdup("double precision"); + break; + + case INT2OID: + buf = pstrdup("smallint"); + break; + + case INT4OID: + buf = pstrdup("integer"); + break; + + case INT8OID: + buf = pstrdup("bigint"); + break; + + case NUMERICOID: + if (with_typemod) + buf = printTypmod("numeric", typemod, typeform->typmodout); + else + buf = pstrdup("numeric"); + break; + + case INTERVALOID: + if (with_typemod) + buf = printTypmod("interval", typemod, typeform->typmodout); + else + buf = pstrdup("interval"); + break; + + case TIMEOID: + if (with_typemod) + buf = printTypmod("time", typemod, typeform->typmodout); + else + buf = pstrdup("time without time zone"); + break; + + case TIMETZOID: + if (with_typemod) + buf = printTypmod("time", typemod, typeform->typmodout); + else + buf = pstrdup("time with time zone"); + break; + + case TIMESTAMPOID: + if (with_typemod) + buf = printTypmod("timestamp", typemod, typeform->typmodout); + else + buf = pstrdup("timestamp without time zone"); + break; + + case TIMESTAMPTZOID: + if (with_typemod) + buf = printTypmod("timestamp", typemod, typeform->typmodout); + else + buf = pstrdup("timestamp with time zone"); + break; + + case VARBITOID: + if (with_typemod) + buf = printTypmod("bit varying", typemod, typeform->typmodout); + else + buf = pstrdup("bit varying"); + break; + + case VARCHAROID: + if (with_typemod) + buf = printTypmod("character varying", typemod, typeform->typmodout); + else + buf = pstrdup("character varying"); + break; + } + + if (buf == NULL) + { + /* + * Default handling: report the name as it appears in the catalog. + * Here, we must qualify the name if it is not visible in the search + * path or if caller requests it; and we must double-quote it if it's + * not a standard identifier or if it matches any keyword. + */ + char *nspname; + char *typname; + + if ((flags & FORMAT_TYPE_FORCE_QUALIFY) == 0 && + TypeIsVisible(type_oid)) + nspname = NULL; + else + nspname = get_namespace_name_or_temp(typeform->typnamespace); + + typname = NameStr(typeform->typname); + + buf = quote_qualified_identifier(nspname, typname); + + if (with_typemod) + buf = printTypmod(buf, typemod, typeform->typmodout); + } + + if (is_array) + buf = psprintf("%s[]", buf); + + ReleaseSysCache(tuple); + + return buf; +} + +/* + * This version is for use within the backend in error messages, etc. + * One difference is that it will fail for an invalid type. + * + * The result is always a palloc'd string. + */ +char * +format_type_be(Oid type_oid) +{ + return format_type_extended(type_oid, -1, 0); +} + +/* + * This version returns a name that is always qualified (unless it's one + * of the SQL-keyword type names, such as TIMESTAMP WITH TIME ZONE). + */ +char * +format_type_be_qualified(Oid type_oid) +{ + return format_type_extended(type_oid, -1, FORMAT_TYPE_FORCE_QUALIFY); +} + +/* + * This version allows a nondefault typemod to be specified. + */ +char * +format_type_with_typemod(Oid type_oid, int32 typemod) +{ + return format_type_extended(type_oid, typemod, FORMAT_TYPE_TYPEMOD_GIVEN); +} + +/* + * Add typmod decoration to the basic type name + */ +static char * +printTypmod(const char *typname, int32 typmod, Oid typmodout) +{ + char *res; + + /* Shouldn't be called if typmod is -1 */ + Assert(typmod >= 0); + + if (typmodout == InvalidOid) + { + /* Default behavior: just print the integer typmod with parens */ + res = psprintf("%s(%d)", typname, (int) typmod); + } + else + { + /* Use the type-specific typmodout procedure */ + char *tmstr; + + tmstr = DatumGetCString(OidFunctionCall1(typmodout, + Int32GetDatum(typmod))); + res = psprintf("%s%s", typname, tmstr); + } + + return res; +} + + +/* + * type_maximum_size --- determine maximum width of a variable-width column + * + * If the max width is indeterminate, return -1. In particular, we return + * -1 for any type not known to this routine. We assume the caller has + * already determined that the type is a variable-width type, so it's not + * necessary to look up the type's pg_type tuple here. + * + * This may appear unrelated to format_type(), but in fact the two routines + * share knowledge of the encoding of typmod for different types, so it's + * convenient to keep them together. (XXX now that most of this knowledge + * has been pushed out of format_type into the typmodout functions, it's + * interesting to wonder if it's worth trying to factor this code too...) + */ +int32 +type_maximum_size(Oid type_oid, int32 typemod) +{ + if (typemod < 0) + return -1; + + switch (type_oid) + { + case BPCHAROID: + case VARCHAROID: + /* typemod includes varlena header */ + + /* typemod is in characters not bytes */ + return (typemod - VARHDRSZ) * + pg_encoding_max_length(GetDatabaseEncoding()) + + VARHDRSZ; + + case NUMERICOID: + return numeric_maximum_size(typemod); + + case VARBITOID: + case BITOID: + /* typemod is the (max) number of bits */ + return (typemod + (BITS_PER_BYTE - 1)) / BITS_PER_BYTE + + 2 * sizeof(int32); + } + + /* Unknown type, or unlimited-width type such as 'text' */ + return -1; +} + + +/* + * oidvectortypes - converts a vector of type OIDs to "typname" list + */ +Datum +oidvectortypes(PG_FUNCTION_ARGS) +{ + oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0); + char *result; + int numargs = oidArray->dim1; + int num; + size_t total; + size_t left; + + total = 20 * numargs + 1; + result = palloc(total); + result[0] = '\0'; + left = total - 1; + + for (num = 0; num < numargs; num++) + { + char *typename = format_type_extended(oidArray->values[num], -1, + FORMAT_TYPE_ALLOW_INVALID); + size_t slen = strlen(typename); + + if (left < (slen + 2)) + { + total += slen + 2; + result = repalloc(result, total); + left += slen + 2; + } + + if (num > 0) + { + strcat(result, ", "); + left -= 2; + } + strcat(result, typename); + left -= slen; + } + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/formatting.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/formatting.c new file mode 100644 index 00000000000..7c6202ddbcc --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/formatting.c @@ -0,0 +1,6693 @@ +/* ----------------------------------------------------------------------- + * formatting.c + * + * src/backend/utils/adt/formatting.c + * + * + * Portions Copyright (c) 1999-2023, PostgreSQL Global Development Group + * + * + * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER(); + * + * The PostgreSQL routines for a timestamp/int/float/numeric formatting, + * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines. + * + * + * Cache & Memory: + * Routines use (itself) internal cache for format pictures. + * + * The cache uses a static buffer and is persistent across transactions. If + * the format-picture is bigger than the cache buffer, the parser is called + * always. + * + * NOTE for Number version: + * All in this version is implemented as keywords ( => not used + * suffixes), because a format picture is for *one* item (number) + * only. It not is as a timestamp version, where each keyword (can) + * has suffix. + * + * NOTE for Timestamp routines: + * In this module the POSIX 'struct tm' type is *not* used, but rather + * PgSQL type, which has tm_mon based on one (*non* zero) and + * year *not* based on 1900, but is used full year number. + * Module supports AD / BC / AM / PM. + * + * Supported types for to_char(): + * + * Timestamp, Numeric, int4, int8, float4, float8 + * + * Supported types for reverse conversion: + * + * Timestamp - to_timestamp() + * Date - to_date() + * Numeric - to_number() + * + * + * Karel Zak + * + * TODO + * - better number building (formatting) / parsing, now it isn't + * ideal code + * - use Assert() + * - add support for roman number to standard number conversion + * - add support for number spelling + * - add support for string to string formatting (we must be better + * than Oracle :-), + * to_char('Hello', 'X X X X X') -> 'H e l l o' + * + * ----------------------------------------------------------------------- + */ + +#ifdef DEBUG_TO_FROM_CHAR +#define DEBUG_elog_output DEBUG3 +#endif + +#include "postgres.h" + +#include <ctype.h> +#include <unistd.h> +#include <math.h> +#include <float.h> +#include <limits.h> +#include <wctype.h> + +#ifdef USE_ICU +#include <unicode/ustring.h> +#endif + +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" +#include "parser/scansup.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/float.h" +#include "utils/formatting.h" +#include "utils/memutils.h" +#include "utils/numeric.h" +#include "utils/pg_locale.h" +#include "varatt.h" + + +/* ---------- + * Routines flags + * ---------- + */ +#define DCH_FLAG 0x1 /* DATE-TIME flag */ +#define NUM_FLAG 0x2 /* NUMBER flag */ +#define STD_FLAG 0x4 /* STANDARD flag */ + +/* ---------- + * KeyWord Index (ascii from position 32 (' ') to 126 (~)) + * ---------- + */ +#define KeyWord_INDEX_SIZE ('~' - ' ') +#define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1) + +/* ---------- + * Maximal length of one node + * ---------- + */ +#define DCH_MAX_ITEM_SIZ 12 /* max localized day name */ +#define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */ + + +/* ---------- + * Format parser structs + * ---------- + */ +typedef struct +{ + const char *name; /* suffix string */ + int len, /* suffix length */ + id, /* used in node->suffix */ + type; /* prefix / postfix */ +} KeySuffix; + +/* ---------- + * FromCharDateMode + * ---------- + * + * This value is used to nominate one of several distinct (and mutually + * exclusive) date conventions that a keyword can belong to. + */ +typedef enum +{ + FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */ + FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */ + FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */ +} FromCharDateMode; + +typedef struct +{ + const char *name; + int len; + int id; + bool is_digit; + FromCharDateMode date_mode; +} KeyWord; + +typedef struct +{ + uint8 type; /* NODE_TYPE_XXX, see below */ + char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */ + uint8 suffix; /* keyword prefix/suffix code, if any */ + const KeyWord *key; /* if type is ACTION */ +} FormatNode; + +#define NODE_TYPE_END 1 +#define NODE_TYPE_ACTION 2 +#define NODE_TYPE_CHAR 3 +#define NODE_TYPE_SEPARATOR 4 +#define NODE_TYPE_SPACE 5 + +#define SUFFTYPE_PREFIX 1 +#define SUFFTYPE_POSTFIX 2 + +#define CLOCK_24_HOUR 0 +#define CLOCK_12_HOUR 1 + + +/* ---------- + * Full months + * ---------- + */ +static const char *const months_full[] = { + "January", "February", "March", "April", "May", "June", "July", + "August", "September", "October", "November", "December", NULL +}; + +static const char *const days_short[] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL +}; + +/* ---------- + * AD / BC + * ---------- + * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it + * positive and map year == -1 to year zero, and shift all negative + * years up one. For interval years, we just return the year. + */ +#define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year))) + +#define A_D_STR "A.D." +#define a_d_STR "a.d." +#define AD_STR "AD" +#define ad_STR "ad" + +#define B_C_STR "B.C." +#define b_c_STR "b.c." +#define BC_STR "BC" +#define bc_STR "bc" + +/* + * AD / BC strings for seq_search. + * + * These are given in two variants, a long form with periods and a standard + * form without. + * + * The array is laid out such that matches for AD have an even index, and + * matches for BC have an odd index. So the boolean value for BC is given by + * taking the array index of the match, modulo 2. + */ +static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL}; +static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL}; + +/* ---------- + * AM / PM + * ---------- + */ +#define A_M_STR "A.M." +#define a_m_STR "a.m." +#define AM_STR "AM" +#define am_STR "am" + +#define P_M_STR "P.M." +#define p_m_STR "p.m." +#define PM_STR "PM" +#define pm_STR "pm" + +/* + * AM / PM strings for seq_search. + * + * These are given in two variants, a long form with periods and a standard + * form without. + * + * The array is laid out such that matches for AM have an even index, and + * matches for PM have an odd index. So the boolean value for PM is given by + * taking the array index of the match, modulo 2. + */ +static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL}; +static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL}; + +/* ---------- + * Months in roman-numeral + * (Must be in reverse order for seq_search (in FROM_CHAR), because + * 'VIII' must have higher precedence than 'V') + * ---------- + */ +static const char *const rm_months_upper[] = +{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL}; + +static const char *const rm_months_lower[] = +{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL}; + +/* ---------- + * Roman numbers + * ---------- + */ +static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL}; +static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL}; +static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL}; + +/* ---------- + * Ordinal postfixes + * ---------- + */ +static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL}; +static const char *const numth[] = {"st", "nd", "rd", "th", NULL}; + +/* ---------- + * Flags & Options: + * ---------- + */ +#define TH_UPPER 1 +#define TH_LOWER 2 + +/* ---------- + * Number description struct + * ---------- + */ +typedef struct +{ + int pre, /* (count) numbers before decimal */ + post, /* (count) numbers after decimal */ + lsign, /* want locales sign */ + flag, /* number parameters */ + pre_lsign_num, /* tmp value for lsign */ + multi, /* multiplier for 'V' */ + zero_start, /* position of first zero */ + zero_end, /* position of last zero */ + need_locale; /* needs it locale */ +} NUMDesc; + +/* ---------- + * Flags for NUMBER version + * ---------- + */ +#define NUM_F_DECIMAL (1 << 1) +#define NUM_F_LDECIMAL (1 << 2) +#define NUM_F_ZERO (1 << 3) +#define NUM_F_BLANK (1 << 4) +#define NUM_F_FILLMODE (1 << 5) +#define NUM_F_LSIGN (1 << 6) +#define NUM_F_BRACKET (1 << 7) +#define NUM_F_MINUS (1 << 8) +#define NUM_F_PLUS (1 << 9) +#define NUM_F_ROMAN (1 << 10) +#define NUM_F_MULTI (1 << 11) +#define NUM_F_PLUS_POST (1 << 12) +#define NUM_F_MINUS_POST (1 << 13) +#define NUM_F_EEEE (1 << 14) + +#define NUM_LSIGN_PRE (-1) +#define NUM_LSIGN_POST 1 +#define NUM_LSIGN_NONE 0 + +/* ---------- + * Tests + * ---------- + */ +#define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL) +#define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL) +#define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO) +#define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK) +#define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE) +#define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET) +#define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS) +#define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN) +#define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS) +#define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN) +#define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI) +#define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE) + +/* ---------- + * Format picture cache + * + * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long; + * likewise number format pictures up to NUM_CACHE_SIZE bytes long. + * + * For simplicity, the cache entries are fixed-size, so they allow for the + * worst case of a FormatNode for each byte in the picture string. + * + * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and + * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that + * we don't waste too much space by palloc'ing them individually. Be sure + * to adjust those macros if you add fields to those structs. + * + * The max number of entries in each cache is DCH_CACHE_ENTRIES + * resp. NUM_CACHE_ENTRIES. + * ---------- + */ +#define DCH_CACHE_OVERHEAD \ + MAXALIGN(sizeof(bool) + sizeof(int)) +#define NUM_CACHE_OVERHEAD \ + MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc)) + +#define DCH_CACHE_SIZE \ + ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) +#define NUM_CACHE_SIZE \ + ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) + +#define DCH_CACHE_ENTRIES 20 +#define NUM_CACHE_ENTRIES 20 + +typedef struct +{ + FormatNode format[DCH_CACHE_SIZE + 1]; + char str[DCH_CACHE_SIZE + 1]; + bool std; + bool valid; + int age; +} DCHCacheEntry; + +typedef struct +{ + FormatNode format[NUM_CACHE_SIZE + 1]; + char str[NUM_CACHE_SIZE + 1]; + bool valid; + int age; + NUMDesc Num; +} NUMCacheEntry; + +/* global cache for date/time format pictures */ +static __thread DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES]; +static __thread int n_DCHCache = 0; /* current number of entries */ +static __thread int DCHCounter = 0; /* aging-event counter */ + +/* global cache for number format pictures */ +static __thread NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES]; +static __thread int n_NUMCache = 0; /* current number of entries */ +static __thread int NUMCounter = 0; /* aging-event counter */ + +/* ---------- + * For char->date/time conversion + * ---------- + */ +typedef struct +{ + FromCharDateMode mode; + int hh, + pm, + mi, + ss, + ssss, + d, /* stored as 1-7, Sunday = 1, 0 means missing */ + dd, + ddd, + mm, + ms, + year, + bc, + ww, + w, + cc, + j, + us, + yysz, /* is it YY or YYYY ? */ + clock, /* 12 or 24 hour clock? */ + tzsign, /* +1, -1 or 0 if timezone info is absent */ + tzh, + tzm, + ff; /* fractional precision */ +} TmFromChar; + +#define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar)) + +/* ---------- + * Debug + * ---------- + */ +#ifdef DEBUG_TO_FROM_CHAR +#define DEBUG_TMFC(_X) \ + elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \ + (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \ + (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \ + (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \ + (_X)->yysz, (_X)->clock) +#define DEBUG_TM(_X) \ + elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\ + (_X)->tm_sec, (_X)->tm_year,\ + (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\ + (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon) +#else +#define DEBUG_TMFC(_X) +#define DEBUG_TM(_X) +#endif + +/* ---------- + * Datetime to char conversion + * + * To support intervals as well as timestamps, we use a custom "tm" struct + * that is almost like struct pg_tm, but has a 64-bit tm_hour field. + * We omit the tm_isdst and tm_zone fields, which are not used here. + * ---------- + */ +struct fmt_tm +{ + int tm_sec; + int tm_min; + int64 tm_hour; + int tm_mday; + int tm_mon; + int tm_year; + int tm_wday; + int tm_yday; + long int tm_gmtoff; +}; + +typedef struct TmToChar +{ + struct fmt_tm tm; /* almost the classic 'tm' struct */ + fsec_t fsec; /* fractional seconds */ + const char *tzn; /* timezone */ +} TmToChar; + +#define tmtcTm(_X) (&(_X)->tm) +#define tmtcTzn(_X) ((_X)->tzn) +#define tmtcFsec(_X) ((_X)->fsec) + +/* Note: this is used to copy pg_tm to fmt_tm, so not quite a bitwise copy */ +#define COPY_tm(_DST, _SRC) \ +do { \ + (_DST)->tm_sec = (_SRC)->tm_sec; \ + (_DST)->tm_min = (_SRC)->tm_min; \ + (_DST)->tm_hour = (_SRC)->tm_hour; \ + (_DST)->tm_mday = (_SRC)->tm_mday; \ + (_DST)->tm_mon = (_SRC)->tm_mon; \ + (_DST)->tm_year = (_SRC)->tm_year; \ + (_DST)->tm_wday = (_SRC)->tm_wday; \ + (_DST)->tm_yday = (_SRC)->tm_yday; \ + (_DST)->tm_gmtoff = (_SRC)->tm_gmtoff; \ +} while(0) + +/* Caution: this is used to zero both pg_tm and fmt_tm structs */ +#define ZERO_tm(_X) \ +do { \ + memset(_X, 0, sizeof(*(_X))); \ + (_X)->tm_mday = (_X)->tm_mon = 1; \ +} while(0) + +#define ZERO_tmtc(_X) \ +do { \ + ZERO_tm( tmtcTm(_X) ); \ + tmtcFsec(_X) = 0; \ + tmtcTzn(_X) = NULL; \ +} while(0) + +/* + * to_char(time) appears to to_char() as an interval, so this check + * is really for interval and time data types. + */ +#define INVALID_FOR_INTERVAL \ +do { \ + if (is_interval) \ + ereport(ERROR, \ + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \ + errmsg("invalid format specification for an interval value"), \ + errhint("Intervals are not tied to specific calendar dates."))); \ +} while(0) + +/***************************************************************************** + * KeyWord definitions + *****************************************************************************/ + +/* ---------- + * Suffixes (FormatNode.suffix is an OR of these codes) + * ---------- + */ +#define DCH_S_FM 0x01 +#define DCH_S_TH 0x02 +#define DCH_S_th 0x04 +#define DCH_S_SP 0x08 +#define DCH_S_TM 0x10 + +/* ---------- + * Suffix tests + * ---------- + */ +#define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0) +#define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0) +#define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0) +#define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER) + +/* Oracle toggles FM behavior, we don't; see docs. */ +#define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0) +#define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0) +#define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0) + +/* ---------- + * Suffixes definition for DATE-TIME TO/FROM CHAR + * ---------- + */ +#define TM_SUFFIX_LEN 2 + +static const KeySuffix DCH_suff[] = { + {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX}, + {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX}, + {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX}, + {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX}, + {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX}, + {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX}, + {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX}, + /* last */ + {NULL, 0, 0, 0} +}; + + +/* ---------- + * Format-pictures (KeyWord). + * + * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted + * complicated -to-> easy: + * + * (example: "DDD","DD","Day","D" ) + * + * (this specific sort needs the algorithm for sequential search for strings, + * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH" + * or "HH12"? You must first try "HH12", because "HH" is in string, but + * it is not good. + * + * (!) + * - Position for the keyword is similar as position in the enum DCH/NUM_poz. + * (!) + * + * For fast search is used the 'int index[]', index is ascii table from position + * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII + * position or -1 if char is not used in the KeyWord. Search example for + * string "MM": + * 1) see in index to index['M' - 32], + * 2) take keywords position (enum DCH_MI) from index + * 3) run sequential search in keywords[] from this position + * + * ---------- + */ + +typedef enum +{ + DCH_A_D, + DCH_A_M, + DCH_AD, + DCH_AM, + DCH_B_C, + DCH_BC, + DCH_CC, + DCH_DAY, + DCH_DDD, + DCH_DD, + DCH_DY, + DCH_Day, + DCH_Dy, + DCH_D, + DCH_FF1, + DCH_FF2, + DCH_FF3, + DCH_FF4, + DCH_FF5, + DCH_FF6, + DCH_FX, /* global suffix */ + DCH_HH24, + DCH_HH12, + DCH_HH, + DCH_IDDD, + DCH_ID, + DCH_IW, + DCH_IYYY, + DCH_IYY, + DCH_IY, + DCH_I, + DCH_J, + DCH_MI, + DCH_MM, + DCH_MONTH, + DCH_MON, + DCH_MS, + DCH_Month, + DCH_Mon, + DCH_OF, + DCH_P_M, + DCH_PM, + DCH_Q, + DCH_RM, + DCH_SSSSS, + DCH_SSSS, + DCH_SS, + DCH_TZH, + DCH_TZM, + DCH_TZ, + DCH_US, + DCH_WW, + DCH_W, + DCH_Y_YYY, + DCH_YYYY, + DCH_YYY, + DCH_YY, + DCH_Y, + DCH_a_d, + DCH_a_m, + DCH_ad, + DCH_am, + DCH_b_c, + DCH_bc, + DCH_cc, + DCH_day, + DCH_ddd, + DCH_dd, + DCH_dy, + DCH_d, + DCH_ff1, + DCH_ff2, + DCH_ff3, + DCH_ff4, + DCH_ff5, + DCH_ff6, + DCH_fx, + DCH_hh24, + DCH_hh12, + DCH_hh, + DCH_iddd, + DCH_id, + DCH_iw, + DCH_iyyy, + DCH_iyy, + DCH_iy, + DCH_i, + DCH_j, + DCH_mi, + DCH_mm, + DCH_month, + DCH_mon, + DCH_ms, + DCH_of, + DCH_p_m, + DCH_pm, + DCH_q, + DCH_rm, + DCH_sssss, + DCH_ssss, + DCH_ss, + DCH_tzh, + DCH_tzm, + DCH_tz, + DCH_us, + DCH_ww, + DCH_w, + DCH_y_yyy, + DCH_yyyy, + DCH_yyy, + DCH_yy, + DCH_y, + + /* last */ + _DCH_last_ +} DCH_poz; + +typedef enum +{ + NUM_COMMA, + NUM_DEC, + NUM_0, + NUM_9, + NUM_B, + NUM_C, + NUM_D, + NUM_E, + NUM_FM, + NUM_G, + NUM_L, + NUM_MI, + NUM_PL, + NUM_PR, + NUM_RN, + NUM_SG, + NUM_SP, + NUM_S, + NUM_TH, + NUM_V, + NUM_b, + NUM_c, + NUM_d, + NUM_e, + NUM_fm, + NUM_g, + NUM_l, + NUM_mi, + NUM_pl, + NUM_pr, + NUM_rn, + NUM_sg, + NUM_sp, + NUM_s, + NUM_th, + NUM_v, + + /* last */ + _NUM_last_ +} NUM_poz; + +/* ---------- + * KeyWords for DATE-TIME version + * ---------- + */ +static const KeyWord DCH_keywords[] = { +/* name, len, id, is_digit, date_mode */ + {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */ + {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE}, + {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE}, + {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE}, + {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */ + {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE}, + {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */ + {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */ + {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, + {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, + {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE}, + {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE}, + {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE}, + {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, + {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */ + {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE}, + {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE}, + {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE}, + {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE}, + {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE}, + {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, + {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */ + {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, + {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, + {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */ + {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, + {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, + {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, + {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, + {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */ + {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */ + {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, + {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN}, + {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN}, + {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, + {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN}, + {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN}, + {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */ + {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */ + {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE}, + {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */ + {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */ + {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */ + {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, + {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, + {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */ + {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE}, + {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE}, + {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */ + {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */ + {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, + {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */ + {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, + {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, + {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */ + {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE}, + {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE}, + {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE}, + {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */ + {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE}, + {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */ + {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */ + {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, + {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, + {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE}, + {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, + {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */ + {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE}, + {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE}, + {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE}, + {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE}, + {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE}, + {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, + {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */ + {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, + {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, + {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */ + {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, + {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, + {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, + {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, + {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */ + {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */ + {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, + {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN}, + {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN}, + {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, + {"of", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* o */ + {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */ + {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE}, + {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */ + {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */ + {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */ + {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, + {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, + {"tzh", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* t */ + {"tzm", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE}, + {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, + {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */ + {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */ + {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, + {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */ + {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, + {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, + + /* last */ + {NULL, 0, 0, 0, 0} +}; + +/* ---------- + * KeyWords for NUMBER version + * + * The is_digit and date_mode fields are not relevant here. + * ---------- + */ +static const KeyWord NUM_keywords[] = { +/* name, len, id is in Index */ + {",", 1, NUM_COMMA}, /* , */ + {".", 1, NUM_DEC}, /* . */ + {"0", 1, NUM_0}, /* 0 */ + {"9", 1, NUM_9}, /* 9 */ + {"B", 1, NUM_B}, /* B */ + {"C", 1, NUM_C}, /* C */ + {"D", 1, NUM_D}, /* D */ + {"EEEE", 4, NUM_E}, /* E */ + {"FM", 2, NUM_FM}, /* F */ + {"G", 1, NUM_G}, /* G */ + {"L", 1, NUM_L}, /* L */ + {"MI", 2, NUM_MI}, /* M */ + {"PL", 2, NUM_PL}, /* P */ + {"PR", 2, NUM_PR}, + {"RN", 2, NUM_RN}, /* R */ + {"SG", 2, NUM_SG}, /* S */ + {"SP", 2, NUM_SP}, + {"S", 1, NUM_S}, + {"TH", 2, NUM_TH}, /* T */ + {"V", 1, NUM_V}, /* V */ + {"b", 1, NUM_B}, /* b */ + {"c", 1, NUM_C}, /* c */ + {"d", 1, NUM_D}, /* d */ + {"eeee", 4, NUM_E}, /* e */ + {"fm", 2, NUM_FM}, /* f */ + {"g", 1, NUM_G}, /* g */ + {"l", 1, NUM_L}, /* l */ + {"mi", 2, NUM_MI}, /* m */ + {"pl", 2, NUM_PL}, /* p */ + {"pr", 2, NUM_PR}, + {"rn", 2, NUM_rn}, /* r */ + {"sg", 2, NUM_SG}, /* s */ + {"sp", 2, NUM_SP}, + {"s", 1, NUM_S}, + {"th", 2, NUM_th}, /* t */ + {"v", 1, NUM_V}, /* v */ + + /* last */ + {NULL, 0, 0} +}; + + +/* ---------- + * KeyWords index for DATE-TIME version + * ---------- + */ +static const int DCH_index[KeyWord_INDEX_SIZE] = { +/* +0 1 2 3 4 5 6 7 8 9 +*/ + /*---- first 0..31 chars are skipped ----*/ + + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1, + DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF, + DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY, + -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc, + DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi, + -1, DCH_of, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tzh, DCH_us, -1, DCH_ww, + -1, DCH_y_yyy, -1, -1, -1, -1 + + /*---- chars over 126 are skipped ----*/ +}; + +/* ---------- + * KeyWords index for NUMBER version + * ---------- + */ +static const int NUM_index[KeyWord_INDEX_SIZE] = { +/* +0 1 2 3 4 5 6 7 8 9 +*/ + /*---- first 0..31 chars are skipped ----*/ + + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1, + -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1, + -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E, + NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1, + NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c, + NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi, + -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1, + -1, -1, -1, -1, -1, -1 + + /*---- chars over 126 are skipped ----*/ +}; + +/* ---------- + * Number processor struct + * ---------- + */ +typedef struct NUMProc +{ + bool is_to_char; + NUMDesc *Num; /* number description */ + + int sign, /* '-' or '+' */ + sign_wrote, /* was sign write */ + num_count, /* number of write digits */ + num_in, /* is inside number */ + num_curr, /* current position in number */ + out_pre_spaces, /* spaces before first digit */ + + read_dec, /* to_number - was read dec. point */ + read_post, /* to_number - number of dec. digit */ + read_pre; /* to_number - number non-dec. digit */ + + char *number, /* string with number */ + *number_p, /* pointer to current number position */ + *inout, /* in / out buffer */ + *inout_p, /* pointer to current inout position */ + *last_relevant, /* last relevant number after decimal point */ + + *L_negative_sign, /* Locale */ + *L_positive_sign, + *decimal, + *L_thousands_sep, + *L_currency_symbol; +} NUMProc; + +/* Return flags for DCH_from_char() */ +#define DCH_DATED 0x01 +#define DCH_TIMED 0x02 +#define DCH_ZONED 0x04 + +/* ---------- + * Functions + * ---------- + */ +static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, + const int *index); +static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); +static bool is_separator_char(const char *str); +static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); +static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, + const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num); + +static void DCH_to_char(FormatNode *node, bool is_interval, + TmToChar *in, char *out, Oid collid); +static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, + Oid collid, bool std, Node *escontext); + +#ifdef DEBUG_TO_FROM_CHAR +static void dump_index(const KeyWord *k, const int *index); +static void dump_node(FormatNode *node, int max); +#endif + +static const char *get_th(char *num, int type); +static char *str_numth(char *dest, char *num, int type); +static int adjust_partial_year_to_2020(int year); +static int strspace_len(const char *str); +static bool from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, + Node *escontext); +static bool from_char_set_int(int *dest, const int value, const FormatNode *node, + Node *escontext); +static int from_char_parse_int_len(int *dest, const char **src, const int len, + FormatNode *node, Node *escontext); +static int from_char_parse_int(int *dest, const char **src, FormatNode *node, + Node *escontext); +static int seq_search_ascii(const char *name, const char *const *array, int *len); +static int seq_search_localized(const char *name, char **array, int *len, + Oid collid); +static bool from_char_seq_search(int *dest, const char **src, + const char *const *array, + char **localized_array, Oid collid, + FormatNode *node, Node *escontext); +static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, + struct pg_tm *tm, fsec_t *fsec, int *fprec, + uint32 *flags, Node *escontext); +static char *fill_str(char *str, int c, int max); +static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); +static char *int_to_roman(int number); +static void NUM_prepare_locale(NUMProc *Np); +static char *get_last_relevant_decnum(char *num); +static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); +static void NUM_numpart_to_char(NUMProc *Np, int id); +static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, + char *number, int input_len, int to_char_out_pre_spaces, + int sign, bool is_to_char, Oid collid); +static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std); +static DCHCacheEntry *DCH_cache_search(const char *str, bool std); +static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std); +static NUMCacheEntry *NUM_cache_getnew(const char *str); +static NUMCacheEntry *NUM_cache_search(const char *str); +static NUMCacheEntry *NUM_cache_fetch(const char *str); + + +/* ---------- + * Fast sequential search, use index for data selection which + * go to seq. cycle (it is very fast for unwanted strings) + * (can't be used binary search in format parsing) + * ---------- + */ +static const KeyWord * +index_seq_search(const char *str, const KeyWord *kw, const int *index) +{ + int poz; + + if (!KeyWord_INDEX_FILTER(*str)) + return NULL; + + if ((poz = *(index + (*str - ' '))) > -1) + { + const KeyWord *k = kw + poz; + + do + { + if (strncmp(str, k->name, k->len) == 0) + return k; + k++; + if (!k->name) + return NULL; + } while (*str == *k->name); + } + return NULL; +} + +static const KeySuffix * +suff_search(const char *str, const KeySuffix *suf, int type) +{ + const KeySuffix *s; + + for (s = suf; s->name != NULL; s++) + { + if (s->type != type) + continue; + + if (strncmp(str, s->name, s->len) == 0) + return s; + } + return NULL; +} + +static bool +is_separator_char(const char *str) +{ + /* ASCII printable character, but not letter or digit */ + return (*str > 0x20 && *str < 0x7F && + !(*str >= 'A' && *str <= 'Z') && + !(*str >= 'a' && *str <= 'z') && + !(*str >= '0' && *str <= '9')); +} + +/* ---------- + * Prepare NUMDesc (number description struct) via FormatNode struct + * ---------- + */ +static void +NUMDesc_prepare(NUMDesc *num, FormatNode *n) +{ + if (n->type != NODE_TYPE_ACTION) + return; + + if (IS_EEEE(num) && n->key->id != NUM_E) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"EEEE\" must be the last pattern used"))); + + switch (n->key->id) + { + case NUM_9: + if (IS_BRACKET(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"9\" must be ahead of \"PR\""))); + if (IS_MULTI(num)) + { + ++num->multi; + break; + } + if (IS_DECIMAL(num)) + ++num->post; + else + ++num->pre; + break; + + case NUM_0: + if (IS_BRACKET(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"0\" must be ahead of \"PR\""))); + if (!IS_ZERO(num) && !IS_DECIMAL(num)) + { + num->flag |= NUM_F_ZERO; + num->zero_start = num->pre + 1; + } + if (!IS_DECIMAL(num)) + ++num->pre; + else + ++num->post; + + num->zero_end = num->pre + num->post; + break; + + case NUM_B: + if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num))) + num->flag |= NUM_F_BLANK; + break; + + case NUM_D: + num->flag |= NUM_F_LDECIMAL; + num->need_locale = true; + /* FALLTHROUGH */ + case NUM_DEC: + if (IS_DECIMAL(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("multiple decimal points"))); + if (IS_MULTI(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"V\" and decimal point together"))); + num->flag |= NUM_F_DECIMAL; + break; + + case NUM_FM: + num->flag |= NUM_F_FILLMODE; + break; + + case NUM_S: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" twice"))); + if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together"))); + if (!IS_DECIMAL(num)) + { + num->lsign = NUM_LSIGN_PRE; + num->pre_lsign_num = num->pre; + num->need_locale = true; + num->flag |= NUM_F_LSIGN; + } + else if (num->lsign == NUM_LSIGN_NONE) + { + num->lsign = NUM_LSIGN_POST; + num->need_locale = true; + num->flag |= NUM_F_LSIGN; + } + break; + + case NUM_MI: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"MI\" together"))); + num->flag |= NUM_F_MINUS; + if (IS_DECIMAL(num)) + num->flag |= NUM_F_MINUS_POST; + break; + + case NUM_PL: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"PL\" together"))); + num->flag |= NUM_F_PLUS; + if (IS_DECIMAL(num)) + num->flag |= NUM_F_PLUS_POST; + break; + + case NUM_SG: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"SG\" together"))); + num->flag |= NUM_F_MINUS; + num->flag |= NUM_F_PLUS; + break; + + case NUM_PR: + if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together"))); + num->flag |= NUM_F_BRACKET; + break; + + case NUM_rn: + case NUM_RN: + num->flag |= NUM_F_ROMAN; + break; + + case NUM_L: + case NUM_G: + num->need_locale = true; + break; + + case NUM_V: + if (IS_DECIMAL(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"V\" and decimal point together"))); + num->flag |= NUM_F_MULTI; + break; + + case NUM_E: + if (IS_EEEE(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"EEEE\" twice"))); + if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) || + IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) || + IS_ROMAN(num) || IS_MULTI(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"EEEE\" is incompatible with other formats"), + errdetail("\"EEEE\" may only be used together with digit and decimal point patterns."))); + num->flag |= NUM_F_EEEE; + break; + } +} + +/* ---------- + * Format parser, search small keywords and keyword's suffixes, and make + * format-node tree. + * + * for DATE-TIME & NUMBER version + * ---------- + */ +static void +parse_format(FormatNode *node, const char *str, const KeyWord *kw, + const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num) +{ + FormatNode *n; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "to_char/number(): run parser"); +#endif + + n = node; + + while (*str) + { + int suffix = 0; + const KeySuffix *s; + + /* + * Prefix + */ + if ((flags & DCH_FLAG) && + (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL) + { + suffix |= s->id; + if (s->len) + str += s->len; + } + + /* + * Keyword + */ + if (*str && (n->key = index_seq_search(str, kw, index)) != NULL) + { + n->type = NODE_TYPE_ACTION; + n->suffix = suffix; + if (n->key->len) + str += n->key->len; + + /* + * NUM version: Prepare global NUMDesc struct + */ + if (flags & NUM_FLAG) + NUMDesc_prepare(Num, n); + + /* + * Postfix + */ + if ((flags & DCH_FLAG) && *str && + (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL) + { + n->suffix |= s->id; + if (s->len) + str += s->len; + } + + n++; + } + else if (*str) + { + int chlen; + + if ((flags & STD_FLAG) && *str != '"') + { + /* + * Standard mode, allow only following separators: "-./,':; ". + * However, we support double quotes even in standard mode + * (see below). This is our extension of standard mode. + */ + if (strchr("-./,':; ", *str) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid datetime format separator: \"%s\"", + pnstrdup(str, pg_mblen(str))))); + + if (*str == ' ') + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_SEPARATOR; + + n->character[0] = *str; + n->character[1] = '\0'; + n->key = NULL; + n->suffix = 0; + n++; + str++; + } + else if (*str == '"') + { + /* + * Process double-quoted literal string, if any + */ + str++; + while (*str) + { + if (*str == '"') + { + str++; + break; + } + /* backslash quotes the next character, if any */ + if (*str == '\\' && *(str + 1)) + str++; + chlen = pg_mblen(str); + n->type = NODE_TYPE_CHAR; + memcpy(n->character, str, chlen); + n->character[chlen] = '\0'; + n->key = NULL; + n->suffix = 0; + n++; + str += chlen; + } + } + else + { + /* + * Outside double-quoted strings, backslash is only special if + * it immediately precedes a double quote. + */ + if (*str == '\\' && *(str + 1) == '"') + str++; + chlen = pg_mblen(str); + + if ((flags & DCH_FLAG) && is_separator_char(str)) + n->type = NODE_TYPE_SEPARATOR; + else if (isspace((unsigned char) *str)) + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_CHAR; + + memcpy(n->character, str, chlen); + n->character[chlen] = '\0'; + n->key = NULL; + n->suffix = 0; + n++; + str += chlen; + } + } + } + + n->type = NODE_TYPE_END; + n->suffix = 0; +} + +/* ---------- + * DEBUG: Dump the FormatNode Tree (debug) + * ---------- + */ +#ifdef DEBUG_TO_FROM_CHAR + +#define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " ")) +#define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ") + +static void +dump_node(FormatNode *node, int max) +{ + FormatNode *n; + int a; + + elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT"); + + for (a = 0, n = node; a <= max; n++, a++) + { + if (n->type == NODE_TYPE_ACTION) + elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)", + a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix)); + else if (n->type == NODE_TYPE_CHAR) + elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'", + a, n->character); + else if (n->type == NODE_TYPE_END) + { + elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a); + return; + } + else + elog(DEBUG_elog_output, "%d:\t unknown NODE!", a); + } +} +#endif /* DEBUG */ + +/***************************************************************************** + * Private utils + *****************************************************************************/ + +/* ---------- + * Return ST/ND/RD/TH for simple (1..9) numbers + * type --> 0 upper, 1 lower + * ---------- + */ +static const char * +get_th(char *num, int type) +{ + int len = strlen(num), + last; + + last = *(num + (len - 1)); + if (!isdigit((unsigned char) last)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("\"%s\" is not a number", num))); + + /* + * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get + * 'ST/st', 'ND/nd', 'RD/rd', respectively + */ + if ((len > 1) && (num[len - 2] == '1')) + last = 0; + + switch (last) + { + case '1': + if (type == TH_UPPER) + return numTH[0]; + return numth[0]; + case '2': + if (type == TH_UPPER) + return numTH[1]; + return numth[1]; + case '3': + if (type == TH_UPPER) + return numTH[2]; + return numth[2]; + default: + if (type == TH_UPPER) + return numTH[3]; + return numth[3]; + } +} + +/* ---------- + * Convert string-number to ordinal string-number + * type --> 0 upper, 1 lower + * ---------- + */ +static char * +str_numth(char *dest, char *num, int type) +{ + if (dest != num) + strcpy(dest, num); + strcat(dest, get_th(num, type)); + return dest; +} + +/***************************************************************************** + * upper/lower/initcap functions + *****************************************************************************/ + +#ifdef USE_ICU + +typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +static int32_t +icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, + UChar **buff_dest, UChar *buff_source, int32_t len_source) +{ + UErrorCode status; + int32_t len_dest; + + len_dest = len_source; /* try first with same length */ + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, + mylocale->info.icu.locale, &status); + if (status == U_BUFFER_OVERFLOW_ERROR) + { + /* try again with adjusted length */ + pfree(*buff_dest); + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, + mylocale->info.icu.locale, &status); + } + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("case conversion failed: %s", u_errorName(status)))); + return len_dest; +} + +static int32_t +u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) +{ + return u_strToTitle(dest, destCapacity, src, srcLength, + NULL, locale, pErrorCode); +} + +#endif /* USE_ICU */ + +/* + * If the system provides the needed functions for wide-character manipulation + * (which are all standardized by C99), then we implement upper/lower/initcap + * using wide-character functions, if necessary. Otherwise we use the + * traditional <ctype.h> functions, which of course will not work as desired + * in multibyte character sets. Note that in either case we are effectively + * assuming that the database character encoding matches the encoding implied + * by LC_CTYPE. + * + * If the system provides locale_t and associated functions (which are + * standardized by Open Group's XBD), we can support collations that are + * neither default nor C. The code is written to handle both combinations + * of have-wide-characters and have-locale_t, though it's rather unlikely + * a platform would have the latter without the former. + */ + +/* + * collation-aware, wide-character-aware lower function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +str_tolower(const char *buff, size_t nbytes, Oid collid) +{ + char *result; + + if (!buff) + return NULL; + + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "lower()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + result = asc_tolower(buff, nbytes); + } + else + { + pg_locale_t mylocale; + + mylocale = pg_newlocale_from_collation(collid); + +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar; + int32_t len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToLower, mylocale, + &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); + pfree(buff_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; + + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else +#endif + workspace[curr_char] = towlower(workspace[curr_char]); + } + + /* + * Make result large enough; case change might change number + * of bytes + */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } + else + { + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that tolower_l() will not be so broken as + * to need an isupper_l() guard test. When using the default + * collation, we apply the traditional Postgres behavior that + * forces ASCII-style treatment of I/i, but in non-default + * collations you get exactly what the collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else +#endif + *p = pg_tolower((unsigned char) *p); + } + } + } + } + + return result; +} + +/* + * collation-aware, wide-character-aware upper function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +str_toupper(const char *buff, size_t nbytes, Oid collid) +{ + char *result; + + if (!buff) + return NULL; + + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "upper()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + result = asc_toupper(buff, nbytes); + } + else + { + pg_locale_t mylocale; + + mylocale = pg_newlocale_from_collation(collid); + +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar, + len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToUpper, mylocale, + &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); + pfree(buff_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; + + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + else +#endif + workspace[curr_char] = towupper(workspace[curr_char]); + } + + /* + * Make result large enough; case change might change number + * of bytes + */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } + else + { + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that toupper_l() will not be so broken as + * to need an islower_l() guard test. When using the default + * collation, we apply the traditional Postgres behavior that + * forces ASCII-style treatment of I/i, but in non-default + * collations you get exactly what the collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + else +#endif + *p = pg_toupper((unsigned char) *p); + } + } + } + } + + return result; +} + +/* + * collation-aware, wide-character-aware initcap function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +str_initcap(const char *buff, size_t nbytes, Oid collid) +{ + char *result; + int wasalnum = false; + + if (!buff) + return NULL; + + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "initcap()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + result = asc_initcap(buff, nbytes); + } + else + { + pg_locale_t mylocale; + + mylocale = pg_newlocale_from_collation(collid); + +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar, + len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, + &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); + pfree(buff_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; + + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + } + else +#endif + { + if (wasalnum) + workspace[curr_char] = towlower(workspace[curr_char]); + else + workspace[curr_char] = towupper(workspace[curr_char]); + wasalnum = iswalnum(workspace[curr_char]); + } + } + + /* + * Make result large enough; case change might change number + * of bytes + */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } + else + { + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that toupper_l()/tolower_l() will not be so + * broken as to need guard tests. When using the default + * collation, we apply the traditional Postgres behavior that + * forces ASCII-style treatment of I/i, but in non-default + * collations you get exactly what the collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); + } + else +#endif + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + wasalnum = isalnum((unsigned char) *p); + } + } + } + } + } + + return result; +} + +/* + * ASCII-only lower function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +asc_tolower(const char *buff, size_t nbytes) +{ + char *result; + char *p; + + if (!buff) + return NULL; + + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + *p = pg_ascii_tolower((unsigned char) *p); + + return result; +} + +/* + * ASCII-only upper function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +asc_toupper(const char *buff, size_t nbytes) +{ + char *result; + char *p; + + if (!buff) + return NULL; + + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + *p = pg_ascii_toupper((unsigned char) *p); + + return result; +} + +/* + * ASCII-only initcap function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +asc_initcap(const char *buff, size_t nbytes) +{ + char *result; + char *p; + int wasalnum = false; + + if (!buff) + return NULL; + + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + { + char c; + + if (wasalnum) + *p = c = pg_ascii_tolower((unsigned char) *p); + else + *p = c = pg_ascii_toupper((unsigned char) *p); + /* we don't trust isalnum() here */ + wasalnum = ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9')); + } + + return result; +} + +/* convenience routines for when the input is null-terminated */ + +static char * +str_tolower_z(const char *buff, Oid collid) +{ + return str_tolower(buff, strlen(buff), collid); +} + +static char * +str_toupper_z(const char *buff, Oid collid) +{ + return str_toupper(buff, strlen(buff), collid); +} + +static char * +str_initcap_z(const char *buff, Oid collid) +{ + return str_initcap(buff, strlen(buff), collid); +} + +static char * +asc_tolower_z(const char *buff) +{ + return asc_tolower(buff, strlen(buff)); +} + +static char * +asc_toupper_z(const char *buff) +{ + return asc_toupper(buff, strlen(buff)); +} + +/* asc_initcap_z is not currently needed */ + + +/* ---------- + * Skip TM / th in FROM_CHAR + * + * If S_THth is on, skip two chars, assuming there are two available + * ---------- + */ +#define SKIP_THth(ptr, _suf) \ + do { \ + if (S_THth(_suf)) \ + { \ + if (*(ptr)) (ptr) += pg_mblen(ptr); \ + if (*(ptr)) (ptr) += pg_mblen(ptr); \ + } \ + } while (0) + + +#ifdef DEBUG_TO_FROM_CHAR +/* ----------- + * DEBUG: Call for debug and for index checking; (Show ASCII char + * and defined keyword for each used position + * ---------- + */ +static void +dump_index(const KeyWord *k, const int *index) +{ + int i, + count = 0, + free_i = 0; + + elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:"); + + for (i = 0; i < KeyWord_INDEX_SIZE; i++) + { + if (index[i] != -1) + { + elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name); + count++; + } + else + { + free_i++; + elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]); + } + } + elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d", + count, free_i); +} +#endif /* DEBUG */ + +/* ---------- + * Return true if next format picture is not digit value + * ---------- + */ +static bool +is_next_separator(FormatNode *n) +{ + if (n->type == NODE_TYPE_END) + return false; + + if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix)) + return true; + + /* + * Next node + */ + n++; + + /* end of format string is treated like a non-digit separator */ + if (n->type == NODE_TYPE_END) + return true; + + if (n->type == NODE_TYPE_ACTION) + { + if (n->key->is_digit) + return false; + + return true; + } + else if (n->character[1] == '\0' && + isdigit((unsigned char) n->character[0])) + return false; + + return true; /* some non-digit input (separator) */ +} + + +static int +adjust_partial_year_to_2020(int year) +{ + /* + * Adjust all dates toward 2020; this is effectively what happens when we + * assume '70' is 1970 and '69' is 2069. + */ + /* Force 0-69 into the 2000's */ + if (year < 70) + return year + 2000; + /* Force 70-99 into the 1900's */ + else if (year < 100) + return year + 1900; + /* Force 100-519 into the 2000's */ + else if (year < 520) + return year + 2000; + /* Force 520-999 into the 1000's */ + else if (year < 1000) + return year + 1000; + else + return year; +} + + +static int +strspace_len(const char *str) +{ + int len = 0; + + while (*str && isspace((unsigned char) *str)) + { + str++; + len++; + } + return len; +} + +/* + * Set the date mode of a from-char conversion. + * + * Puke if the date mode has already been set, and the caller attempts to set + * it to a conflicting mode. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +static bool +from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, + Node *escontext) +{ + if (mode != FROM_CHAR_DATE_NONE) + { + if (tmfc->mode == FROM_CHAR_DATE_NONE) + tmfc->mode = mode; + else if (tmfc->mode != mode) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid combination of date conventions"), + errhint("Do not mix Gregorian and ISO week date " + "conventions in a formatting template."))); + } + return true; +} + +/* + * Set the integer pointed to by 'dest' to the given value. + * + * Puke if the destination integer has previously been set to some other + * non-zero value. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +static bool +from_char_set_int(int *dest, const int value, const FormatNode *node, + Node *escontext) +{ + if (*dest != 0 && *dest != value) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("conflicting values for \"%s\" field in formatting string", + node->key->name), + errdetail("This value contradicts a previous setting " + "for the same field type."))); + *dest = value; + return true; +} + +/* + * Read a single integer from the source string, into the int pointed to by + * 'dest'. If 'dest' is NULL, the result is discarded. + * + * In fixed-width mode (the node does not have the FM suffix), consume at most + * 'len' characters. However, any leading whitespace isn't counted in 'len'. + * + * We use strtol() to recover the integer value from the source string, in + * accordance with the given FormatNode. + * + * If the conversion completes successfully, src will have been advanced to + * point at the character immediately following the last character used in the + * conversion. + * + * Returns the number of characters consumed, or -1 on error (if escontext + * points to an ErrorSaveContext; otherwise errors are thrown). + * + * Note that from_char_parse_int() provides a more convenient wrapper where + * the length of the field is the same as the length of the format keyword (as + * with DD and MI). + */ +static int +from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node, + Node *escontext) +{ + long result; + char copy[DCH_MAX_ITEM_SIZ + 1]; + const char *init = *src; + int used; + + /* + * Skip any whitespace before parsing the integer. + */ + *src += strspace_len(*src); + + Assert(len <= DCH_MAX_ITEM_SIZ); + used = (int) strlcpy(copy, *src, len + 1); + + if (S_FM(node->suffix) || is_next_separator(node)) + { + /* + * This node is in Fill Mode, or the next node is known to be a + * non-digit value, so we just slurp as many characters as we can get. + */ + char *endptr; + + errno = 0; + result = strtol(init, &endptr, 10); + *src = endptr; + } + else + { + /* + * We need to pull exactly the number of characters given in 'len' out + * of the string, and convert those. + */ + char *last; + + if (used < len) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("source string too short for \"%s\" formatting field", + node->key->name), + errdetail("Field requires %d characters, but only %d remain.", + len, used), + errhint("If your source string is not fixed-width, " + "try using the \"FM\" modifier."))); + + errno = 0; + result = strtol(copy, &last, 10); + used = last - copy; + + if (used > 0 && used < len) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid value \"%s\" for \"%s\"", + copy, node->key->name), + errdetail("Field requires %d characters, but only %d could be parsed.", + len, used), + errhint("If your source string is not fixed-width, " + "try using the \"FM\" modifier."))); + + *src += used; + } + + if (*src == init) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid value \"%s\" for \"%s\"", + copy, node->key->name), + errdetail("Value must be an integer."))); + + if (errno == ERANGE || result < INT_MIN || result > INT_MAX) + ereturn(escontext, -1, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("value for \"%s\" in source string is out of range", + node->key->name), + errdetail("Value must be in the range %d to %d.", + INT_MIN, INT_MAX))); + + if (dest != NULL) + { + if (!from_char_set_int(dest, (int) result, node, escontext)) + return -1; + } + + return *src - init; +} + +/* + * Call from_char_parse_int_len(), using the length of the format keyword as + * the expected length of the field. + * + * Don't call this function if the field differs in length from the format + * keyword (as with HH24; the keyword length is 4, but the field length is 2). + * In such cases, call from_char_parse_int_len() instead to specify the + * required length explicitly. + */ +static int +from_char_parse_int(int *dest, const char **src, FormatNode *node, + Node *escontext) +{ + return from_char_parse_int_len(dest, src, node->key->len, node, escontext); +} + +/* + * Sequentially search null-terminated "array" for a case-insensitive match + * to the initial character(s) of "name". + * + * Returns array index of match, or -1 for no match. + * + * *len is set to the length of the match, or 0 for no match. + * + * Case-insensitivity is defined per pg_ascii_tolower, so this is only + * suitable for comparisons to ASCII strings. + */ +static int +seq_search_ascii(const char *name, const char *const *array, int *len) +{ + unsigned char firstc; + const char *const *a; + + *len = 0; + + /* empty string can't match anything */ + if (!*name) + return -1; + + /* we handle first char specially to gain some speed */ + firstc = pg_ascii_tolower((unsigned char) *name); + + for (a = array; *a != NULL; a++) + { + const char *p; + const char *n; + + /* compare first chars */ + if (pg_ascii_tolower((unsigned char) **a) != firstc) + continue; + + /* compare rest of string */ + for (p = *a + 1, n = name + 1;; p++, n++) + { + /* return success if we matched whole array entry */ + if (*p == '\0') + { + *len = n - name; + return a - array; + } + /* else, must have another character in "name" ... */ + if (*n == '\0') + break; + /* ... and it must match */ + if (pg_ascii_tolower((unsigned char) *p) != + pg_ascii_tolower((unsigned char) *n)) + break; + } + } + + return -1; +} + +/* + * Sequentially search an array of possibly non-English words for + * a case-insensitive match to the initial character(s) of "name". + * + * This has the same API as seq_search_ascii(), but we use a more general + * case-folding transformation to achieve case-insensitivity. Case folding + * is done per the rules of the collation identified by "collid". + * + * The array is treated as const, but we don't declare it that way because + * the arrays exported by pg_locale.c aren't const. + */ +static int +seq_search_localized(const char *name, char **array, int *len, Oid collid) +{ + char **a; + char *upper_name; + char *lower_name; + + *len = 0; + + /* empty string can't match anything */ + if (!*name) + return -1; + + /* + * The case-folding processing done below is fairly expensive, so before + * doing that, make a quick pass to see if there is an exact match. + */ + for (a = array; *a != NULL; a++) + { + int element_len = strlen(*a); + + if (strncmp(name, *a, element_len) == 0) + { + *len = element_len; + return a - array; + } + } + + /* + * Fold to upper case, then to lower case, so that we can match reliably + * even in languages in which case conversions are not injective. + */ + upper_name = str_toupper(unconstify(char *, name), strlen(name), collid); + lower_name = str_tolower(upper_name, strlen(upper_name), collid); + pfree(upper_name); + + for (a = array; *a != NULL; a++) + { + char *upper_element; + char *lower_element; + int element_len; + + /* Likewise upper/lower-case array element */ + upper_element = str_toupper(*a, strlen(*a), collid); + lower_element = str_tolower(upper_element, strlen(upper_element), + collid); + pfree(upper_element); + element_len = strlen(lower_element); + + /* Match? */ + if (strncmp(lower_name, lower_element, element_len) == 0) + { + *len = element_len; + pfree(lower_element); + pfree(lower_name); + return a - array; + } + pfree(lower_element); + } + + pfree(lower_name); + return -1; +} + +/* + * Perform a sequential search in 'array' (or 'localized_array', if that's + * not NULL) for an entry matching the first character(s) of the 'src' + * string case-insensitively. + * + * The 'array' is presumed to be English words (all-ASCII), but + * if 'localized_array' is supplied, that might be non-English + * so we need a more expensive case-folding transformation + * (which will follow the rules of the collation 'collid'). + * + * If a match is found, copy the array index of the match into the integer + * pointed to by 'dest' and advance 'src' to the end of the part of the string + * which matched. + * + * Returns true on match, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + * + * 'node' is used only for error reports: node->key->name identifies the + * field type we were searching for. + */ +static bool +from_char_seq_search(int *dest, const char **src, const char *const *array, + char **localized_array, Oid collid, + FormatNode *node, Node *escontext) +{ + int len; + + if (localized_array == NULL) + *dest = seq_search_ascii(*src, array, &len); + else + *dest = seq_search_localized(*src, localized_array, &len, collid); + + if (len <= 0) + { + /* + * In the error report, truncate the string at the next whitespace (if + * any) to avoid including irrelevant data. + */ + char *copy = pstrdup(*src); + char *c; + + for (c = copy; *c; c++) + { + if (scanner_isspace(*c)) + { + *c = '\0'; + break; + } + } + + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid value \"%s\" for \"%s\"", + copy, node->key->name), + errdetail("The given value did not match any of " + "the allowed values for this field."))); + } + *src += len; + return true; +} + +/* ---------- + * Process a TmToChar struct as denoted by a list of FormatNodes. + * The formatted data is written to the string pointed to by 'out'. + * ---------- + */ +static void +DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid) +{ + FormatNode *n; + char *s; + struct fmt_tm *tm = &in->tm; + int i; + + /* cache localized days and months */ + cache_locale_time(); + + s = out; + for (n = node; n->type != NODE_TYPE_END; n++) + { + if (n->type != NODE_TYPE_ACTION) + { + strcpy(s, n->character); + s += strlen(s); + continue; + } + + switch (n->key->id) + { + case DCH_A_M: + case DCH_P_M: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? P_M_STR : A_M_STR); + s += strlen(s); + break; + case DCH_AM: + case DCH_PM: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? PM_STR : AM_STR); + s += strlen(s); + break; + case DCH_a_m: + case DCH_p_m: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? p_m_STR : a_m_STR); + s += strlen(s); + break; + case DCH_am: + case DCH_pm: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? pm_STR : am_STR); + s += strlen(s); + break; + case DCH_HH: + case DCH_HH12: + + /* + * display time as shown on a 12-hour clock, even for + * intervals + */ + sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, + tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? + (long long) (HOURS_PER_DAY / 2) : + (long long) (tm->tm_hour % (HOURS_PER_DAY / 2))); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_HH24: + sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, + (long long) tm->tm_hour); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_MI: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3, + tm->tm_min); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_SS: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3, + tm->tm_sec); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + +#define DCH_to_char_fsec(frac_fmt, frac_val) \ + sprintf(s, frac_fmt, (int) (frac_val)); \ + if (S_THth(n->suffix)) \ + str_numth(s, s, S_TH_TYPE(n->suffix)); \ + s += strlen(s) + + case DCH_FF1: /* tenth of second */ + DCH_to_char_fsec("%01d", in->fsec / 100000); + break; + case DCH_FF2: /* hundredth of second */ + DCH_to_char_fsec("%02d", in->fsec / 10000); + break; + case DCH_FF3: + case DCH_MS: /* millisecond */ + DCH_to_char_fsec("%03d", in->fsec / 1000); + break; + case DCH_FF4: /* tenth of a millisecond */ + DCH_to_char_fsec("%04d", in->fsec / 100); + break; + case DCH_FF5: /* hundredth of a millisecond */ + DCH_to_char_fsec("%05d", in->fsec / 10); + break; + case DCH_FF6: + case DCH_US: /* microsecond */ + DCH_to_char_fsec("%06d", in->fsec); + break; +#undef DCH_to_char_fsec + case DCH_SSSS: + sprintf(s, "%lld", + (long long) (tm->tm_hour * SECS_PER_HOUR + + tm->tm_min * SECS_PER_MINUTE + + tm->tm_sec)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_tz: + INVALID_FOR_INTERVAL; + if (tmtcTzn(in)) + { + /* We assume here that timezone names aren't localized */ + char *p = asc_tolower_z(tmtcTzn(in)); + + strcpy(s, p); + pfree(p); + s += strlen(s); + } + break; + case DCH_TZ: + INVALID_FOR_INTERVAL; + if (tmtcTzn(in)) + { + strcpy(s, tmtcTzn(in)); + s += strlen(s); + } + break; + case DCH_TZH: + INVALID_FOR_INTERVAL; + sprintf(s, "%c%02d", + (tm->tm_gmtoff >= 0) ? '+' : '-', + abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); + s += strlen(s); + break; + case DCH_TZM: + INVALID_FOR_INTERVAL; + sprintf(s, "%02d", + (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); + s += strlen(s); + break; + case DCH_OF: + INVALID_FOR_INTERVAL; + sprintf(s, "%c%0*d", + (tm->tm_gmtoff >= 0) ? '+' : '-', + S_FM(n->suffix) ? 0 : 2, + abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); + s += strlen(s); + if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0) + { + sprintf(s, ":%02d", + (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); + s += strlen(s); + } + break; + case DCH_A_D: + case DCH_B_C: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR)); + s += strlen(s); + break; + case DCH_AD: + case DCH_BC: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR)); + s += strlen(s); + break; + case DCH_a_d: + case DCH_b_c: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR)); + s += strlen(s); + break; + case DCH_ad: + case DCH_bc: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR)); + s += strlen(s); + break; + case DCH_MONTH: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_toupper_z(months_full[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_Month: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + months_full[tm->tm_mon - 1]); + s += strlen(s); + break; + case DCH_month: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_tolower_z(months_full[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_MON: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_toupper_z(months[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_Mon: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, months[tm->tm_mon - 1]); + s += strlen(s); + break; + case DCH_mon: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_tolower_z(months[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_MM: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3, + tm->tm_mon); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_DAY: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_toupper_z(days[tm->tm_wday])); + s += strlen(s); + break; + case DCH_Day: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + days[tm->tm_wday]); + s += strlen(s); + break; + case DCH_day: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_tolower_z(days[tm->tm_wday])); + s += strlen(s); + break; + case DCH_DY: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_toupper_z(days_short[tm->tm_wday])); + s += strlen(s); + break; + case DCH_Dy: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, days_short[tm->tm_wday]); + s += strlen(s); + break; + case DCH_dy: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_tolower_z(days_short[tm->tm_wday])); + s += strlen(s); + break; + case DCH_DDD: + case DCH_IDDD: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3, + (n->key->id == DCH_DDD) ? + tm->tm_yday : + date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_DD: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_D: + INVALID_FOR_INTERVAL; + sprintf(s, "%d", tm->tm_wday + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_ID: + INVALID_FOR_INTERVAL; + sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_WW: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, + (tm->tm_yday - 1) / 7 + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_IW: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, + date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_Q: + if (!tm->tm_mon) + break; + sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_CC: + if (is_interval) /* straight calculation */ + i = tm->tm_year / 100; + else + { + if (tm->tm_year > 0) + /* Century 20 == 1901 - 2000 */ + i = (tm->tm_year - 1) / 100 + 1; + else + /* Century 6BC == 600BC - 501BC */ + i = tm->tm_year / 100 - 1; + } + if (i <= 99 && i >= -99) + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i); + else + sprintf(s, "%d", i); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_Y_YYY: + i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000; + sprintf(s, "%d,%03d", i, + ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_YYYY: + case DCH_IYYY: + sprintf(s, "%0*d", + S_FM(n->suffix) ? 0 : + (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5, + (n->key->id == DCH_YYYY ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval))); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_YYY: + case DCH_IYY: + sprintf(s, "%0*d", + S_FM(n->suffix) ? 0 : + (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4, + (n->key->id == DCH_YYY ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval)) % 1000); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_YY: + case DCH_IY: + sprintf(s, "%0*d", + S_FM(n->suffix) ? 0 : + (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3, + (n->key->id == DCH_YY ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval)) % 100); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_Y: + case DCH_I: + sprintf(s, "%1d", + (n->key->id == DCH_Y ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval)) % 10); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_RM: + /* FALLTHROUGH */ + case DCH_rm: + + /* + * For intervals, values like '12 month' will be reduced to 0 + * month and some years. These should be processed. + */ + if (!tm->tm_mon && !tm->tm_year) + break; + else + { + int mon = 0; + const char *const *months; + + if (n->key->id == DCH_RM) + months = rm_months_upper; + else + months = rm_months_lower; + + /* + * Compute the position in the roman-numeral array. Note + * that the contents of the array are reversed, December + * being first and January last. + */ + if (tm->tm_mon == 0) + { + /* + * This case is special, and tracks the case of full + * interval years. + */ + mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1; + } + else if (tm->tm_mon < 0) + { + /* + * Negative case. In this case, the calculation is + * reversed, where -1 means December, -2 November, + * etc. + */ + mon = -1 * (tm->tm_mon + 1); + } + else + { + /* + * Common case, with a strictly positive value. The + * position in the array matches with the value of + * tm_mon. + */ + mon = MONTHS_PER_YEAR - tm->tm_mon; + } + + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4, + months[mon]); + s += strlen(s); + } + break; + case DCH_W: + sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_J: + sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + } + } + + *s = '\0'; +} + +/* + * Process the string 'in' as denoted by the array of FormatNodes 'node[]'. + * The TmFromChar struct pointed to by 'out' is populated with the results. + * + * 'collid' identifies the collation to use, if needed. + * 'std' specifies standard parsing mode. + * + * If escontext points to an ErrorSaveContext, data errors will be reported + * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see + * whether an error occurred. Otherwise, errors are thrown. + * + * Note: we currently don't have any to_interval() function, so there + * is no need here for INVALID_FOR_INTERVAL checks. + */ +static void +DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, + Oid collid, bool std, Node *escontext) +{ + FormatNode *n; + const char *s; + int len, + value; + bool fx_mode = std; + + /* number of extra skipped characters (more than given in format string) */ + int extra_skip = 0; + + /* cache localized days and months */ + cache_locale_time(); + + for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) + { + /* + * Ignore spaces at the beginning of the string and before fields when + * not in FX (fixed width) mode. + */ + if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && + (n->type == NODE_TYPE_ACTION || n == node)) + { + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + + if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) + { + if (std) + { + /* + * Standard mode requires strict matching between format + * string separators/spaces and input string. + */ + Assert(n->character[0] && !n->character[1]); + + if (*s == n->character[0]) + s++; + else + ereturn(escontext,, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unmatched format separator \"%c\"", + n->character[0]))); + } + else if (!fx_mode) + { + /* + * In non FX (fixed format) mode one format string space or + * separator match to one space or separator in input string. + * Or match nothing if there is no space or separator in the + * current position of input string. + */ + extra_skip--; + if (isspace((unsigned char) *s) || is_separator_char(s)) + { + s++; + extra_skip++; + } + } + else + { + /* + * In FX mode, on format string space or separator we consume + * exactly one character from input string. Notice we don't + * insist that the consumed character match the format's + * character. + */ + s += pg_mblen(s); + } + continue; + } + else if (n->type != NODE_TYPE_ACTION) + { + /* + * Text character, so consume one character from input string. + * Notice we don't insist that the consumed character match the + * format's character. + */ + if (!fx_mode) + { + /* + * In non FX mode we might have skipped some extra characters + * (more than specified in format string) before. In this + * case we don't skip input string character, because it might + * be part of field. + */ + if (extra_skip > 0) + extra_skip--; + else + s += pg_mblen(s); + } + else + { + int chlen = pg_mblen(s); + + /* + * Standard mode requires strict match of format characters. + */ + if (std && n->type == NODE_TYPE_CHAR && + strncmp(s, n->character, chlen) != 0) + ereturn(escontext,, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unmatched format character \"%s\"", + n->character))); + + s += chlen; + } + continue; + } + + if (!from_char_set_mode(out, n->key->date_mode, escontext)) + return; + + switch (n->key->id) + { + case DCH_FX: + fx_mode = true; + break; + case DCH_A_M: + case DCH_P_M: + case DCH_a_m: + case DCH_p_m: + if (!from_char_seq_search(&value, &s, ampm_strings_long, + NULL, InvalidOid, + n, escontext)) + return; + if (!from_char_set_int(&out->pm, value % 2, n, escontext)) + return; + out->clock = CLOCK_12_HOUR; + break; + case DCH_AM: + case DCH_PM: + case DCH_am: + case DCH_pm: + if (!from_char_seq_search(&value, &s, ampm_strings, + NULL, InvalidOid, + n, escontext)) + return; + if (!from_char_set_int(&out->pm, value % 2, n, escontext)) + return; + out->clock = CLOCK_12_HOUR; + break; + case DCH_HH: + case DCH_HH12: + if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0) + return; + out->clock = CLOCK_12_HOUR; + SKIP_THth(s, n->suffix); + break; + case DCH_HH24: + if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_MI: + if (from_char_parse_int(&out->mi, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_SS: + if (from_char_parse_int(&out->ss, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_MS: /* millisecond */ + len = from_char_parse_int_len(&out->ms, &s, 3, n, escontext); + if (len < 0) + return; + + /* + * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25 + */ + out->ms *= len == 1 ? 100 : + len == 2 ? 10 : 1; + + SKIP_THth(s, n->suffix); + break; + case DCH_FF1: + case DCH_FF2: + case DCH_FF3: + case DCH_FF4: + case DCH_FF5: + case DCH_FF6: + out->ff = n->key->id - DCH_FF1 + 1; + /* fall through */ + case DCH_US: /* microsecond */ + len = from_char_parse_int_len(&out->us, &s, + n->key->id == DCH_US ? 6 : + out->ff, n, escontext); + if (len < 0) + return; + + out->us *= len == 1 ? 100000 : + len == 2 ? 10000 : + len == 3 ? 1000 : + len == 4 ? 100 : + len == 5 ? 10 : 1; + + SKIP_THth(s, n->suffix); + break; + case DCH_SSSS: + if (from_char_parse_int(&out->ssss, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_tz: + case DCH_TZ: + case DCH_OF: + ereturn(escontext,, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("formatting field \"%s\" is only supported in to_char", + n->key->name))); + break; + case DCH_TZH: + + /* + * Value of TZH might be negative. And the issue is that we + * might swallow minus sign as the separator. So, if we have + * skipped more characters than specified in the format + * string, then we consider prepending last skipped minus to + * TZH. + */ + if (*s == '+' || *s == '-' || *s == ' ') + { + out->tzsign = *s == '-' ? -1 : +1; + s++; + } + else + { + if (extra_skip > 0 && *(s - 1) == '-') + out->tzsign = -1; + else + out->tzsign = +1; + } + + if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0) + return; + break; + case DCH_TZM: + /* assign positive timezone sign if TZH was not seen before */ + if (!out->tzsign) + out->tzsign = +1; + if (from_char_parse_int_len(&out->tzm, &s, 2, n, escontext) < 0) + return; + break; + case DCH_A_D: + case DCH_B_C: + case DCH_a_d: + case DCH_b_c: + if (!from_char_seq_search(&value, &s, adbc_strings_long, + NULL, InvalidOid, + n, escontext)) + return; + if (!from_char_set_int(&out->bc, value % 2, n, escontext)) + return; + break; + case DCH_AD: + case DCH_BC: + case DCH_ad: + case DCH_bc: + if (!from_char_seq_search(&value, &s, adbc_strings, + NULL, InvalidOid, + n, escontext)) + return; + if (!from_char_set_int(&out->bc, value % 2, n, escontext)) + return; + break; + case DCH_MONTH: + case DCH_Month: + case DCH_month: + if (!from_char_seq_search(&value, &s, months_full, + S_TM(n->suffix) ? localized_full_months : NULL, + collid, + n, escontext)) + return; + if (!from_char_set_int(&out->mm, value + 1, n, escontext)) + return; + break; + case DCH_MON: + case DCH_Mon: + case DCH_mon: + if (!from_char_seq_search(&value, &s, months, + S_TM(n->suffix) ? localized_abbrev_months : NULL, + collid, + n, escontext)) + return; + if (!from_char_set_int(&out->mm, value + 1, n, escontext)) + return; + break; + case DCH_MM: + if (from_char_parse_int(&out->mm, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_DAY: + case DCH_Day: + case DCH_day: + if (!from_char_seq_search(&value, &s, days, + S_TM(n->suffix) ? localized_full_days : NULL, + collid, + n, escontext)) + return; + if (!from_char_set_int(&out->d, value, n, escontext)) + return; + out->d++; + break; + case DCH_DY: + case DCH_Dy: + case DCH_dy: + if (!from_char_seq_search(&value, &s, days_short, + S_TM(n->suffix) ? localized_abbrev_days : NULL, + collid, + n, escontext)) + return; + if (!from_char_set_int(&out->d, value, n, escontext)) + return; + out->d++; + break; + case DCH_DDD: + if (from_char_parse_int(&out->ddd, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_IDDD: + if (from_char_parse_int_len(&out->ddd, &s, 3, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_DD: + if (from_char_parse_int(&out->dd, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_D: + if (from_char_parse_int(&out->d, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_ID: + if (from_char_parse_int_len(&out->d, &s, 1, n, escontext) < 0) + return; + /* Shift numbering to match Gregorian where Sunday = 1 */ + if (++out->d > 7) + out->d = 1; + SKIP_THth(s, n->suffix); + break; + case DCH_WW: + case DCH_IW: + if (from_char_parse_int(&out->ww, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_Q: + + /* + * We ignore 'Q' when converting to date because it is unclear + * which date in the quarter to use, and some people specify + * both quarter and month, so if it was honored it might + * conflict with the supplied month. That is also why we don't + * throw an error. + * + * We still parse the source string for an integer, but it + * isn't stored anywhere in 'out'. + */ + if (from_char_parse_int((int *) NULL, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_CC: + if (from_char_parse_int(&out->cc, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_Y_YYY: + { + int matched, + years, + millennia, + nch; + + matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch); + if (matched < 2) + ereturn(escontext,, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid input string for \"Y,YYY\""))); + years += (millennia * 1000); + if (!from_char_set_int(&out->year, years, n, escontext)) + return; + out->yysz = 4; + s += nch; + SKIP_THth(s, n->suffix); + } + break; + case DCH_YYYY: + case DCH_IYYY: + if (from_char_parse_int(&out->year, &s, n, escontext) < 0) + return; + out->yysz = 4; + SKIP_THth(s, n->suffix); + break; + case DCH_YYY: + case DCH_IYY: + len = from_char_parse_int(&out->year, &s, n, escontext); + if (len < 0) + return; + if (len < 4) + out->year = adjust_partial_year_to_2020(out->year); + out->yysz = 3; + SKIP_THth(s, n->suffix); + break; + case DCH_YY: + case DCH_IY: + len = from_char_parse_int(&out->year, &s, n, escontext); + if (len < 0) + return; + if (len < 4) + out->year = adjust_partial_year_to_2020(out->year); + out->yysz = 2; + SKIP_THth(s, n->suffix); + break; + case DCH_Y: + case DCH_I: + len = from_char_parse_int(&out->year, &s, n, escontext); + if (len < 0) + return; + if (len < 4) + out->year = adjust_partial_year_to_2020(out->year); + out->yysz = 1; + SKIP_THth(s, n->suffix); + break; + case DCH_RM: + case DCH_rm: + if (!from_char_seq_search(&value, &s, rm_months_lower, + NULL, InvalidOid, + n, escontext)) + return; + if (!from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n, + escontext)) + return; + break; + case DCH_W: + if (from_char_parse_int(&out->w, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + case DCH_J: + if (from_char_parse_int(&out->j, &s, n, escontext) < 0) + return; + SKIP_THth(s, n->suffix); + break; + } + + /* Ignore all spaces after fields */ + if (!fx_mode) + { + extra_skip = 0; + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + } + + /* + * Standard parsing mode doesn't allow unmatched format patterns or + * trailing characters in the input string. + */ + if (std) + { + if (n->type != NODE_TYPE_END) + ereturn(escontext,, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("input string is too short for datetime format"))); + + while (*s != '\0' && isspace((unsigned char) *s)) + s++; + + if (*s != '\0') + ereturn(escontext,, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("trailing characters remain in input string after datetime format"))); + } +} + +/* + * The invariant for DCH cache entry management is that DCHCounter is equal + * to the maximum age value among the existing entries, and we increment it + * whenever an access occurs. If we approach overflow, deal with that by + * halving all the age values, so that we retain a fairly accurate idea of + * which entries are oldest. + */ +static inline void +DCH_prevent_counter_overflow(void) +{ + if (DCHCounter >= (INT_MAX - 1)) + { + for (int i = 0; i < n_DCHCache; i++) + DCHCache[i]->age >>= 1; + DCHCounter >>= 1; + } +} + +/* + * Get mask of date/time/zone components present in format nodes. + */ +static int +DCH_datetime_type(FormatNode *node) +{ + FormatNode *n; + int flags = 0; + + for (n = node; n->type != NODE_TYPE_END; n++) + { + if (n->type != NODE_TYPE_ACTION) + continue; + + switch (n->key->id) + { + case DCH_FX: + break; + case DCH_A_M: + case DCH_P_M: + case DCH_a_m: + case DCH_p_m: + case DCH_AM: + case DCH_PM: + case DCH_am: + case DCH_pm: + case DCH_HH: + case DCH_HH12: + case DCH_HH24: + case DCH_MI: + case DCH_SS: + case DCH_MS: /* millisecond */ + case DCH_US: /* microsecond */ + case DCH_FF1: + case DCH_FF2: + case DCH_FF3: + case DCH_FF4: + case DCH_FF5: + case DCH_FF6: + case DCH_SSSS: + flags |= DCH_TIMED; + break; + case DCH_tz: + case DCH_TZ: + case DCH_OF: + case DCH_TZH: + case DCH_TZM: + flags |= DCH_ZONED; + break; + case DCH_A_D: + case DCH_B_C: + case DCH_a_d: + case DCH_b_c: + case DCH_AD: + case DCH_BC: + case DCH_ad: + case DCH_bc: + case DCH_MONTH: + case DCH_Month: + case DCH_month: + case DCH_MON: + case DCH_Mon: + case DCH_mon: + case DCH_MM: + case DCH_DAY: + case DCH_Day: + case DCH_day: + case DCH_DY: + case DCH_Dy: + case DCH_dy: + case DCH_DDD: + case DCH_IDDD: + case DCH_DD: + case DCH_D: + case DCH_ID: + case DCH_WW: + case DCH_Q: + case DCH_CC: + case DCH_Y_YYY: + case DCH_YYYY: + case DCH_IYYY: + case DCH_YYY: + case DCH_IYY: + case DCH_YY: + case DCH_IY: + case DCH_Y: + case DCH_I: + case DCH_RM: + case DCH_rm: + case DCH_W: + case DCH_J: + flags |= DCH_DATED; + break; + } + } + + return flags; +} + +/* select a DCHCacheEntry to hold the given format picture */ +static DCHCacheEntry * +DCH_cache_getnew(const char *str, bool std) +{ + DCHCacheEntry *ent; + + /* Ensure we can advance DCHCounter below */ + DCH_prevent_counter_overflow(); + + /* + * If cache is full, remove oldest entry (or recycle first not-valid one) + */ + if (n_DCHCache >= DCH_CACHE_ENTRIES) + { + DCHCacheEntry *old = DCHCache[0]; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache); +#endif + if (old->valid) + { + for (int i = 1; i < DCH_CACHE_ENTRIES; i++) + { + ent = DCHCache[i]; + if (!ent->valid) + { + old = ent; + break; + } + if (ent->age < old->age) + old = ent; + } + } +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age); +#endif + old->valid = false; + strlcpy(old->str, str, DCH_CACHE_SIZE + 1); + old->age = (++DCHCounter); + /* caller is expected to fill format, then set valid */ + return old; + } + else + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache); +#endif + Assert(DCHCache[n_DCHCache] == NULL); + DCHCache[n_DCHCache] = ent = (DCHCacheEntry *) + MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry)); + ent->valid = false; + strlcpy(ent->str, str, DCH_CACHE_SIZE + 1); + ent->std = std; + ent->age = (++DCHCounter); + /* caller is expected to fill format, then set valid */ + ++n_DCHCache; + return ent; + } +} + +/* look for an existing DCHCacheEntry matching the given format picture */ +static DCHCacheEntry * +DCH_cache_search(const char *str, bool std) +{ + /* Ensure we can advance DCHCounter below */ + DCH_prevent_counter_overflow(); + + for (int i = 0; i < n_DCHCache; i++) + { + DCHCacheEntry *ent = DCHCache[i]; + + if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std) + { + ent->age = (++DCHCounter); + return ent; + } + } + + return NULL; +} + +/* Find or create a DCHCacheEntry for the given format picture */ +static DCHCacheEntry * +DCH_cache_fetch(const char *str, bool std) +{ + DCHCacheEntry *ent; + + if ((ent = DCH_cache_search(str, std)) == NULL) + { + /* + * Not in the cache, must run parser and save a new format-picture to + * the cache. Do not mark the cache entry valid until parsing + * succeeds. + */ + ent = DCH_cache_getnew(str, std); + + parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index, + DCH_FLAG | (std ? STD_FLAG : 0), NULL); + + ent->valid = true; + } + return ent; +} + +/* + * Format a date/time or interval into a string according to fmt. + * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char + * for formatting. + */ +static text * +datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) +{ + FormatNode *format; + char *fmt_str, + *result; + bool incache; + int fmt_len; + text *res; + + /* + * Convert fmt to C string + */ + fmt_str = text_to_cstring(fmt); + fmt_len = strlen(fmt_str); + + /* + * Allocate workspace for result as C string + */ + result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1); + *result = '\0'; + + if (fmt_len > DCH_CACHE_SIZE) + { + /* + * Allocate new memory if format picture is bigger than static cache + * and do not use cache (call parser always) + */ + incache = false; + + format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + + parse_format(format, fmt_str, DCH_keywords, + DCH_suff, DCH_index, DCH_FLAG, NULL); + } + else + { + /* + * Use cache buffers + */ + DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false); + + incache = true; + format = ent->format; + } + + /* The real work is here */ + DCH_to_char(format, is_interval, tmtc, result, collid); + + if (!incache) + pfree(format); + + pfree(fmt_str); + + /* convert C-string result to TEXT format */ + res = cstring_to_text(result); + + pfree(result); + return res; +} + +/**************************************************************************** + * Public routines + ***************************************************************************/ + +/* ------------------- + * TIMESTAMP to_char() + * ------------------- + */ +Datum +timestamp_to_char(PG_FUNCTION_ARGS) +{ + Timestamp dt = PG_GETARG_TIMESTAMP(0); + text *fmt = PG_GETARG_TEXT_PP(1), + *res; + TmToChar tmtc; + struct pg_tm tt; + struct fmt_tm *tm; + int thisdate; + + if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) + PG_RETURN_NULL(); + + ZERO_tmtc(&tmtc); + tm = tmtcTm(&tmtc); + + if (timestamp2tm(dt, NULL, &tt, &tmtcFsec(&tmtc), NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* calculate wday and yday, because timestamp2tm doesn't */ + thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday); + tt.tm_wday = (thisdate + 1) % 7; + tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1; + + COPY_tm(tm, &tt); + + if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(res); +} + +Datum +timestamptz_to_char(PG_FUNCTION_ARGS) +{ + TimestampTz dt = PG_GETARG_TIMESTAMP(0); + text *fmt = PG_GETARG_TEXT_PP(1), + *res; + TmToChar tmtc; + int tz; + struct pg_tm tt; + struct fmt_tm *tm; + int thisdate; + + if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) + PG_RETURN_NULL(); + + ZERO_tmtc(&tmtc); + tm = tmtcTm(&tmtc); + + if (timestamp2tm(dt, &tz, &tt, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* calculate wday and yday, because timestamp2tm doesn't */ + thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday); + tt.tm_wday = (thisdate + 1) % 7; + tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1; + + COPY_tm(tm, &tt); + + if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(res); +} + + +/* ------------------- + * INTERVAL to_char() + * ------------------- + */ +Datum +interval_to_char(PG_FUNCTION_ARGS) +{ + Interval *it = PG_GETARG_INTERVAL_P(0); + text *fmt = PG_GETARG_TEXT_PP(1), + *res; + TmToChar tmtc; + struct fmt_tm *tm; + struct pg_itm tt, + *itm = &tt; + + if (VARSIZE_ANY_EXHDR(fmt) <= 0) + PG_RETURN_NULL(); + + ZERO_tmtc(&tmtc); + tm = tmtcTm(&tmtc); + + interval2itm(*it, itm); + tmtc.fsec = itm->tm_usec; + tm->tm_sec = itm->tm_sec; + tm->tm_min = itm->tm_min; + tm->tm_hour = itm->tm_hour; + tm->tm_mday = itm->tm_mday; + tm->tm_mon = itm->tm_mon; + tm->tm_year = itm->tm_year; + + /* wday is meaningless, yday approximates the total span in days */ + tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday; + + if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION()))) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(res); +} + +/* --------------------- + * TO_TIMESTAMP() + * + * Make Timestamp from date_str which is formatted at argument 'fmt' + * ( to_timestamp is reverse to_char() ) + * --------------------- + */ +Datum +to_timestamp(PG_FUNCTION_ARGS) +{ + text *date_txt = PG_GETARG_TEXT_PP(0); + text *fmt = PG_GETARG_TEXT_PP(1); + Oid collid = PG_GET_COLLATION(); + Timestamp result; + int tz; + struct pg_tm tm; + fsec_t fsec; + int fprec; + + do_to_timestamp(date_txt, fmt, collid, false, + &tm, &fsec, &fprec, NULL, NULL); + + /* Use the specified time zone, if any. */ + if (tm.tm_zone) + { + DateTimeErrorExtra extra; + int dterr = DecodeTimezone(tm.tm_zone, &tz); + + if (dterr) + DateTimeParseError(dterr, &extra, text_to_cstring(date_txt), + "timestamptz", NULL); + } + else + tz = DetermineTimeZoneOffset(&tm, session_timezone); + + if (tm2timestamp(&tm, fsec, &tz, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* Use the specified fractional precision, if any. */ + if (fprec) + AdjustTimestampForTypmod(&result, fprec, NULL); + + PG_RETURN_TIMESTAMP(result); +} + +/* ---------- + * TO_DATE + * Make Date from date_str which is formatted at argument 'fmt' + * ---------- + */ +Datum +to_date(PG_FUNCTION_ARGS) +{ + text *date_txt = PG_GETARG_TEXT_PP(0); + text *fmt = PG_GETARG_TEXT_PP(1); + Oid collid = PG_GET_COLLATION(); + DateADT result; + struct pg_tm tm; + fsec_t fsec; + + do_to_timestamp(date_txt, fmt, collid, false, + &tm, &fsec, NULL, NULL, NULL); + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; + + /* Now check for just-out-of-range dates */ + if (!IS_VALID_DATE(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + PG_RETURN_DATEADT(result); +} + +/* + * Convert the 'date_txt' input to a datetime type using argument 'fmt' + * as a format string. The collation 'collid' may be used for case-folding + * rules in some cases. 'strict' specifies standard parsing mode. + * + * The actual data type (returned in 'typid', 'typmod') is determined by + * the presence of date/time/zone components in the format string. + * + * When a timezone component is present, the corresponding offset is + * returned in '*tz'. + * + * If escontext points to an ErrorSaveContext, data errors will be reported + * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see + * whether an error occurred. Otherwise, errors are thrown. + */ +Datum +parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict, + Oid *typid, int32 *typmod, int *tz, + Node *escontext) +{ + struct pg_tm tm; + fsec_t fsec; + int fprec; + uint32 flags; + + if (!do_to_timestamp(date_txt, fmt, collid, strict, + &tm, &fsec, &fprec, &flags, escontext)) + return (Datum) 0; + + *typmod = fprec ? fprec : -1; /* fractional part precision */ + + if (flags & DCH_DATED) + { + if (flags & DCH_TIMED) + { + if (flags & DCH_ZONED) + { + TimestampTz result; + + if (tm.tm_zone) + { + DateTimeErrorExtra extra; + int dterr = DecodeTimezone(tm.tm_zone, tz); + + if (dterr) + { + DateTimeParseError(dterr, &extra, + text_to_cstring(date_txt), + "timestamptz", escontext); + return (Datum) 0; + } + } + else + { + /* + * Time zone is present in format string, but not in input + * string. Assuming do_to_timestamp() triggers no error + * this should be possible only in non-strict case. + */ + Assert(!strict); + + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("missing time zone in input string for type timestamptz"))); + } + + if (tm2timestamp(&tm, fsec, tz, &result) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamptz out of range"))); + + AdjustTimestampForTypmod(&result, *typmod, escontext); + + *typid = TIMESTAMPTZOID; + return TimestampTzGetDatum(result); + } + else + { + Timestamp result; + + if (tm2timestamp(&tm, fsec, NULL, &result) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + AdjustTimestampForTypmod(&result, *typmod, escontext); + + *typid = TIMESTAMPOID; + return TimestampGetDatum(result); + } + } + else + { + if (flags & DCH_ZONED) + { + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("datetime format is zoned but not timed"))); + } + else + { + DateADT result; + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - + POSTGRES_EPOCH_JDATE; + + /* Now check for just-out-of-range dates */ + if (!IS_VALID_DATE(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + *typid = DATEOID; + return DateADTGetDatum(result); + } + } + } + else if (flags & DCH_TIMED) + { + if (flags & DCH_ZONED) + { + TimeTzADT *result = palloc(sizeof(TimeTzADT)); + + if (tm.tm_zone) + { + DateTimeErrorExtra extra; + int dterr = DecodeTimezone(tm.tm_zone, tz); + + if (dterr) + { + DateTimeParseError(dterr, &extra, + text_to_cstring(date_txt), + "timetz", escontext); + return (Datum) 0; + } + } + else + { + /* + * Time zone is present in format string, but not in input + * string. Assuming do_to_timestamp() triggers no error this + * should be possible only in non-strict case. + */ + Assert(!strict); + + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("missing time zone in input string for type timetz"))); + } + + if (tm2timetz(&tm, fsec, *tz, result) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timetz out of range"))); + + AdjustTimeForTypmod(&result->time, *typmod); + + *typid = TIMETZOID; + return TimeTzADTPGetDatum(result); + } + else + { + TimeADT result; + + if (tm2time(&tm, fsec, &result) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("time out of range"))); + + AdjustTimeForTypmod(&result, *typmod); + + *typid = TIMEOID; + return TimeADTGetDatum(result); + } + } + else + { + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("datetime format is not dated and not timed"))); + } +} + +/* + * do_to_timestamp: shared code for to_timestamp and to_date + * + * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm, + * fractional seconds, and fractional precision. + * + * 'collid' identifies the collation to use, if needed. + * 'std' specifies standard parsing mode. + * + * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags', + * if that is not NULL. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). Note that currently, + * soft-error behavior is provided for bad data but not bad format. + * + * We parse 'fmt' into a list of FormatNodes, which is then passed to + * DCH_from_char to populate a TmFromChar with the parsed contents of + * 'date_txt'. + * + * The TmFromChar is then analysed and converted into the final results in + * struct 'tm', 'fsec', and 'fprec'. + */ +static bool +do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, + struct pg_tm *tm, fsec_t *fsec, int *fprec, + uint32 *flags, Node *escontext) +{ + FormatNode *format = NULL; + TmFromChar tmfc; + int fmt_len; + char *date_str; + int fmask; + bool incache = false; + + Assert(tm != NULL); + Assert(fsec != NULL); + + date_str = text_to_cstring(date_txt); + + ZERO_tmfc(&tmfc); + ZERO_tm(tm); + *fsec = 0; + if (fprec) + *fprec = 0; + if (flags) + *flags = 0; + fmask = 0; /* bit mask for ValidateDate() */ + + fmt_len = VARSIZE_ANY_EXHDR(fmt); + + if (fmt_len) + { + char *fmt_str; + + fmt_str = text_to_cstring(fmt); + + if (fmt_len > DCH_CACHE_SIZE) + { + /* + * Allocate new memory if format picture is bigger than static + * cache and do not use cache (call parser always) + */ + format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + + parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index, + DCH_FLAG | (std ? STD_FLAG : 0), NULL); + } + else + { + /* + * Use cache buffers + */ + DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std); + + incache = true; + format = ent->format; + } + +#ifdef DEBUG_TO_FROM_CHAR + /* dump_node(format, fmt_len); */ + /* dump_index(DCH_keywords, DCH_index); */ +#endif + + DCH_from_char(format, date_str, &tmfc, collid, std, escontext); + pfree(fmt_str); + if (SOFT_ERROR_OCCURRED(escontext)) + goto fail; + + if (flags) + *flags = DCH_datetime_type(format); + + if (!incache) + { + pfree(format); + format = NULL; + } + } + + DEBUG_TMFC(&tmfc); + + /* + * Convert to_date/to_timestamp input fields to standard 'tm' + */ + if (tmfc.ssss) + { + int x = tmfc.ssss; + + tm->tm_hour = x / SECS_PER_HOUR; + x %= SECS_PER_HOUR; + tm->tm_min = x / SECS_PER_MINUTE; + x %= SECS_PER_MINUTE; + tm->tm_sec = x; + } + + if (tmfc.ss) + tm->tm_sec = tmfc.ss; + if (tmfc.mi) + tm->tm_min = tmfc.mi; + if (tmfc.hh) + tm->tm_hour = tmfc.hh; + + if (tmfc.clock == CLOCK_12_HOUR) + { + if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2) + { + errsave(escontext, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("hour \"%d\" is invalid for the 12-hour clock", + tm->tm_hour), + errhint("Use the 24-hour clock, or give an hour between 1 and 12."))); + goto fail; + } + + if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2) + tm->tm_hour += HOURS_PER_DAY / 2; + else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2) + tm->tm_hour = 0; + } + + if (tmfc.year) + { + /* + * If CC and YY (or Y) are provided, use YY as 2 low-order digits for + * the year in the given century. Keep in mind that the 21st century + * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from + * 600BC to 501BC. + */ + if (tmfc.cc && tmfc.yysz <= 2) + { + if (tmfc.bc) + tmfc.cc = -tmfc.cc; + tm->tm_year = tmfc.year % 100; + if (tm->tm_year) + { + if (tmfc.cc >= 0) + tm->tm_year += (tmfc.cc - 1) * 100; + else + tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1; + } + else + { + /* find century year for dates ending in "00" */ + tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1); + } + } + else + { + /* If a 4-digit year is provided, we use that and ignore CC. */ + tm->tm_year = tmfc.year; + if (tmfc.bc) + tm->tm_year = -tm->tm_year; + /* correct for our representation of BC years */ + if (tm->tm_year < 0) + tm->tm_year++; + } + fmask |= DTK_M(YEAR); + } + else if (tmfc.cc) + { + /* use first year of century */ + if (tmfc.bc) + tmfc.cc = -tmfc.cc; + if (tmfc.cc >= 0) + /* +1 because 21st century started in 2001 */ + tm->tm_year = (tmfc.cc - 1) * 100 + 1; + else + /* +1 because year == 599 is 600 BC */ + tm->tm_year = tmfc.cc * 100 + 1; + fmask |= DTK_M(YEAR); + } + + if (tmfc.j) + { + j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; + } + + if (tmfc.ww) + { + if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) + { + /* + * If tmfc.d is not set, then the date is left at the beginning of + * the ISO week (Monday). + */ + if (tmfc.d) + isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + else + isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; + } + else + tmfc.ddd = (tmfc.ww - 1) * 7 + 1; + } + + if (tmfc.w) + tmfc.dd = (tmfc.w - 1) * 7 + 1; + if (tmfc.dd) + { + tm->tm_mday = tmfc.dd; + fmask |= DTK_M(DAY); + } + if (tmfc.mm) + { + tm->tm_mon = tmfc.mm; + fmask |= DTK_M(MONTH); + } + + if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1)) + { + /* + * The month and day field have not been set, so we use the + * day-of-year field to populate them. Depending on the date mode, + * this field may be interpreted as a Gregorian day-of-year, or an ISO + * week date day-of-year. + */ + + if (!tm->tm_year && !tmfc.bc) + { + errsave(escontext, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("cannot calculate day of year without year information"))); + goto fail; + } + + if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) + { + int j0; /* zeroth day of the ISO year, in Julian */ + + j0 = isoweek2j(tm->tm_year, 1) - 1; + + j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; + } + else + { + const int *y; + int i; + + static const int ysum[2][13] = { + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}}; + + y = ysum[isleap(tm->tm_year)]; + + for (i = 1; i <= MONTHS_PER_YEAR; i++) + { + if (tmfc.ddd <= y[i]) + break; + } + if (tm->tm_mon <= 1) + tm->tm_mon = i; + + if (tm->tm_mday <= 1) + tm->tm_mday = tmfc.ddd - y[i - 1]; + + fmask |= DTK_M(MONTH) | DTK_M(DAY); + } + } + + if (tmfc.ms) + *fsec += tmfc.ms * 1000; + if (tmfc.us) + *fsec += tmfc.us; + if (fprec) + *fprec = tmfc.ff; /* fractional precision, if specified */ + + /* Range-check date fields according to bit mask computed above */ + if (fmask != 0) + { + /* We already dealt with AD/BC, so pass isjulian = true */ + int dterr = ValidateDate(fmask, true, false, false, tm); + + if (dterr != 0) + { + /* + * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate + * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an + * irrelevant hint about datestyle. + */ + DateTimeParseError(DTERR_FIELD_OVERFLOW, NULL, + date_str, "timestamp", escontext); + goto fail; + } + } + + /* Range-check time fields too */ + if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY || + tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR || + tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE || + *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC) + { + DateTimeParseError(DTERR_FIELD_OVERFLOW, NULL, + date_str, "timestamp", escontext); + goto fail; + } + + /* Save parsed time-zone into tm->tm_zone if it was specified */ + if (tmfc.tzsign) + { + char *tz; + + if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR || + tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR) + { + DateTimeParseError(DTERR_TZDISP_OVERFLOW, NULL, + date_str, "timestamp", escontext); + goto fail; + } + + tz = psprintf("%c%02d:%02d", + tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm); + + tm->tm_zone = tz; + } + + DEBUG_TM(tm); + + if (format && !incache) + pfree(format); + pfree(date_str); + + return true; + +fail: + if (format && !incache) + pfree(format); + pfree(date_str); + + return false; +} + + +/********************************************************************** + * the NUMBER version part + *********************************************************************/ + + +static char * +fill_str(char *str, int c, int max) +{ + memset(str, c, max); + *(str + max) = '\0'; + return str; +} + +#define zeroize_NUM(_n) \ +do { \ + (_n)->flag = 0; \ + (_n)->lsign = 0; \ + (_n)->pre = 0; \ + (_n)->post = 0; \ + (_n)->pre_lsign_num = 0; \ + (_n)->need_locale = 0; \ + (_n)->multi = 0; \ + (_n)->zero_start = 0; \ + (_n)->zero_end = 0; \ +} while(0) + +/* This works the same as DCH_prevent_counter_overflow */ +static inline void +NUM_prevent_counter_overflow(void) +{ + if (NUMCounter >= (INT_MAX - 1)) + { + for (int i = 0; i < n_NUMCache; i++) + NUMCache[i]->age >>= 1; + NUMCounter >>= 1; + } +} + +/* select a NUMCacheEntry to hold the given format picture */ +static NUMCacheEntry * +NUM_cache_getnew(const char *str) +{ + NUMCacheEntry *ent; + + /* Ensure we can advance NUMCounter below */ + NUM_prevent_counter_overflow(); + + /* + * If cache is full, remove oldest entry (or recycle first not-valid one) + */ + if (n_NUMCache >= NUM_CACHE_ENTRIES) + { + NUMCacheEntry *old = NUMCache[0]; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache); +#endif + if (old->valid) + { + for (int i = 1; i < NUM_CACHE_ENTRIES; i++) + { + ent = NUMCache[i]; + if (!ent->valid) + { + old = ent; + break; + } + if (ent->age < old->age) + old = ent; + } + } +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age); +#endif + old->valid = false; + strlcpy(old->str, str, NUM_CACHE_SIZE + 1); + old->age = (++NUMCounter); + /* caller is expected to fill format and Num, then set valid */ + return old; + } + else + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache); +#endif + Assert(NUMCache[n_NUMCache] == NULL); + NUMCache[n_NUMCache] = ent = (NUMCacheEntry *) + MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry)); + ent->valid = false; + strlcpy(ent->str, str, NUM_CACHE_SIZE + 1); + ent->age = (++NUMCounter); + /* caller is expected to fill format and Num, then set valid */ + ++n_NUMCache; + return ent; + } +} + +/* look for an existing NUMCacheEntry matching the given format picture */ +static NUMCacheEntry * +NUM_cache_search(const char *str) +{ + /* Ensure we can advance NUMCounter below */ + NUM_prevent_counter_overflow(); + + for (int i = 0; i < n_NUMCache; i++) + { + NUMCacheEntry *ent = NUMCache[i]; + + if (ent->valid && strcmp(ent->str, str) == 0) + { + ent->age = (++NUMCounter); + return ent; + } + } + + return NULL; +} + +/* Find or create a NUMCacheEntry for the given format picture */ +static NUMCacheEntry * +NUM_cache_fetch(const char *str) +{ + NUMCacheEntry *ent; + + if ((ent = NUM_cache_search(str)) == NULL) + { + /* + * Not in the cache, must run parser and save a new format-picture to + * the cache. Do not mark the cache entry valid until parsing + * succeeds. + */ + ent = NUM_cache_getnew(str); + + zeroize_NUM(&ent->Num); + + parse_format(ent->format, str, NUM_keywords, + NULL, NUM_index, NUM_FLAG, &ent->Num); + + ent->valid = true; + } + return ent; +} + +/* ---------- + * Cache routine for NUM to_char version + * ---------- + */ +static FormatNode * +NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) +{ + FormatNode *format = NULL; + char *str; + + str = text_to_cstring(pars_str); + + if (len > NUM_CACHE_SIZE) + { + /* + * Allocate new memory if format picture is bigger than static cache + * and do not use cache (call parser always) + */ + format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode)); + + *shouldFree = true; + + zeroize_NUM(Num); + + parse_format(format, str, NUM_keywords, + NULL, NUM_index, NUM_FLAG, Num); + } + else + { + /* + * Use cache buffers + */ + NUMCacheEntry *ent = NUM_cache_fetch(str); + + *shouldFree = false; + + format = ent->format; + + /* + * Copy cache to used struct + */ + Num->flag = ent->Num.flag; + Num->lsign = ent->Num.lsign; + Num->pre = ent->Num.pre; + Num->post = ent->Num.post; + Num->pre_lsign_num = ent->Num.pre_lsign_num; + Num->need_locale = ent->Num.need_locale; + Num->multi = ent->Num.multi; + Num->zero_start = ent->Num.zero_start; + Num->zero_end = ent->Num.zero_end; + } + +#ifdef DEBUG_TO_FROM_CHAR + /* dump_node(format, len); */ + dump_index(NUM_keywords, NUM_index); +#endif + + pfree(str); + return format; +} + + +static char * +int_to_roman(int number) +{ + int len, + num; + char *p, + *result, + numstr[12]; + + result = (char *) palloc(16); + *result = '\0'; + + if (number > 3999 || number < 1) + { + fill_str(result, '#', 15); + return result; + } + len = snprintf(numstr, sizeof(numstr), "%d", number); + + for (p = numstr; *p != '\0'; p++, --len) + { + num = *p - ('0' + 1); + if (num < 0) + continue; + + if (len > 3) + { + while (num-- != -1) + strcat(result, "M"); + } + else + { + if (len == 3) + strcat(result, rm100[num]); + else if (len == 2) + strcat(result, rm10[num]); + else if (len == 1) + strcat(result, rm1[num]); + } + } + return result; +} + + + +/* ---------- + * Locale + * ---------- + */ +static void +NUM_prepare_locale(NUMProc *Np) +{ + if (Np->Num->need_locale) + { + struct lconv *lconv; + + /* + * Get locales + */ + lconv = PGLC_localeconv(); + + /* + * Positive / Negative number sign + */ + if (lconv->negative_sign && *lconv->negative_sign) + Np->L_negative_sign = lconv->negative_sign; + else + Np->L_negative_sign = "-"; + + if (lconv->positive_sign && *lconv->positive_sign) + Np->L_positive_sign = lconv->positive_sign; + else + Np->L_positive_sign = "+"; + + /* + * Number decimal point + */ + if (lconv->decimal_point && *lconv->decimal_point) + Np->decimal = lconv->decimal_point; + + else + Np->decimal = "."; + + if (!IS_LDECIMAL(Np->Num)) + Np->decimal = "."; + + /* + * Number thousands separator + * + * Some locales (e.g. broken glibc pt_BR), have a comma for decimal, + * but "" for thousands_sep, so we set the thousands_sep too. + * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php + */ + if (lconv->thousands_sep && *lconv->thousands_sep) + Np->L_thousands_sep = lconv->thousands_sep; + /* Make sure thousands separator doesn't match decimal point symbol. */ + else if (strcmp(Np->decimal, ",") != 0) + Np->L_thousands_sep = ","; + else + Np->L_thousands_sep = "."; + + /* + * Currency symbol + */ + if (lconv->currency_symbol && *lconv->currency_symbol) + Np->L_currency_symbol = lconv->currency_symbol; + else + Np->L_currency_symbol = " "; + } + else + { + /* + * Default values + */ + Np->L_negative_sign = "-"; + Np->L_positive_sign = "+"; + Np->decimal = "."; + + Np->L_thousands_sep = ","; + Np->L_currency_symbol = " "; + } +} + +/* ---------- + * Return pointer of last relevant number after decimal point + * 12.0500 --> last relevant is '5' + * 12.0000 --> last relevant is '.' + * If there is no decimal point, return NULL (which will result in same + * behavior as if FM hadn't been specified). + * ---------- + */ +static char * +get_last_relevant_decnum(char *num) +{ + char *result, + *p = strchr(num, '.'); + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "get_last_relevant_decnum()"); +#endif + + if (!p) + return NULL; + + result = p; + + while (*(++p)) + { + if (*p != '0') + result = p; + } + + return result; +} + +/* + * These macros are used in NUM_processor() and its subsidiary routines. + * OVERLOAD_TEST: true if we've reached end of input string + * AMOUNT_TEST(s): true if at least s bytes remain in string + */ +#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len) +#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s))) + +/* ---------- + * Number extraction for TO_NUMBER() + * ---------- + */ +static void +NUM_numpart_from_char(NUMProc *Np, int id, int input_len) +{ + bool isread = false; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, " --- scan start --- id=%s", + (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???"); +#endif + + if (OVERLOAD_TEST) + return; + + if (*Np->inout_p == ' ') + Np->inout_p++; + + if (OVERLOAD_TEST) + return; + + /* + * read sign before number + */ + if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) && + (Np->read_pre + Np->read_post) == 0) + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s", + *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign); +#endif + + /* + * locale sign + */ + if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE) + { + int x = 0; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p); +#endif + if ((x = strlen(Np->L_negative_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) + { + Np->inout_p += x; + *Np->number = '-'; + } + else if ((x = strlen(Np->L_positive_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) + { + Np->inout_p += x; + *Np->number = '+'; + } + } + else + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p); +#endif + + /* + * simple + - < > + */ + if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) && + *Np->inout_p == '<')) + { + *Np->number = '-'; /* set - */ + Np->inout_p++; + } + else if (*Np->inout_p == '+') + { + *Np->number = '+'; /* set + */ + Np->inout_p++; + } + } + } + + if (OVERLOAD_TEST) + return; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number); +#endif + + /* + * read digit or decimal point + */ + if (isdigit((unsigned char) *Np->inout_p)) + { + if (Np->read_dec && Np->read_post == Np->Num->post) + return; + + *Np->number_p = *Np->inout_p; + Np->number_p++; + + if (Np->read_dec) + Np->read_post++; + else + Np->read_pre++; + + isread = true; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p); +#endif + } + else if (IS_DECIMAL(Np->Num) && Np->read_dec == false) + { + /* + * We need not test IS_LDECIMAL(Np->Num) explicitly here, because + * Np->decimal is always just "." if we don't have a D format token. + * So we just unconditionally match to Np->decimal. + */ + int x = strlen(Np->decimal); + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read decimal point (%c)", + *Np->inout_p); +#endif + if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0) + { + Np->inout_p += x - 1; + *Np->number_p = '.'; + Np->number_p++; + Np->read_dec = true; + isread = true; + } + } + + if (OVERLOAD_TEST) + return; + + /* + * Read sign behind "last" number + * + * We need sign detection because determine exact position of post-sign is + * difficult: + * + * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI -> + * 5.01- + */ + if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0) + { + /* + * locale sign (NUM_S) is always anchored behind a last number, if: - + * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and + * next char is not digit + */ + if (IS_LSIGN(Np->Num) && isread && + (Np->inout_p + 1) < Np->inout + input_len && + !isdigit((unsigned char) *(Np->inout_p + 1))) + { + int x; + char *tmp = Np->inout_p++; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p); +#endif + if ((x = strlen(Np->L_negative_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) + { + Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ + *Np->number = '-'; + } + else if ((x = strlen(Np->L_positive_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) + { + Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ + *Np->number = '+'; + } + if (*Np->number == ' ') + /* no sign read */ + Np->inout_p = tmp; + } + + /* + * try read non-locale sign, it's happen only if format is not exact + * and we cannot determine sign position of MI/PL/SG, an example: + * + * FM9.999999MI -> 5.01- + * + * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats + * like to_number('1 -', '9S') where sign is not anchored to last + * number. + */ + else if (isread == false && IS_LSIGN(Np->Num) == false && + (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))) + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p); +#endif + + /* + * simple + - + */ + if (*Np->inout_p == '-' || *Np->inout_p == '+') + /* NUM_processor() do inout_p++ */ + *Np->number = *Np->inout_p; + } + } +} + +#define IS_PREDEC_SPACE(_n) \ + (IS_ZERO((_n)->Num)==false && \ + (_n)->number == (_n)->number_p && \ + *(_n)->number == '0' && \ + (_n)->Num->post != 0) + +/* ---------- + * Add digit or sign to number-string + * ---------- + */ +static void +NUM_numpart_to_char(NUMProc *Np, int id) +{ + int end; + + if (IS_ROMAN(Np->Num)) + return; + + /* Note: in this elog() output not set '\0' in 'inout' */ + +#ifdef DEBUG_TO_FROM_CHAR + + /* + * Np->num_curr is number of current item in format-picture, it is not + * current position in inout! + */ + elog(DEBUG_elog_output, + "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"", + Np->sign_wrote, + Np->num_curr, + Np->number_p, + Np->inout); +#endif + Np->num_in = false; + + /* + * Write sign if real number will write to output Note: IS_PREDEC_SPACE() + * handle "9.9" --> " .1" + */ + if (Np->sign_wrote == false && + (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) && + (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.'))) + { + if (IS_LSIGN(Np->Num)) + { + if (Np->Num->lsign == NUM_LSIGN_PRE) + { + if (Np->sign == '-') + strcpy(Np->inout_p, Np->L_negative_sign); + else + strcpy(Np->inout_p, Np->L_positive_sign); + Np->inout_p += strlen(Np->inout_p); + Np->sign_wrote = true; + } + } + else if (IS_BRACKET(Np->Num)) + { + *Np->inout_p = Np->sign == '+' ? ' ' : '<'; + ++Np->inout_p; + Np->sign_wrote = true; + } + else if (Np->sign == '+') + { + if (!IS_FILLMODE(Np->Num)) + { + *Np->inout_p = ' '; /* Write + */ + ++Np->inout_p; + } + Np->sign_wrote = true; + } + else if (Np->sign == '-') + { /* Write - */ + *Np->inout_p = '-'; + ++Np->inout_p; + Np->sign_wrote = true; + } + } + + + /* + * digits / FM / Zero / Dec. point + */ + if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC) + { + if (Np->num_curr < Np->out_pre_spaces && + (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num))) + { + /* + * Write blank space + */ + if (!IS_FILLMODE(Np->Num)) + { + *Np->inout_p = ' '; /* Write ' ' */ + ++Np->inout_p; + } + } + else if (IS_ZERO(Np->Num) && + Np->num_curr < Np->out_pre_spaces && + Np->Num->zero_start <= Np->num_curr) + { + /* + * Write ZERO + */ + *Np->inout_p = '0'; /* Write '0' */ + ++Np->inout_p; + Np->num_in = true; + } + else + { + /* + * Write Decimal point + */ + if (*Np->number_p == '.') + { + if (!Np->last_relevant || *Np->last_relevant != '.') + { + strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ + Np->inout_p += strlen(Np->inout_p); + } + + /* + * Ora 'n' -- FM9.9 --> 'n.' + */ + else if (IS_FILLMODE(Np->Num) && + Np->last_relevant && *Np->last_relevant == '.') + { + strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ + Np->inout_p += strlen(Np->inout_p); + } + } + else + { + /* + * Write Digits + */ + if (Np->last_relevant && Np->number_p > Np->last_relevant && + id != NUM_0) + ; + + /* + * '0.1' -- 9.9 --> ' .1' + */ + else if (IS_PREDEC_SPACE(Np)) + { + if (!IS_FILLMODE(Np->Num)) + { + *Np->inout_p = ' '; + ++Np->inout_p; + } + + /* + * '0' -- FM9.9 --> '0.' + */ + else if (Np->last_relevant && *Np->last_relevant == '.') + { + *Np->inout_p = '0'; + ++Np->inout_p; + } + } + else + { + *Np->inout_p = *Np->number_p; /* Write DIGIT */ + ++Np->inout_p; + Np->num_in = true; + } + } + /* do no exceed string length */ + if (*Np->number_p) + ++Np->number_p; + } + + end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0); + + if (Np->last_relevant && Np->last_relevant == Np->number_p) + end = Np->num_curr; + + if (Np->num_curr + 1 == end) + { + if (Np->sign_wrote == true && IS_BRACKET(Np->Num)) + { + *Np->inout_p = Np->sign == '+' ? ' ' : '>'; + ++Np->inout_p; + } + else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST) + { + if (Np->sign == '-') + strcpy(Np->inout_p, Np->L_negative_sign); + else + strcpy(Np->inout_p, Np->L_positive_sign); + Np->inout_p += strlen(Np->inout_p); + } + } + } + + ++Np->num_curr; +} + +/* + * Skip over "n" input characters, but only if they aren't numeric data + */ +static void +NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len) +{ + while (n-- > 0) + { + if (OVERLOAD_TEST) + break; /* end of input */ + if (strchr("0123456789.,+-", *Np->inout_p) != NULL) + break; /* it's a data character */ + Np->inout_p += pg_mblen(Np->inout_p); + } +} + +static char * +NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, + char *number, int input_len, int to_char_out_pre_spaces, + int sign, bool is_to_char, Oid collid) +{ + FormatNode *n; + NUMProc _Np, + *Np = &_Np; + const char *pattern; + int pattern_len; + + MemSet(Np, 0, sizeof(NUMProc)); + + Np->Num = Num; + Np->is_to_char = is_to_char; + Np->number = number; + Np->inout = inout; + Np->last_relevant = NULL; + Np->read_post = 0; + Np->read_pre = 0; + Np->read_dec = false; + + if (Np->Num->zero_start) + --Np->Num->zero_start; + + if (IS_EEEE(Np->Num)) + { + if (!Np->is_to_char) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"EEEE\" not supported for input"))); + return strcpy(inout, number); + } + + /* + * Roman correction + */ + if (IS_ROMAN(Np->Num)) + { + if (!Np->is_to_char) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"RN\" not supported for input"))); + + Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post = + Np->Num->pre = Np->out_pre_spaces = Np->sign = 0; + + if (IS_FILLMODE(Np->Num)) + { + Np->Num->flag = 0; + Np->Num->flag |= NUM_F_FILLMODE; + } + else + Np->Num->flag = 0; + Np->Num->flag |= NUM_F_ROMAN; + } + + /* + * Sign + */ + if (is_to_char) + { + Np->sign = sign; + + /* MI/PL/SG - write sign itself and not in number */ + if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)) + { + if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false) + Np->sign_wrote = false; /* need sign */ + else + Np->sign_wrote = true; /* needn't sign */ + } + else + { + if (Np->sign != '-') + { + if (IS_FILLMODE(Np->Num)) + Np->Num->flag &= ~NUM_F_BRACKET; + } + + if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false) + Np->sign_wrote = true; /* needn't sign */ + else + Np->sign_wrote = false; /* need sign */ + + if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num) + Np->Num->lsign = NUM_LSIGN_POST; + } + } + else + Np->sign = false; + + /* + * Count + */ + Np->num_count = Np->Num->post + Np->Num->pre - 1; + + if (is_to_char) + { + Np->out_pre_spaces = to_char_out_pre_spaces; + + if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num)) + { + Np->last_relevant = get_last_relevant_decnum(Np->number); + + /* + * If any '0' specifiers are present, make sure we don't strip + * those digits. But don't advance last_relevant beyond the last + * character of the Np->number string, which is a hazard if the + * number got shortened due to precision limitations. + */ + if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces) + { + int last_zero_pos; + char *last_zero; + + /* note that Np->number cannot be zero-length here */ + last_zero_pos = strlen(Np->number) - 1; + last_zero_pos = Min(last_zero_pos, + Np->Num->zero_end - Np->out_pre_spaces); + last_zero = Np->number + last_zero_pos; + if (Np->last_relevant < last_zero) + Np->last_relevant = last_zero; + } + } + + if (Np->sign_wrote == false && Np->out_pre_spaces == 0) + ++Np->num_count; + } + else + { + Np->out_pre_spaces = 0; + *Np->number = ' '; /* sign space */ + *(Np->number + 1) = '\0'; + } + + Np->num_in = 0; + Np->num_curr = 0; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, + "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s", + Np->sign, + Np->number, + Np->Num->pre, + Np->Num->post, + Np->num_count, + Np->out_pre_spaces, + Np->sign_wrote ? "Yes" : "No", + IS_ZERO(Np->Num) ? "Yes" : "No", + Np->Num->zero_start, + Np->Num->zero_end, + Np->last_relevant ? Np->last_relevant : "<not set>", + IS_BRACKET(Np->Num) ? "Yes" : "No", + IS_PLUS(Np->Num) ? "Yes" : "No", + IS_MINUS(Np->Num) ? "Yes" : "No", + IS_FILLMODE(Np->Num) ? "Yes" : "No", + IS_ROMAN(Np->Num) ? "Yes" : "No", + IS_EEEE(Np->Num) ? "Yes" : "No" + ); +#endif + + /* + * Locale + */ + NUM_prepare_locale(Np); + + /* + * Processor direct cycle + */ + if (Np->is_to_char) + Np->number_p = Np->number; + else + Np->number_p = Np->number + 1; /* first char is space for sign */ + + for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++) + { + if (!Np->is_to_char) + { + /* + * Check at least one byte remains to be scanned. (In actions + * below, must use AMOUNT_TEST if we want to read more bytes than + * that.) + */ + if (OVERLOAD_TEST) + break; + } + + /* + * Format pictures actions + */ + if (n->type == NODE_TYPE_ACTION) + { + /* + * Create/read digit/zero/blank/sign/special-case + * + * 'NUM_S' note: The locale sign is anchored to number and we + * read/write it when we work with first or last number + * (NUM_0/NUM_9). This is why NUM_S is missing in switch(). + * + * Notice the "Np->inout_p++" at the bottom of the loop. This is + * why most of the actions advance inout_p one less than you might + * expect. In cases where we don't want that increment to happen, + * a switch case ends with "continue" not "break". + */ + switch (n->key->id) + { + case NUM_9: + case NUM_0: + case NUM_DEC: + case NUM_D: + if (Np->is_to_char) + { + NUM_numpart_to_char(Np, n->key->id); + continue; /* for() */ + } + else + { + NUM_numpart_from_char(Np, n->key->id, input_len); + break; /* switch() case: */ + } + + case NUM_COMMA: + if (Np->is_to_char) + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + else + *Np->inout_p = ' '; + } + else + *Np->inout_p = ','; + } + else + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + } + if (*Np->inout_p != ',') + continue; + } + break; + + case NUM_G: + pattern = Np->L_thousands_sep; + pattern_len = strlen(pattern); + if (Np->is_to_char) + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + else + { + /* just in case there are MB chars */ + pattern_len = pg_mbstrlen(pattern); + memset(Np->inout_p, ' ', pattern_len); + Np->inout_p += pattern_len - 1; + } + } + else + { + strcpy(Np->inout_p, pattern); + Np->inout_p += pattern_len - 1; + } + } + else + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + } + + /* + * Because L_thousands_sep typically contains data + * characters (either '.' or ','), we can't use + * NUM_eat_non_data_chars here. Instead skip only if + * the input matches L_thousands_sep. + */ + if (AMOUNT_TEST(pattern_len) && + strncmp(Np->inout_p, pattern, pattern_len) == 0) + Np->inout_p += pattern_len - 1; + else + continue; + } + break; + + case NUM_L: + pattern = Np->L_currency_symbol; + if (Np->is_to_char) + { + strcpy(Np->inout_p, pattern); + Np->inout_p += strlen(pattern) - 1; + } + else + { + NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); + continue; + } + break; + + case NUM_RN: + if (IS_FILLMODE(Np->Num)) + { + strcpy(Np->inout_p, Np->number_p); + Np->inout_p += strlen(Np->inout_p) - 1; + } + else + { + sprintf(Np->inout_p, "%15s", Np->number_p); + Np->inout_p += strlen(Np->inout_p) - 1; + } + break; + + case NUM_rn: + if (IS_FILLMODE(Np->Num)) + { + strcpy(Np->inout_p, asc_tolower_z(Np->number_p)); + Np->inout_p += strlen(Np->inout_p) - 1; + } + else + { + sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p)); + Np->inout_p += strlen(Np->inout_p) - 1; + } + break; + + case NUM_th: + if (IS_ROMAN(Np->Num) || *Np->number == '#' || + Np->sign == '-' || IS_DECIMAL(Np->Num)) + continue; + + if (Np->is_to_char) + { + strcpy(Np->inout_p, get_th(Np->number, TH_LOWER)); + Np->inout_p += 1; + } + else + { + /* All variants of 'th' occupy 2 characters */ + NUM_eat_non_data_chars(Np, 2, input_len); + continue; + } + break; + + case NUM_TH: + if (IS_ROMAN(Np->Num) || *Np->number == '#' || + Np->sign == '-' || IS_DECIMAL(Np->Num)) + continue; + + if (Np->is_to_char) + { + strcpy(Np->inout_p, get_th(Np->number, TH_UPPER)); + Np->inout_p += 1; + } + else + { + /* All variants of 'TH' occupy 2 characters */ + NUM_eat_non_data_chars(Np, 2, input_len); + continue; + } + break; + + case NUM_MI: + if (Np->is_to_char) + { + if (Np->sign == '-') + *Np->inout_p = '-'; + else if (IS_FILLMODE(Np->Num)) + continue; + else + *Np->inout_p = ' '; + } + else + { + if (*Np->inout_p == '-') + *Np->number = '-'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } + } + break; + + case NUM_PL: + if (Np->is_to_char) + { + if (Np->sign == '+') + *Np->inout_p = '+'; + else if (IS_FILLMODE(Np->Num)) + continue; + else + *Np->inout_p = ' '; + } + else + { + if (*Np->inout_p == '+') + *Np->number = '+'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } + } + break; + + case NUM_SG: + if (Np->is_to_char) + *Np->inout_p = Np->sign; + else + { + if (*Np->inout_p == '-') + *Np->number = '-'; + else if (*Np->inout_p == '+') + *Np->number = '+'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } + } + break; + + default: + continue; + break; + } + } + else + { + /* + * In TO_CHAR, non-pattern characters in the format are copied to + * the output. In TO_NUMBER, we skip one input character for each + * non-pattern format character, whether or not it matches the + * format character. + */ + if (Np->is_to_char) + { + strcpy(Np->inout_p, n->character); + Np->inout_p += strlen(Np->inout_p); + } + else + { + Np->inout_p += pg_mblen(Np->inout_p); + } + continue; + } + Np->inout_p++; + } + + if (Np->is_to_char) + { + *Np->inout_p = '\0'; + return Np->inout; + } + else + { + if (*(Np->number_p - 1) == '.') + *(Np->number_p - 1) = '\0'; + else + *Np->number_p = '\0'; + + /* + * Correction - precision of dec. number + */ + Np->Num->post = Np->read_post; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number); +#endif + return Np->number; + } +} + +/* ---------- + * MACRO: Start part of NUM - for all NUM's to_char variants + * (sorry, but I hate copy same code - macro is better..) + * ---------- + */ +#define NUM_TOCHAR_prepare \ +do { \ + int len = VARSIZE_ANY_EXHDR(fmt); \ + if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \ + PG_RETURN_TEXT_P(cstring_to_text("")); \ + result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \ + format = NUM_cache(len, &Num, fmt, &shouldFree); \ +} while (0) + +/* ---------- + * MACRO: Finish part of NUM + * ---------- + */ +#define NUM_TOCHAR_finish \ +do { \ + int len; \ + \ + NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \ + \ + if (shouldFree) \ + pfree(format); \ + \ + /* \ + * Convert null-terminated representation of result to standard text. \ + * The result is usually much bigger than it needs to be, but there \ + * seems little point in realloc'ing it smaller. \ + */ \ + len = strlen(VARDATA(result)); \ + SET_VARSIZE(result, len + VARHDRSZ); \ +} while (0) + +/* ------------------- + * NUMERIC to_number() (convert string to numeric) + * ------------------- + */ +Datum +numeric_to_number(PG_FUNCTION_ARGS) +{ + text *value = PG_GETARG_TEXT_PP(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + Datum result; + FormatNode *format; + char *numstr; + bool shouldFree; + int len = 0; + int scale, + precision; + + len = VARSIZE_ANY_EXHDR(fmt); + + if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ) + PG_RETURN_NULL(); + + format = NUM_cache(len, &Num, fmt, &shouldFree); + + numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1); + + NUM_processor(format, &Num, VARDATA_ANY(value), numstr, + VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION()); + + scale = Num.post; + precision = Num.pre + Num.multi + scale; + + if (shouldFree) + pfree(format); + + result = DirectFunctionCall3(numeric_in, + CStringGetDatum(numstr), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(((precision << 16) | scale) + VARHDRSZ)); + + if (IS_MULTI(&Num)) + { + Numeric x; + Numeric a = int64_to_numeric(10); + Numeric b = int64_to_numeric(-Num.multi); + + x = DatumGetNumeric(DirectFunctionCall2(numeric_power, + NumericGetDatum(a), + NumericGetDatum(b))); + result = DirectFunctionCall2(numeric_mul, + result, + NumericGetDatum(x)); + } + + pfree(numstr); + return result; +} + +/* ------------------ + * NUMERIC to_char() + * ------------------ + */ +Datum +numeric_to_char(PG_FUNCTION_ARGS) +{ + Numeric value = PG_GETARG_NUMERIC(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *orgnum, + *p; + Numeric x; + + NUM_TOCHAR_prepare; + + /* + * On DateType depend part (numeric) + */ + if (IS_ROMAN(&Num)) + { + x = DatumGetNumeric(DirectFunctionCall2(numeric_round, + NumericGetDatum(value), + Int32GetDatum(0))); + numstr = + int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4, + NumericGetDatum(x)))); + } + else if (IS_EEEE(&Num)) + { + orgnum = numeric_out_sci(value, Num.post); + + /* + * numeric_out_sci() does not emit a sign for positive numbers. We + * need to add a space in this case so that positive and negative + * numbers are aligned. Also must check for NaN/infinity cases, which + * we handle the same way as in float8_to_char. + */ + if (strcmp(orgnum, "NaN") == 0 || + strcmp(orgnum, "Infinity") == 0 || + strcmp(orgnum, "-Infinity") == 0) + { + /* + * Allow 6 characters for the leading sign, the decimal point, + * "e", the exponent's sign and two exponent digits. + */ + numstr = (char *) palloc(Num.pre + Num.post + 7); + fill_str(numstr, '#', Num.pre + Num.post + 6); + *numstr = ' '; + *(numstr + Num.pre + 1) = '.'; + } + else if (*orgnum != '-') + { + numstr = (char *) palloc(strlen(orgnum) + 2); + *numstr = ' '; + strcpy(numstr + 1, orgnum); + } + else + { + numstr = orgnum; + } + } + else + { + int numstr_pre_len; + Numeric val = value; + + if (IS_MULTI(&Num)) + { + Numeric a = int64_to_numeric(10); + Numeric b = int64_to_numeric(Num.multi); + + x = DatumGetNumeric(DirectFunctionCall2(numeric_power, + NumericGetDatum(a), + NumericGetDatum(b))); + val = DatumGetNumeric(DirectFunctionCall2(numeric_mul, + NumericGetDatum(value), + NumericGetDatum(x))); + Num.pre += Num.multi; + } + + x = DatumGetNumeric(DirectFunctionCall2(numeric_round, + NumericGetDatum(val), + Int32GetDatum(Num.post))); + orgnum = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(x))); + + if (*orgnum == '-') + { + sign = '-'; + numstr = orgnum + 1; + } + else + { + sign = '+'; + numstr = orgnum; + } + + if ((p = strchr(numstr, '.'))) + numstr_pre_len = p - numstr; + else + numstr_pre_len = strlen(numstr); + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* --------------- + * INT4 to_char() + * --------------- + */ +Datum +int4_to_char(PG_FUNCTION_ARGS) +{ + int32 value = PG_GETARG_INT32(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *orgnum; + + NUM_TOCHAR_prepare; + + /* + * On DateType depend part (int32) + */ + if (IS_ROMAN(&Num)) + numstr = int_to_roman(value); + else if (IS_EEEE(&Num)) + { + /* we can do it easily because float8 won't lose any precision */ + float8 val = (float8) value; + + orgnum = (char *) psprintf("%+.*e", Num.post, val); + + /* + * Swap a leading positive sign for a space. + */ + if (*orgnum == '+') + *orgnum = ' '; + + numstr = orgnum; + } + else + { + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + orgnum = DatumGetCString(DirectFunctionCall1(int4out, + Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi))))); + Num.pre += Num.multi; + } + else + { + orgnum = DatumGetCString(DirectFunctionCall1(int4out, + Int32GetDatum(value))); + } + + if (*orgnum == '-') + { + sign = '-'; + orgnum++; + } + else + sign = '+'; + + numstr_pre_len = strlen(orgnum); + + /* post-decimal digits? Pad out with zeros. */ + if (Num.post) + { + numstr = (char *) palloc(numstr_pre_len + Num.post + 2); + strcpy(numstr, orgnum); + *(numstr + numstr_pre_len) = '.'; + memset(numstr + numstr_pre_len + 1, '0', Num.post); + *(numstr + numstr_pre_len + Num.post + 1) = '\0'; + } + else + numstr = orgnum; + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* --------------- + * INT8 to_char() + * --------------- + */ +Datum +int8_to_char(PG_FUNCTION_ARGS) +{ + int64 value = PG_GETARG_INT64(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *orgnum; + + NUM_TOCHAR_prepare; + + /* + * On DateType depend part (int32) + */ + if (IS_ROMAN(&Num)) + { + /* Currently don't support int8 conversion to roman... */ + numstr = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value)))); + } + else if (IS_EEEE(&Num)) + { + /* to avoid loss of precision, must go via numeric not float8 */ + orgnum = numeric_out_sci(int64_to_numeric(value), + Num.post); + + /* + * numeric_out_sci() does not emit a sign for positive numbers. We + * need to add a space in this case so that positive and negative + * numbers are aligned. We don't have to worry about NaN/inf here. + */ + if (*orgnum != '-') + { + numstr = (char *) palloc(strlen(orgnum) + 2); + *numstr = ' '; + strcpy(numstr + 1, orgnum); + } + else + { + numstr = orgnum; + } + } + else + { + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + double multi = pow((double) 10, (double) Num.multi); + + value = DatumGetInt64(DirectFunctionCall2(int8mul, + Int64GetDatum(value), + DirectFunctionCall1(dtoi8, + Float8GetDatum(multi)))); + Num.pre += Num.multi; + } + + orgnum = DatumGetCString(DirectFunctionCall1(int8out, + Int64GetDatum(value))); + + if (*orgnum == '-') + { + sign = '-'; + orgnum++; + } + else + sign = '+'; + + numstr_pre_len = strlen(orgnum); + + /* post-decimal digits? Pad out with zeros. */ + if (Num.post) + { + numstr = (char *) palloc(numstr_pre_len + Num.post + 2); + strcpy(numstr, orgnum); + *(numstr + numstr_pre_len) = '.'; + memset(numstr + numstr_pre_len + 1, '0', Num.post); + *(numstr + numstr_pre_len + Num.post + 1) = '\0'; + } + else + numstr = orgnum; + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* ----------------- + * FLOAT4 to_char() + * ----------------- + */ +Datum +float4_to_char(PG_FUNCTION_ARGS) +{ + float4 value = PG_GETARG_FLOAT4(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *p; + + NUM_TOCHAR_prepare; + + if (IS_ROMAN(&Num)) + numstr = int_to_roman((int) rint(value)); + else if (IS_EEEE(&Num)) + { + if (isnan(value) || isinf(value)) + { + /* + * Allow 6 characters for the leading sign, the decimal point, + * "e", the exponent's sign and two exponent digits. + */ + numstr = (char *) palloc(Num.pre + Num.post + 7); + fill_str(numstr, '#', Num.pre + Num.post + 6); + *numstr = ' '; + *(numstr + Num.pre + 1) = '.'; + } + else + { + numstr = psprintf("%+.*e", Num.post, value); + + /* + * Swap a leading positive sign for a space. + */ + if (*numstr == '+') + *numstr = ' '; + } + } + else + { + float4 val = value; + char *orgnum; + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + float multi = pow((double) 10, (double) Num.multi); + + val = value * multi; + Num.pre += Num.multi; + } + + orgnum = psprintf("%.0f", fabs(val)); + numstr_pre_len = strlen(orgnum); + + /* adjust post digits to fit max float digits */ + if (numstr_pre_len >= FLT_DIG) + Num.post = 0; + else if (numstr_pre_len + Num.post > FLT_DIG) + Num.post = FLT_DIG - numstr_pre_len; + orgnum = psprintf("%.*f", Num.post, val); + + if (*orgnum == '-') + { /* < 0 */ + sign = '-'; + numstr = orgnum + 1; + } + else + { + sign = '+'; + numstr = orgnum; + } + + if ((p = strchr(numstr, '.'))) + numstr_pre_len = p - numstr; + else + numstr_pre_len = strlen(numstr); + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* ----------------- + * FLOAT8 to_char() + * ----------------- + */ +Datum +float8_to_char(PG_FUNCTION_ARGS) +{ + float8 value = PG_GETARG_FLOAT8(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *p; + + NUM_TOCHAR_prepare; + + if (IS_ROMAN(&Num)) + numstr = int_to_roman((int) rint(value)); + else if (IS_EEEE(&Num)) + { + if (isnan(value) || isinf(value)) + { + /* + * Allow 6 characters for the leading sign, the decimal point, + * "e", the exponent's sign and two exponent digits. + */ + numstr = (char *) palloc(Num.pre + Num.post + 7); + fill_str(numstr, '#', Num.pre + Num.post + 6); + *numstr = ' '; + *(numstr + Num.pre + 1) = '.'; + } + else + { + numstr = psprintf("%+.*e", Num.post, value); + + /* + * Swap a leading positive sign for a space. + */ + if (*numstr == '+') + *numstr = ' '; + } + } + else + { + float8 val = value; + char *orgnum; + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + double multi = pow((double) 10, (double) Num.multi); + + val = value * multi; + Num.pre += Num.multi; + } + + orgnum = psprintf("%.0f", fabs(val)); + numstr_pre_len = strlen(orgnum); + + /* adjust post digits to fit max double digits */ + if (numstr_pre_len >= DBL_DIG) + Num.post = 0; + else if (numstr_pre_len + Num.post > DBL_DIG) + Num.post = DBL_DIG - numstr_pre_len; + orgnum = psprintf("%.*f", Num.post, val); + + if (*orgnum == '-') + { /* < 0 */ + sign = '-'; + numstr = orgnum + 1; + } + else + { + sign = '+'; + numstr = orgnum; + } + + if ((p = strchr(numstr, '.'))) + numstr_pre_len = p - numstr; + else + numstr_pre_len = strlen(numstr); + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/genfile.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/genfile.c new file mode 100644 index 00000000000..f281ce98068 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/genfile.c @@ -0,0 +1,779 @@ +/*------------------------------------------------------------------------- + * + * genfile.c + * Functions for direct access to files + * + * + * Copyright (c) 2004-2023, PostgreSQL Global Development Group + * + * Author: Andreas Pflug <pgadmin@pse-consulting.de> + * + * IDENTIFICATION + * src/backend/utils/adt/genfile.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <sys/file.h> +#include <sys/stat.h> +#include <unistd.h> +#include <dirent.h> + +#include "access/htup_details.h" +#include "access/xlog_internal.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_tablespace_d.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "postmaster/syslogger.h" +#include "replication/slot.h" +#include "storage/fd.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" + + +/* + * Convert a "text" filename argument to C string, and check it's allowable. + * + * Filename may be absolute or relative to the DataDir, but we only allow + * absolute paths that match DataDir or Log_directory. + * + * This does a privilege check against the 'pg_read_server_files' role, so + * this function is really only appropriate for callers who are only checking + * 'read' access. Do not use this function if you are looking for a check + * for 'write' or 'program' access without updating it to access the type + * of check as an argument and checking the appropriate role membership. + */ +static char * +convert_and_check_filename(text *arg) +{ + char *filename; + + filename = text_to_cstring(arg); + canonicalize_path(filename); /* filename can change length here */ + + /* + * Roles with privileges of the 'pg_read_server_files' role are allowed to + * access any files on the server as the PG user, so no need to do any + * further checks here. + */ + if (has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES)) + return filename; + + /* + * User isn't a member of the pg_read_server_files role, so check if it's + * allowable + */ + if (is_absolute_path(filename)) + { + /* + * Allow absolute paths if within DataDir or Log_directory, even + * though Log_directory might be outside DataDir. + */ + if (!path_is_prefix_of_path(DataDir, filename) && + (!is_absolute_path(Log_directory) || + !path_is_prefix_of_path(Log_directory, filename))) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("absolute path not allowed"))); + } + else if (!path_is_relative_and_below_cwd(filename)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("path must be in or below the data directory"))); + + return filename; +} + + +/* + * Read a section of a file, returning it as bytea + * + * Caller is responsible for all permissions checking. + * + * We read the whole of the file when bytes_to_read is negative. + */ +static bytea * +read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read, + bool missing_ok) +{ + bytea *buf; + size_t nbytes = 0; + FILE *file; + + /* clamp request size to what we can actually deliver */ + if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("requested length too large"))); + + if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL) + { + if (missing_ok && errno == ENOENT) + return NULL; + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + filename))); + } + + if (fseeko(file, (off_t) seek_offset, + (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek in file \"%s\": %m", filename))); + + if (bytes_to_read >= 0) + { + /* If passed explicit read size just do it */ + buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ); + + nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file); + } + else + { + /* Negative read size, read rest of file */ + StringInfoData sbuf; + + initStringInfo(&sbuf); + /* Leave room in the buffer for the varlena length word */ + sbuf.len += VARHDRSZ; + Assert(sbuf.len < sbuf.maxlen); + + while (!(feof(file) || ferror(file))) + { + size_t rbytes; + + /* Minimum amount to read at a time */ +#define MIN_READ_SIZE 4096 + + /* + * If not at end of file, and sbuf.len is equal to MaxAllocSize - + * 1, then either the file is too large, or there is nothing left + * to read. Attempt to read one more byte to see if the end of + * file has been reached. If not, the file is too large; we'd + * rather give the error message for that ourselves. + */ + if (sbuf.len == MaxAllocSize - 1) + { + char rbuf[1]; + + if (fread(rbuf, 1, 1, file) != 0 || !feof(file)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("file length too large"))); + else + break; + } + + /* OK, ensure that we can read at least MIN_READ_SIZE */ + enlargeStringInfo(&sbuf, MIN_READ_SIZE); + + /* + * stringinfo.c likes to allocate in powers of 2, so it's likely + * that much more space is available than we asked for. Use all + * of it, rather than making more fread calls than necessary. + */ + rbytes = fread(sbuf.data + sbuf.len, 1, + (size_t) (sbuf.maxlen - sbuf.len - 1), file); + sbuf.len += rbytes; + nbytes += rbytes; + } + + /* Now we can commandeer the stringinfo's buffer as the result */ + buf = (bytea *) sbuf.data; + } + + if (ferror(file)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + + SET_VARSIZE(buf, nbytes + VARHDRSZ); + + FreeFile(file); + + return buf; +} + +/* + * Similar to read_binary_file, but we verify that the contents are valid + * in the database encoding. + */ +static text * +read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read, + bool missing_ok) +{ + bytea *buf; + + buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok); + + if (buf != NULL) + { + /* Make sure the input is valid */ + pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false); + + /* OK, we can cast it to text safely */ + return (text *) buf; + } + else + return NULL; +} + +/* + * Read a section of a file, returning it as text + * + * This function is kept to support adminpack 1.0. + */ +Datum +pg_read_file(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + int64 seek_offset = 0; + int64 bytes_to_read = -1; + bool missing_ok = false; + char *filename; + text *result; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to read files with adminpack 1.0"), + /* translator: %s is a SQL function name */ + errhint("Consider using %s, which is part of core, instead.", + "pg_read_file()"))); + + /* handle optional arguments */ + if (PG_NARGS() >= 3) + { + seek_offset = PG_GETARG_INT64(1); + bytes_to_read = PG_GETARG_INT64(2); + + if (bytes_to_read < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("requested length cannot be negative"))); + } + if (PG_NARGS() >= 4) + missing_ok = PG_GETARG_BOOL(3); + + filename = convert_and_check_filename(filename_t); + + result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok); + if (result) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +/* + * Read a section of a file, returning it as text + * + * No superuser check done here- instead privileges are handled by the + * GRANT system. + * + * If read_to_eof is true, bytes_to_read must be -1, otherwise negative values + * are not allowed for bytes_to_read. + */ +static text * +pg_read_file_common(text *filename_t, int64 seek_offset, int64 bytes_to_read, + bool read_to_eof, bool missing_ok) +{ + if (read_to_eof) + Assert(bytes_to_read == -1); + else if (bytes_to_read < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("requested length cannot be negative"))); + + return read_text_file(convert_and_check_filename(filename_t), + seek_offset, bytes_to_read, missing_ok); +} + +/* + * Read a section of a file, returning it as bytea + * + * Parameters are interpreted the same as pg_read_file_common(). + */ +static bytea * +pg_read_binary_file_common(text *filename_t, + int64 seek_offset, int64 bytes_to_read, + bool read_to_eof, bool missing_ok) +{ + if (read_to_eof) + Assert(bytes_to_read == -1); + else if (bytes_to_read < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("requested length cannot be negative"))); + + return read_binary_file(convert_and_check_filename(filename_t), + seek_offset, bytes_to_read, missing_ok); +} + + +/* + * Wrapper functions for the variants of SQL functions pg_read_file() and + * pg_read_binary_file(). + * + * These are necessary to pass the sanity check in opr_sanity, which checks + * that all built-in functions that share the implementing C function take + * the same number of arguments. + */ +Datum +pg_read_file_off_len(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + int64 seek_offset = PG_GETARG_INT64(1); + int64 bytes_to_read = PG_GETARG_INT64(2); + text *ret; + + ret = pg_read_file_common(filename_t, seek_offset, bytes_to_read, + false, false); + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(ret); +} + +Datum +pg_read_file_off_len_missing(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + int64 seek_offset = PG_GETARG_INT64(1); + int64 bytes_to_read = PG_GETARG_INT64(2); + bool missing_ok = PG_GETARG_BOOL(3); + text *ret; + + ret = pg_read_file_common(filename_t, seek_offset, bytes_to_read, + false, missing_ok); + + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(ret); +} + +Datum +pg_read_file_all(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = pg_read_file_common(filename_t, 0, -1, true, false); + + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(ret); +} + +Datum +pg_read_file_all_missing(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + bool missing_ok = PG_GETARG_BOOL(1); + text *ret; + + ret = pg_read_file_common(filename_t, 0, -1, true, missing_ok); + + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(ret); +} + +Datum +pg_read_binary_file_off_len(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + int64 seek_offset = PG_GETARG_INT64(1); + int64 bytes_to_read = PG_GETARG_INT64(2); + text *ret; + + ret = pg_read_binary_file_common(filename_t, seek_offset, bytes_to_read, + false, false); + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_BYTEA_P(ret); +} + +Datum +pg_read_binary_file_off_len_missing(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + int64 seek_offset = PG_GETARG_INT64(1); + int64 bytes_to_read = PG_GETARG_INT64(2); + bool missing_ok = PG_GETARG_BOOL(3); + text *ret; + + ret = pg_read_binary_file_common(filename_t, seek_offset, bytes_to_read, + false, missing_ok); + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_BYTEA_P(ret); +} + +Datum +pg_read_binary_file_all(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = pg_read_binary_file_common(filename_t, 0, -1, true, false); + + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_BYTEA_P(ret); +} + +Datum +pg_read_binary_file_all_missing(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + bool missing_ok = PG_GETARG_BOOL(1); + text *ret; + + ret = pg_read_binary_file_common(filename_t, 0, -1, true, missing_ok); + + if (!ret) + PG_RETURN_NULL(); + + PG_RETURN_BYTEA_P(ret); +} + +/* + * stat a file + */ +Datum +pg_stat_file(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_PP(0); + char *filename; + struct stat fst; + Datum values[6]; + bool isnull[6]; + HeapTuple tuple; + TupleDesc tupdesc; + bool missing_ok = false; + + /* check the optional argument */ + if (PG_NARGS() == 2) + missing_ok = PG_GETARG_BOOL(1); + + filename = convert_and_check_filename(filename_t); + + if (stat(filename, &fst) < 0) + { + if (missing_ok && errno == ENOENT) + PG_RETURN_NULL(); + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", filename))); + } + + /* + * This record type had better match the output parameters declared for me + * in pg_proc.h. + */ + tupdesc = CreateTemplateTupleDesc(6); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, + "size", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, + "access", TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, + "modification", TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, + "change", TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, + "creation", TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, + "isdir", BOOLOID, -1, 0); + BlessTupleDesc(tupdesc); + + memset(isnull, false, sizeof(isnull)); + + values[0] = Int64GetDatum((int64) fst.st_size); + values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime)); + values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime)); + /* Unix has file status change time, while Win32 has creation time */ +#if !defined(WIN32) && !defined(__CYGWIN__) + values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime)); + isnull[4] = true; +#else + isnull[3] = true; + values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime)); +#endif + values[5] = BoolGetDatum(S_ISDIR(fst.st_mode)); + + tuple = heap_form_tuple(tupdesc, values, isnull); + + pfree(filename); + + PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); +} + +/* + * stat a file (1 argument version) + * + * note: this wrapper is necessary to pass the sanity check in opr_sanity, + * which checks that all built-in functions that share the implementing C + * function take the same number of arguments + */ +Datum +pg_stat_file_1arg(PG_FUNCTION_ARGS) +{ + return pg_stat_file(fcinfo); +} + +/* + * List a directory (returns the filenames only) + */ +Datum +pg_ls_dir(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + char *location; + bool missing_ok = false; + bool include_dot_dirs = false; + DIR *dirdesc; + struct dirent *de; + + location = convert_and_check_filename(PG_GETARG_TEXT_PP(0)); + + /* check the optional arguments */ + if (PG_NARGS() == 3) + { + if (!PG_ARGISNULL(1)) + missing_ok = PG_GETARG_BOOL(1); + if (!PG_ARGISNULL(2)) + include_dot_dirs = PG_GETARG_BOOL(2); + } + + InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC); + + dirdesc = AllocateDir(location); + if (!dirdesc) + { + /* Return empty tuplestore if appropriate */ + if (missing_ok && errno == ENOENT) + return (Datum) 0; + /* Otherwise, we can let ReadDir() throw the error */ + } + + while ((de = ReadDir(dirdesc, location)) != NULL) + { + Datum values[1]; + bool nulls[1]; + + if (!include_dot_dirs && + (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0)) + continue; + + values[0] = CStringGetTextDatum(de->d_name); + nulls[0] = false; + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + FreeDir(dirdesc); + return (Datum) 0; +} + +/* + * List a directory (1 argument version) + * + * note: this wrapper is necessary to pass the sanity check in opr_sanity, + * which checks that all built-in functions that share the implementing C + * function take the same number of arguments. + */ +Datum +pg_ls_dir_1arg(PG_FUNCTION_ARGS) +{ + return pg_ls_dir(fcinfo); +} + +/* + * Generic function to return a directory listing of files. + * + * If the directory isn't there, silently return an empty set if missing_ok. + * Other unreadable-directory cases throw an error. + */ +static Datum +pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + DIR *dirdesc; + struct dirent *de; + + InitMaterializedSRF(fcinfo, 0); + + /* + * Now walk the directory. Note that we must do this within a single SRF + * call, not leave the directory open across multiple calls, since we + * can't count on the SRF being run to completion. + */ + dirdesc = AllocateDir(dir); + if (!dirdesc) + { + /* Return empty tuplestore if appropriate */ + if (missing_ok && errno == ENOENT) + return (Datum) 0; + /* Otherwise, we can let ReadDir() throw the error */ + } + + while ((de = ReadDir(dirdesc, dir)) != NULL) + { + Datum values[3]; + bool nulls[3]; + char path[MAXPGPATH * 2]; + struct stat attrib; + + /* Skip hidden files */ + if (de->d_name[0] == '.') + continue; + + /* Get the file info */ + snprintf(path, sizeof(path), "%s/%s", dir, de->d_name); + if (stat(path, &attrib) < 0) + { + /* Ignore concurrently-deleted files, else complain */ + if (errno == ENOENT) + continue; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", path))); + } + + /* Ignore anything but regular files */ + if (!S_ISREG(attrib.st_mode)) + continue; + + values[0] = CStringGetTextDatum(de->d_name); + values[1] = Int64GetDatum((int64) attrib.st_size); + values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime)); + memset(nulls, 0, sizeof(nulls)); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + FreeDir(dirdesc); + return (Datum) 0; +} + +/* Function to return the list of files in the log directory */ +Datum +pg_ls_logdir(PG_FUNCTION_ARGS) +{ + return pg_ls_dir_files(fcinfo, Log_directory, false); +} + +/* Function to return the list of files in the WAL directory */ +Datum +pg_ls_waldir(PG_FUNCTION_ARGS) +{ + return pg_ls_dir_files(fcinfo, XLOGDIR, false); +} + +/* + * Generic function to return the list of files in pgsql_tmp + */ +static Datum +pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc) +{ + char path[MAXPGPATH]; + + if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tblspc))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace with OID %u does not exist", + tblspc))); + + TempTablespacePath(path, tblspc); + return pg_ls_dir_files(fcinfo, path, true); +} + +/* + * Function to return the list of temporary files in the pg_default tablespace's + * pgsql_tmp directory + */ +Datum +pg_ls_tmpdir_noargs(PG_FUNCTION_ARGS) +{ + return pg_ls_tmpdir(fcinfo, DEFAULTTABLESPACE_OID); +} + +/* + * Function to return the list of temporary files in the specified tablespace's + * pgsql_tmp directory + */ +Datum +pg_ls_tmpdir_1arg(PG_FUNCTION_ARGS) +{ + return pg_ls_tmpdir(fcinfo, PG_GETARG_OID(0)); +} + +/* + * Function to return the list of files in the WAL archive status directory. + */ +Datum +pg_ls_archive_statusdir(PG_FUNCTION_ARGS) +{ + return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true); +} + +/* + * Function to return the list of files in the pg_logical/snapshots directory. + */ +Datum +pg_ls_logicalsnapdir(PG_FUNCTION_ARGS) +{ + return pg_ls_dir_files(fcinfo, "pg_logical/snapshots", false); +} + +/* + * Function to return the list of files in the pg_logical/mappings directory. + */ +Datum +pg_ls_logicalmapdir(PG_FUNCTION_ARGS) +{ + return pg_ls_dir_files(fcinfo, "pg_logical/mappings", false); +} + +/* + * Function to return the list of files in the pg_replslot/<replication_slot> + * directory. + */ +Datum +pg_ls_replslotdir(PG_FUNCTION_ARGS) +{ + text *slotname_t; + char path[MAXPGPATH]; + char *slotname; + + slotname_t = PG_GETARG_TEXT_PP(0); + + slotname = text_to_cstring(slotname_t); + + if (!SearchNamedReplicationSlot(slotname, true)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("replication slot \"%s\" does not exist", + slotname))); + + snprintf(path, sizeof(path), "pg_replslot/%s", slotname); + return pg_ls_dir_files(fcinfo, path, false); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_ops.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_ops.c new file mode 100644 index 00000000000..53ee4b6f9cb --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_ops.c @@ -0,0 +1,5562 @@ +/*------------------------------------------------------------------------- + * + * geo_ops.c + * 2D geometric operations + * + * This module implements the geometric functions and operators. The + * geometric types are (from simple to more complicated): + * + * - point + * - line + * - line segment + * - box + * - circle + * - polygon + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/geo_ops.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> +#include <limits.h> +#include <float.h> +#include <ctype.h> + +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/miscnodes.h" +#include "utils/float.h" +#include "utils/fmgrprotos.h" +#include "utils/geo_decls.h" +#include "varatt.h" + +/* + * * Type constructors have this form: + * void type_construct(Type *result, ...); + * + * * Operators commonly have signatures such as + * void type1_operator_type2(Type *result, Type1 *obj1, Type2 *obj2); + * + * Common operators are: + * * Intersection point: + * bool type1_interpt_type2(Point *result, Type1 *obj1, Type2 *obj2); + * Return whether the two objects intersect. If *result is not NULL, + * it is set to the intersection point. + * + * * Containment: + * bool type1_contain_type2(Type1 *obj1, Type2 *obj2); + * Return whether obj1 contains obj2. + * bool type1_contain_type2(Type1 *contains_obj, Type1 *contained_obj); + * Return whether obj1 contains obj2 (used when types are the same) + * + * * Distance of closest point in or on obj1 to obj2: + * float8 type1_closept_type2(Point *result, Type1 *obj1, Type2 *obj2); + * Returns the shortest distance between two objects. If *result is not + * NULL, it is set to the closest point in or on obj1 to obj2. + * + * These functions may be used to implement multiple SQL-level operators. For + * example, determining whether two lines are parallel is done by checking + * whether they don't intersect. + */ + +/* + * Internal routines + */ + +enum path_delim +{ + PATH_NONE, PATH_OPEN, PATH_CLOSED +}; + +/* Routines for points */ +static inline void point_construct(Point *result, float8 x, float8 y); +static inline void point_add_point(Point *result, Point *pt1, Point *pt2); +static inline void point_sub_point(Point *result, Point *pt1, Point *pt2); +static inline void point_mul_point(Point *result, Point *pt1, Point *pt2); +static inline void point_div_point(Point *result, Point *pt1, Point *pt2); +static inline bool point_eq_point(Point *pt1, Point *pt2); +static inline float8 point_dt(Point *pt1, Point *pt2); +static inline float8 point_sl(Point *pt1, Point *pt2); +static int point_inside(Point *p, int npts, Point *plist); + +/* Routines for lines */ +static inline void line_construct(LINE *result, Point *pt, float8 m); +static inline float8 line_sl(LINE *line); +static inline float8 line_invsl(LINE *line); +static bool line_interpt_line(Point *result, LINE *l1, LINE *l2); +static bool line_contain_point(LINE *line, Point *point); +static float8 line_closept_point(Point *result, LINE *line, Point *point); + +/* Routines for line segments */ +static inline void statlseg_construct(LSEG *lseg, Point *pt1, Point *pt2); +static inline float8 lseg_sl(LSEG *lseg); +static inline float8 lseg_invsl(LSEG *lseg); +static bool lseg_interpt_line(Point *result, LSEG *lseg, LINE *line); +static bool lseg_interpt_lseg(Point *result, LSEG *l1, LSEG *l2); +static int lseg_crossing(float8 x, float8 y, float8 prev_x, float8 prev_y); +static bool lseg_contain_point(LSEG *lseg, Point *pt); +static float8 lseg_closept_point(Point *result, LSEG *lseg, Point *pt); +static float8 lseg_closept_line(Point *result, LSEG *lseg, LINE *line); +static float8 lseg_closept_lseg(Point *result, LSEG *on_lseg, LSEG *to_lseg); + +/* Routines for boxes */ +static inline void box_construct(BOX *result, Point *pt1, Point *pt2); +static void box_cn(Point *center, BOX *box); +static bool box_ov(BOX *box1, BOX *box2); +static float8 box_ar(BOX *box); +static float8 box_ht(BOX *box); +static float8 box_wd(BOX *box); +static bool box_contain_point(BOX *box, Point *point); +static bool box_contain_box(BOX *contains_box, BOX *contained_box); +static bool box_contain_lseg(BOX *box, LSEG *lseg); +static bool box_interpt_lseg(Point *result, BOX *box, LSEG *lseg); +static float8 box_closept_point(Point *result, BOX *box, Point *pt); +static float8 box_closept_lseg(Point *result, BOX *box, LSEG *lseg); + +/* Routines for circles */ +static float8 circle_ar(CIRCLE *circle); + +/* Routines for polygons */ +static void make_bound_box(POLYGON *poly); +static void poly_to_circle(CIRCLE *result, POLYGON *poly); +static bool lseg_inside_poly(Point *a, Point *b, POLYGON *poly, int start); +static bool poly_contain_poly(POLYGON *contains_poly, POLYGON *contained_poly); +static bool plist_same(int npts, Point *p1, Point *p2); +static float8 dist_ppoly_internal(Point *pt, POLYGON *poly); + +/* Routines for encoding and decoding */ +static bool single_decode(char *num, float8 *x, char **endptr_p, + const char *type_name, const char *orig_string, + Node *escontext); +static void single_encode(float8 x, StringInfo str); +static bool pair_decode(char *str, float8 *x, float8 *y, char **endptr_p, + const char *type_name, const char *orig_string, + Node *escontext); +static void pair_encode(float8 x, float8 y, StringInfo str); +static int pair_count(char *s, char delim); +static bool path_decode(char *str, bool opentype, int npts, Point *p, + bool *isopen, char **endptr_p, + const char *type_name, const char *orig_string, + Node *escontext); +static char *path_encode(enum path_delim path_delim, int npts, Point *pt); + + +/* + * Delimiters for input and output strings. + * LDELIM, RDELIM, and DELIM are left, right, and separator delimiters, respectively. + * LDELIM_EP, RDELIM_EP are left and right delimiters for paths with endpoints. + */ + +#define LDELIM '(' +#define RDELIM ')' +#define DELIM ',' +#define LDELIM_EP '[' +#define RDELIM_EP ']' +#define LDELIM_C '<' +#define RDELIM_C '>' +#define LDELIM_L '{' +#define RDELIM_L '}' + + +/* + * Geometric data types are composed of points. + * This code tries to support a common format throughout the data types, + * to allow for more predictable usage and data type conversion. + * The fundamental unit is the point. Other units are line segments, + * open paths, boxes, closed paths, and polygons (which should be considered + * non-intersecting closed paths). + * + * Data representation is as follows: + * point: (x,y) + * line segment: [(x1,y1),(x2,y2)] + * box: (x1,y1),(x2,y2) + * open path: [(x1,y1),...,(xn,yn)] + * closed path: ((x1,y1),...,(xn,yn)) + * polygon: ((x1,y1),...,(xn,yn)) + * + * For boxes, the points are opposite corners with the first point at the top right. + * For closed paths and polygons, the points should be reordered to allow + * fast and correct equality comparisons. + * + * XXX perhaps points in complex shapes should be reordered internally + * to allow faster internal operations, but should keep track of input order + * and restore that order for text output - tgl 97/01/16 + */ + +static bool +single_decode(char *num, float8 *x, char **endptr_p, + const char *type_name, const char *orig_string, + Node *escontext) +{ + *x = float8in_internal(num, endptr_p, type_name, orig_string, escontext); + return (!SOFT_ERROR_OCCURRED(escontext)); +} /* single_decode() */ + +static void +single_encode(float8 x, StringInfo str) +{ + char *xstr = float8out_internal(x); + + appendStringInfoString(str, xstr); + pfree(xstr); +} /* single_encode() */ + +static bool +pair_decode(char *str, float8 *x, float8 *y, char **endptr_p, + const char *type_name, const char *orig_string, + Node *escontext) +{ + bool has_delim; + + while (isspace((unsigned char) *str)) + str++; + if ((has_delim = (*str == LDELIM))) + str++; + + if (!single_decode(str, x, &str, type_name, orig_string, escontext)) + return false; + + if (*str++ != DELIM) + goto fail; + + if (!single_decode(str, y, &str, type_name, orig_string, escontext)) + return false; + + if (has_delim) + { + if (*str++ != RDELIM) + goto fail; + while (isspace((unsigned char) *str)) + str++; + } + + /* report stopping point if wanted, else complain if not end of string */ + if (endptr_p) + *endptr_p = str; + else if (*str != '\0') + goto fail; + return true; + +fail: + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); +} + +static void +pair_encode(float8 x, float8 y, StringInfo str) +{ + char *xstr = float8out_internal(x); + char *ystr = float8out_internal(y); + + appendStringInfo(str, "%s,%s", xstr, ystr); + pfree(xstr); + pfree(ystr); +} + +static bool +path_decode(char *str, bool opentype, int npts, Point *p, + bool *isopen, char **endptr_p, + const char *type_name, const char *orig_string, + Node *escontext) +{ + int depth = 0; + char *cp; + int i; + + while (isspace((unsigned char) *str)) + str++; + if ((*isopen = (*str == LDELIM_EP))) + { + /* no open delimiter allowed? */ + if (!opentype) + goto fail; + depth++; + str++; + } + else if (*str == LDELIM) + { + cp = (str + 1); + while (isspace((unsigned char) *cp)) + cp++; + if (*cp == LDELIM) + { + depth++; + str = cp; + } + else if (strrchr(str, LDELIM) == str) + { + depth++; + str = cp; + } + } + + for (i = 0; i < npts; i++) + { + if (!pair_decode(str, &(p->x), &(p->y), &str, type_name, orig_string, + escontext)) + return false; + if (*str == DELIM) + str++; + p++; + } + + while (depth > 0) + { + if (*str == RDELIM || (*str == RDELIM_EP && *isopen && depth == 1)) + { + depth--; + str++; + while (isspace((unsigned char) *str)) + str++; + } + else + goto fail; + } + + /* report stopping point if wanted, else complain if not end of string */ + if (endptr_p) + *endptr_p = str; + else if (*str != '\0') + goto fail; + return true; + +fail: + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + type_name, orig_string))); +} /* path_decode() */ + +static char * +path_encode(enum path_delim path_delim, int npts, Point *pt) +{ + StringInfoData str; + int i; + + initStringInfo(&str); + + switch (path_delim) + { + case PATH_CLOSED: + appendStringInfoChar(&str, LDELIM); + break; + case PATH_OPEN: + appendStringInfoChar(&str, LDELIM_EP); + break; + case PATH_NONE: + break; + } + + for (i = 0; i < npts; i++) + { + if (i > 0) + appendStringInfoChar(&str, DELIM); + appendStringInfoChar(&str, LDELIM); + pair_encode(pt->x, pt->y, &str); + appendStringInfoChar(&str, RDELIM); + pt++; + } + + switch (path_delim) + { + case PATH_CLOSED: + appendStringInfoChar(&str, RDELIM); + break; + case PATH_OPEN: + appendStringInfoChar(&str, RDELIM_EP); + break; + case PATH_NONE: + break; + } + + return str.data; +} /* path_encode() */ + +/*------------------------------------------------------------- + * pair_count - count the number of points + * allow the following notation: + * '((1,2),(3,4))' + * '(1,3,2,4)' + * require an odd number of delim characters in the string + *-------------------------------------------------------------*/ +static int +pair_count(char *s, char delim) +{ + int ndelim = 0; + + while ((s = strchr(s, delim)) != NULL) + { + ndelim++; + s++; + } + return (ndelim % 2) ? ((ndelim + 1) / 2) : -1; +} + + +/*********************************************************************** + ** + ** Routines for two-dimensional boxes. + ** + ***********************************************************************/ + +/*---------------------------------------------------------- + * Formatting and conversion routines. + *---------------------------------------------------------*/ + +/* box_in - convert a string to internal form. + * + * External format: (two corners of box) + * "(f8, f8), (f8, f8)" + * also supports the older style "(f8, f8, f8, f8)" + */ +Datum +box_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + BOX *box = (BOX *) palloc(sizeof(BOX)); + bool isopen; + float8 x, + y; + + if (!path_decode(str, false, 2, &(box->high), &isopen, NULL, "box", str, + escontext)) + PG_RETURN_NULL(); + + /* reorder corners if necessary... */ + if (float8_lt(box->high.x, box->low.x)) + { + x = box->high.x; + box->high.x = box->low.x; + box->low.x = x; + } + if (float8_lt(box->high.y, box->low.y)) + { + y = box->high.y; + box->high.y = box->low.y; + box->low.y = y; + } + + PG_RETURN_BOX_P(box); +} + +/* box_out - convert a box to external form. + */ +Datum +box_out(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + + PG_RETURN_CSTRING(path_encode(PATH_NONE, 2, &(box->high))); +} + +/* + * box_recv - converts external binary format to box + */ +Datum +box_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + BOX *box; + float8 x, + y; + + box = (BOX *) palloc(sizeof(BOX)); + + box->high.x = pq_getmsgfloat8(buf); + box->high.y = pq_getmsgfloat8(buf); + box->low.x = pq_getmsgfloat8(buf); + box->low.y = pq_getmsgfloat8(buf); + + /* reorder corners if necessary... */ + if (float8_lt(box->high.x, box->low.x)) + { + x = box->high.x; + box->high.x = box->low.x; + box->low.x = x; + } + if (float8_lt(box->high.y, box->low.y)) + { + y = box->high.y; + box->high.y = box->low.y; + box->low.y = y; + } + + PG_RETURN_BOX_P(box); +} + +/* + * box_send - converts box to binary format + */ +Datum +box_send(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, box->high.x); + pq_sendfloat8(&buf, box->high.y); + pq_sendfloat8(&buf, box->low.x); + pq_sendfloat8(&buf, box->low.y); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* box_construct - fill in a new box. + */ +static inline void +box_construct(BOX *result, Point *pt1, Point *pt2) +{ + if (float8_gt(pt1->x, pt2->x)) + { + result->high.x = pt1->x; + result->low.x = pt2->x; + } + else + { + result->high.x = pt2->x; + result->low.x = pt1->x; + } + if (float8_gt(pt1->y, pt2->y)) + { + result->high.y = pt1->y; + result->low.y = pt2->y; + } + else + { + result->high.y = pt2->y; + result->low.y = pt1->y; + } +} + + +/*---------------------------------------------------------- + * Relational operators for BOXes. + * <, >, <=, >=, and == are based on box area. + *---------------------------------------------------------*/ + +/* box_same - are two boxes identical? + */ +Datum +box_same(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(point_eq_point(&box1->high, &box2->high) && + point_eq_point(&box1->low, &box2->low)); +} + +/* box_overlap - does box1 overlap box2? + */ +Datum +box_overlap(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(box_ov(box1, box2)); +} + +static bool +box_ov(BOX *box1, BOX *box2) +{ + return (FPle(box1->low.x, box2->high.x) && + FPle(box2->low.x, box1->high.x) && + FPle(box1->low.y, box2->high.y) && + FPle(box2->low.y, box1->high.y)); +} + +/* box_left - is box1 strictly left of box2? + */ +Datum +box_left(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPlt(box1->high.x, box2->low.x)); +} + +/* box_overleft - is the right edge of box1 at or left of + * the right edge of box2? + * + * This is "less than or equal" for the end of a time range, + * when time ranges are stored as rectangles. + */ +Datum +box_overleft(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPle(box1->high.x, box2->high.x)); +} + +/* box_right - is box1 strictly right of box2? + */ +Datum +box_right(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPgt(box1->low.x, box2->high.x)); +} + +/* box_overright - is the left edge of box1 at or right of + * the left edge of box2? + * + * This is "greater than or equal" for time ranges, when time ranges + * are stored as rectangles. + */ +Datum +box_overright(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPge(box1->low.x, box2->low.x)); +} + +/* box_below - is box1 strictly below box2? + */ +Datum +box_below(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPlt(box1->high.y, box2->low.y)); +} + +/* box_overbelow - is the upper edge of box1 at or below + * the upper edge of box2? + */ +Datum +box_overbelow(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPle(box1->high.y, box2->high.y)); +} + +/* box_above - is box1 strictly above box2? + */ +Datum +box_above(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPgt(box1->low.y, box2->high.y)); +} + +/* box_overabove - is the lower edge of box1 at or above + * the lower edge of box2? + */ +Datum +box_overabove(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPge(box1->low.y, box2->low.y)); +} + +/* box_contained - is box1 contained by box2? + */ +Datum +box_contained(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(box_contain_box(box2, box1)); +} + +/* box_contain - does box1 contain box2? + */ +Datum +box_contain(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(box_contain_box(box1, box2)); +} + +/* + * Check whether the second box is in the first box or on its border + */ +static bool +box_contain_box(BOX *contains_box, BOX *contained_box) +{ + return FPge(contains_box->high.x, contained_box->high.x) && + FPle(contains_box->low.x, contained_box->low.x) && + FPge(contains_box->high.y, contained_box->high.y) && + FPle(contains_box->low.y, contained_box->low.y); +} + + +/* box_positionop - + * is box1 entirely {above,below} box2? + * + * box_below_eq and box_above_eq are obsolete versions that (probably + * erroneously) accept the equal-boundaries case. Since these are not + * in sync with the box_left and box_right code, they are deprecated and + * not supported in the PG 8.1 rtree operator class extension. + */ +Datum +box_below_eq(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPle(box1->high.y, box2->low.y)); +} + +Datum +box_above_eq(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPge(box1->low.y, box2->high.y)); +} + + +/* box_relop - is area(box1) relop area(box2), within + * our accuracy constraint? + */ +Datum +box_lt(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPlt(box_ar(box1), box_ar(box2))); +} + +Datum +box_gt(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPgt(box_ar(box1), box_ar(box2))); +} + +Datum +box_eq(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPeq(box_ar(box1), box_ar(box2))); +} + +Datum +box_le(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPle(box_ar(box1), box_ar(box2))); +} + +Datum +box_ge(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(FPge(box_ar(box1), box_ar(box2))); +} + + +/*---------------------------------------------------------- + * "Arithmetic" operators on boxes. + *---------------------------------------------------------*/ + +/* box_area - returns the area of the box. + */ +Datum +box_area(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + + PG_RETURN_FLOAT8(box_ar(box)); +} + + +/* box_width - returns the width of the box + * (horizontal magnitude). + */ +Datum +box_width(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + + PG_RETURN_FLOAT8(box_wd(box)); +} + + +/* box_height - returns the height of the box + * (vertical magnitude). + */ +Datum +box_height(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + + PG_RETURN_FLOAT8(box_ht(box)); +} + + +/* box_distance - returns the distance between the + * center points of two boxes. + */ +Datum +box_distance(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + Point a, + b; + + box_cn(&a, box1); + box_cn(&b, box2); + + PG_RETURN_FLOAT8(point_dt(&a, &b)); +} + + +/* box_center - returns the center point of the box. + */ +Datum +box_center(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + Point *result = (Point *) palloc(sizeof(Point)); + + box_cn(result, box); + + PG_RETURN_POINT_P(result); +} + + +/* box_ar - returns the area of the box. + */ +static float8 +box_ar(BOX *box) +{ + return float8_mul(box_wd(box), box_ht(box)); +} + + +/* box_cn - stores the centerpoint of the box into *center. + */ +static void +box_cn(Point *center, BOX *box) +{ + center->x = float8_div(float8_pl(box->high.x, box->low.x), 2.0); + center->y = float8_div(float8_pl(box->high.y, box->low.y), 2.0); +} + + +/* box_wd - returns the width (length) of the box + * (horizontal magnitude). + */ +static float8 +box_wd(BOX *box) +{ + return float8_mi(box->high.x, box->low.x); +} + + +/* box_ht - returns the height of the box + * (vertical magnitude). + */ +static float8 +box_ht(BOX *box) +{ + return float8_mi(box->high.y, box->low.y); +} + + +/*---------------------------------------------------------- + * Funky operations. + *---------------------------------------------------------*/ + +/* box_intersect - + * returns the overlapping portion of two boxes, + * or NULL if they do not intersect. + */ +Datum +box_intersect(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0); + BOX *box2 = PG_GETARG_BOX_P(1); + BOX *result; + + if (!box_ov(box1, box2)) + PG_RETURN_NULL(); + + result = (BOX *) palloc(sizeof(BOX)); + + result->high.x = float8_min(box1->high.x, box2->high.x); + result->low.x = float8_max(box1->low.x, box2->low.x); + result->high.y = float8_min(box1->high.y, box2->high.y); + result->low.y = float8_max(box1->low.y, box2->low.y); + + PG_RETURN_BOX_P(result); +} + + +/* box_diagonal - + * returns a line segment which happens to be the + * positive-slope diagonal of "box". + */ +Datum +box_diagonal(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + LSEG *result = (LSEG *) palloc(sizeof(LSEG)); + + statlseg_construct(result, &box->high, &box->low); + + PG_RETURN_LSEG_P(result); +} + +/*********************************************************************** + ** + ** Routines for 2D lines. + ** + ***********************************************************************/ + +static bool +line_decode(char *s, const char *str, LINE *line, Node *escontext) +{ + /* s was already advanced over leading '{' */ + if (!single_decode(s, &line->A, &s, "line", str, escontext)) + return false; + if (*s++ != DELIM) + goto fail; + if (!single_decode(s, &line->B, &s, "line", str, escontext)) + return false; + if (*s++ != DELIM) + goto fail; + if (!single_decode(s, &line->C, &s, "line", str, escontext)) + return false; + if (*s++ != RDELIM_L) + goto fail; + while (isspace((unsigned char) *s)) + s++; + if (*s != '\0') + goto fail; + return true; + +fail: + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "line", str))); +} + +Datum +line_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + LINE *line = (LINE *) palloc(sizeof(LINE)); + LSEG lseg; + bool isopen; + char *s; + + s = str; + while (isspace((unsigned char) *s)) + s++; + if (*s == LDELIM_L) + { + if (!line_decode(s + 1, str, line, escontext)) + PG_RETURN_NULL(); + if (FPzero(line->A) && FPzero(line->B)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid line specification: A and B cannot both be zero"))); + } + else + { + if (!path_decode(s, true, 2, &lseg.p[0], &isopen, NULL, "line", str, + escontext)) + PG_RETURN_NULL(); + if (point_eq_point(&lseg.p[0], &lseg.p[1])) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid line specification: must be two distinct points"))); + + /* + * XXX lseg_sl() and line_construct() can throw overflow/underflow + * errors. Eventually we should allow those to be soft, but the + * notational pain seems to outweigh the value for now. + */ + line_construct(line, &lseg.p[0], lseg_sl(&lseg)); + } + + PG_RETURN_LINE_P(line); +} + + +Datum +line_out(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + char *astr = float8out_internal(line->A); + char *bstr = float8out_internal(line->B); + char *cstr = float8out_internal(line->C); + + PG_RETURN_CSTRING(psprintf("%c%s%c%s%c%s%c", LDELIM_L, astr, DELIM, bstr, + DELIM, cstr, RDELIM_L)); +} + +/* + * line_recv - converts external binary format to line + */ +Datum +line_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + LINE *line; + + line = (LINE *) palloc(sizeof(LINE)); + + line->A = pq_getmsgfloat8(buf); + line->B = pq_getmsgfloat8(buf); + line->C = pq_getmsgfloat8(buf); + + if (FPzero(line->A) && FPzero(line->B)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid line specification: A and B cannot both be zero"))); + + PG_RETURN_LINE_P(line); +} + +/* + * line_send - converts line to binary format + */ +Datum +line_send(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, line->A); + pq_sendfloat8(&buf, line->B); + pq_sendfloat8(&buf, line->C); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/*---------------------------------------------------------- + * Conversion routines from one line formula to internal. + * Internal form: Ax+By+C=0 + *---------------------------------------------------------*/ + +/* + * Fill already-allocated LINE struct from the point and the slope + */ +static inline void +line_construct(LINE *result, Point *pt, float8 m) +{ + if (isinf(m)) + { + /* vertical - use "x = C" */ + result->A = -1.0; + result->B = 0.0; + result->C = pt->x; + } + else if (m == 0) + { + /* horizontal - use "y = C" */ + result->A = 0.0; + result->B = -1.0; + result->C = pt->y; + } + else + { + /* use "mx - y + yinter = 0" */ + result->A = m; + result->B = -1.0; + result->C = float8_mi(pt->y, float8_mul(m, pt->x)); + /* on some platforms, the preceding expression tends to produce -0 */ + if (result->C == 0.0) + result->C = 0.0; + } +} + +/* line_construct_pp() + * two points + */ +Datum +line_construct_pp(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + LINE *result = (LINE *) palloc(sizeof(LINE)); + + if (point_eq_point(pt1, pt2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid line specification: must be two distinct points"))); + + line_construct(result, pt1, point_sl(pt1, pt2)); + + PG_RETURN_LINE_P(result); +} + + +/*---------------------------------------------------------- + * Relative position routines. + *---------------------------------------------------------*/ + +Datum +line_intersect(PG_FUNCTION_ARGS) +{ + LINE *l1 = PG_GETARG_LINE_P(0); + LINE *l2 = PG_GETARG_LINE_P(1); + + PG_RETURN_BOOL(line_interpt_line(NULL, l1, l2)); +} + +Datum +line_parallel(PG_FUNCTION_ARGS) +{ + LINE *l1 = PG_GETARG_LINE_P(0); + LINE *l2 = PG_GETARG_LINE_P(1); + + PG_RETURN_BOOL(!line_interpt_line(NULL, l1, l2)); +} + +Datum +line_perp(PG_FUNCTION_ARGS) +{ + LINE *l1 = PG_GETARG_LINE_P(0); + LINE *l2 = PG_GETARG_LINE_P(1); + + if (FPzero(l1->A)) + PG_RETURN_BOOL(FPzero(l2->B)); + if (FPzero(l2->A)) + PG_RETURN_BOOL(FPzero(l1->B)); + if (FPzero(l1->B)) + PG_RETURN_BOOL(FPzero(l2->A)); + if (FPzero(l2->B)) + PG_RETURN_BOOL(FPzero(l1->A)); + + PG_RETURN_BOOL(FPeq(float8_div(float8_mul(l1->A, l2->A), + float8_mul(l1->B, l2->B)), -1.0)); +} + +Datum +line_vertical(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + + PG_RETURN_BOOL(FPzero(line->B)); +} + +Datum +line_horizontal(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + + PG_RETURN_BOOL(FPzero(line->A)); +} + + +/* + * Check whether the two lines are the same + */ +Datum +line_eq(PG_FUNCTION_ARGS) +{ + LINE *l1 = PG_GETARG_LINE_P(0); + LINE *l2 = PG_GETARG_LINE_P(1); + float8 ratio; + + /* If any NaNs are involved, insist on exact equality */ + if (unlikely(isnan(l1->A) || isnan(l1->B) || isnan(l1->C) || + isnan(l2->A) || isnan(l2->B) || isnan(l2->C))) + { + PG_RETURN_BOOL(float8_eq(l1->A, l2->A) && + float8_eq(l1->B, l2->B) && + float8_eq(l1->C, l2->C)); + } + + /* Otherwise, lines whose parameters are proportional are the same */ + if (!FPzero(l2->A)) + ratio = float8_div(l1->A, l2->A); + else if (!FPzero(l2->B)) + ratio = float8_div(l1->B, l2->B); + else if (!FPzero(l2->C)) + ratio = float8_div(l1->C, l2->C); + else + ratio = 1.0; + + PG_RETURN_BOOL(FPeq(l1->A, float8_mul(ratio, l2->A)) && + FPeq(l1->B, float8_mul(ratio, l2->B)) && + FPeq(l1->C, float8_mul(ratio, l2->C))); +} + + +/*---------------------------------------------------------- + * Line arithmetic routines. + *---------------------------------------------------------*/ + +/* + * Return slope of the line + */ +static inline float8 +line_sl(LINE *line) +{ + if (FPzero(line->A)) + return 0.0; + if (FPzero(line->B)) + return get_float8_infinity(); + return float8_div(line->A, -line->B); +} + + +/* + * Return inverse slope of the line + */ +static inline float8 +line_invsl(LINE *line) +{ + if (FPzero(line->A)) + return get_float8_infinity(); + if (FPzero(line->B)) + return 0.0; + return float8_div(line->B, line->A); +} + + +/* line_distance() + * Distance between two lines. + */ +Datum +line_distance(PG_FUNCTION_ARGS) +{ + LINE *l1 = PG_GETARG_LINE_P(0); + LINE *l2 = PG_GETARG_LINE_P(1); + float8 ratio; + + if (line_interpt_line(NULL, l1, l2)) /* intersecting? */ + PG_RETURN_FLOAT8(0.0); + + if (!FPzero(l1->A) && !isnan(l1->A) && !FPzero(l2->A) && !isnan(l2->A)) + ratio = float8_div(l1->A, l2->A); + else if (!FPzero(l1->B) && !isnan(l1->B) && !FPzero(l2->B) && !isnan(l2->B)) + ratio = float8_div(l1->B, l2->B); + else + ratio = 1.0; + + PG_RETURN_FLOAT8(float8_div(fabs(float8_mi(l1->C, + float8_mul(ratio, l2->C))), + HYPOT(l1->A, l1->B))); +} + +/* line_interpt() + * Point where two lines l1, l2 intersect (if any) + */ +Datum +line_interpt(PG_FUNCTION_ARGS) +{ + LINE *l1 = PG_GETARG_LINE_P(0); + LINE *l2 = PG_GETARG_LINE_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + if (!line_interpt_line(result, l1, l2)) + PG_RETURN_NULL(); + PG_RETURN_POINT_P(result); +} + +/* + * Internal version of line_interpt + * + * Return whether two lines intersect. If *result is not NULL, it is set to + * the intersection point. + * + * NOTE: If the lines are identical then we will find they are parallel + * and report "no intersection". This is a little weird, but since + * there's no *unique* intersection, maybe it's appropriate behavior. + * + * If the lines have NaN constants, we will return true, and the intersection + * point would have NaN coordinates. We shouldn't return false in this case + * because that would mean the lines are parallel. + */ +static bool +line_interpt_line(Point *result, LINE *l1, LINE *l2) +{ + float8 x, + y; + + if (!FPzero(l1->B)) + { + if (FPeq(l2->A, float8_mul(l1->A, float8_div(l2->B, l1->B)))) + return false; + + x = float8_div(float8_mi(float8_mul(l1->B, l2->C), + float8_mul(l2->B, l1->C)), + float8_mi(float8_mul(l1->A, l2->B), + float8_mul(l2->A, l1->B))); + y = float8_div(-float8_pl(float8_mul(l1->A, x), l1->C), l1->B); + } + else if (!FPzero(l2->B)) + { + if (FPeq(l1->A, float8_mul(l2->A, float8_div(l1->B, l2->B)))) + return false; + + x = float8_div(float8_mi(float8_mul(l2->B, l1->C), + float8_mul(l1->B, l2->C)), + float8_mi(float8_mul(l2->A, l1->B), + float8_mul(l1->A, l2->B))); + y = float8_div(-float8_pl(float8_mul(l2->A, x), l2->C), l2->B); + } + else + return false; + + /* On some platforms, the preceding expressions tend to produce -0. */ + if (x == 0.0) + x = 0.0; + if (y == 0.0) + y = 0.0; + + if (result != NULL) + point_construct(result, x, y); + + return true; +} + + +/*********************************************************************** + ** + ** Routines for 2D paths (sequences of line segments, also + ** called `polylines'). + ** + ** This is not a general package for geometric paths, + ** which of course include polygons; the emphasis here + ** is on (for example) usefulness in wire layout. + ** + ***********************************************************************/ + +/*---------------------------------------------------------- + * String to path / path to string conversion. + * External format: + * "((xcoord, ycoord),... )" + * "[(xcoord, ycoord),... ]" + * "(xcoord, ycoord),... " + * "[xcoord, ycoord,... ]" + * Also support older format: + * "(closed, npts, xcoord, ycoord,... )" + *---------------------------------------------------------*/ + +Datum +path_area(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + float8 area = 0.0; + int i, + j; + + if (!path->closed) + PG_RETURN_NULL(); + + for (i = 0; i < path->npts; i++) + { + j = (i + 1) % path->npts; + area = float8_pl(area, float8_mul(path->p[i].x, path->p[j].y)); + area = float8_mi(area, float8_mul(path->p[i].y, path->p[j].x)); + } + + PG_RETURN_FLOAT8(float8_div(fabs(area), 2.0)); +} + + +Datum +path_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + PATH *path; + bool isopen; + char *s; + int npts; + int size; + int base_size; + int depth = 0; + + if ((npts = pair_count(str, ',')) <= 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "path", str))); + + s = str; + while (isspace((unsigned char) *s)) + s++; + + /* skip single leading paren */ + if ((*s == LDELIM) && (strrchr(s, LDELIM) == s)) + { + s++; + depth++; + } + + base_size = sizeof(path->p[0]) * npts; + size = offsetof(PATH, p) + base_size; + + /* Check for integer overflow */ + if (base_size / npts != sizeof(path->p[0]) || size <= base_size) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many points requested"))); + + path = (PATH *) palloc(size); + + SET_VARSIZE(path, size); + path->npts = npts; + + if (!path_decode(s, true, npts, &(path->p[0]), &isopen, &s, "path", str, + escontext)) + PG_RETURN_NULL(); + + if (depth >= 1) + { + if (*s++ != RDELIM) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "path", str))); + while (isspace((unsigned char) *s)) + s++; + } + if (*s != '\0') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "path", str))); + + path->closed = (!isopen); + /* prevent instability in unused pad bytes */ + path->dummy = 0; + + PG_RETURN_PATH_P(path); +} + + +Datum +path_out(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + + PG_RETURN_CSTRING(path_encode(path->closed ? PATH_CLOSED : PATH_OPEN, path->npts, path->p)); +} + +/* + * path_recv - converts external binary format to path + * + * External representation is closed flag (a boolean byte), int32 number + * of points, and the points. + */ +Datum +path_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + PATH *path; + int closed; + int32 npts; + int32 i; + int size; + + closed = pq_getmsgbyte(buf); + npts = pq_getmsgint(buf, sizeof(int32)); + if (npts <= 0 || npts >= (int32) ((INT_MAX - offsetof(PATH, p)) / sizeof(Point))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid number of points in external \"path\" value"))); + + size = offsetof(PATH, p) + sizeof(path->p[0]) * npts; + path = (PATH *) palloc(size); + + SET_VARSIZE(path, size); + path->npts = npts; + path->closed = (closed ? 1 : 0); + /* prevent instability in unused pad bytes */ + path->dummy = 0; + + for (i = 0; i < npts; i++) + { + path->p[i].x = pq_getmsgfloat8(buf); + path->p[i].y = pq_getmsgfloat8(buf); + } + + PG_RETURN_PATH_P(path); +} + +/* + * path_send - converts path to binary format + */ +Datum +path_send(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + StringInfoData buf; + int32 i; + + pq_begintypsend(&buf); + pq_sendbyte(&buf, path->closed ? 1 : 0); + pq_sendint32(&buf, path->npts); + for (i = 0; i < path->npts; i++) + { + pq_sendfloat8(&buf, path->p[i].x); + pq_sendfloat8(&buf, path->p[i].y); + } + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/*---------------------------------------------------------- + * Relational operators. + * These are based on the path cardinality, + * as stupid as that sounds. + * + * Better relops and access methods coming soon. + *---------------------------------------------------------*/ + +Datum +path_n_lt(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + + PG_RETURN_BOOL(p1->npts < p2->npts); +} + +Datum +path_n_gt(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + + PG_RETURN_BOOL(p1->npts > p2->npts); +} + +Datum +path_n_eq(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + + PG_RETURN_BOOL(p1->npts == p2->npts); +} + +Datum +path_n_le(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + + PG_RETURN_BOOL(p1->npts <= p2->npts); +} + +Datum +path_n_ge(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + + PG_RETURN_BOOL(p1->npts >= p2->npts); +} + +/*---------------------------------------------------------- + * Conversion operators. + *---------------------------------------------------------*/ + +Datum +path_isclosed(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + + PG_RETURN_BOOL(path->closed); +} + +Datum +path_isopen(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + + PG_RETURN_BOOL(!path->closed); +} + +Datum +path_npoints(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + + PG_RETURN_INT32(path->npts); +} + + +Datum +path_close(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P_COPY(0); + + path->closed = true; + + PG_RETURN_PATH_P(path); +} + +Datum +path_open(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P_COPY(0); + + path->closed = false; + + PG_RETURN_PATH_P(path); +} + + +/* path_inter - + * Does p1 intersect p2 at any point? + * Use bounding boxes for a quick (O(n)) check, then do a + * O(n^2) iterative edge check. + */ +Datum +path_inter(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + BOX b1, + b2; + int i, + j; + LSEG seg1, + seg2; + + Assert(p1->npts > 0 && p2->npts > 0); + + b1.high.x = b1.low.x = p1->p[0].x; + b1.high.y = b1.low.y = p1->p[0].y; + for (i = 1; i < p1->npts; i++) + { + b1.high.x = float8_max(p1->p[i].x, b1.high.x); + b1.high.y = float8_max(p1->p[i].y, b1.high.y); + b1.low.x = float8_min(p1->p[i].x, b1.low.x); + b1.low.y = float8_min(p1->p[i].y, b1.low.y); + } + b2.high.x = b2.low.x = p2->p[0].x; + b2.high.y = b2.low.y = p2->p[0].y; + for (i = 1; i < p2->npts; i++) + { + b2.high.x = float8_max(p2->p[i].x, b2.high.x); + b2.high.y = float8_max(p2->p[i].y, b2.high.y); + b2.low.x = float8_min(p2->p[i].x, b2.low.x); + b2.low.y = float8_min(p2->p[i].y, b2.low.y); + } + if (!box_ov(&b1, &b2)) + PG_RETURN_BOOL(false); + + /* pairwise check lseg intersections */ + for (i = 0; i < p1->npts; i++) + { + int iprev; + + if (i > 0) + iprev = i - 1; + else + { + if (!p1->closed) + continue; + iprev = p1->npts - 1; /* include the closure segment */ + } + + for (j = 0; j < p2->npts; j++) + { + int jprev; + + if (j > 0) + jprev = j - 1; + else + { + if (!p2->closed) + continue; + jprev = p2->npts - 1; /* include the closure segment */ + } + + statlseg_construct(&seg1, &p1->p[iprev], &p1->p[i]); + statlseg_construct(&seg2, &p2->p[jprev], &p2->p[j]); + if (lseg_interpt_lseg(NULL, &seg1, &seg2)) + PG_RETURN_BOOL(true); + } + } + + /* if we dropped through, no two segs intersected */ + PG_RETURN_BOOL(false); +} + +/* path_distance() + * This essentially does a cartesian product of the lsegs in the + * two paths, and finds the min distance between any two lsegs + */ +Datum +path_distance(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + float8 min = 0.0; /* initialize to keep compiler quiet */ + bool have_min = false; + float8 tmp; + int i, + j; + LSEG seg1, + seg2; + + for (i = 0; i < p1->npts; i++) + { + int iprev; + + if (i > 0) + iprev = i - 1; + else + { + if (!p1->closed) + continue; + iprev = p1->npts - 1; /* include the closure segment */ + } + + for (j = 0; j < p2->npts; j++) + { + int jprev; + + if (j > 0) + jprev = j - 1; + else + { + if (!p2->closed) + continue; + jprev = p2->npts - 1; /* include the closure segment */ + } + + statlseg_construct(&seg1, &p1->p[iprev], &p1->p[i]); + statlseg_construct(&seg2, &p2->p[jprev], &p2->p[j]); + + tmp = lseg_closept_lseg(NULL, &seg1, &seg2); + if (!have_min || float8_lt(tmp, min)) + { + min = tmp; + have_min = true; + } + } + } + + if (!have_min) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(min); +} + + +/*---------------------------------------------------------- + * "Arithmetic" operations. + *---------------------------------------------------------*/ + +Datum +path_length(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + float8 result = 0.0; + int i; + + for (i = 0; i < path->npts; i++) + { + int iprev; + + if (i > 0) + iprev = i - 1; + else + { + if (!path->closed) + continue; + iprev = path->npts - 1; /* include the closure segment */ + } + + result = float8_pl(result, point_dt(&path->p[iprev], &path->p[i])); + } + + PG_RETURN_FLOAT8(result); +} + +/*********************************************************************** + ** + ** Routines for 2D points. + ** + ***********************************************************************/ + +/*---------------------------------------------------------- + * String to point, point to string conversion. + * External format: + * "(x,y)" + * "x,y" + *---------------------------------------------------------*/ + +Datum +point_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Point *point = (Point *) palloc(sizeof(Point)); + + /* Ignore failure from pair_decode, since our return value won't matter */ + pair_decode(str, &point->x, &point->y, NULL, "point", str, fcinfo->context); + PG_RETURN_POINT_P(point); +} + +Datum +point_out(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + + PG_RETURN_CSTRING(path_encode(PATH_NONE, 1, pt)); +} + +/* + * point_recv - converts external binary format to point + */ +Datum +point_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Point *point; + + point = (Point *) palloc(sizeof(Point)); + point->x = pq_getmsgfloat8(buf); + point->y = pq_getmsgfloat8(buf); + PG_RETURN_POINT_P(point); +} + +/* + * point_send - converts point to binary format + */ +Datum +point_send(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, pt->x); + pq_sendfloat8(&buf, pt->y); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * Initialize a point + */ +static inline void +point_construct(Point *result, float8 x, float8 y) +{ + result->x = x; + result->y = y; +} + + +/*---------------------------------------------------------- + * Relational operators for Points. + * Since we do have a sense of coordinates being + * "equal" to a given accuracy (point_vert, point_horiz), + * the other ops must preserve that sense. This means + * that results may, strictly speaking, be a lie (unless + * EPSILON = 0.0). + *---------------------------------------------------------*/ + +Datum +point_left(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(FPlt(pt1->x, pt2->x)); +} + +Datum +point_right(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(FPgt(pt1->x, pt2->x)); +} + +Datum +point_above(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(FPgt(pt1->y, pt2->y)); +} + +Datum +point_below(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(FPlt(pt1->y, pt2->y)); +} + +Datum +point_vert(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(FPeq(pt1->x, pt2->x)); +} + +Datum +point_horiz(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(FPeq(pt1->y, pt2->y)); +} + +Datum +point_eq(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(point_eq_point(pt1, pt2)); +} + +Datum +point_ne(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(!point_eq_point(pt1, pt2)); +} + + +/* + * Check whether the two points are the same + */ +static inline bool +point_eq_point(Point *pt1, Point *pt2) +{ + /* If any NaNs are involved, insist on exact equality */ + if (unlikely(isnan(pt1->x) || isnan(pt1->y) || + isnan(pt2->x) || isnan(pt2->y))) + return (float8_eq(pt1->x, pt2->x) && float8_eq(pt1->y, pt2->y)); + + return (FPeq(pt1->x, pt2->x) && FPeq(pt1->y, pt2->y)); +} + + +/*---------------------------------------------------------- + * "Arithmetic" operators on points. + *---------------------------------------------------------*/ + +Datum +point_distance(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_FLOAT8(point_dt(pt1, pt2)); +} + +static inline float8 +point_dt(Point *pt1, Point *pt2) +{ + return HYPOT(float8_mi(pt1->x, pt2->x), float8_mi(pt1->y, pt2->y)); +} + +Datum +point_slope(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + + PG_RETURN_FLOAT8(point_sl(pt1, pt2)); +} + + +/* + * Return slope of two points + * + * Note that this function returns Inf when the points are the same. + */ +static inline float8 +point_sl(Point *pt1, Point *pt2) +{ + if (FPeq(pt1->x, pt2->x)) + return get_float8_infinity(); + if (FPeq(pt1->y, pt2->y)) + return 0.0; + return float8_div(float8_mi(pt1->y, pt2->y), float8_mi(pt1->x, pt2->x)); +} + + +/* + * Return inverse slope of two points + * + * Note that this function returns 0.0 when the points are the same. + */ +static inline float8 +point_invsl(Point *pt1, Point *pt2) +{ + if (FPeq(pt1->x, pt2->x)) + return 0.0; + if (FPeq(pt1->y, pt2->y)) + return get_float8_infinity(); + return float8_div(float8_mi(pt1->x, pt2->x), float8_mi(pt2->y, pt1->y)); +} + + +/*********************************************************************** + ** + ** Routines for 2D line segments. + ** + ***********************************************************************/ + +/*---------------------------------------------------------- + * String to lseg, lseg to string conversion. + * External forms: "[(x1, y1), (x2, y2)]" + * "(x1, y1), (x2, y2)" + * "x1, y1, x2, y2" + * closed form ok "((x1, y1), (x2, y2))" + * (old form) "(x1, y1, x2, y2)" + *---------------------------------------------------------*/ + +Datum +lseg_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + LSEG *lseg = (LSEG *) palloc(sizeof(LSEG)); + bool isopen; + + if (!path_decode(str, true, 2, &lseg->p[0], &isopen, NULL, "lseg", str, + escontext)) + PG_RETURN_NULL(); + + PG_RETURN_LSEG_P(lseg); +} + + +Datum +lseg_out(PG_FUNCTION_ARGS) +{ + LSEG *ls = PG_GETARG_LSEG_P(0); + + PG_RETURN_CSTRING(path_encode(PATH_OPEN, 2, &ls->p[0])); +} + +/* + * lseg_recv - converts external binary format to lseg + */ +Datum +lseg_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + LSEG *lseg; + + lseg = (LSEG *) palloc(sizeof(LSEG)); + + lseg->p[0].x = pq_getmsgfloat8(buf); + lseg->p[0].y = pq_getmsgfloat8(buf); + lseg->p[1].x = pq_getmsgfloat8(buf); + lseg->p[1].y = pq_getmsgfloat8(buf); + + PG_RETURN_LSEG_P(lseg); +} + +/* + * lseg_send - converts lseg to binary format + */ +Datum +lseg_send(PG_FUNCTION_ARGS) +{ + LSEG *ls = PG_GETARG_LSEG_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, ls->p[0].x); + pq_sendfloat8(&buf, ls->p[0].y); + pq_sendfloat8(&buf, ls->p[1].x); + pq_sendfloat8(&buf, ls->p[1].y); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* lseg_construct - + * form a LSEG from two Points. + */ +Datum +lseg_construct(PG_FUNCTION_ARGS) +{ + Point *pt1 = PG_GETARG_POINT_P(0); + Point *pt2 = PG_GETARG_POINT_P(1); + LSEG *result = (LSEG *) palloc(sizeof(LSEG)); + + statlseg_construct(result, pt1, pt2); + + PG_RETURN_LSEG_P(result); +} + +/* like lseg_construct, but assume space already allocated */ +static inline void +statlseg_construct(LSEG *lseg, Point *pt1, Point *pt2) +{ + lseg->p[0].x = pt1->x; + lseg->p[0].y = pt1->y; + lseg->p[1].x = pt2->x; + lseg->p[1].y = pt2->y; +} + + +/* + * Return slope of the line segment + */ +static inline float8 +lseg_sl(LSEG *lseg) +{ + return point_sl(&lseg->p[0], &lseg->p[1]); +} + + +/* + * Return inverse slope of the line segment + */ +static inline float8 +lseg_invsl(LSEG *lseg) +{ + return point_invsl(&lseg->p[0], &lseg->p[1]); +} + + +Datum +lseg_length(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + + PG_RETURN_FLOAT8(point_dt(&lseg->p[0], &lseg->p[1])); +} + +/*---------------------------------------------------------- + * Relative position routines. + *---------------------------------------------------------*/ + +/* + ** find intersection of the two lines, and see if it falls on + ** both segments. + */ +Datum +lseg_intersect(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(lseg_interpt_lseg(NULL, l1, l2)); +} + + +Datum +lseg_parallel(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(FPeq(lseg_sl(l1), lseg_sl(l2))); +} + +/* + * Determine if two line segments are perpendicular. + */ +Datum +lseg_perp(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(FPeq(lseg_sl(l1), lseg_invsl(l2))); +} + +Datum +lseg_vertical(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + + PG_RETURN_BOOL(FPeq(lseg->p[0].x, lseg->p[1].x)); +} + +Datum +lseg_horizontal(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + + PG_RETURN_BOOL(FPeq(lseg->p[0].y, lseg->p[1].y)); +} + + +Datum +lseg_eq(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(point_eq_point(&l1->p[0], &l2->p[0]) && + point_eq_point(&l1->p[1], &l2->p[1])); +} + +Datum +lseg_ne(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(!point_eq_point(&l1->p[0], &l2->p[0]) || + !point_eq_point(&l1->p[1], &l2->p[1])); +} + +Datum +lseg_lt(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(FPlt(point_dt(&l1->p[0], &l1->p[1]), + point_dt(&l2->p[0], &l2->p[1]))); +} + +Datum +lseg_le(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(FPle(point_dt(&l1->p[0], &l1->p[1]), + point_dt(&l2->p[0], &l2->p[1]))); +} + +Datum +lseg_gt(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(FPgt(point_dt(&l1->p[0], &l1->p[1]), + point_dt(&l2->p[0], &l2->p[1]))); +} + +Datum +lseg_ge(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(FPge(point_dt(&l1->p[0], &l1->p[1]), + point_dt(&l2->p[0], &l2->p[1]))); +} + + +/*---------------------------------------------------------- + * Line arithmetic routines. + *---------------------------------------------------------*/ + +/* lseg_distance - + * If two segments don't intersect, then the closest + * point will be from one of the endpoints to the other + * segment. + */ +Datum +lseg_distance(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + + PG_RETURN_FLOAT8(lseg_closept_lseg(NULL, l1, l2)); +} + + +Datum +lseg_center(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + result->x = float8_div(float8_pl(lseg->p[0].x, lseg->p[1].x), 2.0); + result->y = float8_div(float8_pl(lseg->p[0].y, lseg->p[1].y), 2.0); + + PG_RETURN_POINT_P(result); +} + + +/* + * Return whether the two segments intersect. If *result is not NULL, + * it is set to the intersection point. + * + * This function is almost perfectly symmetric, even though it doesn't look + * like it. See lseg_interpt_line() for the other half of it. + */ +static bool +lseg_interpt_lseg(Point *result, LSEG *l1, LSEG *l2) +{ + Point interpt; + LINE tmp; + + line_construct(&tmp, &l2->p[0], lseg_sl(l2)); + if (!lseg_interpt_line(&interpt, l1, &tmp)) + return false; + + /* + * If the line intersection point isn't within l2, there is no valid + * segment intersection point at all. + */ + if (!lseg_contain_point(l2, &interpt)) + return false; + + if (result != NULL) + *result = interpt; + + return true; +} + +Datum +lseg_interpt(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + if (!lseg_interpt_lseg(result, l1, l2)) + PG_RETURN_NULL(); + PG_RETURN_POINT_P(result); +} + +/*********************************************************************** + ** + ** Routines for position comparisons of differently-typed + ** 2D objects. + ** + ***********************************************************************/ + +/*--------------------------------------------------------------------- + * dist_ + * Minimum distance from one object to another. + *-------------------------------------------------------------------*/ + +/* + * Distance from a point to a line + */ +Datum +dist_pl(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + LINE *line = PG_GETARG_LINE_P(1); + + PG_RETURN_FLOAT8(line_closept_point(NULL, line, pt)); +} + +/* + * Distance from a line to a point + */ +Datum +dist_lp(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + Point *pt = PG_GETARG_POINT_P(1); + + PG_RETURN_FLOAT8(line_closept_point(NULL, line, pt)); +} + +/* + * Distance from a point to a lseg + */ +Datum +dist_ps(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + LSEG *lseg = PG_GETARG_LSEG_P(1); + + PG_RETURN_FLOAT8(lseg_closept_point(NULL, lseg, pt)); +} + +/* + * Distance from a lseg to a point + */ +Datum +dist_sp(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + Point *pt = PG_GETARG_POINT_P(1); + + PG_RETURN_FLOAT8(lseg_closept_point(NULL, lseg, pt)); +} + +static float8 +dist_ppath_internal(Point *pt, PATH *path) +{ + float8 result = 0.0; /* keep compiler quiet */ + bool have_min = false; + float8 tmp; + int i; + LSEG lseg; + + Assert(path->npts > 0); + + /* + * The distance from a point to a path is the smallest distance from the + * point to any of its constituent segments. + */ + for (i = 0; i < path->npts; i++) + { + int iprev; + + if (i > 0) + iprev = i - 1; + else + { + if (!path->closed) + continue; + iprev = path->npts - 1; /* Include the closure segment */ + } + + statlseg_construct(&lseg, &path->p[iprev], &path->p[i]); + tmp = lseg_closept_point(NULL, &lseg, pt); + if (!have_min || float8_lt(tmp, result)) + { + result = tmp; + have_min = true; + } + } + + return result; +} + +/* + * Distance from a point to a path + */ +Datum +dist_ppath(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + PATH *path = PG_GETARG_PATH_P(1); + + PG_RETURN_FLOAT8(dist_ppath_internal(pt, path)); +} + +/* + * Distance from a path to a point + */ +Datum +dist_pathp(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + Point *pt = PG_GETARG_POINT_P(1); + + PG_RETURN_FLOAT8(dist_ppath_internal(pt, path)); +} + +/* + * Distance from a point to a box + */ +Datum +dist_pb(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + BOX *box = PG_GETARG_BOX_P(1); + + PG_RETURN_FLOAT8(box_closept_point(NULL, box, pt)); +} + +/* + * Distance from a box to a point + */ +Datum +dist_bp(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + Point *pt = PG_GETARG_POINT_P(1); + + PG_RETURN_FLOAT8(box_closept_point(NULL, box, pt)); +} + +/* + * Distance from a lseg to a line + */ +Datum +dist_sl(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + LINE *line = PG_GETARG_LINE_P(1); + + PG_RETURN_FLOAT8(lseg_closept_line(NULL, lseg, line)); +} + +/* + * Distance from a line to a lseg + */ +Datum +dist_ls(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + LSEG *lseg = PG_GETARG_LSEG_P(1); + + PG_RETURN_FLOAT8(lseg_closept_line(NULL, lseg, line)); +} + +/* + * Distance from a lseg to a box + */ +Datum +dist_sb(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + BOX *box = PG_GETARG_BOX_P(1); + + PG_RETURN_FLOAT8(box_closept_lseg(NULL, box, lseg)); +} + +/* + * Distance from a box to a lseg + */ +Datum +dist_bs(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + LSEG *lseg = PG_GETARG_LSEG_P(1); + + PG_RETURN_FLOAT8(box_closept_lseg(NULL, box, lseg)); +} + +static float8 +dist_cpoly_internal(CIRCLE *circle, POLYGON *poly) +{ + float8 result; + + /* calculate distance to center, and subtract radius */ + result = float8_mi(dist_ppoly_internal(&circle->center, poly), + circle->radius); + if (result < 0.0) + result = 0.0; + + return result; +} + +/* + * Distance from a circle to a polygon + */ +Datum +dist_cpoly(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + POLYGON *poly = PG_GETARG_POLYGON_P(1); + + PG_RETURN_FLOAT8(dist_cpoly_internal(circle, poly)); +} + +/* + * Distance from a polygon to a circle + */ +Datum +dist_polyc(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + CIRCLE *circle = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_FLOAT8(dist_cpoly_internal(circle, poly)); +} + +/* + * Distance from a point to a polygon + */ +Datum +dist_ppoly(PG_FUNCTION_ARGS) +{ + Point *point = PG_GETARG_POINT_P(0); + POLYGON *poly = PG_GETARG_POLYGON_P(1); + + PG_RETURN_FLOAT8(dist_ppoly_internal(point, poly)); +} + +Datum +dist_polyp(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + Point *point = PG_GETARG_POINT_P(1); + + PG_RETURN_FLOAT8(dist_ppoly_internal(point, poly)); +} + +static float8 +dist_ppoly_internal(Point *pt, POLYGON *poly) +{ + float8 result; + float8 d; + int i; + LSEG seg; + + if (point_inside(pt, poly->npts, poly->p) != 0) + return 0.0; + + /* initialize distance with segment between first and last points */ + seg.p[0].x = poly->p[0].x; + seg.p[0].y = poly->p[0].y; + seg.p[1].x = poly->p[poly->npts - 1].x; + seg.p[1].y = poly->p[poly->npts - 1].y; + result = lseg_closept_point(NULL, &seg, pt); + + /* check distances for other segments */ + for (i = 0; i < poly->npts - 1; i++) + { + seg.p[0].x = poly->p[i].x; + seg.p[0].y = poly->p[i].y; + seg.p[1].x = poly->p[i + 1].x; + seg.p[1].y = poly->p[i + 1].y; + d = lseg_closept_point(NULL, &seg, pt); + if (float8_lt(d, result)) + result = d; + } + + return result; +} + + +/*--------------------------------------------------------------------- + * interpt_ + * Intersection point of objects. + * We choose to ignore the "point" of intersection between + * lines and boxes, since there are typically two. + *-------------------------------------------------------------------*/ + +/* + * Return whether the line segment intersect with the line. If *result is not + * NULL, it is set to the intersection point. + */ +static bool +lseg_interpt_line(Point *result, LSEG *lseg, LINE *line) +{ + Point interpt; + LINE tmp; + + /* + * First, we promote the line segment to a line, because we know how to + * find the intersection point of two lines. If they don't have an + * intersection point, we are done. + */ + line_construct(&tmp, &lseg->p[0], lseg_sl(lseg)); + if (!line_interpt_line(&interpt, &tmp, line)) + return false; + + /* + * Then, we check whether the intersection point is actually on the line + * segment. + */ + if (!lseg_contain_point(lseg, &interpt)) + return false; + if (result != NULL) + { + /* + * If there is an intersection, then check explicitly for matching + * endpoints since there may be rounding effects with annoying LSB + * residue. + */ + if (point_eq_point(&lseg->p[0], &interpt)) + *result = lseg->p[0]; + else if (point_eq_point(&lseg->p[1], &interpt)) + *result = lseg->p[1]; + else + *result = interpt; + } + + return true; +} + +/*--------------------------------------------------------------------- + * close_ + * Point of closest proximity between objects. + *-------------------------------------------------------------------*/ + +/* + * If *result is not NULL, it is set to the intersection point of a + * perpendicular of the line through the point. Returns the distance + * of those two points. + */ +static float8 +line_closept_point(Point *result, LINE *line, Point *point) +{ + Point closept; + LINE tmp; + + /* + * We drop a perpendicular to find the intersection point. Ordinarily we + * should always find it, but that can fail in the presence of NaN + * coordinates, and perhaps even from simple roundoff issues. + */ + line_construct(&tmp, point, line_invsl(line)); + if (!line_interpt_line(&closept, &tmp, line)) + { + if (result != NULL) + *result = *point; + + return get_float8_nan(); + } + + if (result != NULL) + *result = closept; + + return point_dt(&closept, point); +} + +Datum +close_pl(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + LINE *line = PG_GETARG_LINE_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + if (isnan(line_closept_point(result, line, pt))) + PG_RETURN_NULL(); + + PG_RETURN_POINT_P(result); +} + + +/* + * Closest point on line segment to specified point. + * + * If *result is not NULL, set it to the closest point on the line segment + * to the point. Returns the distance of the two points. + */ +static float8 +lseg_closept_point(Point *result, LSEG *lseg, Point *pt) +{ + Point closept; + LINE tmp; + + /* + * To find the closest point, we draw a perpendicular line from the point + * to the line segment. + */ + line_construct(&tmp, pt, point_invsl(&lseg->p[0], &lseg->p[1])); + lseg_closept_line(&closept, lseg, &tmp); + + if (result != NULL) + *result = closept; + + return point_dt(&closept, pt); +} + +Datum +close_ps(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + LSEG *lseg = PG_GETARG_LSEG_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + if (isnan(lseg_closept_point(result, lseg, pt))) + PG_RETURN_NULL(); + + PG_RETURN_POINT_P(result); +} + + +/* + * Closest point on line segment to line segment + */ +static float8 +lseg_closept_lseg(Point *result, LSEG *on_lseg, LSEG *to_lseg) +{ + Point point; + float8 dist, + d; + + /* First, we handle the case when the line segments are intersecting. */ + if (lseg_interpt_lseg(result, on_lseg, to_lseg)) + return 0.0; + + /* + * Then, we find the closest points from the endpoints of the second line + * segment, and keep the closest one. + */ + dist = lseg_closept_point(result, on_lseg, &to_lseg->p[0]); + d = lseg_closept_point(&point, on_lseg, &to_lseg->p[1]); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = point; + } + + /* The closest point can still be one of the endpoints, so we test them. */ + d = lseg_closept_point(NULL, to_lseg, &on_lseg->p[0]); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = on_lseg->p[0]; + } + d = lseg_closept_point(NULL, to_lseg, &on_lseg->p[1]); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = on_lseg->p[1]; + } + + return dist; +} + +Datum +close_lseg(PG_FUNCTION_ARGS) +{ + LSEG *l1 = PG_GETARG_LSEG_P(0); + LSEG *l2 = PG_GETARG_LSEG_P(1); + Point *result; + + if (lseg_sl(l1) == lseg_sl(l2)) + PG_RETURN_NULL(); + + result = (Point *) palloc(sizeof(Point)); + + if (isnan(lseg_closept_lseg(result, l2, l1))) + PG_RETURN_NULL(); + + PG_RETURN_POINT_P(result); +} + + +/* + * Closest point on or in box to specified point. + * + * If *result is not NULL, set it to the closest point on the box to the + * given point, and return the distance of the two points. + */ +static float8 +box_closept_point(Point *result, BOX *box, Point *pt) +{ + float8 dist, + d; + Point point, + closept; + LSEG lseg; + + if (box_contain_point(box, pt)) + { + if (result != NULL) + *result = *pt; + + return 0.0; + } + + /* pairwise check lseg distances */ + point.x = box->low.x; + point.y = box->high.y; + statlseg_construct(&lseg, &box->low, &point); + dist = lseg_closept_point(result, &lseg, pt); + + statlseg_construct(&lseg, &box->high, &point); + d = lseg_closept_point(&closept, &lseg, pt); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = closept; + } + + point.x = box->high.x; + point.y = box->low.y; + statlseg_construct(&lseg, &box->low, &point); + d = lseg_closept_point(&closept, &lseg, pt); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = closept; + } + + statlseg_construct(&lseg, &box->high, &point); + d = lseg_closept_point(&closept, &lseg, pt); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = closept; + } + + return dist; +} + +Datum +close_pb(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + BOX *box = PG_GETARG_BOX_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + if (isnan(box_closept_point(result, box, pt))) + PG_RETURN_NULL(); + + PG_RETURN_POINT_P(result); +} + +/* + * Closest point on line segment to line. + * + * Return the distance between the line and the closest point of the line + * segment to the line. If *result is not NULL, set it to that point. + * + * NOTE: When the lines are parallel, endpoints of one of the line segment + * are FPeq(), in presence of NaN or Infinite coordinates, or perhaps = + * even because of simple roundoff issues, there may not be a single closest + * point. We are likely to set the result to the second endpoint in these + * cases. + */ +static float8 +lseg_closept_line(Point *result, LSEG *lseg, LINE *line) +{ + float8 dist1, + dist2; + + if (lseg_interpt_line(result, lseg, line)) + return 0.0; + + dist1 = line_closept_point(NULL, line, &lseg->p[0]); + dist2 = line_closept_point(NULL, line, &lseg->p[1]); + + if (dist1 < dist2) + { + if (result != NULL) + *result = lseg->p[0]; + + return dist1; + } + else + { + if (result != NULL) + *result = lseg->p[1]; + + return dist2; + } +} + +Datum +close_ls(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + LSEG *lseg = PG_GETARG_LSEG_P(1); + Point *result; + + if (lseg_sl(lseg) == line_sl(line)) + PG_RETURN_NULL(); + + result = (Point *) palloc(sizeof(Point)); + + if (isnan(lseg_closept_line(result, lseg, line))) + PG_RETURN_NULL(); + + PG_RETURN_POINT_P(result); +} + + +/* + * Closest point on or in box to line segment. + * + * Returns the distance between the closest point on or in the box to + * the line segment. If *result is not NULL, it is set to that point. + */ +static float8 +box_closept_lseg(Point *result, BOX *box, LSEG *lseg) +{ + float8 dist, + d; + Point point, + closept; + LSEG bseg; + + if (box_interpt_lseg(result, box, lseg)) + return 0.0; + + /* pairwise check lseg distances */ + point.x = box->low.x; + point.y = box->high.y; + statlseg_construct(&bseg, &box->low, &point); + dist = lseg_closept_lseg(result, &bseg, lseg); + + statlseg_construct(&bseg, &box->high, &point); + d = lseg_closept_lseg(&closept, &bseg, lseg); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = closept; + } + + point.x = box->high.x; + point.y = box->low.y; + statlseg_construct(&bseg, &box->low, &point); + d = lseg_closept_lseg(&closept, &bseg, lseg); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = closept; + } + + statlseg_construct(&bseg, &box->high, &point); + d = lseg_closept_lseg(&closept, &bseg, lseg); + if (float8_lt(d, dist)) + { + dist = d; + if (result != NULL) + *result = closept; + } + + return dist; +} + +Datum +close_sb(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + BOX *box = PG_GETARG_BOX_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + if (isnan(box_closept_lseg(result, box, lseg))) + PG_RETURN_NULL(); + + PG_RETURN_POINT_P(result); +} + + +/*--------------------------------------------------------------------- + * on_ + * Whether one object lies completely within another. + *-------------------------------------------------------------------*/ + +/* + * Does the point satisfy the equation? + */ +static bool +line_contain_point(LINE *line, Point *point) +{ + return FPzero(float8_pl(float8_pl(float8_mul(line->A, point->x), + float8_mul(line->B, point->y)), + line->C)); +} + +Datum +on_pl(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + LINE *line = PG_GETARG_LINE_P(1); + + PG_RETURN_BOOL(line_contain_point(line, pt)); +} + + +/* + * Determine colinearity by detecting a triangle inequality. + * This algorithm seems to behave nicely even with lsb residues - tgl 1997-07-09 + */ +static bool +lseg_contain_point(LSEG *lseg, Point *pt) +{ + return FPeq(point_dt(pt, &lseg->p[0]) + + point_dt(pt, &lseg->p[1]), + point_dt(&lseg->p[0], &lseg->p[1])); +} + +Datum +on_ps(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + LSEG *lseg = PG_GETARG_LSEG_P(1); + + PG_RETURN_BOOL(lseg_contain_point(lseg, pt)); +} + + +/* + * Check whether the point is in the box or on its border + */ +static bool +box_contain_point(BOX *box, Point *point) +{ + return box->high.x >= point->x && box->low.x <= point->x && + box->high.y >= point->y && box->low.y <= point->y; +} + +Datum +on_pb(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + BOX *box = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(box_contain_point(box, pt)); +} + +Datum +box_contain_pt(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + Point *pt = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(box_contain_point(box, pt)); +} + +/* on_ppath - + * Whether a point lies within (on) a polyline. + * If open, we have to (groan) check each segment. + * (uses same algorithm as for point intersecting segment - tgl 1997-07-09) + * If closed, we use the old O(n) ray method for point-in-polygon. + * The ray is horizontal, from pt out to the right. + * Each segment that crosses the ray counts as an + * intersection; note that an endpoint or edge may touch + * but not cross. + * (we can do p-in-p in lg(n), but it takes preprocessing) + */ +Datum +on_ppath(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + PATH *path = PG_GETARG_PATH_P(1); + int i, + n; + float8 a, + b; + + /*-- OPEN --*/ + if (!path->closed) + { + n = path->npts - 1; + a = point_dt(pt, &path->p[0]); + for (i = 0; i < n; i++) + { + b = point_dt(pt, &path->p[i + 1]); + if (FPeq(float8_pl(a, b), point_dt(&path->p[i], &path->p[i + 1]))) + PG_RETURN_BOOL(true); + a = b; + } + PG_RETURN_BOOL(false); + } + + /*-- CLOSED --*/ + PG_RETURN_BOOL(point_inside(pt, path->npts, path->p) != 0); +} + + +/* + * Check whether the line segment is on the line or close enough + * + * It is, if both of its points are on the line or close enough. + */ +Datum +on_sl(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + LINE *line = PG_GETARG_LINE_P(1); + + PG_RETURN_BOOL(line_contain_point(line, &lseg->p[0]) && + line_contain_point(line, &lseg->p[1])); +} + + +/* + * Check whether the line segment is in the box or on its border + * + * It is, if both of its points are in the box or on its border. + */ +static bool +box_contain_lseg(BOX *box, LSEG *lseg) +{ + return box_contain_point(box, &lseg->p[0]) && + box_contain_point(box, &lseg->p[1]); +} + +Datum +on_sb(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + BOX *box = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(box_contain_lseg(box, lseg)); +} + +/*--------------------------------------------------------------------- + * inter_ + * Whether one object intersects another. + *-------------------------------------------------------------------*/ + +Datum +inter_sl(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + LINE *line = PG_GETARG_LINE_P(1); + + PG_RETURN_BOOL(lseg_interpt_line(NULL, lseg, line)); +} + + +/* + * Do line segment and box intersect? + * + * Segment completely inside box counts as intersection. + * If you want only segments crossing box boundaries, + * try converting box to path first. + * + * This function also sets the *result to the closest point on the line + * segment to the center of the box when they overlap and the result is + * not NULL. It is somewhat arbitrary, but maybe the best we can do as + * there are typically two points they intersect. + * + * Optimize for non-intersection by checking for box intersection first. + * - thomas 1998-01-30 + */ +static bool +box_interpt_lseg(Point *result, BOX *box, LSEG *lseg) +{ + BOX lbox; + LSEG bseg; + Point point; + + lbox.low.x = float8_min(lseg->p[0].x, lseg->p[1].x); + lbox.low.y = float8_min(lseg->p[0].y, lseg->p[1].y); + lbox.high.x = float8_max(lseg->p[0].x, lseg->p[1].x); + lbox.high.y = float8_max(lseg->p[0].y, lseg->p[1].y); + + /* nothing close to overlap? then not going to intersect */ + if (!box_ov(&lbox, box)) + return false; + + if (result != NULL) + { + box_cn(&point, box); + lseg_closept_point(result, lseg, &point); + } + + /* an endpoint of segment is inside box? then clearly intersects */ + if (box_contain_point(box, &lseg->p[0]) || + box_contain_point(box, &lseg->p[1])) + return true; + + /* pairwise check lseg intersections */ + point.x = box->low.x; + point.y = box->high.y; + statlseg_construct(&bseg, &box->low, &point); + if (lseg_interpt_lseg(NULL, &bseg, lseg)) + return true; + + statlseg_construct(&bseg, &box->high, &point); + if (lseg_interpt_lseg(NULL, &bseg, lseg)) + return true; + + point.x = box->high.x; + point.y = box->low.y; + statlseg_construct(&bseg, &box->low, &point); + if (lseg_interpt_lseg(NULL, &bseg, lseg)) + return true; + + statlseg_construct(&bseg, &box->high, &point); + if (lseg_interpt_lseg(NULL, &bseg, lseg)) + return true; + + /* if we dropped through, no two segs intersected */ + return false; +} + +Datum +inter_sb(PG_FUNCTION_ARGS) +{ + LSEG *lseg = PG_GETARG_LSEG_P(0); + BOX *box = PG_GETARG_BOX_P(1); + + PG_RETURN_BOOL(box_interpt_lseg(NULL, box, lseg)); +} + + +/* inter_lb() + * Do line and box intersect? + */ +Datum +inter_lb(PG_FUNCTION_ARGS) +{ + LINE *line = PG_GETARG_LINE_P(0); + BOX *box = PG_GETARG_BOX_P(1); + LSEG bseg; + Point p1, + p2; + + /* pairwise check lseg intersections */ + p1.x = box->low.x; + p1.y = box->low.y; + p2.x = box->low.x; + p2.y = box->high.y; + statlseg_construct(&bseg, &p1, &p2); + if (lseg_interpt_line(NULL, &bseg, line)) + PG_RETURN_BOOL(true); + p1.x = box->high.x; + p1.y = box->high.y; + statlseg_construct(&bseg, &p1, &p2); + if (lseg_interpt_line(NULL, &bseg, line)) + PG_RETURN_BOOL(true); + p2.x = box->high.x; + p2.y = box->low.y; + statlseg_construct(&bseg, &p1, &p2); + if (lseg_interpt_line(NULL, &bseg, line)) + PG_RETURN_BOOL(true); + p1.x = box->low.x; + p1.y = box->low.y; + statlseg_construct(&bseg, &p1, &p2); + if (lseg_interpt_line(NULL, &bseg, line)) + PG_RETURN_BOOL(true); + + /* if we dropped through, no intersection */ + PG_RETURN_BOOL(false); +} + +/*------------------------------------------------------------------ + * The following routines define a data type and operator class for + * POLYGONS .... Part of which (the polygon's bounding box) is built on + * top of the BOX data type. + * + * make_bound_box - create the bounding box for the input polygon + *------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------- + * Make the smallest bounding box for the given polygon. + *---------------------------------------------------------------------*/ +static void +make_bound_box(POLYGON *poly) +{ + int i; + float8 x1, + y1, + x2, + y2; + + Assert(poly->npts > 0); + + x1 = x2 = poly->p[0].x; + y2 = y1 = poly->p[0].y; + for (i = 1; i < poly->npts; i++) + { + if (float8_lt(poly->p[i].x, x1)) + x1 = poly->p[i].x; + if (float8_gt(poly->p[i].x, x2)) + x2 = poly->p[i].x; + if (float8_lt(poly->p[i].y, y1)) + y1 = poly->p[i].y; + if (float8_gt(poly->p[i].y, y2)) + y2 = poly->p[i].y; + } + + poly->boundbox.low.x = x1; + poly->boundbox.high.x = x2; + poly->boundbox.low.y = y1; + poly->boundbox.high.y = y2; +} + +/*------------------------------------------------------------------ + * poly_in - read in the polygon from a string specification + * + * External format: + * "((x0,y0),...,(xn,yn))" + * "x0,y0,...,xn,yn" + * also supports the older style "(x1,...,xn,y1,...yn)" + *------------------------------------------------------------------*/ +Datum +poly_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + POLYGON *poly; + int npts; + int size; + int base_size; + bool isopen; + + if ((npts = pair_count(str, ',')) <= 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "polygon", str))); + + base_size = sizeof(poly->p[0]) * npts; + size = offsetof(POLYGON, p) + base_size; + + /* Check for integer overflow */ + if (base_size / npts != sizeof(poly->p[0]) || size <= base_size) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many points requested"))); + + poly = (POLYGON *) palloc0(size); /* zero any holes */ + + SET_VARSIZE(poly, size); + poly->npts = npts; + + if (!path_decode(str, false, npts, &(poly->p[0]), &isopen, NULL, "polygon", + str, escontext)) + PG_RETURN_NULL(); + + make_bound_box(poly); + + PG_RETURN_POLYGON_P(poly); +} + +/*--------------------------------------------------------------- + * poly_out - convert internal POLYGON representation to the + * character string format "((f8,f8),...,(f8,f8))" + *---------------------------------------------------------------*/ +Datum +poly_out(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + + PG_RETURN_CSTRING(path_encode(PATH_CLOSED, poly->npts, poly->p)); +} + +/* + * poly_recv - converts external binary format to polygon + * + * External representation is int32 number of points, and the points. + * We recompute the bounding box on read, instead of trusting it to + * be valid. (Checking it would take just as long, so may as well + * omit it from external representation.) + */ +Datum +poly_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + POLYGON *poly; + int32 npts; + int32 i; + int size; + + npts = pq_getmsgint(buf, sizeof(int32)); + if (npts <= 0 || npts >= (int32) ((INT_MAX - offsetof(POLYGON, p)) / sizeof(Point))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid number of points in external \"polygon\" value"))); + + size = offsetof(POLYGON, p) + sizeof(poly->p[0]) * npts; + poly = (POLYGON *) palloc0(size); /* zero any holes */ + + SET_VARSIZE(poly, size); + poly->npts = npts; + + for (i = 0; i < npts; i++) + { + poly->p[i].x = pq_getmsgfloat8(buf); + poly->p[i].y = pq_getmsgfloat8(buf); + } + + make_bound_box(poly); + + PG_RETURN_POLYGON_P(poly); +} + +/* + * poly_send - converts polygon to binary format + */ +Datum +poly_send(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + StringInfoData buf; + int32 i; + + pq_begintypsend(&buf); + pq_sendint32(&buf, poly->npts); + for (i = 0; i < poly->npts; i++) + { + pq_sendfloat8(&buf, poly->p[i].x); + pq_sendfloat8(&buf, poly->p[i].y); + } + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/*------------------------------------------------------- + * Is polygon A strictly left of polygon B? i.e. is + * the right most point of A left of the left most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_left(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.high.x < polyb->boundbox.low.x; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*------------------------------------------------------- + * Is polygon A overlapping or left of polygon B? i.e. is + * the right most point of A at or left of the right most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_overleft(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.high.x <= polyb->boundbox.high.x; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*------------------------------------------------------- + * Is polygon A strictly right of polygon B? i.e. is + * the left most point of A right of the right most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_right(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.low.x > polyb->boundbox.high.x; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*------------------------------------------------------- + * Is polygon A overlapping or right of polygon B? i.e. is + * the left most point of A at or right of the left most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_overright(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.low.x >= polyb->boundbox.low.x; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*------------------------------------------------------- + * Is polygon A strictly below polygon B? i.e. is + * the upper most point of A below the lower most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_below(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.high.y < polyb->boundbox.low.y; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*------------------------------------------------------- + * Is polygon A overlapping or below polygon B? i.e. is + * the upper most point of A at or below the upper most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_overbelow(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.high.y <= polyb->boundbox.high.y; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*------------------------------------------------------- + * Is polygon A strictly above polygon B? i.e. is + * the lower most point of A above the upper most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_above(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.low.y > polyb->boundbox.high.y; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*------------------------------------------------------- + * Is polygon A overlapping or above polygon B? i.e. is + * the lower most point of A at or above the lower most point + * of B? + *-------------------------------------------------------*/ +Datum +poly_overabove(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = polya->boundbox.low.y >= polyb->boundbox.low.y; + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + + +/*------------------------------------------------------- + * Is polygon A the same as polygon B? i.e. are all the + * points the same? + * Check all points for matches in both forward and reverse + * direction since polygons are non-directional and are + * closed shapes. + *-------------------------------------------------------*/ +Datum +poly_same(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + if (polya->npts != polyb->npts) + result = false; + else + result = plist_same(polya->npts, polya->p, polyb->p); + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/*----------------------------------------------------------------- + * Determine if polygon A overlaps polygon B + *-----------------------------------------------------------------*/ +static bool +poly_overlap_internal(POLYGON *polya, POLYGON *polyb) +{ + bool result; + + Assert(polya->npts > 0 && polyb->npts > 0); + + /* Quick check by bounding box */ + result = box_ov(&polya->boundbox, &polyb->boundbox); + + /* + * Brute-force algorithm - try to find intersected edges, if so then + * polygons are overlapped else check is one polygon inside other or not + * by testing single point of them. + */ + if (result) + { + int ia, + ib; + LSEG sa, + sb; + + /* Init first of polya's edge with last point */ + sa.p[0] = polya->p[polya->npts - 1]; + result = false; + + for (ia = 0; ia < polya->npts && !result; ia++) + { + /* Second point of polya's edge is a current one */ + sa.p[1] = polya->p[ia]; + + /* Init first of polyb's edge with last point */ + sb.p[0] = polyb->p[polyb->npts - 1]; + + for (ib = 0; ib < polyb->npts && !result; ib++) + { + sb.p[1] = polyb->p[ib]; + result = lseg_interpt_lseg(NULL, &sa, &sb); + sb.p[0] = sb.p[1]; + } + + /* + * move current endpoint to the first point of next edge + */ + sa.p[0] = sa.p[1]; + } + + if (!result) + { + result = (point_inside(polya->p, polyb->npts, polyb->p) || + point_inside(polyb->p, polya->npts, polya->p)); + } + } + + return result; +} + +Datum +poly_overlap(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = poly_overlap_internal(polya, polyb); + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + +/* + * Tests special kind of segment for in/out of polygon. + * Special kind means: + * - point a should be on segment s + * - segment (a,b) should not be contained by s + * Returns true if: + * - segment (a,b) is collinear to s and (a,b) is in polygon + * - segment (a,b) s not collinear to s. Note: that doesn't + * mean that segment is in polygon! + */ + +static bool +touched_lseg_inside_poly(Point *a, Point *b, LSEG *s, POLYGON *poly, int start) +{ + /* point a is on s, b is not */ + LSEG t; + + t.p[0] = *a; + t.p[1] = *b; + + if (point_eq_point(a, s->p)) + { + if (lseg_contain_point(&t, s->p + 1)) + return lseg_inside_poly(b, s->p + 1, poly, start); + } + else if (point_eq_point(a, s->p + 1)) + { + if (lseg_contain_point(&t, s->p)) + return lseg_inside_poly(b, s->p, poly, start); + } + else if (lseg_contain_point(&t, s->p)) + { + return lseg_inside_poly(b, s->p, poly, start); + } + else if (lseg_contain_point(&t, s->p + 1)) + { + return lseg_inside_poly(b, s->p + 1, poly, start); + } + + return true; /* may be not true, but that will check later */ +} + +/* + * Returns true if segment (a,b) is in polygon, option + * start is used for optimization - function checks + * polygon's edges starting from start + */ +static bool +lseg_inside_poly(Point *a, Point *b, POLYGON *poly, int start) +{ + LSEG s, + t; + int i; + bool res = true, + intersection = false; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + t.p[0] = *a; + t.p[1] = *b; + s.p[0] = poly->p[(start == 0) ? (poly->npts - 1) : (start - 1)]; + + for (i = start; i < poly->npts && res; i++) + { + Point interpt; + + CHECK_FOR_INTERRUPTS(); + + s.p[1] = poly->p[i]; + + if (lseg_contain_point(&s, t.p)) + { + if (lseg_contain_point(&s, t.p + 1)) + return true; /* t is contained by s */ + + /* Y-cross */ + res = touched_lseg_inside_poly(t.p, t.p + 1, &s, poly, i + 1); + } + else if (lseg_contain_point(&s, t.p + 1)) + { + /* Y-cross */ + res = touched_lseg_inside_poly(t.p + 1, t.p, &s, poly, i + 1); + } + else if (lseg_interpt_lseg(&interpt, &t, &s)) + { + /* + * segments are X-crossing, go to check each subsegment + */ + + intersection = true; + res = lseg_inside_poly(t.p, &interpt, poly, i + 1); + if (res) + res = lseg_inside_poly(t.p + 1, &interpt, poly, i + 1); + } + + s.p[0] = s.p[1]; + } + + if (res && !intersection) + { + Point p; + + /* + * if X-intersection wasn't found, then check central point of tested + * segment. In opposite case we already check all subsegments + */ + p.x = float8_div(float8_pl(t.p[0].x, t.p[1].x), 2.0); + p.y = float8_div(float8_pl(t.p[0].y, t.p[1].y), 2.0); + + res = point_inside(&p, poly->npts, poly->p); + } + + return res; +} + +/* + * Check whether the first polygon contains the second + */ +static bool +poly_contain_poly(POLYGON *contains_poly, POLYGON *contained_poly) +{ + int i; + LSEG s; + + Assert(contains_poly->npts > 0 && contained_poly->npts > 0); + + /* + * Quick check to see if contained's bounding box is contained in + * contains' bb. + */ + if (!box_contain_box(&contains_poly->boundbox, &contained_poly->boundbox)) + return false; + + s.p[0] = contained_poly->p[contained_poly->npts - 1]; + + for (i = 0; i < contained_poly->npts; i++) + { + s.p[1] = contained_poly->p[i]; + if (!lseg_inside_poly(s.p, s.p + 1, contains_poly, 0)) + return false; + s.p[0] = s.p[1]; + } + + return true; +} + +Datum +poly_contain(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + result = poly_contain_poly(polya, polyb); + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + + +/*----------------------------------------------------------------- + * Determine if polygon A is contained by polygon B + *-----------------------------------------------------------------*/ +Datum +poly_contained(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + bool result; + + /* Just switch the arguments and pass it off to poly_contain */ + result = poly_contain_poly(polyb, polya); + + /* + * Avoid leaking memory for toasted inputs ... needed for rtree indexes + */ + PG_FREE_IF_COPY(polya, 0); + PG_FREE_IF_COPY(polyb, 1); + + PG_RETURN_BOOL(result); +} + + +Datum +poly_contain_pt(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + Point *p = PG_GETARG_POINT_P(1); + + PG_RETURN_BOOL(point_inside(p, poly->npts, poly->p) != 0); +} + +Datum +pt_contained_poly(PG_FUNCTION_ARGS) +{ + Point *p = PG_GETARG_POINT_P(0); + POLYGON *poly = PG_GETARG_POLYGON_P(1); + + PG_RETURN_BOOL(point_inside(p, poly->npts, poly->p) != 0); +} + + +Datum +poly_distance(PG_FUNCTION_ARGS) +{ + POLYGON *polya = PG_GETARG_POLYGON_P(0); + POLYGON *polyb = PG_GETARG_POLYGON_P(1); + float8 min = 0.0; /* initialize to keep compiler quiet */ + bool have_min = false; + float8 tmp; + int i, + j; + LSEG seg1, + seg2; + + /* + * Distance is zero if polygons overlap. We must check this because the + * path distance will not give the right answer if one poly is entirely + * within the other. + */ + if (poly_overlap_internal(polya, polyb)) + PG_RETURN_FLOAT8(0.0); + + /* + * When they don't overlap, the distance calculation is identical to that + * for closed paths (i.e., we needn't care about the fact that polygons + * include their contained areas). See path_distance(). + */ + for (i = 0; i < polya->npts; i++) + { + int iprev; + + if (i > 0) + iprev = i - 1; + else + iprev = polya->npts - 1; + + for (j = 0; j < polyb->npts; j++) + { + int jprev; + + if (j > 0) + jprev = j - 1; + else + jprev = polyb->npts - 1; + + statlseg_construct(&seg1, &polya->p[iprev], &polya->p[i]); + statlseg_construct(&seg2, &polyb->p[jprev], &polyb->p[j]); + + tmp = lseg_closept_lseg(NULL, &seg1, &seg2); + if (!have_min || float8_lt(tmp, min)) + { + min = tmp; + have_min = true; + } + } + } + + if (!have_min) + PG_RETURN_NULL(); + + PG_RETURN_FLOAT8(min); +} + + +/*********************************************************************** + ** + ** Routines for 2D points. + ** + ***********************************************************************/ + +Datum +construct_point(PG_FUNCTION_ARGS) +{ + float8 x = PG_GETARG_FLOAT8(0); + float8 y = PG_GETARG_FLOAT8(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + point_construct(result, x, y); + + PG_RETURN_POINT_P(result); +} + + +static inline void +point_add_point(Point *result, Point *pt1, Point *pt2) +{ + point_construct(result, + float8_pl(pt1->x, pt2->x), + float8_pl(pt1->y, pt2->y)); +} + +Datum +point_add(PG_FUNCTION_ARGS) +{ + Point *p1 = PG_GETARG_POINT_P(0); + Point *p2 = PG_GETARG_POINT_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + point_add_point(result, p1, p2); + + PG_RETURN_POINT_P(result); +} + + +static inline void +point_sub_point(Point *result, Point *pt1, Point *pt2) +{ + point_construct(result, + float8_mi(pt1->x, pt2->x), + float8_mi(pt1->y, pt2->y)); +} + +Datum +point_sub(PG_FUNCTION_ARGS) +{ + Point *p1 = PG_GETARG_POINT_P(0); + Point *p2 = PG_GETARG_POINT_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + point_sub_point(result, p1, p2); + + PG_RETURN_POINT_P(result); +} + + +static inline void +point_mul_point(Point *result, Point *pt1, Point *pt2) +{ + point_construct(result, + float8_mi(float8_mul(pt1->x, pt2->x), + float8_mul(pt1->y, pt2->y)), + float8_pl(float8_mul(pt1->x, pt2->y), + float8_mul(pt1->y, pt2->x))); +} + +Datum +point_mul(PG_FUNCTION_ARGS) +{ + Point *p1 = PG_GETARG_POINT_P(0); + Point *p2 = PG_GETARG_POINT_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + point_mul_point(result, p1, p2); + + PG_RETURN_POINT_P(result); +} + + +static inline void +point_div_point(Point *result, Point *pt1, Point *pt2) +{ + float8 div; + + div = float8_pl(float8_mul(pt2->x, pt2->x), float8_mul(pt2->y, pt2->y)); + + point_construct(result, + float8_div(float8_pl(float8_mul(pt1->x, pt2->x), + float8_mul(pt1->y, pt2->y)), div), + float8_div(float8_mi(float8_mul(pt1->y, pt2->x), + float8_mul(pt1->x, pt2->y)), div)); +} + +Datum +point_div(PG_FUNCTION_ARGS) +{ + Point *p1 = PG_GETARG_POINT_P(0); + Point *p2 = PG_GETARG_POINT_P(1); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + + point_div_point(result, p1, p2); + + PG_RETURN_POINT_P(result); +} + + +/*********************************************************************** + ** + ** Routines for 2D boxes. + ** + ***********************************************************************/ + +Datum +points_box(PG_FUNCTION_ARGS) +{ + Point *p1 = PG_GETARG_POINT_P(0); + Point *p2 = PG_GETARG_POINT_P(1); + BOX *result; + + result = (BOX *) palloc(sizeof(BOX)); + + box_construct(result, p1, p2); + + PG_RETURN_BOX_P(result); +} + +Datum +box_add(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + Point *p = PG_GETARG_POINT_P(1); + BOX *result; + + result = (BOX *) palloc(sizeof(BOX)); + + point_add_point(&result->high, &box->high, p); + point_add_point(&result->low, &box->low, p); + + PG_RETURN_BOX_P(result); +} + +Datum +box_sub(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + Point *p = PG_GETARG_POINT_P(1); + BOX *result; + + result = (BOX *) palloc(sizeof(BOX)); + + point_sub_point(&result->high, &box->high, p); + point_sub_point(&result->low, &box->low, p); + + PG_RETURN_BOX_P(result); +} + +Datum +box_mul(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + Point *p = PG_GETARG_POINT_P(1); + BOX *result; + Point high, + low; + + result = (BOX *) palloc(sizeof(BOX)); + + point_mul_point(&high, &box->high, p); + point_mul_point(&low, &box->low, p); + + box_construct(result, &high, &low); + + PG_RETURN_BOX_P(result); +} + +Datum +box_div(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + Point *p = PG_GETARG_POINT_P(1); + BOX *result; + Point high, + low; + + result = (BOX *) palloc(sizeof(BOX)); + + point_div_point(&high, &box->high, p); + point_div_point(&low, &box->low, p); + + box_construct(result, &high, &low); + + PG_RETURN_BOX_P(result); +} + +/* + * Convert point to empty box + */ +Datum +point_box(PG_FUNCTION_ARGS) +{ + Point *pt = PG_GETARG_POINT_P(0); + BOX *box; + + box = (BOX *) palloc(sizeof(BOX)); + + box->high.x = pt->x; + box->low.x = pt->x; + box->high.y = pt->y; + box->low.y = pt->y; + + PG_RETURN_BOX_P(box); +} + +/* + * Smallest bounding box that includes both of the given boxes + */ +Datum +boxes_bound_box(PG_FUNCTION_ARGS) +{ + BOX *box1 = PG_GETARG_BOX_P(0), + *box2 = PG_GETARG_BOX_P(1), + *container; + + container = (BOX *) palloc(sizeof(BOX)); + + container->high.x = float8_max(box1->high.x, box2->high.x); + container->low.x = float8_min(box1->low.x, box2->low.x); + container->high.y = float8_max(box1->high.y, box2->high.y); + container->low.y = float8_min(box1->low.y, box2->low.y); + + PG_RETURN_BOX_P(container); +} + + +/*********************************************************************** + ** + ** Routines for 2D paths. + ** + ***********************************************************************/ + +/* path_add() + * Concatenate two paths (only if they are both open). + */ +Datum +path_add(PG_FUNCTION_ARGS) +{ + PATH *p1 = PG_GETARG_PATH_P(0); + PATH *p2 = PG_GETARG_PATH_P(1); + PATH *result; + int size, + base_size; + int i; + + if (p1->closed || p2->closed) + PG_RETURN_NULL(); + + base_size = sizeof(p1->p[0]) * (p1->npts + p2->npts); + size = offsetof(PATH, p) + base_size; + + /* Check for integer overflow */ + if (base_size / sizeof(p1->p[0]) != (p1->npts + p2->npts) || + size <= base_size) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many points requested"))); + + result = (PATH *) palloc(size); + + SET_VARSIZE(result, size); + result->npts = (p1->npts + p2->npts); + result->closed = p1->closed; + /* prevent instability in unused pad bytes */ + result->dummy = 0; + + for (i = 0; i < p1->npts; i++) + { + result->p[i].x = p1->p[i].x; + result->p[i].y = p1->p[i].y; + } + for (i = 0; i < p2->npts; i++) + { + result->p[i + p1->npts].x = p2->p[i].x; + result->p[i + p1->npts].y = p2->p[i].y; + } + + PG_RETURN_PATH_P(result); +} + +/* path_add_pt() + * Translation operators. + */ +Datum +path_add_pt(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P_COPY(0); + Point *point = PG_GETARG_POINT_P(1); + int i; + + for (i = 0; i < path->npts; i++) + point_add_point(&path->p[i], &path->p[i], point); + + PG_RETURN_PATH_P(path); +} + +Datum +path_sub_pt(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P_COPY(0); + Point *point = PG_GETARG_POINT_P(1); + int i; + + for (i = 0; i < path->npts; i++) + point_sub_point(&path->p[i], &path->p[i], point); + + PG_RETURN_PATH_P(path); +} + +/* path_mul_pt() + * Rotation and scaling operators. + */ +Datum +path_mul_pt(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P_COPY(0); + Point *point = PG_GETARG_POINT_P(1); + int i; + + for (i = 0; i < path->npts; i++) + point_mul_point(&path->p[i], &path->p[i], point); + + PG_RETURN_PATH_P(path); +} + +Datum +path_div_pt(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P_COPY(0); + Point *point = PG_GETARG_POINT_P(1); + int i; + + for (i = 0; i < path->npts; i++) + point_div_point(&path->p[i], &path->p[i], point); + + PG_RETURN_PATH_P(path); +} + + +Datum +path_poly(PG_FUNCTION_ARGS) +{ + PATH *path = PG_GETARG_PATH_P(0); + POLYGON *poly; + int size; + int i; + + /* This is not very consistent --- other similar cases return NULL ... */ + if (!path->closed) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("open path cannot be converted to polygon"))); + + /* + * Never overflows: the old size fit in MaxAllocSize, and the new size is + * just a small constant larger. + */ + size = offsetof(POLYGON, p) + sizeof(poly->p[0]) * path->npts; + poly = (POLYGON *) palloc(size); + + SET_VARSIZE(poly, size); + poly->npts = path->npts; + + for (i = 0; i < path->npts; i++) + { + poly->p[i].x = path->p[i].x; + poly->p[i].y = path->p[i].y; + } + + make_bound_box(poly); + + PG_RETURN_POLYGON_P(poly); +} + + +/*********************************************************************** + ** + ** Routines for 2D polygons. + ** + ***********************************************************************/ + +Datum +poly_npoints(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + + PG_RETURN_INT32(poly->npts); +} + + +Datum +poly_center(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + Point *result; + CIRCLE circle; + + result = (Point *) palloc(sizeof(Point)); + + poly_to_circle(&circle, poly); + *result = circle.center; + + PG_RETURN_POINT_P(result); +} + + +Datum +poly_box(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + BOX *box; + + box = (BOX *) palloc(sizeof(BOX)); + *box = poly->boundbox; + + PG_RETURN_BOX_P(box); +} + + +/* box_poly() + * Convert a box to a polygon. + */ +Datum +box_poly(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + POLYGON *poly; + int size; + + /* map four corners of the box to a polygon */ + size = offsetof(POLYGON, p) + sizeof(poly->p[0]) * 4; + poly = (POLYGON *) palloc(size); + + SET_VARSIZE(poly, size); + poly->npts = 4; + + poly->p[0].x = box->low.x; + poly->p[0].y = box->low.y; + poly->p[1].x = box->low.x; + poly->p[1].y = box->high.y; + poly->p[2].x = box->high.x; + poly->p[2].y = box->high.y; + poly->p[3].x = box->high.x; + poly->p[3].y = box->low.y; + + box_construct(&poly->boundbox, &box->high, &box->low); + + PG_RETURN_POLYGON_P(poly); +} + + +Datum +poly_path(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + PATH *path; + int size; + int i; + + /* + * Never overflows: the old size fit in MaxAllocSize, and the new size is + * smaller by a small constant. + */ + size = offsetof(PATH, p) + sizeof(path->p[0]) * poly->npts; + path = (PATH *) palloc(size); + + SET_VARSIZE(path, size); + path->npts = poly->npts; + path->closed = true; + /* prevent instability in unused pad bytes */ + path->dummy = 0; + + for (i = 0; i < poly->npts; i++) + { + path->p[i].x = poly->p[i].x; + path->p[i].y = poly->p[i].y; + } + + PG_RETURN_PATH_P(path); +} + + +/*********************************************************************** + ** + ** Routines for circles. + ** + ***********************************************************************/ + +/*---------------------------------------------------------- + * Formatting and conversion routines. + *---------------------------------------------------------*/ + +/* circle_in - convert a string to internal form. + * + * External format: (center and radius of circle) + * "<(f8,f8),f8>" + * also supports quick entry style "f8,f8,f8" + */ +Datum +circle_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + CIRCLE *circle = (CIRCLE *) palloc(sizeof(CIRCLE)); + char *s, + *cp; + int depth = 0; + + s = str; + while (isspace((unsigned char) *s)) + s++; + if (*s == LDELIM_C) + depth++, s++; + else if (*s == LDELIM) + { + /* If there are two left parens, consume the first one */ + cp = (s + 1); + while (isspace((unsigned char) *cp)) + cp++; + if (*cp == LDELIM) + depth++, s = cp; + } + + /* pair_decode will consume parens around the pair, if any */ + if (!pair_decode(s, &circle->center.x, &circle->center.y, &s, "circle", str, + escontext)) + PG_RETURN_NULL(); + + if (*s == DELIM) + s++; + + if (!single_decode(s, &circle->radius, &s, "circle", str, escontext)) + PG_RETURN_NULL(); + + /* We have to accept NaN. */ + if (circle->radius < 0.0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "circle", str))); + + while (depth > 0) + { + if ((*s == RDELIM) || ((*s == RDELIM_C) && (depth == 1))) + { + depth--; + s++; + while (isspace((unsigned char) *s)) + s++; + } + else + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "circle", str))); + } + + if (*s != '\0') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "circle", str))); + + PG_RETURN_CIRCLE_P(circle); +} + +/* circle_out - convert a circle to external form. + */ +Datum +circle_out(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + StringInfoData str; + + initStringInfo(&str); + + appendStringInfoChar(&str, LDELIM_C); + appendStringInfoChar(&str, LDELIM); + pair_encode(circle->center.x, circle->center.y, &str); + appendStringInfoChar(&str, RDELIM); + appendStringInfoChar(&str, DELIM); + single_encode(circle->radius, &str); + appendStringInfoChar(&str, RDELIM_C); + + PG_RETURN_CSTRING(str.data); +} + +/* + * circle_recv - converts external binary format to circle + */ +Datum +circle_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + CIRCLE *circle; + + circle = (CIRCLE *) palloc(sizeof(CIRCLE)); + + circle->center.x = pq_getmsgfloat8(buf); + circle->center.y = pq_getmsgfloat8(buf); + circle->radius = pq_getmsgfloat8(buf); + + /* We have to accept NaN. */ + if (circle->radius < 0.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid radius in external \"circle\" value"))); + + PG_RETURN_CIRCLE_P(circle); +} + +/* + * circle_send - converts circle to binary format + */ +Datum +circle_send(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, circle->center.x); + pq_sendfloat8(&buf, circle->center.y); + pq_sendfloat8(&buf, circle->radius); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/*---------------------------------------------------------- + * Relational operators for CIRCLEs. + * <, >, <=, >=, and == are based on circle area. + *---------------------------------------------------------*/ + +/* circles identical? + * + * We consider NaNs values to be equal to each other to let those circles + * to be found. + */ +Datum +circle_same(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(((isnan(circle1->radius) && isnan(circle2->radius)) || + FPeq(circle1->radius, circle2->radius)) && + point_eq_point(&circle1->center, &circle2->center)); +} + +/* circle_overlap - does circle1 overlap circle2? + */ +Datum +circle_overlap(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPle(point_dt(&circle1->center, &circle2->center), + float8_pl(circle1->radius, circle2->radius))); +} + +/* circle_overleft - is the right edge of circle1 at or left of + * the right edge of circle2? + */ +Datum +circle_overleft(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPle(float8_pl(circle1->center.x, circle1->radius), + float8_pl(circle2->center.x, circle2->radius))); +} + +/* circle_left - is circle1 strictly left of circle2? + */ +Datum +circle_left(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPlt(float8_pl(circle1->center.x, circle1->radius), + float8_mi(circle2->center.x, circle2->radius))); +} + +/* circle_right - is circle1 strictly right of circle2? + */ +Datum +circle_right(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPgt(float8_mi(circle1->center.x, circle1->radius), + float8_pl(circle2->center.x, circle2->radius))); +} + +/* circle_overright - is the left edge of circle1 at or right of + * the left edge of circle2? + */ +Datum +circle_overright(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPge(float8_mi(circle1->center.x, circle1->radius), + float8_mi(circle2->center.x, circle2->radius))); +} + +/* circle_contained - is circle1 contained by circle2? + */ +Datum +circle_contained(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPle(point_dt(&circle1->center, &circle2->center), + float8_mi(circle2->radius, circle1->radius))); +} + +/* circle_contain - does circle1 contain circle2? + */ +Datum +circle_contain(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPle(point_dt(&circle1->center, &circle2->center), + float8_mi(circle1->radius, circle2->radius))); +} + + +/* circle_below - is circle1 strictly below circle2? + */ +Datum +circle_below(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPlt(float8_pl(circle1->center.y, circle1->radius), + float8_mi(circle2->center.y, circle2->radius))); +} + +/* circle_above - is circle1 strictly above circle2? + */ +Datum +circle_above(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPgt(float8_mi(circle1->center.y, circle1->radius), + float8_pl(circle2->center.y, circle2->radius))); +} + +/* circle_overbelow - is the upper edge of circle1 at or below + * the upper edge of circle2? + */ +Datum +circle_overbelow(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPle(float8_pl(circle1->center.y, circle1->radius), + float8_pl(circle2->center.y, circle2->radius))); +} + +/* circle_overabove - is the lower edge of circle1 at or above + * the lower edge of circle2? + */ +Datum +circle_overabove(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPge(float8_mi(circle1->center.y, circle1->radius), + float8_mi(circle2->center.y, circle2->radius))); +} + + +/* circle_relop - is area(circle1) relop area(circle2), within + * our accuracy constraint? + */ +Datum +circle_eq(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPeq(circle_ar(circle1), circle_ar(circle2))); +} + +Datum +circle_ne(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPne(circle_ar(circle1), circle_ar(circle2))); +} + +Datum +circle_lt(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPlt(circle_ar(circle1), circle_ar(circle2))); +} + +Datum +circle_gt(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPgt(circle_ar(circle1), circle_ar(circle2))); +} + +Datum +circle_le(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPle(circle_ar(circle1), circle_ar(circle2))); +} + +Datum +circle_ge(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + + PG_RETURN_BOOL(FPge(circle_ar(circle1), circle_ar(circle2))); +} + + +/*---------------------------------------------------------- + * "Arithmetic" operators on circles. + *---------------------------------------------------------*/ + +/* circle_add_pt() + * Translation operator. + */ +Datum +circle_add_pt(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + Point *point = PG_GETARG_POINT_P(1); + CIRCLE *result; + + result = (CIRCLE *) palloc(sizeof(CIRCLE)); + + point_add_point(&result->center, &circle->center, point); + result->radius = circle->radius; + + PG_RETURN_CIRCLE_P(result); +} + +Datum +circle_sub_pt(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + Point *point = PG_GETARG_POINT_P(1); + CIRCLE *result; + + result = (CIRCLE *) palloc(sizeof(CIRCLE)); + + point_sub_point(&result->center, &circle->center, point); + result->radius = circle->radius; + + PG_RETURN_CIRCLE_P(result); +} + + +/* circle_mul_pt() + * Rotation and scaling operators. + */ +Datum +circle_mul_pt(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + Point *point = PG_GETARG_POINT_P(1); + CIRCLE *result; + + result = (CIRCLE *) palloc(sizeof(CIRCLE)); + + point_mul_point(&result->center, &circle->center, point); + result->radius = float8_mul(circle->radius, HYPOT(point->x, point->y)); + + PG_RETURN_CIRCLE_P(result); +} + +Datum +circle_div_pt(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + Point *point = PG_GETARG_POINT_P(1); + CIRCLE *result; + + result = (CIRCLE *) palloc(sizeof(CIRCLE)); + + point_div_point(&result->center, &circle->center, point); + result->radius = float8_div(circle->radius, HYPOT(point->x, point->y)); + + PG_RETURN_CIRCLE_P(result); +} + + +/* circle_area - returns the area of the circle. + */ +Datum +circle_area(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + + PG_RETURN_FLOAT8(circle_ar(circle)); +} + + +/* circle_diameter - returns the diameter of the circle. + */ +Datum +circle_diameter(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + + PG_RETURN_FLOAT8(float8_mul(circle->radius, 2.0)); +} + + +/* circle_radius - returns the radius of the circle. + */ +Datum +circle_radius(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + + PG_RETURN_FLOAT8(circle->radius); +} + + +/* circle_distance - returns the distance between + * two circles. + */ +Datum +circle_distance(PG_FUNCTION_ARGS) +{ + CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); + CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); + float8 result; + + result = float8_mi(point_dt(&circle1->center, &circle2->center), + float8_pl(circle1->radius, circle2->radius)); + if (result < 0.0) + result = 0.0; + + PG_RETURN_FLOAT8(result); +} + + +Datum +circle_contain_pt(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + Point *point = PG_GETARG_POINT_P(1); + float8 d; + + d = point_dt(&circle->center, point); + PG_RETURN_BOOL(d <= circle->radius); +} + + +Datum +pt_contained_circle(PG_FUNCTION_ARGS) +{ + Point *point = PG_GETARG_POINT_P(0); + CIRCLE *circle = PG_GETARG_CIRCLE_P(1); + float8 d; + + d = point_dt(&circle->center, point); + PG_RETURN_BOOL(d <= circle->radius); +} + + +/* dist_pc - returns the distance between + * a point and a circle. + */ +Datum +dist_pc(PG_FUNCTION_ARGS) +{ + Point *point = PG_GETARG_POINT_P(0); + CIRCLE *circle = PG_GETARG_CIRCLE_P(1); + float8 result; + + result = float8_mi(point_dt(point, &circle->center), + circle->radius); + if (result < 0.0) + result = 0.0; + + PG_RETURN_FLOAT8(result); +} + +/* + * Distance from a circle to a point + */ +Datum +dist_cpoint(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + Point *point = PG_GETARG_POINT_P(1); + float8 result; + + result = float8_mi(point_dt(point, &circle->center), circle->radius); + if (result < 0.0) + result = 0.0; + + PG_RETURN_FLOAT8(result); +} + +/* circle_center - returns the center point of the circle. + */ +Datum +circle_center(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + Point *result; + + result = (Point *) palloc(sizeof(Point)); + result->x = circle->center.x; + result->y = circle->center.y; + + PG_RETURN_POINT_P(result); +} + + +/* circle_ar - returns the area of the circle. + */ +static float8 +circle_ar(CIRCLE *circle) +{ + return float8_mul(float8_mul(circle->radius, circle->radius), M_PI); +} + + +/*---------------------------------------------------------- + * Conversion operators. + *---------------------------------------------------------*/ + +Datum +cr_circle(PG_FUNCTION_ARGS) +{ + Point *center = PG_GETARG_POINT_P(0); + float8 radius = PG_GETARG_FLOAT8(1); + CIRCLE *result; + + result = (CIRCLE *) palloc(sizeof(CIRCLE)); + + result->center.x = center->x; + result->center.y = center->y; + result->radius = radius; + + PG_RETURN_CIRCLE_P(result); +} + +Datum +circle_box(PG_FUNCTION_ARGS) +{ + CIRCLE *circle = PG_GETARG_CIRCLE_P(0); + BOX *box; + float8 delta; + + box = (BOX *) palloc(sizeof(BOX)); + + delta = float8_div(circle->radius, sqrt(2.0)); + + box->high.x = float8_pl(circle->center.x, delta); + box->low.x = float8_mi(circle->center.x, delta); + box->high.y = float8_pl(circle->center.y, delta); + box->low.y = float8_mi(circle->center.y, delta); + + PG_RETURN_BOX_P(box); +} + +/* box_circle() + * Convert a box to a circle. + */ +Datum +box_circle(PG_FUNCTION_ARGS) +{ + BOX *box = PG_GETARG_BOX_P(0); + CIRCLE *circle; + + circle = (CIRCLE *) palloc(sizeof(CIRCLE)); + + circle->center.x = float8_div(float8_pl(box->high.x, box->low.x), 2.0); + circle->center.y = float8_div(float8_pl(box->high.y, box->low.y), 2.0); + + circle->radius = point_dt(&circle->center, &box->high); + + PG_RETURN_CIRCLE_P(circle); +} + + +Datum +circle_poly(PG_FUNCTION_ARGS) +{ + int32 npts = PG_GETARG_INT32(0); + CIRCLE *circle = PG_GETARG_CIRCLE_P(1); + POLYGON *poly; + int base_size, + size; + int i; + float8 angle; + float8 anglestep; + + if (FPzero(circle->radius)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert circle with radius zero to polygon"))); + + if (npts < 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("must request at least 2 points"))); + + base_size = sizeof(poly->p[0]) * npts; + size = offsetof(POLYGON, p) + base_size; + + /* Check for integer overflow */ + if (base_size / npts != sizeof(poly->p[0]) || size <= base_size) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many points requested"))); + + poly = (POLYGON *) palloc0(size); /* zero any holes */ + SET_VARSIZE(poly, size); + poly->npts = npts; + + anglestep = float8_div(2.0 * M_PI, npts); + + for (i = 0; i < npts; i++) + { + angle = float8_mul(anglestep, i); + + poly->p[i].x = float8_mi(circle->center.x, + float8_mul(circle->radius, cos(angle))); + poly->p[i].y = float8_pl(circle->center.y, + float8_mul(circle->radius, sin(angle))); + } + + make_bound_box(poly); + + PG_RETURN_POLYGON_P(poly); +} + +/* + * Convert polygon to circle + * + * The result must be preallocated. + * + * XXX This algorithm should use weighted means of line segments + * rather than straight average values of points - tgl 97/01/21. + */ +static void +poly_to_circle(CIRCLE *result, POLYGON *poly) +{ + int i; + + Assert(poly->npts > 0); + + result->center.x = 0; + result->center.y = 0; + result->radius = 0; + + for (i = 0; i < poly->npts; i++) + point_add_point(&result->center, &result->center, &poly->p[i]); + result->center.x = float8_div(result->center.x, poly->npts); + result->center.y = float8_div(result->center.y, poly->npts); + + for (i = 0; i < poly->npts; i++) + result->radius = float8_pl(result->radius, + point_dt(&poly->p[i], &result->center)); + result->radius = float8_div(result->radius, poly->npts); +} + +Datum +poly_circle(PG_FUNCTION_ARGS) +{ + POLYGON *poly = PG_GETARG_POLYGON_P(0); + CIRCLE *result; + + result = (CIRCLE *) palloc(sizeof(CIRCLE)); + + poly_to_circle(result, poly); + + PG_RETURN_CIRCLE_P(result); +} + + +/*********************************************************************** + ** + ** Private routines for multiple types. + ** + ***********************************************************************/ + +/* + * Test to see if the point is inside the polygon, returns 1/0, or 2 if + * the point is on the polygon. + * Code adapted but not copied from integer-based routines in WN: A + * Server for the HTTP + * version 1.15.1, file wn/image.c + * http://hopf.math.northwestern.edu/index.html + * Description of algorithm: http://www.linuxjournal.com/article/2197 + * http://www.linuxjournal.com/article/2029 + */ + +#define POINT_ON_POLYGON INT_MAX + +static int +point_inside(Point *p, int npts, Point *plist) +{ + float8 x0, + y0; + float8 prev_x, + prev_y; + int i = 0; + float8 x, + y; + int cross, + total_cross = 0; + + Assert(npts > 0); + + /* compute first polygon point relative to single point */ + x0 = float8_mi(plist[0].x, p->x); + y0 = float8_mi(plist[0].y, p->y); + + prev_x = x0; + prev_y = y0; + /* loop over polygon points and aggregate total_cross */ + for (i = 1; i < npts; i++) + { + /* compute next polygon point relative to single point */ + x = float8_mi(plist[i].x, p->x); + y = float8_mi(plist[i].y, p->y); + + /* compute previous to current point crossing */ + if ((cross = lseg_crossing(x, y, prev_x, prev_y)) == POINT_ON_POLYGON) + return 2; + total_cross += cross; + + prev_x = x; + prev_y = y; + } + + /* now do the first point */ + if ((cross = lseg_crossing(x0, y0, prev_x, prev_y)) == POINT_ON_POLYGON) + return 2; + total_cross += cross; + + if (total_cross != 0) + return 1; + return 0; +} + + +/* lseg_crossing() + * Returns +/-2 if line segment crosses the positive X-axis in a +/- direction. + * Returns +/-1 if one point is on the positive X-axis. + * Returns 0 if both points are on the positive X-axis, or there is no crossing. + * Returns POINT_ON_POLYGON if the segment contains (0,0). + * Wow, that is one confusing API, but it is used above, and when summed, + * can tell is if a point is in a polygon. + */ + +static int +lseg_crossing(float8 x, float8 y, float8 prev_x, float8 prev_y) +{ + float8 z; + int y_sign; + + if (FPzero(y)) + { /* y == 0, on X axis */ + if (FPzero(x)) /* (x,y) is (0,0)? */ + return POINT_ON_POLYGON; + else if (FPgt(x, 0)) + { /* x > 0 */ + if (FPzero(prev_y)) /* y and prev_y are zero */ + /* prev_x > 0? */ + return FPgt(prev_x, 0.0) ? 0 : POINT_ON_POLYGON; + return FPlt(prev_y, 0.0) ? 1 : -1; + } + else + { /* x < 0, x not on positive X axis */ + if (FPzero(prev_y)) + /* prev_x < 0? */ + return FPlt(prev_x, 0.0) ? 0 : POINT_ON_POLYGON; + return 0; + } + } + else + { /* y != 0 */ + /* compute y crossing direction from previous point */ + y_sign = FPgt(y, 0.0) ? 1 : -1; + + if (FPzero(prev_y)) + /* previous point was on X axis, so new point is either off or on */ + return FPlt(prev_x, 0.0) ? 0 : y_sign; + else if ((y_sign < 0 && FPlt(prev_y, 0.0)) || + (y_sign > 0 && FPgt(prev_y, 0.0))) + /* both above or below X axis */ + return 0; /* same sign */ + else + { /* y and prev_y cross X-axis */ + if (FPge(x, 0.0) && FPgt(prev_x, 0.0)) + /* both non-negative so cross positive X-axis */ + return 2 * y_sign; + if (FPlt(x, 0.0) && FPle(prev_x, 0.0)) + /* both non-positive so do not cross positive X-axis */ + return 0; + + /* x and y cross axes, see URL above point_inside() */ + z = float8_mi(float8_mul(float8_mi(x, prev_x), y), + float8_mul(float8_mi(y, prev_y), x)); + if (FPzero(z)) + return POINT_ON_POLYGON; + if ((y_sign < 0 && FPlt(z, 0.0)) || + (y_sign > 0 && FPgt(z, 0.0))) + return 0; + return 2 * y_sign; + } + } +} + + +static bool +plist_same(int npts, Point *p1, Point *p2) +{ + int i, + ii, + j; + + /* find match for first point */ + for (i = 0; i < npts; i++) + { + if (point_eq_point(&p2[i], &p1[0])) + { + + /* match found? then look forward through remaining points */ + for (ii = 1, j = i + 1; ii < npts; ii++, j++) + { + if (j >= npts) + j = 0; + if (!point_eq_point(&p2[j], &p1[ii])) + break; + } + if (ii == npts) + return true; + + /* match not found forwards? then look backwards */ + for (ii = 1, j = i - 1; ii < npts; ii++, j--) + { + if (j < 0) + j = (npts - 1); + if (!point_eq_point(&p2[j], &p1[ii])) + break; + } + if (ii == npts) + return true; + } + } + + return false; +} + + +/*------------------------------------------------------------------------- + * Determine the hypotenuse. + * + * If required, x and y are swapped to make x the larger number. The + * traditional formula of x^2+y^2 is rearranged to factor x outside the + * sqrt. This allows computation of the hypotenuse for significantly + * larger values, and with a higher precision than when using the naive + * formula. In particular, this cannot overflow unless the final result + * would be out-of-range. + * + * sqrt( x^2 + y^2 ) = sqrt( x^2( 1 + y^2/x^2) ) + * = x * sqrt( 1 + y^2/x^2 ) + * = x * sqrt( 1 + y/x * y/x ) + * + * It is expected that this routine will eventually be replaced with the + * C99 hypot() function. + * + * This implementation conforms to IEEE Std 1003.1 and GLIBC, in that the + * case of hypot(inf,nan) results in INF, and not NAN. + *----------------------------------------------------------------------- + */ +float8 +pg_hypot(float8 x, float8 y) +{ + float8 yx, + result; + + /* Handle INF and NaN properly */ + if (isinf(x) || isinf(y)) + return get_float8_infinity(); + + if (isnan(x) || isnan(y)) + return get_float8_nan(); + + /* Else, drop any minus signs */ + x = fabs(x); + y = fabs(y); + + /* Swap x and y if needed to make x the larger one */ + if (x < y) + { + float8 temp = x; + + x = y; + y = temp; + } + + /* + * If y is zero, the hypotenuse is x. This test saves a few cycles in + * such cases, but more importantly it also protects against + * divide-by-zero errors, since now x >= y. + */ + if (y == 0.0) + return x; + + /* Determine the hypotenuse */ + yx = y / x; + result = x * sqrt(1.0 + (yx * yx)); + + if (unlikely(isinf(result))) + float_overflow_error(); + if (unlikely(result == 0.0)) + float_underflow_error(); + + return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_selfuncs.c new file mode 100644 index 00000000000..f9f40922e03 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_selfuncs.c @@ -0,0 +1,96 @@ +/*------------------------------------------------------------------------- + * + * geo_selfuncs.c + * Selectivity routines registered in the operator catalog in the + * "oprrest" and "oprjoin" attributes. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/geo_selfuncs.c + * + * XXX These are totally bogus. Perhaps someone will make them do + * something reasonable, someday. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/builtins.h" +#include "utils/geo_decls.h" + + +/* + * Selectivity functions for geometric operators. These are bogus -- unless + * we know the actual key distribution in the index, we can't make a good + * prediction of the selectivity of these operators. + * + * Note: the values used here may look unreasonably small. Perhaps they + * are. For now, we want to make sure that the optimizer will make use + * of a geometric index if one is available, so the selectivity had better + * be fairly small. + * + * In general, GiST needs to search multiple subtrees in order to guarantee + * that all occurrences of the same key have been found. Because of this, + * the estimated cost for scanning the index ought to be higher than the + * output selectivity would indicate. gistcostestimate(), over in selfuncs.c, + * ought to be adjusted accordingly --- but until we can generate somewhat + * realistic numbers here, it hardly matters... + */ + + +/* + * Selectivity for operators that depend on area, such as "overlap". + */ + +Datum +areasel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.005); +} + +Datum +areajoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.005); +} + +/* + * positionsel + * + * How likely is a box to be strictly left of (right of, above, below) + * a given box? + */ + +Datum +positionsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.1); +} + +Datum +positionjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.1); +} + +/* + * contsel -- How likely is a box to contain (be contained by) a given box? + * + * This is a tighter constraint than "overlap", so produce a smaller + * estimate than areasel does. + */ + +Datum +contsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.001); +} + +Datum +contjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.001); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_spgist.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_spgist.c new file mode 100644 index 00000000000..b708a805477 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/geo_spgist.c @@ -0,0 +1,885 @@ +/*------------------------------------------------------------------------- + * + * geo_spgist.c + * SP-GiST implementation of 4-dimensional quad tree over boxes + * + * This module provides SP-GiST implementation for boxes using quad tree + * analogy in 4-dimensional space. SP-GiST doesn't allow indexing of + * overlapping objects. We are making 2D objects never-overlapping in + * 4D space. This technique has some benefits compared to traditional + * R-Tree which is implemented as GiST. The performance tests reveal + * that this technique especially beneficial with too much overlapping + * objects, so called "spaghetti data". + * + * Unlike the original quad tree, we are splitting the tree into 16 + * quadrants in 4D space. It is easier to imagine it as splitting space + * two times into 4: + * + * | | + * | | + * | -----+----- + * | | + * | | + * -------------+------------- + * | + * | + * | + * | + * | + * + * We are using box datatype as the prefix, but we are treating them + * as points in 4-dimensional space, because 2D boxes are not enough + * to represent the quadrant boundaries in 4D space. They however are + * sufficient to point out the additional boundaries of the next + * quadrant. + * + * We are using traversal values provided by SP-GiST to calculate and + * to store the bounds of the quadrants, while traversing into the tree. + * Traversal value has all the boundaries in the 4D space, and is capable + * of transferring the required boundaries to the following traversal + * values. In conclusion, three things are necessary to calculate the + * next traversal value: + * + * (1) the traversal value of the parent + * (2) the quadrant of the current node + * (3) the prefix of the current node + * + * If we visualize them on our simplified drawing (see the drawing above); + * transferred boundaries of (1) would be the outer axis, relevant part + * of (2) would be the up right part of the other axis, and (3) would be + * the inner axis. + * + * For example, consider the case of overlapping. When recursion + * descends deeper and deeper down the tree, all quadrants in + * the current node will be checked for overlapping. The boundaries + * will be re-calculated for all quadrants. Overlap check answers + * the question: can any box from this quadrant overlap with the given + * box? If yes, then this quadrant will be walked. If no, then this + * quadrant will be skipped. + * + * This method provides restrictions for minimum and maximum values of + * every dimension of every corner of the box on every level of the tree + * except the root. For the root node, we are setting the boundaries + * that we don't yet have as infinity. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/geo_spgist.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/spgist.h" +#include "access/spgist_private.h" +#include "access/stratnum.h" +#include "catalog/pg_type.h" +#include "utils/float.h" +#include "utils/fmgroids.h" +#include "utils/fmgrprotos.h" +#include "utils/geo_decls.h" + +/* + * Comparator for qsort + * + * We don't need to use the floating point macros in here, because this + * is only going to be used in a place to effect the performance + * of the index, not the correctness. + */ +static int +compareDoubles(const void *a, const void *b) +{ + float8 x = *(float8 *) a; + float8 y = *(float8 *) b; + + if (x == y) + return 0; + return (x > y) ? 1 : -1; +} + +typedef struct +{ + float8 low; + float8 high; +} Range; + +typedef struct +{ + Range left; + Range right; +} RangeBox; + +typedef struct +{ + RangeBox range_box_x; + RangeBox range_box_y; +} RectBox; + +/* + * Calculate the quadrant + * + * The quadrant is 8 bit unsigned integer with 4 least bits in use. + * This function accepts BOXes as input. They are not casted to + * RangeBoxes, yet. All 4 bits are set by comparing a corner of the box. + * This makes 16 quadrants in total. + */ +static uint8 +getQuadrant(BOX *centroid, BOX *inBox) +{ + uint8 quadrant = 0; + + if (inBox->low.x > centroid->low.x) + quadrant |= 0x8; + + if (inBox->high.x > centroid->high.x) + quadrant |= 0x4; + + if (inBox->low.y > centroid->low.y) + quadrant |= 0x2; + + if (inBox->high.y > centroid->high.y) + quadrant |= 0x1; + + return quadrant; +} + +/* + * Get RangeBox using BOX + * + * We are turning the BOX to our structures to emphasize their function + * of representing points in 4D space. It also is more convenient to + * access the values with this structure. + */ +static RangeBox * +getRangeBox(BOX *box) +{ + RangeBox *range_box = (RangeBox *) palloc(sizeof(RangeBox)); + + range_box->left.low = box->low.x; + range_box->left.high = box->high.x; + + range_box->right.low = box->low.y; + range_box->right.high = box->high.y; + + return range_box; +} + +/* + * Initialize the traversal value + * + * In the beginning, we don't have any restrictions. We have to + * initialize the struct to cover the whole 4D space. + */ +static RectBox * +initRectBox(void) +{ + RectBox *rect_box = (RectBox *) palloc(sizeof(RectBox)); + float8 infinity = get_float8_infinity(); + + rect_box->range_box_x.left.low = -infinity; + rect_box->range_box_x.left.high = infinity; + + rect_box->range_box_x.right.low = -infinity; + rect_box->range_box_x.right.high = infinity; + + rect_box->range_box_y.left.low = -infinity; + rect_box->range_box_y.left.high = infinity; + + rect_box->range_box_y.right.low = -infinity; + rect_box->range_box_y.right.high = infinity; + + return rect_box; +} + +/* + * Calculate the next traversal value + * + * All centroids are bounded by RectBox, but SP-GiST only keeps + * boxes. When we are traversing the tree, we must calculate RectBox, + * using centroid and quadrant. + */ +static RectBox * +nextRectBox(RectBox *rect_box, RangeBox *centroid, uint8 quadrant) +{ + RectBox *next_rect_box = (RectBox *) palloc(sizeof(RectBox)); + + memcpy(next_rect_box, rect_box, sizeof(RectBox)); + + if (quadrant & 0x8) + next_rect_box->range_box_x.left.low = centroid->left.low; + else + next_rect_box->range_box_x.left.high = centroid->left.low; + + if (quadrant & 0x4) + next_rect_box->range_box_x.right.low = centroid->left.high; + else + next_rect_box->range_box_x.right.high = centroid->left.high; + + if (quadrant & 0x2) + next_rect_box->range_box_y.left.low = centroid->right.low; + else + next_rect_box->range_box_y.left.high = centroid->right.low; + + if (quadrant & 0x1) + next_rect_box->range_box_y.right.low = centroid->right.high; + else + next_rect_box->range_box_y.right.high = centroid->right.high; + + return next_rect_box; +} + +/* Can any range from range_box overlap with this argument? */ +static bool +overlap2D(RangeBox *range_box, Range *query) +{ + return FPge(range_box->right.high, query->low) && + FPle(range_box->left.low, query->high); +} + +/* Can any rectangle from rect_box overlap with this argument? */ +static bool +overlap4D(RectBox *rect_box, RangeBox *query) +{ + return overlap2D(&rect_box->range_box_x, &query->left) && + overlap2D(&rect_box->range_box_y, &query->right); +} + +/* Can any range from range_box contain this argument? */ +static bool +contain2D(RangeBox *range_box, Range *query) +{ + return FPge(range_box->right.high, query->high) && + FPle(range_box->left.low, query->low); +} + +/* Can any rectangle from rect_box contain this argument? */ +static bool +contain4D(RectBox *rect_box, RangeBox *query) +{ + return contain2D(&rect_box->range_box_x, &query->left) && + contain2D(&rect_box->range_box_y, &query->right); +} + +/* Can any range from range_box be contained by this argument? */ +static bool +contained2D(RangeBox *range_box, Range *query) +{ + return FPle(range_box->left.low, query->high) && + FPge(range_box->left.high, query->low) && + FPle(range_box->right.low, query->high) && + FPge(range_box->right.high, query->low); +} + +/* Can any rectangle from rect_box be contained by this argument? */ +static bool +contained4D(RectBox *rect_box, RangeBox *query) +{ + return contained2D(&rect_box->range_box_x, &query->left) && + contained2D(&rect_box->range_box_y, &query->right); +} + +/* Can any range from range_box to be lower than this argument? */ +static bool +lower2D(RangeBox *range_box, Range *query) +{ + return FPlt(range_box->left.low, query->low) && + FPlt(range_box->right.low, query->low); +} + +/* Can any range from range_box not extend to the right side of the query? */ +static bool +overLower2D(RangeBox *range_box, Range *query) +{ + return FPle(range_box->left.low, query->high) && + FPle(range_box->right.low, query->high); +} + +/* Can any range from range_box to be higher than this argument? */ +static bool +higher2D(RangeBox *range_box, Range *query) +{ + return FPgt(range_box->left.high, query->high) && + FPgt(range_box->right.high, query->high); +} + +/* Can any range from range_box not extend to the left side of the query? */ +static bool +overHigher2D(RangeBox *range_box, Range *query) +{ + return FPge(range_box->left.high, query->low) && + FPge(range_box->right.high, query->low); +} + +/* Can any rectangle from rect_box be left of this argument? */ +static bool +left4D(RectBox *rect_box, RangeBox *query) +{ + return lower2D(&rect_box->range_box_x, &query->left); +} + +/* Can any rectangle from rect_box does not extend the right of this argument? */ +static bool +overLeft4D(RectBox *rect_box, RangeBox *query) +{ + return overLower2D(&rect_box->range_box_x, &query->left); +} + +/* Can any rectangle from rect_box be right of this argument? */ +static bool +right4D(RectBox *rect_box, RangeBox *query) +{ + return higher2D(&rect_box->range_box_x, &query->left); +} + +/* Can any rectangle from rect_box does not extend the left of this argument? */ +static bool +overRight4D(RectBox *rect_box, RangeBox *query) +{ + return overHigher2D(&rect_box->range_box_x, &query->left); +} + +/* Can any rectangle from rect_box be below of this argument? */ +static bool +below4D(RectBox *rect_box, RangeBox *query) +{ + return lower2D(&rect_box->range_box_y, &query->right); +} + +/* Can any rectangle from rect_box does not extend above this argument? */ +static bool +overBelow4D(RectBox *rect_box, RangeBox *query) +{ + return overLower2D(&rect_box->range_box_y, &query->right); +} + +/* Can any rectangle from rect_box be above of this argument? */ +static bool +above4D(RectBox *rect_box, RangeBox *query) +{ + return higher2D(&rect_box->range_box_y, &query->right); +} + +/* Can any rectangle from rect_box does not extend below of this argument? */ +static bool +overAbove4D(RectBox *rect_box, RangeBox *query) +{ + return overHigher2D(&rect_box->range_box_y, &query->right); +} + +/* Lower bound for the distance between point and rect_box */ +static double +pointToRectBoxDistance(Point *point, RectBox *rect_box) +{ + double dx; + double dy; + + if (point->x < rect_box->range_box_x.left.low) + dx = rect_box->range_box_x.left.low - point->x; + else if (point->x > rect_box->range_box_x.right.high) + dx = point->x - rect_box->range_box_x.right.high; + else + dx = 0; + + if (point->y < rect_box->range_box_y.left.low) + dy = rect_box->range_box_y.left.low - point->y; + else if (point->y > rect_box->range_box_y.right.high) + dy = point->y - rect_box->range_box_y.right.high; + else + dy = 0; + + return HYPOT(dx, dy); +} + + +/* + * SP-GiST config function + */ +Datum +spg_box_quad_config(PG_FUNCTION_ARGS) +{ + spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1); + + cfg->prefixType = BOXOID; + cfg->labelType = VOIDOID; /* We don't need node labels. */ + cfg->canReturnData = true; + cfg->longValuesOK = false; + + PG_RETURN_VOID(); +} + +/* + * SP-GiST choose function + */ +Datum +spg_box_quad_choose(PG_FUNCTION_ARGS) +{ + spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); + spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); + BOX *centroid = DatumGetBoxP(in->prefixDatum), + *box = DatumGetBoxP(in->leafDatum); + + out->resultType = spgMatchNode; + out->result.matchNode.restDatum = BoxPGetDatum(box); + + /* nodeN will be set by core, when allTheSame. */ + if (!in->allTheSame) + out->result.matchNode.nodeN = getQuadrant(centroid, box); + + PG_RETURN_VOID(); +} + +/* + * SP-GiST pick-split function + * + * It splits a list of boxes into quadrants by choosing a central 4D + * point as the median of the coordinates of the boxes. + */ +Datum +spg_box_quad_picksplit(PG_FUNCTION_ARGS) +{ + spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); + spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); + BOX *centroid; + int median, + i; + float8 *lowXs = palloc(sizeof(float8) * in->nTuples); + float8 *highXs = palloc(sizeof(float8) * in->nTuples); + float8 *lowYs = palloc(sizeof(float8) * in->nTuples); + float8 *highYs = palloc(sizeof(float8) * in->nTuples); + + /* Calculate median of all 4D coordinates */ + for (i = 0; i < in->nTuples; i++) + { + BOX *box = DatumGetBoxP(in->datums[i]); + + lowXs[i] = box->low.x; + highXs[i] = box->high.x; + lowYs[i] = box->low.y; + highYs[i] = box->high.y; + } + + qsort(lowXs, in->nTuples, sizeof(float8), compareDoubles); + qsort(highXs, in->nTuples, sizeof(float8), compareDoubles); + qsort(lowYs, in->nTuples, sizeof(float8), compareDoubles); + qsort(highYs, in->nTuples, sizeof(float8), compareDoubles); + + median = in->nTuples / 2; + + centroid = palloc(sizeof(BOX)); + + centroid->low.x = lowXs[median]; + centroid->high.x = highXs[median]; + centroid->low.y = lowYs[median]; + centroid->high.y = highYs[median]; + + /* Fill the output */ + out->hasPrefix = true; + out->prefixDatum = BoxPGetDatum(centroid); + + out->nNodes = 16; + out->nodeLabels = NULL; /* We don't need node labels. */ + + out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); + out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); + + /* + * Assign ranges to corresponding nodes according to quadrants relative to + * the "centroid" range + */ + for (i = 0; i < in->nTuples; i++) + { + BOX *box = DatumGetBoxP(in->datums[i]); + uint8 quadrant = getQuadrant(centroid, box); + + out->leafTupleDatums[i] = BoxPGetDatum(box); + out->mapTuplesToNodes[i] = quadrant; + } + + PG_RETURN_VOID(); +} + +/* + * Check if result of consistent method based on bounding box is exact. + */ +static bool +is_bounding_box_test_exact(StrategyNumber strategy) +{ + switch (strategy) + { + case RTLeftStrategyNumber: + case RTOverLeftStrategyNumber: + case RTOverRightStrategyNumber: + case RTRightStrategyNumber: + case RTOverBelowStrategyNumber: + case RTBelowStrategyNumber: + case RTAboveStrategyNumber: + case RTOverAboveStrategyNumber: + return true; + + default: + return false; + } +} + +/* + * Get bounding box for ScanKey. + */ +static BOX * +spg_box_quad_get_scankey_bbox(ScanKey sk, bool *recheck) +{ + switch (sk->sk_subtype) + { + case BOXOID: + return DatumGetBoxP(sk->sk_argument); + + case POLYGONOID: + if (recheck && !is_bounding_box_test_exact(sk->sk_strategy)) + *recheck = true; + return &DatumGetPolygonP(sk->sk_argument)->boundbox; + + default: + elog(ERROR, "unrecognized scankey subtype: %d", sk->sk_subtype); + return NULL; + } +} + +/* + * SP-GiST inner consistent function + */ +Datum +spg_box_quad_inner_consistent(PG_FUNCTION_ARGS) +{ + spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); + spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); + int i; + MemoryContext old_ctx; + RectBox *rect_box; + uint8 quadrant; + RangeBox *centroid, + **queries; + + /* + * We are saving the traversal value or initialize it an unbounded one, if + * we have just begun to walk the tree. + */ + if (in->traversalValue) + rect_box = in->traversalValue; + else + rect_box = initRectBox(); + + if (in->allTheSame) + { + /* Report that all nodes should be visited */ + out->nNodes = in->nNodes; + out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); + for (i = 0; i < in->nNodes; i++) + out->nodeNumbers[i] = i; + + if (in->norderbys > 0 && in->nNodes > 0) + { + double *distances = palloc(sizeof(double) * in->norderbys); + int j; + + for (j = 0; j < in->norderbys; j++) + { + Point *pt = DatumGetPointP(in->orderbys[j].sk_argument); + + distances[j] = pointToRectBoxDistance(pt, rect_box); + } + + out->distances = (double **) palloc(sizeof(double *) * in->nNodes); + out->distances[0] = distances; + + for (i = 1; i < in->nNodes; i++) + { + out->distances[i] = palloc(sizeof(double) * in->norderbys); + memcpy(out->distances[i], distances, + sizeof(double) * in->norderbys); + } + } + + PG_RETURN_VOID(); + } + + /* + * We are casting the prefix and queries to RangeBoxes for ease of the + * following operations. + */ + centroid = getRangeBox(DatumGetBoxP(in->prefixDatum)); + queries = (RangeBox **) palloc(in->nkeys * sizeof(RangeBox *)); + for (i = 0; i < in->nkeys; i++) + { + BOX *box = spg_box_quad_get_scankey_bbox(&in->scankeys[i], NULL); + + queries[i] = getRangeBox(box); + } + + /* Allocate enough memory for nodes */ + out->nNodes = 0; + out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); + out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes); + if (in->norderbys > 0) + out->distances = (double **) palloc(sizeof(double *) * in->nNodes); + + /* + * We switch memory context, because we want to allocate memory for new + * traversal values (next_rect_box) and pass these pieces of memory to + * further call of this function. + */ + old_ctx = MemoryContextSwitchTo(in->traversalMemoryContext); + + for (quadrant = 0; quadrant < in->nNodes; quadrant++) + { + RectBox *next_rect_box = nextRectBox(rect_box, centroid, quadrant); + bool flag = true; + + for (i = 0; i < in->nkeys; i++) + { + StrategyNumber strategy = in->scankeys[i].sk_strategy; + + switch (strategy) + { + case RTOverlapStrategyNumber: + flag = overlap4D(next_rect_box, queries[i]); + break; + + case RTContainsStrategyNumber: + flag = contain4D(next_rect_box, queries[i]); + break; + + case RTSameStrategyNumber: + case RTContainedByStrategyNumber: + flag = contained4D(next_rect_box, queries[i]); + break; + + case RTLeftStrategyNumber: + flag = left4D(next_rect_box, queries[i]); + break; + + case RTOverLeftStrategyNumber: + flag = overLeft4D(next_rect_box, queries[i]); + break; + + case RTRightStrategyNumber: + flag = right4D(next_rect_box, queries[i]); + break; + + case RTOverRightStrategyNumber: + flag = overRight4D(next_rect_box, queries[i]); + break; + + case RTAboveStrategyNumber: + flag = above4D(next_rect_box, queries[i]); + break; + + case RTOverAboveStrategyNumber: + flag = overAbove4D(next_rect_box, queries[i]); + break; + + case RTBelowStrategyNumber: + flag = below4D(next_rect_box, queries[i]); + break; + + case RTOverBelowStrategyNumber: + flag = overBelow4D(next_rect_box, queries[i]); + break; + + default: + elog(ERROR, "unrecognized strategy: %d", strategy); + } + + /* If any check is failed, we have found our answer. */ + if (!flag) + break; + } + + if (flag) + { + out->traversalValues[out->nNodes] = next_rect_box; + out->nodeNumbers[out->nNodes] = quadrant; + + if (in->norderbys > 0) + { + double *distances = palloc(sizeof(double) * in->norderbys); + int j; + + out->distances[out->nNodes] = distances; + + for (j = 0; j < in->norderbys; j++) + { + Point *pt = DatumGetPointP(in->orderbys[j].sk_argument); + + distances[j] = pointToRectBoxDistance(pt, next_rect_box); + } + } + + out->nNodes++; + } + else + { + /* + * If this node is not selected, we don't need to keep the next + * traversal value in the memory context. + */ + pfree(next_rect_box); + } + } + + /* Switch back */ + MemoryContextSwitchTo(old_ctx); + + PG_RETURN_VOID(); +} + +/* + * SP-GiST inner consistent function + */ +Datum +spg_box_quad_leaf_consistent(PG_FUNCTION_ARGS) +{ + spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0); + spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1); + Datum leaf = in->leafDatum; + bool flag = true; + int i; + + /* All tests are exact. */ + out->recheck = false; + + /* + * Don't return leafValue unless told to; this is used for both box and + * polygon opclasses, and in the latter case the leaf datum is not even of + * the right type to return. + */ + if (in->returnData) + out->leafValue = leaf; + + /* Perform the required comparison(s) */ + for (i = 0; i < in->nkeys; i++) + { + StrategyNumber strategy = in->scankeys[i].sk_strategy; + BOX *box = spg_box_quad_get_scankey_bbox(&in->scankeys[i], + &out->recheck); + Datum query = BoxPGetDatum(box); + + switch (strategy) + { + case RTOverlapStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_overlap, leaf, + query)); + break; + + case RTContainsStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_contain, leaf, + query)); + break; + + case RTContainedByStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_contained, leaf, + query)); + break; + + case RTSameStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_same, leaf, + query)); + break; + + case RTLeftStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_left, leaf, + query)); + break; + + case RTOverLeftStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_overleft, leaf, + query)); + break; + + case RTRightStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_right, leaf, + query)); + break; + + case RTOverRightStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_overright, leaf, + query)); + break; + + case RTAboveStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_above, leaf, + query)); + break; + + case RTOverAboveStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_overabove, leaf, + query)); + break; + + case RTBelowStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_below, leaf, + query)); + break; + + case RTOverBelowStrategyNumber: + flag = DatumGetBool(DirectFunctionCall2(box_overbelow, leaf, + query)); + break; + + default: + elog(ERROR, "unrecognized strategy: %d", strategy); + } + + /* If any check is failed, we have found our answer. */ + if (!flag) + break; + } + + if (flag && in->norderbys > 0) + { + Oid distfnoid = in->orderbys[0].sk_func.fn_oid; + + out->distances = spg_key_orderbys_distances(leaf, false, + in->orderbys, in->norderbys); + + /* Recheck is necessary when computing distance to polygon */ + out->recheckDistances = distfnoid == F_DIST_POLYP; + } + + PG_RETURN_BOOL(flag); +} + + +/* + * SP-GiST config function for 2-D types that are lossy represented by their + * bounding boxes + */ +Datum +spg_bbox_quad_config(PG_FUNCTION_ARGS) +{ + spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1); + + cfg->prefixType = BOXOID; /* A type represented by its bounding box */ + cfg->labelType = VOIDOID; /* We don't need node labels. */ + cfg->leafType = BOXOID; + cfg->canReturnData = false; + cfg->longValuesOK = false; + + PG_RETURN_VOID(); +} + +/* + * SP-GiST compress function for polygons + */ +Datum +spg_poly_quad_compress(PG_FUNCTION_ARGS) +{ + POLYGON *polygon = PG_GETARG_POLYGON_P(0); + BOX *box; + + box = (BOX *) palloc(sizeof(BOX)); + *box = polygon->boundbox; + + PG_RETURN_BOX_P(box); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/hbafuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/hbafuncs.c new file mode 100644 index 00000000000..73d3ad1dadc --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/hbafuncs.c @@ -0,0 +1,588 @@ +/*------------------------------------------------------------------------- + * + * hbafuncs.c + * Support functions for SQL views of authentication files. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/hbafuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/objectaddress.h" +#include "common/ip.h" +#include "funcapi.h" +#include "libpq/hba.h" +#include "miscadmin.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/guc.h" + + +static ArrayType *get_hba_options(HbaLine *hba); +static void fill_hba_line(Tuplestorestate *tuple_store, TupleDesc tupdesc, + int rule_number, char *filename, int lineno, + HbaLine *hba, const char *err_msg); +static void fill_hba_view(Tuplestorestate *tuple_store, TupleDesc tupdesc); +static void fill_ident_line(Tuplestorestate *tuple_store, TupleDesc tupdesc, + int map_number, char *filename, int lineno, + IdentLine *ident, const char *err_msg); +static void fill_ident_view(Tuplestorestate *tuple_store, TupleDesc tupdesc); + + +/* + * This macro specifies the maximum number of authentication options + * that are possible with any given authentication method that is supported. + * Currently LDAP supports 11, and there are 3 that are not dependent on + * the auth method here. It may not actually be possible to set all of them + * at the same time, but we'll set the macro value high enough to be + * conservative and avoid warnings from static analysis tools. + */ +#define MAX_HBA_OPTIONS 14 + +/* + * Create a text array listing the options specified in the HBA line. + * Return NULL if no options are specified. + */ +static ArrayType * +get_hba_options(HbaLine *hba) +{ + int noptions; + Datum options[MAX_HBA_OPTIONS]; + + noptions = 0; + + if (hba->auth_method == uaGSS || hba->auth_method == uaSSPI) + { + if (hba->include_realm) + options[noptions++] = + CStringGetTextDatum("include_realm=true"); + + if (hba->krb_realm) + options[noptions++] = + CStringGetTextDatum(psprintf("krb_realm=%s", hba->krb_realm)); + } + + if (hba->usermap) + options[noptions++] = + CStringGetTextDatum(psprintf("map=%s", hba->usermap)); + + if (hba->clientcert != clientCertOff) + options[noptions++] = + CStringGetTextDatum(psprintf("clientcert=%s", (hba->clientcert == clientCertCA) ? "verify-ca" : "verify-full")); + + if (hba->pamservice) + options[noptions++] = + CStringGetTextDatum(psprintf("pamservice=%s", hba->pamservice)); + + if (hba->auth_method == uaLDAP) + { + if (hba->ldapserver) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapserver=%s", hba->ldapserver)); + + if (hba->ldapport) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapport=%d", hba->ldapport)); + + if (hba->ldaptls) + options[noptions++] = + CStringGetTextDatum("ldaptls=true"); + + if (hba->ldapprefix) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapprefix=%s", hba->ldapprefix)); + + if (hba->ldapsuffix) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapsuffix=%s", hba->ldapsuffix)); + + if (hba->ldapbasedn) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapbasedn=%s", hba->ldapbasedn)); + + if (hba->ldapbinddn) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapbinddn=%s", hba->ldapbinddn)); + + if (hba->ldapbindpasswd) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapbindpasswd=%s", + hba->ldapbindpasswd)); + + if (hba->ldapsearchattribute) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapsearchattribute=%s", + hba->ldapsearchattribute)); + + if (hba->ldapsearchfilter) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapsearchfilter=%s", + hba->ldapsearchfilter)); + + if (hba->ldapscope) + options[noptions++] = + CStringGetTextDatum(psprintf("ldapscope=%d", hba->ldapscope)); + } + + if (hba->auth_method == uaRADIUS) + { + if (hba->radiusservers_s) + options[noptions++] = + CStringGetTextDatum(psprintf("radiusservers=%s", hba->radiusservers_s)); + + if (hba->radiussecrets_s) + options[noptions++] = + CStringGetTextDatum(psprintf("radiussecrets=%s", hba->radiussecrets_s)); + + if (hba->radiusidentifiers_s) + options[noptions++] = + CStringGetTextDatum(psprintf("radiusidentifiers=%s", hba->radiusidentifiers_s)); + + if (hba->radiusports_s) + options[noptions++] = + CStringGetTextDatum(psprintf("radiusports=%s", hba->radiusports_s)); + } + + /* If you add more options, consider increasing MAX_HBA_OPTIONS. */ + Assert(noptions <= MAX_HBA_OPTIONS); + + if (noptions > 0) + return construct_array_builtin(options, noptions, TEXTOID); + else + return NULL; +} + +/* Number of columns in pg_hba_file_rules view */ +#define NUM_PG_HBA_FILE_RULES_ATTS 11 + +/* + * fill_hba_line + * Build one row of pg_hba_file_rules view, add it to tuplestore. + * + * tuple_store: where to store data + * tupdesc: tuple descriptor for the view + * rule_number: unique identifier among all valid rules + * filename: configuration file name (must always be valid) + * lineno: line number of configuration file (must always be valid) + * hba: parsed line data (can be NULL, in which case err_msg should be set) + * err_msg: error message (NULL if none) + * + * Note: leaks memory, but we don't care since this is run in a short-lived + * memory context. + */ +static void +fill_hba_line(Tuplestorestate *tuple_store, TupleDesc tupdesc, + int rule_number, char *filename, int lineno, HbaLine *hba, + const char *err_msg) +{ + Datum values[NUM_PG_HBA_FILE_RULES_ATTS]; + bool nulls[NUM_PG_HBA_FILE_RULES_ATTS]; + char buffer[NI_MAXHOST]; + HeapTuple tuple; + int index; + ListCell *lc; + const char *typestr; + const char *addrstr; + const char *maskstr; + ArrayType *options; + + Assert(tupdesc->natts == NUM_PG_HBA_FILE_RULES_ATTS); + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + index = 0; + + /* rule_number, nothing on error */ + if (err_msg) + nulls[index++] = true; + else + values[index++] = Int32GetDatum(rule_number); + + /* file_name */ + values[index++] = CStringGetTextDatum(filename); + + /* line_number */ + values[index++] = Int32GetDatum(lineno); + + if (hba != NULL) + { + /* type */ + /* Avoid a default: case so compiler will warn about missing cases */ + typestr = NULL; + switch (hba->conntype) + { + case ctLocal: + typestr = "local"; + break; + case ctHost: + typestr = "host"; + break; + case ctHostSSL: + typestr = "hostssl"; + break; + case ctHostNoSSL: + typestr = "hostnossl"; + break; + case ctHostGSS: + typestr = "hostgssenc"; + break; + case ctHostNoGSS: + typestr = "hostnogssenc"; + break; + } + if (typestr) + values[index++] = CStringGetTextDatum(typestr); + else + nulls[index++] = true; + + /* database */ + if (hba->databases) + { + /* + * Flatten AuthToken list to string list. It might seem that we + * should re-quote any quoted tokens, but that has been rejected + * on the grounds that it makes it harder to compare the array + * elements to other system catalogs. That makes entries like + * "all" or "samerole" formally ambiguous ... but users who name + * databases/roles that way are inflicting their own pain. + */ + List *names = NIL; + + foreach(lc, hba->databases) + { + AuthToken *tok = lfirst(lc); + + names = lappend(names, tok->string); + } + values[index++] = PointerGetDatum(strlist_to_textarray(names)); + } + else + nulls[index++] = true; + + /* user */ + if (hba->roles) + { + /* Flatten AuthToken list to string list; see comment above */ + List *roles = NIL; + + foreach(lc, hba->roles) + { + AuthToken *tok = lfirst(lc); + + roles = lappend(roles, tok->string); + } + values[index++] = PointerGetDatum(strlist_to_textarray(roles)); + } + else + nulls[index++] = true; + + /* address and netmask */ + /* Avoid a default: case so compiler will warn about missing cases */ + addrstr = maskstr = NULL; + switch (hba->ip_cmp_method) + { + case ipCmpMask: + if (hba->hostname) + { + addrstr = hba->hostname; + } + else + { + /* + * Note: if pg_getnameinfo_all fails, it'll set buffer to + * "???", which we want to return. + */ + if (hba->addrlen > 0) + { + if (pg_getnameinfo_all(&hba->addr, hba->addrlen, + buffer, sizeof(buffer), + NULL, 0, + NI_NUMERICHOST) == 0) + clean_ipv6_addr(hba->addr.ss_family, buffer); + addrstr = pstrdup(buffer); + } + if (hba->masklen > 0) + { + if (pg_getnameinfo_all(&hba->mask, hba->masklen, + buffer, sizeof(buffer), + NULL, 0, + NI_NUMERICHOST) == 0) + clean_ipv6_addr(hba->mask.ss_family, buffer); + maskstr = pstrdup(buffer); + } + } + break; + case ipCmpAll: + addrstr = "all"; + break; + case ipCmpSameHost: + addrstr = "samehost"; + break; + case ipCmpSameNet: + addrstr = "samenet"; + break; + } + if (addrstr) + values[index++] = CStringGetTextDatum(addrstr); + else + nulls[index++] = true; + if (maskstr) + values[index++] = CStringGetTextDatum(maskstr); + else + nulls[index++] = true; + + /* auth_method */ + values[index++] = CStringGetTextDatum(hba_authname(hba->auth_method)); + + /* options */ + options = get_hba_options(hba); + if (options) + values[index++] = PointerGetDatum(options); + else + nulls[index++] = true; + } + else + { + /* no parsing result, so set relevant fields to nulls */ + memset(&nulls[3], true, (NUM_PG_HBA_FILE_RULES_ATTS - 4) * sizeof(bool)); + } + + /* error */ + if (err_msg) + values[NUM_PG_HBA_FILE_RULES_ATTS - 1] = CStringGetTextDatum(err_msg); + else + nulls[NUM_PG_HBA_FILE_RULES_ATTS - 1] = true; + + tuple = heap_form_tuple(tupdesc, values, nulls); + tuplestore_puttuple(tuple_store, tuple); +} + +/* + * fill_hba_view + * Read the pg_hba.conf file and fill the tuplestore with view records. + */ +static void +fill_hba_view(Tuplestorestate *tuple_store, TupleDesc tupdesc) +{ + FILE *file; + List *hba_lines = NIL; + ListCell *line; + int rule_number = 0; + MemoryContext hbacxt; + MemoryContext oldcxt; + + /* + * In the unlikely event that we can't open pg_hba.conf, we throw an + * error, rather than trying to report it via some sort of view entry. + * (Most other error conditions should result in a message in a view + * entry.) + */ + file = open_auth_file(HbaFileName, ERROR, 0, NULL); + + tokenize_auth_file(HbaFileName, file, &hba_lines, DEBUG3, 0); + + /* Now parse all the lines */ + hbacxt = AllocSetContextCreate(CurrentMemoryContext, + "hba parser context", + ALLOCSET_SMALL_SIZES); + oldcxt = MemoryContextSwitchTo(hbacxt); + foreach(line, hba_lines) + { + TokenizedAuthLine *tok_line = (TokenizedAuthLine *) lfirst(line); + HbaLine *hbaline = NULL; + + /* don't parse lines that already have errors */ + if (tok_line->err_msg == NULL) + hbaline = parse_hba_line(tok_line, DEBUG3); + + /* No error, set a new rule number */ + if (tok_line->err_msg == NULL) + rule_number++; + + fill_hba_line(tuple_store, tupdesc, rule_number, + tok_line->file_name, tok_line->line_num, hbaline, + tok_line->err_msg); + } + + /* Free tokenizer memory */ + free_auth_file(file, 0); + /* Free parse_hba_line memory */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(hbacxt); +} + +/* + * pg_hba_file_rules + * + * SQL-accessible set-returning function to return all the entries in the + * pg_hba.conf file. + */ +Datum +pg_hba_file_rules(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsi; + + /* + * Build tuplestore to hold the result rows. We must use the Materialize + * mode to be safe against HBA file changes while the cursor is open. It's + * also more efficient than having to look up our current position in the + * parsed list every time. + */ + InitMaterializedSRF(fcinfo, 0); + + /* Fill the tuplestore */ + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + fill_hba_view(rsi->setResult, rsi->setDesc); + + PG_RETURN_NULL(); +} + +/* Number of columns in pg_ident_file_mappings view */ +#define NUM_PG_IDENT_FILE_MAPPINGS_ATTS 7 + +/* + * fill_ident_line: build one row of pg_ident_file_mappings view, add it to + * tuplestore + * + * tuple_store: where to store data + * tupdesc: tuple descriptor for the view + * map_number: unique identifier among all valid maps + * filename: configuration file name (must always be valid) + * lineno: line number of configuration file (must always be valid) + * ident: parsed line data (can be NULL, in which case err_msg should be set) + * err_msg: error message (NULL if none) + * + * Note: leaks memory, but we don't care since this is run in a short-lived + * memory context. + */ +static void +fill_ident_line(Tuplestorestate *tuple_store, TupleDesc tupdesc, + int map_number, char *filename, int lineno, IdentLine *ident, + const char *err_msg) +{ + Datum values[NUM_PG_IDENT_FILE_MAPPINGS_ATTS]; + bool nulls[NUM_PG_IDENT_FILE_MAPPINGS_ATTS]; + HeapTuple tuple; + int index; + + Assert(tupdesc->natts == NUM_PG_IDENT_FILE_MAPPINGS_ATTS); + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + index = 0; + + /* map_number, nothing on error */ + if (err_msg) + nulls[index++] = true; + else + values[index++] = Int32GetDatum(map_number); + + /* file_name */ + values[index++] = CStringGetTextDatum(filename); + + /* line_number */ + values[index++] = Int32GetDatum(lineno); + + if (ident != NULL) + { + values[index++] = CStringGetTextDatum(ident->usermap); + values[index++] = CStringGetTextDatum(ident->system_user->string); + values[index++] = CStringGetTextDatum(ident->pg_user->string); + } + else + { + /* no parsing result, so set relevant fields to nulls */ + memset(&nulls[3], true, (NUM_PG_IDENT_FILE_MAPPINGS_ATTS - 4) * sizeof(bool)); + } + + /* error */ + if (err_msg) + values[NUM_PG_IDENT_FILE_MAPPINGS_ATTS - 1] = CStringGetTextDatum(err_msg); + else + nulls[NUM_PG_IDENT_FILE_MAPPINGS_ATTS - 1] = true; + + tuple = heap_form_tuple(tupdesc, values, nulls); + tuplestore_puttuple(tuple_store, tuple); +} + +/* + * Read the pg_ident.conf file and fill the tuplestore with view records. + */ +static void +fill_ident_view(Tuplestorestate *tuple_store, TupleDesc tupdesc) +{ + FILE *file; + List *ident_lines = NIL; + ListCell *line; + int map_number = 0; + MemoryContext identcxt; + MemoryContext oldcxt; + + /* + * In the unlikely event that we can't open pg_ident.conf, we throw an + * error, rather than trying to report it via some sort of view entry. + * (Most other error conditions should result in a message in a view + * entry.) + */ + file = open_auth_file(IdentFileName, ERROR, 0, NULL); + + tokenize_auth_file(IdentFileName, file, &ident_lines, DEBUG3, 0); + + /* Now parse all the lines */ + identcxt = AllocSetContextCreate(CurrentMemoryContext, + "ident parser context", + ALLOCSET_SMALL_SIZES); + oldcxt = MemoryContextSwitchTo(identcxt); + foreach(line, ident_lines) + { + TokenizedAuthLine *tok_line = (TokenizedAuthLine *) lfirst(line); + IdentLine *identline = NULL; + + /* don't parse lines that already have errors */ + if (tok_line->err_msg == NULL) + identline = parse_ident_line(tok_line, DEBUG3); + + /* no error, set a new mapping number */ + if (tok_line->err_msg == NULL) + map_number++; + + fill_ident_line(tuple_store, tupdesc, map_number, + tok_line->file_name, tok_line->line_num, + identline, tok_line->err_msg); + } + + /* Free tokenizer memory */ + free_auth_file(file, 0); + /* Free parse_ident_line memory */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(identcxt); +} + +/* + * SQL-accessible SRF to return all the entries in the pg_ident.conf file. + */ +Datum +pg_ident_file_mappings(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsi; + + /* + * Build tuplestore to hold the result rows. We must use the Materialize + * mode to be safe against HBA file changes while the cursor is open. It's + * also more efficient than having to look up our current position in the + * parsed list every time. + */ + InitMaterializedSRF(fcinfo, 0); + + /* Fill the tuplestore */ + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + fill_ident_view(rsi->setResult, rsi->setDesc); + + PG_RETURN_NULL(); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/inet_cidr_ntop.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/inet_cidr_ntop.c new file mode 100644 index 00000000000..5f74c05a65d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/inet_cidr_ntop.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC") + * Copyright (c) 1996,1999 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * src/backend/utils/adt/inet_cidr_ntop.c + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static const char rcsid[] = "Id: inet_net_ntop.c,v 1.1.2.2 2004/03/09 09:17:27 marka Exp $"; +#endif + +#include "postgres.h" + +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "utils/builtins.h" +#include "utils/inet.h" + + +#ifdef SPRINTF_CHAR +#define SPRINTF(x) strlen(sprintf/**/x) +#else +#define SPRINTF(x) ((size_t)sprintf x) +#endif + +static char *inet_cidr_ntop_ipv4(const u_char *src, int bits, + char *dst, size_t size); +static char *inet_cidr_ntop_ipv6(const u_char *src, int bits, + char *dst, size_t size); + +/* + * char * + * pg_inet_cidr_ntop(af, src, bits, dst, size) + * convert network number from network to presentation format. + * generates CIDR style result always. + * return: + * pointer to dst, or NULL if an error occurred (check errno). + * author: + * Paul Vixie (ISC), July 1996 + */ +char * +pg_inet_cidr_ntop(int af, const void *src, int bits, char *dst, size_t size) +{ + switch (af) + { + case PGSQL_AF_INET: + return inet_cidr_ntop_ipv4(src, bits, dst, size); + case PGSQL_AF_INET6: + return inet_cidr_ntop_ipv6(src, bits, dst, size); + default: + errno = EAFNOSUPPORT; + return NULL; + } +} + + +/* + * static char * + * inet_cidr_ntop_ipv4(src, bits, dst, size) + * convert IPv4 network number from network to presentation format. + * generates CIDR style result always. + * return: + * pointer to dst, or NULL if an error occurred (check errno). + * note: + * network byte order assumed. this means 192.5.5.240/28 has + * 0b11110000 in its fourth octet. + * author: + * Paul Vixie (ISC), July 1996 + */ +static char * +inet_cidr_ntop_ipv4(const u_char *src, int bits, char *dst, size_t size) +{ + char *odst = dst; + char *t; + u_int m; + int b; + + if (bits < 0 || bits > 32) + { + errno = EINVAL; + return NULL; + } + + if (bits == 0) + { + if (size < sizeof "0") + goto emsgsize; + *dst++ = '0'; + size--; + *dst = '\0'; + } + + /* Format whole octets. */ + for (b = bits / 8; b > 0; b--) + { + if (size <= sizeof "255.") + goto emsgsize; + t = dst; + dst += SPRINTF((dst, "%u", *src++)); + if (b > 1) + { + *dst++ = '.'; + *dst = '\0'; + } + size -= (size_t) (dst - t); + } + + /* Format partial octet. */ + b = bits % 8; + if (b > 0) + { + if (size <= sizeof ".255") + goto emsgsize; + t = dst; + if (dst != odst) + *dst++ = '.'; + m = ((1 << b) - 1) << (8 - b); + dst += SPRINTF((dst, "%u", *src & m)); + size -= (size_t) (dst - t); + } + + /* Format CIDR /width. */ + if (size <= sizeof "/32") + goto emsgsize; + dst += SPRINTF((dst, "/%u", bits)); + return odst; + +emsgsize: + errno = EMSGSIZE; + return NULL; +} + +/* + * static char * + * inet_cidr_ntop_ipv6(src, bits, dst, size) + * convert IPv6 network number from network to presentation format. + * generates CIDR style result always. Picks the shortest representation + * unless the IP is really IPv4. + * always prints specified number of bits (bits). + * return: + * pointer to dst, or NULL if an error occurred (check errno). + * note: + * network byte order assumed. this means 192.5.5.240/28 has + * 0x11110000 in its fourth octet. + * author: + * Vadim Kogan (UCB), June 2001 + * Original version (IPv4) by Paul Vixie (ISC), July 1996 + */ + +static char * +inet_cidr_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size) +{ + u_int m; + int b; + int p; + int zero_s, + zero_l, + tmp_zero_s, + tmp_zero_l; + int i; + int is_ipv4 = 0; + unsigned char inbuf[16]; + char outbuf[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")]; + char *cp; + int words; + u_char *s; + + if (bits < 0 || bits > 128) + { + errno = EINVAL; + return NULL; + } + + cp = outbuf; + + if (bits == 0) + { + *cp++ = ':'; + *cp++ = ':'; + *cp = '\0'; + } + else + { + /* Copy src to private buffer. Zero host part. */ + p = (bits + 7) / 8; + memcpy(inbuf, src, p); + memset(inbuf + p, 0, 16 - p); + b = bits % 8; + if (b != 0) + { + m = ((u_int) ~0) << (8 - b); + inbuf[p - 1] &= m; + } + + s = inbuf; + + /* how many words need to be displayed in output */ + words = (bits + 15) / 16; + if (words == 1) + words = 2; + + /* Find the longest substring of zero's */ + zero_s = zero_l = tmp_zero_s = tmp_zero_l = 0; + for (i = 0; i < (words * 2); i += 2) + { + if ((s[i] | s[i + 1]) == 0) + { + if (tmp_zero_l == 0) + tmp_zero_s = i / 2; + tmp_zero_l++; + } + else + { + if (tmp_zero_l && zero_l < tmp_zero_l) + { + zero_s = tmp_zero_s; + zero_l = tmp_zero_l; + tmp_zero_l = 0; + } + } + } + + if (tmp_zero_l && zero_l < tmp_zero_l) + { + zero_s = tmp_zero_s; + zero_l = tmp_zero_l; + } + + if (zero_l != words && zero_s == 0 && ((zero_l == 6) || + ((zero_l == 5 && s[10] == 0xff && s[11] == 0xff) || + ((zero_l == 7 && s[14] != 0 && s[15] != 1))))) + is_ipv4 = 1; + + /* Format whole words. */ + for (p = 0; p < words; p++) + { + if (zero_l != 0 && p >= zero_s && p < zero_s + zero_l) + { + /* Time to skip some zeros */ + if (p == zero_s) + *cp++ = ':'; + if (p == words - 1) + *cp++ = ':'; + s++; + s++; + continue; + } + + if (is_ipv4 && p > 5) + { + *cp++ = (p == 6) ? ':' : '.'; + cp += SPRINTF((cp, "%u", *s++)); + /* we can potentially drop the last octet */ + if (p != 7 || bits > 120) + { + *cp++ = '.'; + cp += SPRINTF((cp, "%u", *s++)); + } + } + else + { + if (cp != outbuf) + *cp++ = ':'; + cp += SPRINTF((cp, "%x", *s * 256 + s[1])); + s += 2; + } + } + } + /* Format CIDR /width. */ + (void) SPRINTF((cp, "/%u", bits)); + if (strlen(outbuf) + 1 > size) + goto emsgsize; + strcpy(dst, outbuf); + + return dst; + +emsgsize: + errno = EMSGSIZE; + return NULL; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/inet_net_pton.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/inet_net_pton.c new file mode 100644 index 00000000000..d3221a13139 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/inet_net_pton.c @@ -0,0 +1,564 @@ +/* + * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC") + * Copyright (c) 1996,1999 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * src/backend/utils/adt/inet_net_pton.c + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static const char rcsid[] = "Id: inet_net_pton.c,v 1.4.2.3 2004/03/17 00:40:11 marka Exp $"; +#endif + +#include "postgres.h" + +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <assert.h> +#include <ctype.h> + +#include "utils/builtins.h" /* pgrminclude ignore */ /* needed on some + * platforms */ +#include "utils/inet.h" + + +static int inet_net_pton_ipv4(const char *src, u_char *dst); +static int inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size); +static int inet_net_pton_ipv6(const char *src, u_char *dst); +static int inet_cidr_pton_ipv6(const char *src, u_char *dst, size_t size); + + +/* + * int + * pg_inet_net_pton(af, src, dst, size) + * convert network number from presentation to network format. + * accepts hex octets, hex strings, decimal octets, and /CIDR. + * "size" is in bytes and describes "dst". + * return: + * number of bits, either imputed classfully or specified with /CIDR, + * or -1 if some failure occurred (check errno). ENOENT means it was + * not a valid network specification. + * author: + * Paul Vixie (ISC), June 1996 + * + * Changes: + * I added the inet_cidr_pton function (also from Paul) and changed + * the names to reflect their current use. + * + */ +int +pg_inet_net_pton(int af, const char *src, void *dst, size_t size) +{ + switch (af) + { + case PGSQL_AF_INET: + return size == -1 ? + inet_net_pton_ipv4(src, dst) : + inet_cidr_pton_ipv4(src, dst, size); + case PGSQL_AF_INET6: + return size == -1 ? + inet_net_pton_ipv6(src, dst) : + inet_cidr_pton_ipv6(src, dst, size); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +/* + * static int + * inet_cidr_pton_ipv4(src, dst, size) + * convert IPv4 network number from presentation to network format. + * accepts hex octets, hex strings, decimal octets, and /CIDR. + * "size" is in bytes and describes "dst". + * return: + * number of bits, either imputed classfully or specified with /CIDR, + * or -1 if some failure occurred (check errno). ENOENT means it was + * not an IPv4 network specification. + * note: + * network byte order assumed. this means 192.5.5.240/28 has + * 0b11110000 in its fourth octet. + * author: + * Paul Vixie (ISC), June 1996 + */ +static int +inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size) +{ + static const char xdigits[] = "0123456789abcdef"; + static const char digits[] = "0123456789"; + int n, + ch, + tmp = 0, + dirty, + bits; + const u_char *odst = dst; + + ch = *src++; + if (ch == '0' && (src[0] == 'x' || src[0] == 'X') + && isxdigit((unsigned char) src[1])) + { + /* Hexadecimal: Eat nybble string. */ + if (size <= 0U) + goto emsgsize; + dirty = 0; + src++; /* skip x or X. */ + while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch)) + { + if (isupper((unsigned char) ch)) + ch = tolower((unsigned char) ch); + n = strchr(xdigits, ch) - xdigits; + assert(n >= 0 && n <= 15); + if (dirty == 0) + tmp = n; + else + tmp = (tmp << 4) | n; + if (++dirty == 2) + { + if (size-- <= 0U) + goto emsgsize; + *dst++ = (u_char) tmp; + dirty = 0; + } + } + if (dirty) + { /* Odd trailing nybble? */ + if (size-- <= 0U) + goto emsgsize; + *dst++ = (u_char) (tmp << 4); + } + } + else if (isdigit((unsigned char) ch)) + { + /* Decimal: eat dotted digit string. */ + for (;;) + { + tmp = 0; + do + { + n = strchr(digits, ch) - digits; + assert(n >= 0 && n <= 9); + tmp *= 10; + tmp += n; + if (tmp > 255) + goto enoent; + } while ((ch = *src++) != '\0' && + isdigit((unsigned char) ch)); + if (size-- <= 0U) + goto emsgsize; + *dst++ = (u_char) tmp; + if (ch == '\0' || ch == '/') + break; + if (ch != '.') + goto enoent; + ch = *src++; + if (!isdigit((unsigned char) ch)) + goto enoent; + } + } + else + goto enoent; + + bits = -1; + if (ch == '/' && isdigit((unsigned char) src[0]) && dst > odst) + { + /* CIDR width specifier. Nothing can follow it. */ + ch = *src++; /* Skip over the /. */ + bits = 0; + do + { + n = strchr(digits, ch) - digits; + assert(n >= 0 && n <= 9); + bits *= 10; + bits += n; + } while ((ch = *src++) != '\0' && isdigit((unsigned char) ch)); + if (ch != '\0') + goto enoent; + if (bits > 32) + goto emsgsize; + } + + /* Fiery death and destruction unless we prefetched EOS. */ + if (ch != '\0') + goto enoent; + + /* If nothing was written to the destination, we found no address. */ + if (dst == odst) + goto enoent; + /* If no CIDR spec was given, infer width from net class. */ + if (bits == -1) + { + if (*odst >= 240) /* Class E */ + bits = 32; + else if (*odst >= 224) /* Class D */ + bits = 8; + else if (*odst >= 192) /* Class C */ + bits = 24; + else if (*odst >= 128) /* Class B */ + bits = 16; + else + /* Class A */ + bits = 8; + /* If imputed mask is narrower than specified octets, widen. */ + if (bits < ((dst - odst) * 8)) + bits = (dst - odst) * 8; + + /* + * If there are no additional bits specified for a class D address + * adjust bits to 4. + */ + if (bits == 8 && *odst == 224) + bits = 4; + } + /* Extend network to cover the actual mask. */ + while (bits > ((dst - odst) * 8)) + { + if (size-- <= 0U) + goto emsgsize; + *dst++ = '\0'; + } + return bits; + +enoent: + errno = ENOENT; + return -1; + +emsgsize: + errno = EMSGSIZE; + return -1; +} + +/* + * int + * inet_net_pton_ipv4(af, src, dst, *bits) + * convert network address from presentation to network format. + * accepts inet_pton()'s input for this "af" plus trailing "/CIDR". + * "dst" is assumed large enough for its "af". "bits" is set to the + * /CIDR prefix length, which can have defaults (like /32 for IPv4). + * return: + * -1 if an error occurred (inspect errno; ENOENT means bad format). + * 0 if successful conversion occurred. + * note: + * 192.5.5.1/28 has a nonzero host part, which means it isn't a network + * as called for by inet_cidr_pton() but it can be a host address with + * an included netmask. + * author: + * Paul Vixie (ISC), October 1998 + */ +static int +inet_net_pton_ipv4(const char *src, u_char *dst) +{ + static const char digits[] = "0123456789"; + const u_char *odst = dst; + int n, + ch, + tmp, + bits; + size_t size = 4; + + /* Get the mantissa. */ + while (ch = *src++, isdigit((unsigned char) ch)) + { + tmp = 0; + do + { + n = strchr(digits, ch) - digits; + assert(n >= 0 && n <= 9); + tmp *= 10; + tmp += n; + if (tmp > 255) + goto enoent; + } while ((ch = *src++) != '\0' && isdigit((unsigned char) ch)); + if (size-- == 0) + goto emsgsize; + *dst++ = (u_char) tmp; + if (ch == '\0' || ch == '/') + break; + if (ch != '.') + goto enoent; + } + + /* Get the prefix length if any. */ + bits = -1; + if (ch == '/' && isdigit((unsigned char) src[0]) && dst > odst) + { + /* CIDR width specifier. Nothing can follow it. */ + ch = *src++; /* Skip over the /. */ + bits = 0; + do + { + n = strchr(digits, ch) - digits; + assert(n >= 0 && n <= 9); + bits *= 10; + bits += n; + } while ((ch = *src++) != '\0' && isdigit((unsigned char) ch)); + if (ch != '\0') + goto enoent; + if (bits > 32) + goto emsgsize; + } + + /* Fiery death and destruction unless we prefetched EOS. */ + if (ch != '\0') + goto enoent; + + /* Prefix length can default to /32 only if all four octets spec'd. */ + if (bits == -1) + { + if (dst - odst == 4) + bits = 32; + else + goto enoent; + } + + /* If nothing was written to the destination, we found no address. */ + if (dst == odst) + goto enoent; + + /* If prefix length overspecifies mantissa, life is bad. */ + if ((bits / 8) > (dst - odst)) + goto enoent; + + /* Extend address to four octets. */ + while (size-- > 0) + *dst++ = 0; + + return bits; + +enoent: + errno = ENOENT; + return -1; + +emsgsize: + errno = EMSGSIZE; + return -1; +} + +static int +getbits(const char *src, int *bitsp) +{ + static const char digits[] = "0123456789"; + int n; + int val; + char ch; + + val = 0; + n = 0; + while ((ch = *src++) != '\0') + { + const char *pch; + + pch = strchr(digits, ch); + if (pch != NULL) + { + if (n++ != 0 && val == 0) /* no leading zeros */ + return 0; + val *= 10; + val += (pch - digits); + if (val > 128) /* range */ + return 0; + continue; + } + return 0; + } + if (n == 0) + return 0; + *bitsp = val; + return 1; +} + +static int +getv4(const char *src, u_char *dst, int *bitsp) +{ + static const char digits[] = "0123456789"; + u_char *odst = dst; + int n; + u_int val; + char ch; + + val = 0; + n = 0; + while ((ch = *src++) != '\0') + { + const char *pch; + + pch = strchr(digits, ch); + if (pch != NULL) + { + if (n++ != 0 && val == 0) /* no leading zeros */ + return 0; + val *= 10; + val += (pch - digits); + if (val > 255) /* range */ + return 0; + continue; + } + if (ch == '.' || ch == '/') + { + if (dst - odst > 3) /* too many octets? */ + return 0; + *dst++ = val; + if (ch == '/') + return getbits(src, bitsp); + val = 0; + n = 0; + continue; + } + return 0; + } + if (n == 0) + return 0; + if (dst - odst > 3) /* too many octets? */ + return 0; + *dst++ = val; + return 1; +} + +static int +inet_net_pton_ipv6(const char *src, u_char *dst) +{ + return inet_cidr_pton_ipv6(src, dst, 16); +} + +#define NS_IN6ADDRSZ 16 +#define NS_INT16SZ 2 +#define NS_INADDRSZ 4 + +static int +inet_cidr_pton_ipv6(const char *src, u_char *dst, size_t size) +{ + static const char xdigits_l[] = "0123456789abcdef", + xdigits_u[] = "0123456789ABCDEF"; + u_char tmp[NS_IN6ADDRSZ], + *tp, + *endp, + *colonp; + const char *xdigits, + *curtok; + int ch, + saw_xdigit; + u_int val; + int digits; + int bits; + + if (size < NS_IN6ADDRSZ) + goto emsgsize; + + memset((tp = tmp), '\0', NS_IN6ADDRSZ); + endp = tp + NS_IN6ADDRSZ; + colonp = NULL; + /* Leading :: requires some special handling. */ + if (*src == ':') + if (*++src != ':') + goto enoent; + curtok = src; + saw_xdigit = 0; + val = 0; + digits = 0; + bits = -1; + while ((ch = *src++) != '\0') + { + const char *pch; + + if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) + pch = strchr((xdigits = xdigits_u), ch); + if (pch != NULL) + { + val <<= 4; + val |= (pch - xdigits); + if (++digits > 4) + goto enoent; + saw_xdigit = 1; + continue; + } + if (ch == ':') + { + curtok = src; + if (!saw_xdigit) + { + if (colonp) + goto enoent; + colonp = tp; + continue; + } + else if (*src == '\0') + goto enoent; + if (tp + NS_INT16SZ > endp) + goto enoent; + *tp++ = (u_char) (val >> 8) & 0xff; + *tp++ = (u_char) val & 0xff; + saw_xdigit = 0; + digits = 0; + val = 0; + continue; + } + if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) && + getv4(curtok, tp, &bits) > 0) + { + tp += NS_INADDRSZ; + saw_xdigit = 0; + break; /* '\0' was seen by inet_pton4(). */ + } + if (ch == '/' && getbits(src, &bits) > 0) + break; + goto enoent; + } + if (saw_xdigit) + { + if (tp + NS_INT16SZ > endp) + goto enoent; + *tp++ = (u_char) (val >> 8) & 0xff; + *tp++ = (u_char) val & 0xff; + } + if (bits == -1) + bits = 128; + + endp = tmp + 16; + + if (colonp != NULL) + { + /* + * Since some memmove()'s erroneously fail to handle overlapping + * regions, we'll do the shift by hand. + */ + const int n = tp - colonp; + int i; + + if (tp == endp) + goto enoent; + for (i = 1; i <= n; i++) + { + endp[-i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp) + goto enoent; + + /* + * Copy out the result. + */ + memcpy(dst, tmp, NS_IN6ADDRSZ); + + return bits; + +enoent: + errno = ENOENT; + return -1; + +emsgsize: + errno = EMSGSIZE; + return -1; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int.c new file mode 100644 index 00000000000..44d1c7ad0c4 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int.c @@ -0,0 +1,1649 @@ +/*------------------------------------------------------------------------- + * + * int.c + * Functions for the built-in integer types (except int8). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/int.c + * + *------------------------------------------------------------------------- + */ +/* + * OLD COMMENTS + * I/O routines: + * int2in, int2out, int2recv, int2send + * int4in, int4out, int4recv, int4send + * int2vectorin, int2vectorout, int2vectorrecv, int2vectorsend + * Boolean operators: + * inteq, intne, intlt, intle, intgt, intge + * Arithmetic operators: + * intpl, intmi, int4mul, intdiv + * + * Arithmetic operators: + * intmod + */ +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> +#include <math.h> + +#include "catalog/pg_type.h" +#include "common/int.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" +#include "utils/array.h" +#include "utils/builtins.h" + +#define Int2VectorSize(n) (offsetof(int2vector, values) + (n) * sizeof(int16)) + +typedef struct +{ + int32 current; + int32 finish; + int32 step; +} generate_series_fctx; + + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +/* + * int2in - converts "num" to short + */ +Datum +int2in(PG_FUNCTION_ARGS) +{ + char *num = PG_GETARG_CSTRING(0); + + PG_RETURN_INT16(pg_strtoint16_safe(num, fcinfo->context)); +} + +/* + * int2out - converts short to "num" + */ +Datum +int2out(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + char *result = (char *) palloc(7); /* sign, 5 digits, '\0' */ + + pg_itoa(arg1, result); + PG_RETURN_CSTRING(result); +} + +/* + * int2recv - converts external binary format to int2 + */ +Datum +int2recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_INT16((int16) pq_getmsgint(buf, sizeof(int16))); +} + +/* + * int2send - converts int2 to binary format + */ +Datum +int2send(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint16(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * construct int2vector given a raw array of int2s + * + * If int2s is NULL then caller must fill values[] afterward + */ +int2vector * +buildint2vector(const int16 *int2s, int n) +{ + int2vector *result; + + result = (int2vector *) palloc0(Int2VectorSize(n)); + + if (n > 0 && int2s) + memcpy(result->values, int2s, n * sizeof(int16)); + + /* + * Attach standard array header. For historical reasons, we set the index + * lower bound to 0 not 1. + */ + SET_VARSIZE(result, Int2VectorSize(n)); + result->ndim = 1; + result->dataoffset = 0; /* never any nulls */ + result->elemtype = INT2OID; + result->dim1 = n; + result->lbound1 = 0; + + return result; +} + +/* + * int2vectorin - converts "num num ..." to internal form + */ +Datum +int2vectorin(PG_FUNCTION_ARGS) +{ + char *intString = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + int2vector *result; + int nalloc; + int n; + + nalloc = 32; /* arbitrary initial size guess */ + result = (int2vector *) palloc0(Int2VectorSize(nalloc)); + + for (n = 0;; n++) + { + long l; + char *endp; + + while (*intString && isspace((unsigned char) *intString)) + intString++; + if (*intString == '\0') + break; + + if (n >= nalloc) + { + nalloc *= 2; + result = (int2vector *) repalloc(result, Int2VectorSize(nalloc)); + } + + errno = 0; + l = strtol(intString, &endp, 10); + + if (intString == endp) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "smallint", intString))); + + if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", intString, + "smallint"))); + + if (*endp && *endp != ' ') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "smallint", intString))); + + result->values[n] = l; + intString = endp; + } + + SET_VARSIZE(result, Int2VectorSize(n)); + result->ndim = 1; + result->dataoffset = 0; /* never any nulls */ + result->elemtype = INT2OID; + result->dim1 = n; + result->lbound1 = 0; + + PG_RETURN_POINTER(result); +} + +/* + * int2vectorout - converts internal form to "num num ..." + */ +Datum +int2vectorout(PG_FUNCTION_ARGS) +{ + int2vector *int2Array = (int2vector *) PG_GETARG_POINTER(0); + int num, + nnums = int2Array->dim1; + char *rp; + char *result; + + /* assumes sign, 5 digits, ' ' */ + rp = result = (char *) palloc(nnums * 7 + 1); + for (num = 0; num < nnums; num++) + { + if (num != 0) + *rp++ = ' '; + rp += pg_itoa(int2Array->values[num], rp); + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * int2vectorrecv - converts external binary format to int2vector + */ +Datum +int2vectorrecv(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(locfcinfo, 3); + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int2vector *result; + + /* + * Normally one would call array_recv() using DirectFunctionCall3, but + * that does not work since array_recv wants to cache some data using + * fcinfo->flinfo->fn_extra. So we need to pass it our own flinfo + * parameter. + */ + InitFunctionCallInfoData(*locfcinfo, fcinfo->flinfo, 3, + InvalidOid, NULL, NULL); + + locfcinfo->args[0].value = PointerGetDatum(buf); + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = ObjectIdGetDatum(INT2OID); + locfcinfo->args[1].isnull = false; + locfcinfo->args[2].value = Int32GetDatum(-1); + locfcinfo->args[2].isnull = false; + + result = (int2vector *) DatumGetPointer(array_recv(locfcinfo)); + + Assert(!locfcinfo->isnull); + + /* sanity checks: int2vector must be 1-D, 0-based, no nulls */ + if (ARR_NDIM(result) != 1 || + ARR_HASNULL(result) || + ARR_ELEMTYPE(result) != INT2OID || + ARR_LBOUND(result)[0] != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid int2vector data"))); + + PG_RETURN_POINTER(result); +} + +/* + * int2vectorsend - converts int2vector to binary format + */ +Datum +int2vectorsend(PG_FUNCTION_ARGS) +{ + return array_send(fcinfo); +} + + +/***************************************************************************** + * PUBLIC ROUTINES * + *****************************************************************************/ + +/* + * int4in - converts "num" to int4 + */ +Datum +int4in(PG_FUNCTION_ARGS) +{ + char *num = PG_GETARG_CSTRING(0); + + PG_RETURN_INT32(pg_strtoint32_safe(num, fcinfo->context)); +} + +/* + * int4out - converts int4 to "num" + */ +Datum +int4out(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + char *result = (char *) palloc(12); /* sign, 10 digits, '\0' */ + + pg_ltoa(arg1, result); + PG_RETURN_CSTRING(result); +} + +/* + * int4recv - converts external binary format to int4 + */ +Datum +int4recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_INT32((int32) pq_getmsgint(buf, sizeof(int32))); +} + +/* + * int4send - converts int4 to binary format + */ +Datum +int4send(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * =================== + * CONVERSION ROUTINES + * =================== + */ + +Datum +i2toi4(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + + PG_RETURN_INT32((int32) arg1); +} + +Datum +i4toi2(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + + if (unlikely(arg1 < SHRT_MIN) || unlikely(arg1 > SHRT_MAX)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + PG_RETURN_INT16((int16) arg1); +} + +/* Cast int4 -> bool */ +Datum +int4_bool(PG_FUNCTION_ARGS) +{ + if (PG_GETARG_INT32(0) == 0) + PG_RETURN_BOOL(false); + else + PG_RETURN_BOOL(true); +} + +/* Cast bool -> int4 */ +Datum +bool_int4(PG_FUNCTION_ARGS) +{ + if (PG_GETARG_BOOL(0) == false) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(1); +} + +/* + * ============================ + * COMPARISON OPERATOR ROUTINES + * ============================ + */ + +/* + * inteq - returns 1 iff arg1 == arg2 + * intne - returns 1 iff arg1 != arg2 + * intlt - returns 1 iff arg1 < arg2 + * intle - returns 1 iff arg1 <= arg2 + * intgt - returns 1 iff arg1 > arg2 + * intge - returns 1 iff arg1 >= arg2 + */ + +Datum +int4eq(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 == arg2); +} + +Datum +int4ne(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 != arg2); +} + +Datum +int4lt(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 < arg2); +} + +Datum +int4le(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 <= arg2); +} + +Datum +int4gt(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 > arg2); +} + +Datum +int4ge(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 >= arg2); +} + +Datum +int2eq(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 == arg2); +} + +Datum +int2ne(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 != arg2); +} + +Datum +int2lt(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 < arg2); +} + +Datum +int2le(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 <= arg2); +} + +Datum +int2gt(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 > arg2); +} + +Datum +int2ge(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 >= arg2); +} + +Datum +int24eq(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 == arg2); +} + +Datum +int24ne(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 != arg2); +} + +Datum +int24lt(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 < arg2); +} + +Datum +int24le(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 <= arg2); +} + +Datum +int24gt(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 > arg2); +} + +Datum +int24ge(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(arg1 >= arg2); +} + +Datum +int42eq(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 == arg2); +} + +Datum +int42ne(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 != arg2); +} + +Datum +int42lt(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 < arg2); +} + +Datum +int42le(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 <= arg2); +} + +Datum +int42gt(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 > arg2); +} + +Datum +int42ge(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(arg1 >= arg2); +} + + +/*---------------------------------------------------------- + * in_range functions for int4 and int2, + * including cross-data-type comparisons. + * + * Note: we provide separate intN_int8 functions for performance + * reasons. This forces also providing intN_int2, else cases with a + * smallint offset value would fail to resolve which function to use. + * But that's an unlikely situation, so don't duplicate code for it. + *---------------------------------------------------------*/ + +Datum +in_range_int4_int4(PG_FUNCTION_ARGS) +{ + int32 val = PG_GETARG_INT32(0); + int32 base = PG_GETARG_INT32(1); + int32 offset = PG_GETARG_INT32(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int32 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s32_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_int4_int2(PG_FUNCTION_ARGS) +{ + /* Doesn't seem worth duplicating code for, so just invoke int4_int4 */ + return DirectFunctionCall5(in_range_int4_int4, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + Int32GetDatum((int32) PG_GETARG_INT16(2)), + PG_GETARG_DATUM(3), + PG_GETARG_DATUM(4)); +} + +Datum +in_range_int4_int8(PG_FUNCTION_ARGS) +{ + /* We must do all the math in int64 */ + int64 val = (int64) PG_GETARG_INT32(0); + int64 base = (int64) PG_GETARG_INT32(1); + int64 offset = PG_GETARG_INT64(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int64 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s64_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_int2_int4(PG_FUNCTION_ARGS) +{ + /* We must do all the math in int32 */ + int32 val = (int32) PG_GETARG_INT16(0); + int32 base = (int32) PG_GETARG_INT16(1); + int32 offset = PG_GETARG_INT32(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int32 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s32_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_int2_int2(PG_FUNCTION_ARGS) +{ + /* Doesn't seem worth duplicating code for, so just invoke int2_int4 */ + return DirectFunctionCall5(in_range_int2_int4, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + Int32GetDatum((int32) PG_GETARG_INT16(2)), + PG_GETARG_DATUM(3), + PG_GETARG_DATUM(4)); +} + +Datum +in_range_int2_int8(PG_FUNCTION_ARGS) +{ + /* Doesn't seem worth duplicating code for, so just invoke int4_int8 */ + return DirectFunctionCall5(in_range_int4_int8, + Int32GetDatum((int32) PG_GETARG_INT16(0)), + Int32GetDatum((int32) PG_GETARG_INT16(1)), + PG_GETARG_DATUM(2), + PG_GETARG_DATUM(3), + PG_GETARG_DATUM(4)); +} + + +/* + * int[24]pl - returns arg1 + arg2 + * int[24]mi - returns arg1 - arg2 + * int[24]mul - returns arg1 * arg2 + * int[24]div - returns arg1 / arg2 + */ + +Datum +int4um(PG_FUNCTION_ARGS) +{ + int32 arg = PG_GETARG_INT32(0); + + if (unlikely(arg == PG_INT32_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(-arg); +} + +Datum +int4up(PG_FUNCTION_ARGS) +{ + int32 arg = PG_GETARG_INT32(0); + + PG_RETURN_INT32(arg); +} + +Datum +int4pl(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + if (unlikely(pg_add_s32_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int4mi(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + if (unlikely(pg_sub_s32_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int4mul(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + if (unlikely(pg_mul_s32_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int4div(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + if (arg2 == 0) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * INT_MIN / -1 is problematic, since the result can't be represented on a + * two's-complement machine. Some machines produce INT_MIN, some produce + * zero, some throw an exception. We can dodge the problem by recognizing + * that division by -1 is the same as negation. + */ + if (arg2 == -1) + { + if (unlikely(arg1 == PG_INT32_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + result = -arg1; + PG_RETURN_INT32(result); + } + + /* No overflow is possible */ + + result = arg1 / arg2; + + PG_RETURN_INT32(result); +} + +Datum +int4inc(PG_FUNCTION_ARGS) +{ + int32 arg = PG_GETARG_INT32(0); + int32 result; + + if (unlikely(pg_add_s32_overflow(arg, 1, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + PG_RETURN_INT32(result); +} + +Datum +int2um(PG_FUNCTION_ARGS) +{ + int16 arg = PG_GETARG_INT16(0); + + if (unlikely(arg == PG_INT16_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + PG_RETURN_INT16(-arg); +} + +Datum +int2up(PG_FUNCTION_ARGS) +{ + int16 arg = PG_GETARG_INT16(0); + + PG_RETURN_INT16(arg); +} + +Datum +int2pl(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + int16 result; + + if (unlikely(pg_add_s16_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + PG_RETURN_INT16(result); +} + +Datum +int2mi(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + int16 result; + + if (unlikely(pg_sub_s16_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + PG_RETURN_INT16(result); +} + +Datum +int2mul(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + int16 result; + + if (unlikely(pg_mul_s16_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + PG_RETURN_INT16(result); +} + +Datum +int2div(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + int16 result; + + if (arg2 == 0) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * SHRT_MIN / -1 is problematic, since the result can't be represented on + * a two's-complement machine. Some machines produce SHRT_MIN, some + * produce zero, some throw an exception. We can dodge the problem by + * recognizing that division by -1 is the same as negation. + */ + if (arg2 == -1) + { + if (unlikely(arg1 == PG_INT16_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + result = -arg1; + PG_RETURN_INT16(result); + } + + /* No overflow is possible */ + + result = arg1 / arg2; + + PG_RETURN_INT16(result); +} + +Datum +int24pl(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + if (unlikely(pg_add_s32_overflow((int32) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int24mi(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + if (unlikely(pg_sub_s32_overflow((int32) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int24mul(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + if (unlikely(pg_mul_s32_overflow((int32) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int24div(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* No overflow is possible */ + PG_RETURN_INT32((int32) arg1 / arg2); +} + +Datum +int42pl(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + int32 result; + + if (unlikely(pg_add_s32_overflow(arg1, (int32) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int42mi(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + int32 result; + + if (unlikely(pg_sub_s32_overflow(arg1, (int32) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int42mul(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + int32 result; + + if (unlikely(pg_mul_s32_overflow(arg1, (int32) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); +} + +Datum +int42div(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int16 arg2 = PG_GETARG_INT16(1); + int32 result; + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * INT_MIN / -1 is problematic, since the result can't be represented on a + * two's-complement machine. Some machines produce INT_MIN, some produce + * zero, some throw an exception. We can dodge the problem by recognizing + * that division by -1 is the same as negation. + */ + if (arg2 == -1) + { + if (unlikely(arg1 == PG_INT32_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + result = -arg1; + PG_RETURN_INT32(result); + } + + /* No overflow is possible */ + + result = arg1 / arg2; + + PG_RETURN_INT32(result); +} + +Datum +int4mod(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * Some machines throw a floating-point exception for INT_MIN % -1, which + * is a bit silly since the correct answer is perfectly well-defined, + * namely zero. + */ + if (arg2 == -1) + PG_RETURN_INT32(0); + + /* No overflow is possible */ + + PG_RETURN_INT32(arg1 % arg2); +} + +Datum +int2mod(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * Some machines throw a floating-point exception for INT_MIN % -1, which + * is a bit silly since the correct answer is perfectly well-defined, + * namely zero. (It's not clear this ever happens when dealing with + * int16, but we might as well have the test for safety.) + */ + if (arg2 == -1) + PG_RETURN_INT16(0); + + /* No overflow is possible */ + + PG_RETURN_INT16(arg1 % arg2); +} + + +/* int[24]abs() + * Absolute value + */ +Datum +int4abs(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 result; + + if (unlikely(arg1 == PG_INT32_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + result = (arg1 < 0) ? -arg1 : arg1; + PG_RETURN_INT32(result); +} + +Datum +int2abs(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 result; + + if (unlikely(arg1 == PG_INT16_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + result = (arg1 < 0) ? -arg1 : arg1; + PG_RETURN_INT16(result); +} + +/* + * Greatest Common Divisor + * + * Returns the largest positive integer that exactly divides both inputs. + * Special cases: + * - gcd(x, 0) = gcd(0, x) = abs(x) + * because 0 is divisible by anything + * - gcd(0, 0) = 0 + * complies with the previous definition and is a common convention + * + * Special care must be taken if either input is INT_MIN --- gcd(0, INT_MIN), + * gcd(INT_MIN, 0) and gcd(INT_MIN, INT_MIN) are all equal to abs(INT_MIN), + * which cannot be represented as a 32-bit signed integer. + */ +static int32 +int4gcd_internal(int32 arg1, int32 arg2) +{ + int32 swap; + int32 a1, + a2; + + /* + * Put the greater absolute value in arg1. + * + * This would happen automatically in the loop below, but avoids an + * expensive modulo operation, and simplifies the special-case handling + * for INT_MIN below. + * + * We do this in negative space in order to handle INT_MIN. + */ + a1 = (arg1 < 0) ? arg1 : -arg1; + a2 = (arg2 < 0) ? arg2 : -arg2; + if (a1 > a2) + { + swap = arg1; + arg1 = arg2; + arg2 = swap; + } + + /* Special care needs to be taken with INT_MIN. See comments above. */ + if (arg1 == PG_INT32_MIN) + { + if (arg2 == 0 || arg2 == PG_INT32_MIN) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + /* + * Some machines throw a floating-point exception for INT_MIN % -1, + * which is a bit silly since the correct answer is perfectly + * well-defined, namely zero. Guard against this and just return the + * result, gcd(INT_MIN, -1) = 1. + */ + if (arg2 == -1) + return 1; + } + + /* Use the Euclidean algorithm to find the GCD */ + while (arg2 != 0) + { + swap = arg2; + arg2 = arg1 % arg2; + arg1 = swap; + } + + /* + * Make sure the result is positive. (We know we don't have INT_MIN + * anymore). + */ + if (arg1 < 0) + arg1 = -arg1; + + return arg1; +} + +Datum +int4gcd(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 result; + + result = int4gcd_internal(arg1, arg2); + + PG_RETURN_INT32(result); +} + +/* + * Least Common Multiple + */ +Datum +int4lcm(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + int32 gcd; + int32 result; + + /* + * Handle lcm(x, 0) = lcm(0, x) = 0 as a special case. This prevents a + * division-by-zero error below when x is zero, and an overflow error from + * the GCD computation when x = INT_MIN. + */ + if (arg1 == 0 || arg2 == 0) + PG_RETURN_INT32(0); + + /* lcm(x, y) = abs(x / gcd(x, y) * y) */ + gcd = int4gcd_internal(arg1, arg2); + arg1 = arg1 / gcd; + + if (unlikely(pg_mul_s32_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + /* If the result is INT_MIN, it cannot be represented. */ + if (unlikely(result == PG_INT32_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + if (result < 0) + result = -result; + + PG_RETURN_INT32(result); +} + +Datum +int2larger(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_INT16((arg1 > arg2) ? arg1 : arg2); +} + +Datum +int2smaller(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_INT16((arg1 < arg2) ? arg1 : arg2); +} + +Datum +int4larger(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT32((arg1 > arg2) ? arg1 : arg2); +} + +Datum +int4smaller(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT32((arg1 < arg2) ? arg1 : arg2); +} + +/* + * Bit-pushing operators + * + * int[24]and - returns arg1 & arg2 + * int[24]or - returns arg1 | arg2 + * int[24]xor - returns arg1 # arg2 + * int[24]not - returns ~arg1 + * int[24]shl - returns arg1 << arg2 + * int[24]shr - returns arg1 >> arg2 + */ + +Datum +int4and(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT32(arg1 & arg2); +} + +Datum +int4or(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT32(arg1 | arg2); +} + +Datum +int4xor(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT32(arg1 ^ arg2); +} + +Datum +int4shl(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT32(arg1 << arg2); +} + +Datum +int4shr(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT32(arg1 >> arg2); +} + +Datum +int4not(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + + PG_RETURN_INT32(~arg1); +} + +Datum +int2and(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_INT16(arg1 & arg2); +} + +Datum +int2or(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_INT16(arg1 | arg2); +} + +Datum +int2xor(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int16 arg2 = PG_GETARG_INT16(1); + + PG_RETURN_INT16(arg1 ^ arg2); +} + +Datum +int2not(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + + PG_RETURN_INT16(~arg1); +} + + +Datum +int2shl(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT16(arg1 << arg2); +} + +Datum +int2shr(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT16(arg1 >> arg2); +} + +/* + * non-persistent numeric series generator + */ +Datum +generate_series_int4(PG_FUNCTION_ARGS) +{ + return generate_series_step_int4(fcinfo); +} + +Datum +generate_series_step_int4(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + generate_series_fctx *fctx; + int32 result; + MemoryContext oldcontext; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + int32 start = PG_GETARG_INT32(0); + int32 finish = PG_GETARG_INT32(1); + int32 step = 1; + + /* see if we were given an explicit step size */ + if (PG_NARGS() == 3) + step = PG_GETARG_INT32(2); + if (step == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("step size cannot equal zero"))); + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* allocate memory for user context */ + fctx = (generate_series_fctx *) palloc(sizeof(generate_series_fctx)); + + /* + * Use fctx to keep state from call to call. Seed current with the + * original start value + */ + fctx->current = start; + fctx->finish = finish; + fctx->step = step; + + funcctx->user_fctx = fctx; + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + /* + * get the saved state and use current as the result for this iteration + */ + fctx = funcctx->user_fctx; + result = fctx->current; + + if ((fctx->step > 0 && fctx->current <= fctx->finish) || + (fctx->step < 0 && fctx->current >= fctx->finish)) + { + /* + * Increment current in preparation for next iteration. If next-value + * computation overflows, this is the final result. + */ + if (pg_add_s32_overflow(fctx->current, fctx->step, &fctx->current)) + fctx->step = 0; + + /* do when there is more left to send */ + SRF_RETURN_NEXT(funcctx, Int32GetDatum(result)); + } + else + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); +} + +/* + * Planner support function for generate_series(int4, int4 [, int4]) + */ +Datum +generate_series_int4_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt32(((Const *) arg1)->constvalue); + finish = DatumGetInt32(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int8.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int8.c new file mode 100644 index 00000000000..41fbeec8fd7 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int8.c @@ -0,0 +1,1539 @@ +/*------------------------------------------------------------------------- + * + * int8.c + * Internal 64-bit integer operations + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/int8.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> +#include <math.h> + +#include "common/int.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + + +typedef struct +{ + int64 current; + int64 finish; + int64 step; +} generate_series_fctx; + + +/*********************************************************************** + ** + ** Routines for 64-bit integers. + ** + ***********************************************************************/ + +/*---------------------------------------------------------- + * Formatting and conversion routines. + *---------------------------------------------------------*/ + +/* int8in() + */ +Datum +int8in(PG_FUNCTION_ARGS) +{ + char *num = PG_GETARG_CSTRING(0); + + PG_RETURN_INT64(pg_strtoint64_safe(num, fcinfo->context)); +} + + +/* int8out() + */ +Datum +int8out(PG_FUNCTION_ARGS) +{ + int64 val = PG_GETARG_INT64(0); + char buf[MAXINT8LEN + 1]; + char *result; + int len; + + len = pg_lltoa(val, buf) + 1; + + /* + * Since the length is already known, we do a manual palloc() and memcpy() + * to avoid the strlen() call that would otherwise be done in pstrdup(). + */ + result = palloc(len); + memcpy(result, buf, len); + PG_RETURN_CSTRING(result); +} + +/* + * int8recv - converts external binary format to int8 + */ +Datum +int8recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_INT64(pq_getmsgint64(buf)); +} + +/* + * int8send - converts int8 to binary format + */ +Datum +int8send(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/*---------------------------------------------------------- + * Relational operators for int8s, including cross-data-type comparisons. + *---------------------------------------------------------*/ + +/* int8relop() + * Is val1 relop val2? + */ +Datum +int8eq(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 == val2); +} + +Datum +int8ne(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 != val2); +} + +Datum +int8lt(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 < val2); +} + +Datum +int8gt(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 > val2); +} + +Datum +int8le(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 <= val2); +} + +Datum +int8ge(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 >= val2); +} + +/* int84relop() + * Is 64-bit val1 relop 32-bit val2? + */ +Datum +int84eq(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int32 val2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(val1 == val2); +} + +Datum +int84ne(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int32 val2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(val1 != val2); +} + +Datum +int84lt(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int32 val2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(val1 < val2); +} + +Datum +int84gt(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int32 val2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(val1 > val2); +} + +Datum +int84le(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int32 val2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(val1 <= val2); +} + +Datum +int84ge(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int32 val2 = PG_GETARG_INT32(1); + + PG_RETURN_BOOL(val1 >= val2); +} + +/* int48relop() + * Is 32-bit val1 relop 64-bit val2? + */ +Datum +int48eq(PG_FUNCTION_ARGS) +{ + int32 val1 = PG_GETARG_INT32(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 == val2); +} + +Datum +int48ne(PG_FUNCTION_ARGS) +{ + int32 val1 = PG_GETARG_INT32(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 != val2); +} + +Datum +int48lt(PG_FUNCTION_ARGS) +{ + int32 val1 = PG_GETARG_INT32(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 < val2); +} + +Datum +int48gt(PG_FUNCTION_ARGS) +{ + int32 val1 = PG_GETARG_INT32(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 > val2); +} + +Datum +int48le(PG_FUNCTION_ARGS) +{ + int32 val1 = PG_GETARG_INT32(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 <= val2); +} + +Datum +int48ge(PG_FUNCTION_ARGS) +{ + int32 val1 = PG_GETARG_INT32(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 >= val2); +} + +/* int82relop() + * Is 64-bit val1 relop 16-bit val2? + */ +Datum +int82eq(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int16 val2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(val1 == val2); +} + +Datum +int82ne(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int16 val2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(val1 != val2); +} + +Datum +int82lt(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int16 val2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(val1 < val2); +} + +Datum +int82gt(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int16 val2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(val1 > val2); +} + +Datum +int82le(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int16 val2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(val1 <= val2); +} + +Datum +int82ge(PG_FUNCTION_ARGS) +{ + int64 val1 = PG_GETARG_INT64(0); + int16 val2 = PG_GETARG_INT16(1); + + PG_RETURN_BOOL(val1 >= val2); +} + +/* int28relop() + * Is 16-bit val1 relop 64-bit val2? + */ +Datum +int28eq(PG_FUNCTION_ARGS) +{ + int16 val1 = PG_GETARG_INT16(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 == val2); +} + +Datum +int28ne(PG_FUNCTION_ARGS) +{ + int16 val1 = PG_GETARG_INT16(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 != val2); +} + +Datum +int28lt(PG_FUNCTION_ARGS) +{ + int16 val1 = PG_GETARG_INT16(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 < val2); +} + +Datum +int28gt(PG_FUNCTION_ARGS) +{ + int16 val1 = PG_GETARG_INT16(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 > val2); +} + +Datum +int28le(PG_FUNCTION_ARGS) +{ + int16 val1 = PG_GETARG_INT16(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 <= val2); +} + +Datum +int28ge(PG_FUNCTION_ARGS) +{ + int16 val1 = PG_GETARG_INT16(0); + int64 val2 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(val1 >= val2); +} + +/* + * in_range support function for int8. + * + * Note: we needn't supply int8_int4 or int8_int2 variants, as implicit + * coercion of the offset value takes care of those scenarios just as well. + */ +Datum +in_range_int8_int8(PG_FUNCTION_ARGS) +{ + int64 val = PG_GETARG_INT64(0); + int64 base = PG_GETARG_INT64(1); + int64 offset = PG_GETARG_INT64(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int64 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s64_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + + +/*---------------------------------------------------------- + * Arithmetic operators on 64-bit integers. + *---------------------------------------------------------*/ + +Datum +int8um(PG_FUNCTION_ARGS) +{ + int64 arg = PG_GETARG_INT64(0); + int64 result; + + if (unlikely(arg == PG_INT64_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + result = -arg; + PG_RETURN_INT64(result); +} + +Datum +int8up(PG_FUNCTION_ARGS) +{ + int64 arg = PG_GETARG_INT64(0); + + PG_RETURN_INT64(arg); +} + +Datum +int8pl(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_add_s64_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int8mi(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_sub_s64_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int8mul(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_mul_s64_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int8div(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (arg2 == 0) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * INT64_MIN / -1 is problematic, since the result can't be represented on + * a two's-complement machine. Some machines produce INT64_MIN, some + * produce zero, some throw an exception. We can dodge the problem by + * recognizing that division by -1 is the same as negation. + */ + if (arg2 == -1) + { + if (unlikely(arg1 == PG_INT64_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + result = -arg1; + PG_RETURN_INT64(result); + } + + /* No overflow is possible */ + + result = arg1 / arg2; + + PG_RETURN_INT64(result); +} + +/* int8abs() + * Absolute value + */ +Datum +int8abs(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 result; + + if (unlikely(arg1 == PG_INT64_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + result = (arg1 < 0) ? -arg1 : arg1; + PG_RETURN_INT64(result); +} + +/* int8mod() + * Modulo operation. + */ +Datum +int8mod(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * Some machines throw a floating-point exception for INT64_MIN % -1, + * which is a bit silly since the correct answer is perfectly + * well-defined, namely zero. + */ + if (arg2 == -1) + PG_RETURN_INT64(0); + + /* No overflow is possible */ + + PG_RETURN_INT64(arg1 % arg2); +} + +/* + * Greatest Common Divisor + * + * Returns the largest positive integer that exactly divides both inputs. + * Special cases: + * - gcd(x, 0) = gcd(0, x) = abs(x) + * because 0 is divisible by anything + * - gcd(0, 0) = 0 + * complies with the previous definition and is a common convention + * + * Special care must be taken if either input is INT64_MIN --- + * gcd(0, INT64_MIN), gcd(INT64_MIN, 0) and gcd(INT64_MIN, INT64_MIN) are + * all equal to abs(INT64_MIN), which cannot be represented as a 64-bit signed + * integer. + */ +static int64 +int8gcd_internal(int64 arg1, int64 arg2) +{ + int64 swap; + int64 a1, + a2; + + /* + * Put the greater absolute value in arg1. + * + * This would happen automatically in the loop below, but avoids an + * expensive modulo operation, and simplifies the special-case handling + * for INT64_MIN below. + * + * We do this in negative space in order to handle INT64_MIN. + */ + a1 = (arg1 < 0) ? arg1 : -arg1; + a2 = (arg2 < 0) ? arg2 : -arg2; + if (a1 > a2) + { + swap = arg1; + arg1 = arg2; + arg2 = swap; + } + + /* Special care needs to be taken with INT64_MIN. See comments above. */ + if (arg1 == PG_INT64_MIN) + { + if (arg2 == 0 || arg2 == PG_INT64_MIN) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + /* + * Some machines throw a floating-point exception for INT64_MIN % -1, + * which is a bit silly since the correct answer is perfectly + * well-defined, namely zero. Guard against this and just return the + * result, gcd(INT64_MIN, -1) = 1. + */ + if (arg2 == -1) + return 1; + } + + /* Use the Euclidean algorithm to find the GCD */ + while (arg2 != 0) + { + swap = arg2; + arg2 = arg1 % arg2; + arg1 = swap; + } + + /* + * Make sure the result is positive. (We know we don't have INT64_MIN + * anymore). + */ + if (arg1 < 0) + arg1 = -arg1; + + return arg1; +} + +Datum +int8gcd(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + result = int8gcd_internal(arg1, arg2); + + PG_RETURN_INT64(result); +} + +/* + * Least Common Multiple + */ +Datum +int8lcm(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 gcd; + int64 result; + + /* + * Handle lcm(x, 0) = lcm(0, x) = 0 as a special case. This prevents a + * division-by-zero error below when x is zero, and an overflow error from + * the GCD computation when x = INT64_MIN. + */ + if (arg1 == 0 || arg2 == 0) + PG_RETURN_INT64(0); + + /* lcm(x, y) = abs(x / gcd(x, y) * y) */ + gcd = int8gcd_internal(arg1, arg2); + arg1 = arg1 / gcd; + + if (unlikely(pg_mul_s64_overflow(arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + /* If the result is INT64_MIN, it cannot be represented. */ + if (unlikely(result == PG_INT64_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + if (result < 0) + result = -result; + + PG_RETURN_INT64(result); +} + +Datum +int8inc(PG_FUNCTION_ARGS) +{ + /* + * When int8 is pass-by-reference, we provide this special case to avoid + * palloc overhead for COUNT(): when called as an aggregate, we know that + * the argument is modifiable local storage, so just update it in-place. + * (If int8 is pass-by-value, then of course this is useless as well as + * incorrect, so just ifdef it out.) + */ +#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ + if (AggCheckCallContext(fcinfo, NULL)) + { + int64 *arg = (int64 *) PG_GETARG_POINTER(0); + + if (unlikely(pg_add_s64_overflow(*arg, 1, arg))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + PG_RETURN_POINTER(arg); + } + else +#endif + { + /* Not called as an aggregate, so just do it the dumb way */ + int64 arg = PG_GETARG_INT64(0); + int64 result; + + if (unlikely(pg_add_s64_overflow(arg, 1, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + PG_RETURN_INT64(result); + } +} + +Datum +int8dec(PG_FUNCTION_ARGS) +{ + /* + * When int8 is pass-by-reference, we provide this special case to avoid + * palloc overhead for COUNT(): when called as an aggregate, we know that + * the argument is modifiable local storage, so just update it in-place. + * (If int8 is pass-by-value, then of course this is useless as well as + * incorrect, so just ifdef it out.) + */ +#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ + if (AggCheckCallContext(fcinfo, NULL)) + { + int64 *arg = (int64 *) PG_GETARG_POINTER(0); + + if (unlikely(pg_sub_s64_overflow(*arg, 1, arg))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_POINTER(arg); + } + else +#endif + { + /* Not called as an aggregate, so just do it the dumb way */ + int64 arg = PG_GETARG_INT64(0); + int64 result; + + if (unlikely(pg_sub_s64_overflow(arg, 1, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + PG_RETURN_INT64(result); + } +} + + +/* + * These functions are exactly like int8inc/int8dec but are used for + * aggregates that count only non-null values. Since the functions are + * declared strict, the null checks happen before we ever get here, and all we + * need do is increment the state value. We could actually make these pg_proc + * entries point right at int8inc/int8dec, but then the opr_sanity regression + * test would complain about mismatched entries for a built-in function. + */ + +Datum +int8inc_any(PG_FUNCTION_ARGS) +{ + return int8inc(fcinfo); +} + +Datum +int8inc_float8_float8(PG_FUNCTION_ARGS) +{ + return int8inc(fcinfo); +} + +Datum +int8dec_any(PG_FUNCTION_ARGS) +{ + return int8dec(fcinfo); +} + +/* + * int8inc_support + * prosupport function for int8inc() and int8inc_any() + */ +Datum +int8inc_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + MonotonicFunction monotonic = MONOTONICFUNC_NONE; + int frameOptions = req->window_clause->frameOptions; + WindowFunc *wfunc = req->window_func; + + if (list_length(wfunc->args) == 1) + { + Node *expr = eval_const_expressions(NULL, linitial(wfunc->args)); + + /* + * Due to the Node representation of WindowClause runConditions in + * version prior to v17, we need to insist that the count arg is + * Const to allow safe application of the runCondition + * optimization. + */ + if (!IsA(expr, Const)) + PG_RETURN_POINTER(NULL); + } + + /* No ORDER BY clause then all rows are peers */ + if (req->window_clause->orderClause == NIL) + monotonic = MONOTONICFUNC_BOTH; + else + { + /* + * Otherwise take into account the frame options. When the frame + * bound is the start of the window then the resulting value can + * never decrease, therefore is monotonically increasing + */ + if (frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) + monotonic |= MONOTONICFUNC_INCREASING; + + /* + * Likewise, if the frame bound is the end of the window then the + * resulting value can never decrease. + */ + if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) + monotonic |= MONOTONICFUNC_DECREASING; + } + + req->monotonic = monotonic; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + + +Datum +int8larger(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + result = ((arg1 > arg2) ? arg1 : arg2); + + PG_RETURN_INT64(result); +} + +Datum +int8smaller(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + result = ((arg1 < arg2) ? arg1 : arg2); + + PG_RETURN_INT64(result); +} + +Datum +int84pl(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int32 arg2 = PG_GETARG_INT32(1); + int64 result; + + if (unlikely(pg_add_s64_overflow(arg1, (int64) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int84mi(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int32 arg2 = PG_GETARG_INT32(1); + int64 result; + + if (unlikely(pg_sub_s64_overflow(arg1, (int64) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int84mul(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int32 arg2 = PG_GETARG_INT32(1); + int64 result; + + if (unlikely(pg_mul_s64_overflow(arg1, (int64) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int84div(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int32 arg2 = PG_GETARG_INT32(1); + int64 result; + + if (arg2 == 0) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * INT64_MIN / -1 is problematic, since the result can't be represented on + * a two's-complement machine. Some machines produce INT64_MIN, some + * produce zero, some throw an exception. We can dodge the problem by + * recognizing that division by -1 is the same as negation. + */ + if (arg2 == -1) + { + if (unlikely(arg1 == PG_INT64_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + result = -arg1; + PG_RETURN_INT64(result); + } + + /* No overflow is possible */ + + result = arg1 / arg2; + + PG_RETURN_INT64(result); +} + +Datum +int48pl(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_add_s64_overflow((int64) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int48mi(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_sub_s64_overflow((int64) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int48mul(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_mul_s64_overflow((int64) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int48div(PG_FUNCTION_ARGS) +{ + int32 arg1 = PG_GETARG_INT32(0); + int64 arg2 = PG_GETARG_INT64(1); + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* No overflow is possible */ + PG_RETURN_INT64((int64) arg1 / arg2); +} + +Datum +int82pl(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int16 arg2 = PG_GETARG_INT16(1); + int64 result; + + if (unlikely(pg_add_s64_overflow(arg1, (int64) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int82mi(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int16 arg2 = PG_GETARG_INT16(1); + int64 result; + + if (unlikely(pg_sub_s64_overflow(arg1, (int64) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int82mul(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int16 arg2 = PG_GETARG_INT16(1); + int64 result; + + if (unlikely(pg_mul_s64_overflow(arg1, (int64) arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int82div(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int16 arg2 = PG_GETARG_INT16(1); + int64 result; + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* + * INT64_MIN / -1 is problematic, since the result can't be represented on + * a two's-complement machine. Some machines produce INT64_MIN, some + * produce zero, some throw an exception. We can dodge the problem by + * recognizing that division by -1 is the same as negation. + */ + if (arg2 == -1) + { + if (unlikely(arg1 == PG_INT64_MIN)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + result = -arg1; + PG_RETURN_INT64(result); + } + + /* No overflow is possible */ + + result = arg1 / arg2; + + PG_RETURN_INT64(result); +} + +Datum +int28pl(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_add_s64_overflow((int64) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int28mi(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_sub_s64_overflow((int64) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int28mul(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int64 arg2 = PG_GETARG_INT64(1); + int64 result; + + if (unlikely(pg_mul_s64_overflow((int64) arg1, arg2, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + PG_RETURN_INT64(result); +} + +Datum +int28div(PG_FUNCTION_ARGS) +{ + int16 arg1 = PG_GETARG_INT16(0); + int64 arg2 = PG_GETARG_INT64(1); + + if (unlikely(arg2 == 0)) + { + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + /* ensure compiler realizes we mustn't reach the division (gcc bug) */ + PG_RETURN_NULL(); + } + + /* No overflow is possible */ + PG_RETURN_INT64((int64) arg1 / arg2); +} + +/* Binary arithmetics + * + * int8and - returns arg1 & arg2 + * int8or - returns arg1 | arg2 + * int8xor - returns arg1 # arg2 + * int8not - returns ~arg1 + * int8shl - returns arg1 << arg2 + * int8shr - returns arg1 >> arg2 + */ + +Datum +int8and(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + + PG_RETURN_INT64(arg1 & arg2); +} + +Datum +int8or(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + + PG_RETURN_INT64(arg1 | arg2); +} + +Datum +int8xor(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int64 arg2 = PG_GETARG_INT64(1); + + PG_RETURN_INT64(arg1 ^ arg2); +} + +Datum +int8not(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + + PG_RETURN_INT64(~arg1); +} + +Datum +int8shl(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT64(arg1 << arg2); +} + +Datum +int8shr(PG_FUNCTION_ARGS) +{ + int64 arg1 = PG_GETARG_INT64(0); + int32 arg2 = PG_GETARG_INT32(1); + + PG_RETURN_INT64(arg1 >> arg2); +} + +/*---------------------------------------------------------- + * Conversion operators. + *---------------------------------------------------------*/ + +Datum +int48(PG_FUNCTION_ARGS) +{ + int32 arg = PG_GETARG_INT32(0); + + PG_RETURN_INT64((int64) arg); +} + +Datum +int84(PG_FUNCTION_ARGS) +{ + int64 arg = PG_GETARG_INT64(0); + + if (unlikely(arg < PG_INT32_MIN) || unlikely(arg > PG_INT32_MAX)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + PG_RETURN_INT32((int32) arg); +} + +Datum +int28(PG_FUNCTION_ARGS) +{ + int16 arg = PG_GETARG_INT16(0); + + PG_RETURN_INT64((int64) arg); +} + +Datum +int82(PG_FUNCTION_ARGS) +{ + int64 arg = PG_GETARG_INT64(0); + + if (unlikely(arg < PG_INT16_MIN) || unlikely(arg > PG_INT16_MAX)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + PG_RETURN_INT16((int16) arg); +} + +Datum +i8tod(PG_FUNCTION_ARGS) +{ + int64 arg = PG_GETARG_INT64(0); + float8 result; + + result = arg; + + PG_RETURN_FLOAT8(result); +} + +/* dtoi8() + * Convert float8 to 8-byte integer. + */ +Datum +dtoi8(PG_FUNCTION_ARGS) +{ + float8 num = PG_GETARG_FLOAT8(0); + + /* + * Get rid of any fractional part in the input. This is so we don't fail + * on just-out-of-range values that would round into range. Note + * assumption that rint() will pass through a NaN or Inf unchanged. + */ + num = rint(num); + + /* Range check */ + if (unlikely(isnan(num) || !FLOAT8_FITS_IN_INT64(num))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + PG_RETURN_INT64((int64) num); +} + +Datum +i8tof(PG_FUNCTION_ARGS) +{ + int64 arg = PG_GETARG_INT64(0); + float4 result; + + result = arg; + + PG_RETURN_FLOAT4(result); +} + +/* ftoi8() + * Convert float4 to 8-byte integer. + */ +Datum +ftoi8(PG_FUNCTION_ARGS) +{ + float4 num = PG_GETARG_FLOAT4(0); + + /* + * Get rid of any fractional part in the input. This is so we don't fail + * on just-out-of-range values that would round into range. Note + * assumption that rint() will pass through a NaN or Inf unchanged. + */ + num = rint(num); + + /* Range check */ + if (unlikely(isnan(num) || !FLOAT4_FITS_IN_INT64(num))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + PG_RETURN_INT64((int64) num); +} + +Datum +i8tooid(PG_FUNCTION_ARGS) +{ + int64 arg = PG_GETARG_INT64(0); + + if (unlikely(arg < 0) || unlikely(arg > PG_UINT32_MAX)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("OID out of range"))); + + PG_RETURN_OID((Oid) arg); +} + +Datum +oidtoi8(PG_FUNCTION_ARGS) +{ + Oid arg = PG_GETARG_OID(0); + + PG_RETURN_INT64((int64) arg); +} + +/* + * non-persistent numeric series generator + */ +Datum +generate_series_int8(PG_FUNCTION_ARGS) +{ + return generate_series_step_int8(fcinfo); +} + +Datum +generate_series_step_int8(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + generate_series_fctx *fctx; + int64 result; + MemoryContext oldcontext; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + int64 start = PG_GETARG_INT64(0); + int64 finish = PG_GETARG_INT64(1); + int64 step = 1; + + /* see if we were given an explicit step size */ + if (PG_NARGS() == 3) + step = PG_GETARG_INT64(2); + if (step == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("step size cannot equal zero"))); + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* allocate memory for user context */ + fctx = (generate_series_fctx *) palloc(sizeof(generate_series_fctx)); + + /* + * Use fctx to keep state from call to call. Seed current with the + * original start value + */ + fctx->current = start; + fctx->finish = finish; + fctx->step = step; + + funcctx->user_fctx = fctx; + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + /* + * get the saved state and use current as the result for this iteration + */ + fctx = funcctx->user_fctx; + result = fctx->current; + + if ((fctx->step > 0 && fctx->current <= fctx->finish) || + (fctx->step < 0 && fctx->current >= fctx->finish)) + { + /* + * Increment current in preparation for next iteration. If next-value + * computation overflows, this is the final result. + */ + if (pg_add_s64_overflow(fctx->current, fctx->step, &fctx->current)) + fctx->step = 0; + + /* do when there is more left to send */ + SRF_RETURN_NEXT(funcctx, Int64GetDatum(result)); + } + else + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); +} + +/* + * Planner support function for generate_series(int8, int8 [, int8]) + */ +Datum +generate_series_int8_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt64(((Const *) arg1)->constvalue); + finish = DatumGetInt64(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/json.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/json.c new file mode 100644 index 00000000000..7205f4adca8 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/json.c @@ -0,0 +1,1825 @@ +/*------------------------------------------------------------------------- + * + * json.c + * JSON data type support. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/json.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "parser/parse_coerce.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/json.h" +#include "utils/jsonfuncs.h" +#include "utils/lsyscache.h" +#include "utils/typcache.h" + +typedef enum /* type categories for datum_to_json */ +{ + JSONTYPE_NULL, /* null, so we didn't bother to identify */ + JSONTYPE_BOOL, /* boolean (built-in types only) */ + JSONTYPE_NUMERIC, /* numeric (ditto) */ + JSONTYPE_DATE, /* we use special formatting for datetimes */ + JSONTYPE_TIMESTAMP, + JSONTYPE_TIMESTAMPTZ, + JSONTYPE_JSON, /* JSON itself (and JSONB) */ + JSONTYPE_ARRAY, /* array */ + JSONTYPE_COMPOSITE, /* composite */ + JSONTYPE_CAST, /* something with an explicit cast to JSON */ + JSONTYPE_OTHER /* all else */ +} JsonTypeCategory; + + +/* + * Support for fast key uniqueness checking. + * + * We maintain a hash table of used keys in JSON objects for fast detection + * of duplicates. + */ +/* Common context for key uniqueness check */ +typedef struct HTAB *JsonUniqueCheckState; /* hash table for key names */ + +/* Hash entry for JsonUniqueCheckState */ +typedef struct JsonUniqueHashEntry +{ + const char *key; + int key_len; + int object_id; +} JsonUniqueHashEntry; + +/* Stack element for key uniqueness check during JSON parsing */ +typedef struct JsonUniqueStackEntry +{ + struct JsonUniqueStackEntry *parent; + int object_id; +} JsonUniqueStackEntry; + +/* Context struct for key uniqueness check during JSON parsing */ +typedef struct JsonUniqueParsingState +{ + JsonLexContext *lex; + JsonUniqueCheckState check; + JsonUniqueStackEntry *stack; + int id_counter; + bool unique; +} JsonUniqueParsingState; + +/* Context struct for key uniqueness check during JSON building */ +typedef struct JsonUniqueBuilderState +{ + JsonUniqueCheckState check; /* unique check */ + StringInfoData skipped_keys; /* skipped keys with NULL values */ + MemoryContext mcxt; /* context for saving skipped keys */ +} JsonUniqueBuilderState; + + +/* State struct for JSON aggregation */ +typedef struct JsonAggState +{ + StringInfo str; + JsonTypeCategory key_category; + Oid key_output_func; + JsonTypeCategory val_category; + Oid val_output_func; + JsonUniqueBuilderState unique_check; +} JsonAggState; + +static void composite_to_json(Datum composite, StringInfo result, + bool use_line_feeds); +static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, + Datum *vals, bool *nulls, int *valcount, + JsonTypeCategory tcategory, Oid outfuncoid, + bool use_line_feeds); +static void array_to_json_internal(Datum array, StringInfo result, + bool use_line_feeds); +static void json_categorize_type(Oid typoid, + JsonTypeCategory *tcategory, + Oid *outfuncoid); +static void datum_to_json(Datum val, bool is_null, StringInfo result, + JsonTypeCategory tcategory, Oid outfuncoid, + bool key_scalar); +static void add_json(Datum val, bool is_null, StringInfo result, + Oid val_type, bool key_scalar); +static text *catenate_stringinfo_string(StringInfo buffer, const char *addon); + +/* + * Input. + */ +Datum +json_in(PG_FUNCTION_ARGS) +{ + char *json = PG_GETARG_CSTRING(0); + text *result = cstring_to_text(json); + JsonLexContext *lex; + + /* validate it */ + lex = makeJsonLexContext(result, false); + if (!pg_parse_json_or_errsave(lex, &nullSemAction, fcinfo->context)) + PG_RETURN_NULL(); + + /* Internal representation is the same as text */ + PG_RETURN_TEXT_P(result); +} + +/* + * Output. + */ +Datum +json_out(PG_FUNCTION_ARGS) +{ + /* we needn't detoast because text_to_cstring will handle that */ + Datum txt = PG_GETARG_DATUM(0); + + PG_RETURN_CSTRING(TextDatumGetCString(txt)); +} + +/* + * Binary send. + */ +Datum +json_send(PG_FUNCTION_ARGS) +{ + text *t = PG_GETARG_TEXT_PP(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Binary receive. + */ +Datum +json_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + char *str; + int nbytes; + JsonLexContext *lex; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + + /* Validate it. */ + lex = makeJsonLexContextCstringLen(str, nbytes, GetDatabaseEncoding(), false); + pg_parse_json_or_ereport(lex, &nullSemAction); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes)); +} + +/* + * Determine how we want to print values of a given type in datum_to_json. + * + * Given the datatype OID, return its JsonTypeCategory, as well as the type's + * output function OID. If the returned category is JSONTYPE_CAST, we + * return the OID of the type->JSON cast function instead. + */ +static void +json_categorize_type(Oid typoid, + JsonTypeCategory *tcategory, + Oid *outfuncoid) +{ + bool typisvarlena; + + /* Look through any domain */ + typoid = getBaseType(typoid); + + *outfuncoid = InvalidOid; + + /* + * We need to get the output function for everything except date and + * timestamp types, array and composite types, booleans, and non-builtin + * types where there's a cast to json. + */ + + switch (typoid) + { + case BOOLOID: + *tcategory = JSONTYPE_BOOL; + break; + + case INT2OID: + case INT4OID: + case INT8OID: + case FLOAT4OID: + case FLOAT8OID: + case NUMERICOID: + getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); + *tcategory = JSONTYPE_NUMERIC; + break; + + case DATEOID: + *tcategory = JSONTYPE_DATE; + break; + + case TIMESTAMPOID: + *tcategory = JSONTYPE_TIMESTAMP; + break; + + case TIMESTAMPTZOID: + *tcategory = JSONTYPE_TIMESTAMPTZ; + break; + + case JSONOID: + case JSONBOID: + getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); + *tcategory = JSONTYPE_JSON; + break; + + default: + /* Check for arrays and composites */ + if (OidIsValid(get_element_type(typoid)) || typoid == ANYARRAYOID + || typoid == ANYCOMPATIBLEARRAYOID || typoid == RECORDARRAYOID) + *tcategory = JSONTYPE_ARRAY; + else if (type_is_rowtype(typoid)) /* includes RECORDOID */ + *tcategory = JSONTYPE_COMPOSITE; + else + { + /* It's probably the general case ... */ + *tcategory = JSONTYPE_OTHER; + /* but let's look for a cast to json, if it's not built-in */ + if (typoid >= FirstNormalObjectId) + { + Oid castfunc; + CoercionPathType ctype; + + ctype = find_coercion_pathway(JSONOID, typoid, + COERCION_EXPLICIT, + &castfunc); + if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc)) + { + *tcategory = JSONTYPE_CAST; + *outfuncoid = castfunc; + } + else + { + /* non builtin type with no cast */ + getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); + } + } + else + { + /* any other builtin type */ + getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); + } + } + break; + } +} + +/* + * Turn a Datum into JSON text, appending the string to "result". + * + * tcategory and outfuncoid are from a previous call to json_categorize_type, + * except that if is_null is true then they can be invalid. + * + * If key_scalar is true, the value is being printed as a key, so insist + * it's of an acceptable type, and force it to be quoted. + */ +static void +datum_to_json(Datum val, bool is_null, StringInfo result, + JsonTypeCategory tcategory, Oid outfuncoid, + bool key_scalar) +{ + char *outputstr; + text *jsontext; + + check_stack_depth(); + + /* callers are expected to ensure that null keys are not passed in */ + Assert(!(key_scalar && is_null)); + + if (is_null) + { + appendStringInfoString(result, "null"); + return; + } + + if (key_scalar && + (tcategory == JSONTYPE_ARRAY || + tcategory == JSONTYPE_COMPOSITE || + tcategory == JSONTYPE_JSON || + tcategory == JSONTYPE_CAST)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("key value must be scalar, not array, composite, or json"))); + + switch (tcategory) + { + case JSONTYPE_ARRAY: + array_to_json_internal(val, result, false); + break; + case JSONTYPE_COMPOSITE: + composite_to_json(val, result, false); + break; + case JSONTYPE_BOOL: + outputstr = DatumGetBool(val) ? "true" : "false"; + if (key_scalar) + escape_json(result, outputstr); + else + appendStringInfoString(result, outputstr); + break; + case JSONTYPE_NUMERIC: + outputstr = OidOutputFunctionCall(outfuncoid, val); + + /* + * Don't call escape_json for a non-key if it's a valid JSON + * number. + */ + if (!key_scalar && IsValidJsonNumber(outputstr, strlen(outputstr))) + appendStringInfoString(result, outputstr); + else + escape_json(result, outputstr); + pfree(outputstr); + break; + case JSONTYPE_DATE: + { + char buf[MAXDATELEN + 1]; + + JsonEncodeDateTime(buf, val, DATEOID, NULL); + appendStringInfo(result, "\"%s\"", buf); + } + break; + case JSONTYPE_TIMESTAMP: + { + char buf[MAXDATELEN + 1]; + + JsonEncodeDateTime(buf, val, TIMESTAMPOID, NULL); + appendStringInfo(result, "\"%s\"", buf); + } + break; + case JSONTYPE_TIMESTAMPTZ: + { + char buf[MAXDATELEN + 1]; + + JsonEncodeDateTime(buf, val, TIMESTAMPTZOID, NULL); + appendStringInfo(result, "\"%s\"", buf); + } + break; + case JSONTYPE_JSON: + /* JSON and JSONB output will already be escaped */ + outputstr = OidOutputFunctionCall(outfuncoid, val); + appendStringInfoString(result, outputstr); + pfree(outputstr); + break; + case JSONTYPE_CAST: + /* outfuncoid refers to a cast function, not an output function */ + jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val)); + outputstr = text_to_cstring(jsontext); + appendStringInfoString(result, outputstr); + pfree(outputstr); + pfree(jsontext); + break; + default: + outputstr = OidOutputFunctionCall(outfuncoid, val); + escape_json(result, outputstr); + pfree(outputstr); + break; + } +} + +/* + * Encode 'value' of datetime type 'typid' into JSON string in ISO format using + * optionally preallocated buffer 'buf'. Optional 'tzp' determines time-zone + * offset (in seconds) in which we want to show timestamptz. + */ +char * +JsonEncodeDateTime(char *buf, Datum value, Oid typid, const int *tzp) +{ + if (!buf) + buf = palloc(MAXDATELEN + 1); + + switch (typid) + { + case DATEOID: + { + DateADT date; + struct pg_tm tm; + + date = DatumGetDateADT(value); + + /* Same as date_out(), but forcing DateStyle */ + if (DATE_NOT_FINITE(date)) + EncodeSpecialDate(date, buf); + else + { + j2date(date + POSTGRES_EPOCH_JDATE, + &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday)); + EncodeDateOnly(&tm, USE_XSD_DATES, buf); + } + } + break; + case TIMEOID: + { + TimeADT time = DatumGetTimeADT(value); + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + + /* Same as time_out(), but forcing DateStyle */ + time2tm(time, tm, &fsec); + EncodeTimeOnly(tm, fsec, false, 0, USE_XSD_DATES, buf); + } + break; + case TIMETZOID: + { + TimeTzADT *time = DatumGetTimeTzADTP(value); + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + + /* Same as timetz_out(), but forcing DateStyle */ + timetz2tm(time, tm, &fsec, &tz); + EncodeTimeOnly(tm, fsec, true, tz, USE_XSD_DATES, buf); + } + break; + case TIMESTAMPOID: + { + Timestamp timestamp; + struct pg_tm tm; + fsec_t fsec; + + timestamp = DatumGetTimestamp(value); + /* Same as timestamp_out(), but forcing DateStyle */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + EncodeSpecialTimestamp(timestamp, buf); + else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0) + EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + break; + case TIMESTAMPTZOID: + { + TimestampTz timestamp; + struct pg_tm tm; + int tz; + fsec_t fsec; + const char *tzn = NULL; + + timestamp = DatumGetTimestampTz(value); + + /* + * If a time zone is specified, we apply the time-zone shift, + * convert timestamptz to pg_tm as if it were without a time + * zone, and then use the specified time zone for converting + * the timestamp into a string. + */ + if (tzp) + { + tz = *tzp; + timestamp -= (TimestampTz) tz * USECS_PER_SEC; + } + + /* Same as timestamptz_out(), but forcing DateStyle */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + EncodeSpecialTimestamp(timestamp, buf); + else if (timestamp2tm(timestamp, tzp ? NULL : &tz, &tm, &fsec, + tzp ? NULL : &tzn, NULL) == 0) + { + if (tzp) + tm.tm_isdst = 1; /* set time-zone presence flag */ + + EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf); + } + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + break; + default: + elog(ERROR, "unknown jsonb value datetime type oid %u", typid); + return NULL; + } + + return buf; +} + +/* + * Process a single dimension of an array. + * If it's the innermost dimension, output the values, otherwise call + * ourselves recursively to process the next dimension. + */ +static void +array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, + bool *nulls, int *valcount, JsonTypeCategory tcategory, + Oid outfuncoid, bool use_line_feeds) +{ + int i; + const char *sep; + + Assert(dim < ndims); + + sep = use_line_feeds ? ",\n " : ","; + + appendStringInfoChar(result, '['); + + for (i = 1; i <= dims[dim]; i++) + { + if (i > 1) + appendStringInfoString(result, sep); + + if (dim + 1 == ndims) + { + datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory, + outfuncoid, false); + (*valcount)++; + } + else + { + /* + * Do we want line feeds on inner dimensions of arrays? For now + * we'll say no. + */ + array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls, + valcount, tcategory, outfuncoid, false); + } + } + + appendStringInfoChar(result, ']'); +} + +/* + * Turn an array into JSON. + */ +static void +array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) +{ + ArrayType *v = DatumGetArrayTypeP(array); + Oid element_type = ARR_ELEMTYPE(v); + int *dim; + int ndim; + int nitems; + int count = 0; + Datum *elements; + bool *nulls; + int16 typlen; + bool typbyval; + char typalign; + JsonTypeCategory tcategory; + Oid outfuncoid; + + ndim = ARR_NDIM(v); + dim = ARR_DIMS(v); + nitems = ArrayGetNItems(ndim, dim); + + if (nitems <= 0) + { + appendStringInfoString(result, "[]"); + return; + } + + get_typlenbyvalalign(element_type, + &typlen, &typbyval, &typalign); + + json_categorize_type(element_type, + &tcategory, &outfuncoid); + + deconstruct_array(v, element_type, typlen, typbyval, + typalign, &elements, &nulls, + &nitems); + + array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory, + outfuncoid, use_line_feeds); + + pfree(elements); + pfree(nulls); +} + +/* + * Turn a composite / record into JSON. + */ +static void +composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) +{ + HeapTupleHeader td; + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tmptup, + *tuple; + int i; + bool needsep = false; + const char *sep; + + sep = use_line_feeds ? ",\n " : ","; + + td = DatumGetHeapTupleHeader(composite); + + /* Extract rowtype info and find a tupdesc */ + tupType = HeapTupleHeaderGetTypeId(td); + tupTypmod = HeapTupleHeaderGetTypMod(td); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + + /* Build a temporary HeapTuple control structure */ + tmptup.t_len = HeapTupleHeaderGetDatumLength(td); + tmptup.t_data = td; + tuple = &tmptup; + + appendStringInfoChar(result, '{'); + + for (i = 0; i < tupdesc->natts; i++) + { + Datum val; + bool isnull; + char *attname; + JsonTypeCategory tcategory; + Oid outfuncoid; + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + + if (needsep) + appendStringInfoString(result, sep); + needsep = true; + + attname = NameStr(att->attname); + escape_json(result, attname); + appendStringInfoChar(result, ':'); + + val = heap_getattr(tuple, i + 1, tupdesc, &isnull); + + if (isnull) + { + tcategory = JSONTYPE_NULL; + outfuncoid = InvalidOid; + } + else + json_categorize_type(att->atttypid, &tcategory, &outfuncoid); + + datum_to_json(val, isnull, result, tcategory, outfuncoid, false); + } + + appendStringInfoChar(result, '}'); + ReleaseTupleDesc(tupdesc); +} + +/* + * Append JSON text for "val" to "result". + * + * This is just a thin wrapper around datum_to_json. If the same type will be + * printed many times, avoid using this; better to do the json_categorize_type + * lookups only once. + */ +static void +add_json(Datum val, bool is_null, StringInfo result, + Oid val_type, bool key_scalar) +{ + JsonTypeCategory tcategory; + Oid outfuncoid; + + if (val_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + if (is_null) + { + tcategory = JSONTYPE_NULL; + outfuncoid = InvalidOid; + } + else + json_categorize_type(val_type, + &tcategory, &outfuncoid); + + datum_to_json(val, is_null, result, tcategory, outfuncoid, key_scalar); +} + +/* + * SQL function array_to_json(row) + */ +Datum +array_to_json(PG_FUNCTION_ARGS) +{ + Datum array = PG_GETARG_DATUM(0); + StringInfo result; + + result = makeStringInfo(); + + array_to_json_internal(array, result, false); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); +} + +/* + * SQL function array_to_json(row, prettybool) + */ +Datum +array_to_json_pretty(PG_FUNCTION_ARGS) +{ + Datum array = PG_GETARG_DATUM(0); + bool use_line_feeds = PG_GETARG_BOOL(1); + StringInfo result; + + result = makeStringInfo(); + + array_to_json_internal(array, result, use_line_feeds); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); +} + +/* + * SQL function row_to_json(row) + */ +Datum +row_to_json(PG_FUNCTION_ARGS) +{ + Datum array = PG_GETARG_DATUM(0); + StringInfo result; + + result = makeStringInfo(); + + composite_to_json(array, result, false); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); +} + +/* + * SQL function row_to_json(row, prettybool) + */ +Datum +row_to_json_pretty(PG_FUNCTION_ARGS) +{ + Datum array = PG_GETARG_DATUM(0); + bool use_line_feeds = PG_GETARG_BOOL(1); + StringInfo result; + + result = makeStringInfo(); + + composite_to_json(array, result, use_line_feeds); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); +} + +/* + * Is the given type immutable when coming out of a JSON context? + * + * At present, datetimes are all considered mutable, because they + * depend on timezone. XXX we should also drill down into objects + * and arrays, but do not. + */ +bool +to_json_is_immutable(Oid typoid) +{ + JsonTypeCategory tcategory; + Oid outfuncoid; + + json_categorize_type(typoid, &tcategory, &outfuncoid); + + switch (tcategory) + { + case JSONTYPE_BOOL: + case JSONTYPE_JSON: + case JSONTYPE_NULL: + return true; + + case JSONTYPE_DATE: + case JSONTYPE_TIMESTAMP: + case JSONTYPE_TIMESTAMPTZ: + return false; + + case JSONTYPE_ARRAY: + return false; /* TODO recurse into elements */ + + case JSONTYPE_COMPOSITE: + return false; /* TODO recurse into fields */ + + case JSONTYPE_NUMERIC: + case JSONTYPE_CAST: + case JSONTYPE_OTHER: + return func_volatile(outfuncoid) == PROVOLATILE_IMMUTABLE; + } + + return false; /* not reached */ +} + +/* + * SQL function to_json(anyvalue) + */ +Datum +to_json(PG_FUNCTION_ARGS) +{ + Datum val = PG_GETARG_DATUM(0); + Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0); + StringInfo result; + JsonTypeCategory tcategory; + Oid outfuncoid; + + if (val_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + json_categorize_type(val_type, + &tcategory, &outfuncoid); + + result = makeStringInfo(); + + datum_to_json(val, false, result, tcategory, outfuncoid, false); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); +} + +/* + * json_agg transition function + * + * aggregate input column as a json array value. + */ +static Datum +json_agg_transfn_worker(FunctionCallInfo fcinfo, bool absent_on_null) +{ + MemoryContext aggcontext, + oldcontext; + JsonAggState *state; + Datum val; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "json_agg_transfn called in non-aggregate context"); + } + + if (PG_ARGISNULL(0)) + { + Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + + if (arg_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + /* + * Make this state object in a context where it will persist for the + * duration of the aggregate call. MemoryContextSwitchTo is only + * needed the first time, as the StringInfo routines make sure they + * use the right context to enlarge the object if necessary. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = (JsonAggState *) palloc(sizeof(JsonAggState)); + state->str = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + appendStringInfoChar(state->str, '['); + json_categorize_type(arg_type, &state->val_category, + &state->val_output_func); + } + else + { + state = (JsonAggState *) PG_GETARG_POINTER(0); + } + + if (absent_on_null && PG_ARGISNULL(1)) + PG_RETURN_POINTER(state); + + if (state->str->len > 1) + appendStringInfoString(state->str, ", "); + + /* fast path for NULLs */ + if (PG_ARGISNULL(1)) + { + datum_to_json((Datum) 0, true, state->str, JSONTYPE_NULL, + InvalidOid, false); + PG_RETURN_POINTER(state); + } + + val = PG_GETARG_DATUM(1); + + /* add some whitespace if structured type and not first item */ + if (!PG_ARGISNULL(0) && state->str->len > 1 && + (state->val_category == JSONTYPE_ARRAY || + state->val_category == JSONTYPE_COMPOSITE)) + { + appendStringInfoString(state->str, "\n "); + } + + datum_to_json(val, false, state->str, state->val_category, + state->val_output_func, false); + + /* + * The transition type for json_agg() is declared to be "internal", which + * is a pass-by-value type the same size as a pointer. So we can safely + * pass the JsonAggState pointer through nodeAgg.c's machinations. + */ + PG_RETURN_POINTER(state); +} + + +/* + * json_agg aggregate function + */ +Datum +json_agg_transfn(PG_FUNCTION_ARGS) +{ + return json_agg_transfn_worker(fcinfo, false); +} + +/* + * json_agg_strict aggregate function + */ +Datum +json_agg_strict_transfn(PG_FUNCTION_ARGS) +{ + return json_agg_transfn_worker(fcinfo, true); +} + +/* + * json_agg final function + */ +Datum +json_agg_finalfn(PG_FUNCTION_ARGS) +{ + JsonAggState *state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? + NULL : + (JsonAggState *) PG_GETARG_POINTER(0); + + /* NULL result for no rows in, as is standard with aggregates */ + if (state == NULL) + PG_RETURN_NULL(); + + /* Else return state with appropriate array terminator added */ + PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]")); +} + +/* Functions implementing hash table for key uniqueness check */ +static uint32 +json_unique_hash(const void *key, Size keysize) +{ + const JsonUniqueHashEntry *entry = (JsonUniqueHashEntry *) key; + uint32 hash = hash_bytes_uint32(entry->object_id); + + hash ^= hash_bytes((const unsigned char *) entry->key, entry->key_len); + + return DatumGetUInt32(hash); +} + +static int +json_unique_hash_match(const void *key1, const void *key2, Size keysize) +{ + const JsonUniqueHashEntry *entry1 = (const JsonUniqueHashEntry *) key1; + const JsonUniqueHashEntry *entry2 = (const JsonUniqueHashEntry *) key2; + + if (entry1->object_id != entry2->object_id) + return entry1->object_id > entry2->object_id ? 1 : -1; + + if (entry1->key_len != entry2->key_len) + return entry1->key_len > entry2->key_len ? 1 : -1; + + return strncmp(entry1->key, entry2->key, entry1->key_len); +} + +/* + * Uniqueness detection support. + * + * In order to detect uniqueness during building or parsing of a JSON + * object, we maintain a hash table of key names already seen. + */ +static void +json_unique_check_init(JsonUniqueCheckState *cxt) +{ + HASHCTL ctl; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(JsonUniqueHashEntry); + ctl.entrysize = sizeof(JsonUniqueHashEntry); + ctl.hcxt = CurrentMemoryContext; + ctl.hash = json_unique_hash; + ctl.match = json_unique_hash_match; + + *cxt = hash_create("json object hashtable", + 32, + &ctl, + HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE); +} + +static void +json_unique_builder_init(JsonUniqueBuilderState *cxt) +{ + json_unique_check_init(&cxt->check); + cxt->mcxt = CurrentMemoryContext; + cxt->skipped_keys.data = NULL; +} + +static bool +json_unique_check_key(JsonUniqueCheckState *cxt, const char *key, int object_id) +{ + JsonUniqueHashEntry entry; + bool found; + + entry.key = key; + entry.key_len = strlen(key); + entry.object_id = object_id; + + (void) hash_search(*cxt, &entry, HASH_ENTER, &found); + + return !found; +} + +/* + * On-demand initialization of a throwaway StringInfo. This is used to + * read a key name that we don't need to store in the output object, for + * duplicate key detection when the value is NULL. + */ +static StringInfo +json_unique_builder_get_throwawaybuf(JsonUniqueBuilderState *cxt) +{ + StringInfo out = &cxt->skipped_keys; + + if (!out->data) + { + MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt); + + initStringInfo(out); + MemoryContextSwitchTo(oldcxt); + } + else + /* Just reset the string to empty */ + out->len = 0; + + return out; +} + +/* + * json_object_agg transition function. + * + * aggregate two input columns as a single json object value. + */ +static Datum +json_object_agg_transfn_worker(FunctionCallInfo fcinfo, + bool absent_on_null, bool unique_keys) +{ + MemoryContext aggcontext, + oldcontext; + JsonAggState *state; + StringInfo out; + Datum arg; + bool skip; + int key_offset; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "json_object_agg_transfn called in non-aggregate context"); + } + + if (PG_ARGISNULL(0)) + { + Oid arg_type; + + /* + * Make the StringInfo in a context where it will persist for the + * duration of the aggregate call. Switching context is only needed + * for this initial step, as the StringInfo and dynahash routines make + * sure they use the right context to enlarge the object if necessary. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = (JsonAggState *) palloc(sizeof(JsonAggState)); + state->str = makeStringInfo(); + if (unique_keys) + json_unique_builder_init(&state->unique_check); + else + memset(&state->unique_check, 0, sizeof(state->unique_check)); + MemoryContextSwitchTo(oldcontext); + + arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + + if (arg_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine data type for argument %d", 1))); + + json_categorize_type(arg_type, &state->key_category, + &state->key_output_func); + + arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + + if (arg_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine data type for argument %d", 2))); + + json_categorize_type(arg_type, &state->val_category, + &state->val_output_func); + + appendStringInfoString(state->str, "{ "); + } + else + { + state = (JsonAggState *) PG_GETARG_POINTER(0); + } + + /* + * Note: since json_object_agg() is declared as taking type "any", the + * parser will not do any type conversion on unknown-type literals (that + * is, undecorated strings or NULLs). Such values will arrive here as + * type UNKNOWN, which fortunately does not matter to us, since + * unknownout() works fine. + */ + + if (PG_ARGISNULL(1)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null value not allowed for object key"))); + + /* Skip null values if absent_on_null */ + skip = absent_on_null && PG_ARGISNULL(2); + + if (skip) + { + /* + * We got a NULL value and we're not storing those; if we're not + * testing key uniqueness, we're done. If we are, use the throwaway + * buffer to store the key name so that we can check it. + */ + if (!unique_keys) + PG_RETURN_POINTER(state); + + out = json_unique_builder_get_throwawaybuf(&state->unique_check); + } + else + { + out = state->str; + + /* + * Append comma delimiter only if we have already output some fields + * after the initial string "{ ". + */ + if (out->len > 2) + appendStringInfoString(out, ", "); + } + + arg = PG_GETARG_DATUM(1); + + key_offset = out->len; + + datum_to_json(arg, false, out, state->key_category, + state->key_output_func, true); + + if (unique_keys) + { + const char *key = &out->data[key_offset]; + + if (!json_unique_check_key(&state->unique_check.check, key, 0)) + ereport(ERROR, + errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE), + errmsg("duplicate JSON object key value: %s", key)); + + if (skip) + PG_RETURN_POINTER(state); + } + + appendStringInfoString(state->str, " : "); + + if (PG_ARGISNULL(2)) + arg = (Datum) 0; + else + arg = PG_GETARG_DATUM(2); + + datum_to_json(arg, PG_ARGISNULL(2), state->str, state->val_category, + state->val_output_func, false); + + PG_RETURN_POINTER(state); +} + +/* + * json_object_agg aggregate function + */ +Datum +json_object_agg_transfn(PG_FUNCTION_ARGS) +{ + return json_object_agg_transfn_worker(fcinfo, false, false); +} + +/* + * json_object_agg_strict aggregate function + */ +Datum +json_object_agg_strict_transfn(PG_FUNCTION_ARGS) +{ + return json_object_agg_transfn_worker(fcinfo, true, false); +} + +/* + * json_object_agg_unique aggregate function + */ +Datum +json_object_agg_unique_transfn(PG_FUNCTION_ARGS) +{ + return json_object_agg_transfn_worker(fcinfo, false, true); +} + +/* + * json_object_agg_unique_strict aggregate function + */ +Datum +json_object_agg_unique_strict_transfn(PG_FUNCTION_ARGS) +{ + return json_object_agg_transfn_worker(fcinfo, true, true); +} + +/* + * json_object_agg final function. + */ +Datum +json_object_agg_finalfn(PG_FUNCTION_ARGS) +{ + JsonAggState *state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (JsonAggState *) PG_GETARG_POINTER(0); + + /* NULL result for no rows in, as is standard with aggregates */ + if (state == NULL) + PG_RETURN_NULL(); + + /* Else return state with appropriate object terminator added */ + PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }")); +} + +/* + * Helper function for aggregates: return given StringInfo's contents plus + * specified trailing string, as a text datum. We need this because aggregate + * final functions are not allowed to modify the aggregate state. + */ +static text * +catenate_stringinfo_string(StringInfo buffer, const char *addon) +{ + /* custom version of cstring_to_text_with_len */ + int buflen = buffer->len; + int addlen = strlen(addon); + text *result = (text *) palloc(buflen + addlen + VARHDRSZ); + + SET_VARSIZE(result, buflen + addlen + VARHDRSZ); + memcpy(VARDATA(result), buffer->data, buflen); + memcpy(VARDATA(result) + buflen, addon, addlen); + + return result; +} + +Datum +json_build_object_worker(int nargs, Datum *args, bool *nulls, Oid *types, + bool absent_on_null, bool unique_keys) +{ + int i; + const char *sep = ""; + StringInfo result; + JsonUniqueBuilderState unique_check; + + if (nargs % 2 != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument list must have even number of elements"), + /* translator: %s is a SQL function name */ + errhint("The arguments of %s must consist of alternating keys and values.", + "json_build_object()"))); + + result = makeStringInfo(); + + appendStringInfoChar(result, '{'); + + if (unique_keys) + json_unique_builder_init(&unique_check); + + for (i = 0; i < nargs; i += 2) + { + StringInfo out; + bool skip; + int key_offset; + + /* Skip null values if absent_on_null */ + skip = absent_on_null && nulls[i + 1]; + + if (skip) + { + /* If key uniqueness check is needed we must save skipped keys */ + if (!unique_keys) + continue; + + out = json_unique_builder_get_throwawaybuf(&unique_check); + } + else + { + appendStringInfoString(result, sep); + sep = ", "; + out = result; + } + + /* process key */ + if (nulls[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null value not allowed for object key"))); + + /* save key offset before appending it */ + key_offset = out->len; + + add_json(args[i], false, out, types[i], true); + + if (unique_keys) + { + /* check key uniqueness after key appending */ + const char *key = &out->data[key_offset]; + + if (!json_unique_check_key(&unique_check.check, key, 0)) + ereport(ERROR, + errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE), + errmsg("duplicate JSON object key value: %s", key)); + + if (skip) + continue; + } + + appendStringInfoString(result, " : "); + + /* process value */ + add_json(args[i + 1], nulls[i + 1], result, types[i + 1], false); + } + + appendStringInfoChar(result, '}'); + + return PointerGetDatum(cstring_to_text_with_len(result->data, result->len)); +} + +/* + * SQL function json_build_object(variadic "any") + */ +Datum +json_build_object(PG_FUNCTION_ARGS) +{ + Datum *args; + bool *nulls; + Oid *types; + + /* build argument values to build the object */ + int nargs = extract_variadic_args(fcinfo, 0, true, + &args, &types, &nulls); + + if (nargs < 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(json_build_object_worker(nargs, args, nulls, types, false, false)); +} + +/* + * degenerate case of json_build_object where it gets 0 arguments. + */ +Datum +json_build_object_noargs(PG_FUNCTION_ARGS) +{ + PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2)); +} + +Datum +json_build_array_worker(int nargs, Datum *args, bool *nulls, Oid *types, + bool absent_on_null) +{ + int i; + const char *sep = ""; + StringInfo result; + + result = makeStringInfo(); + + appendStringInfoChar(result, '['); + + for (i = 0; i < nargs; i++) + { + if (absent_on_null && nulls[i]) + continue; + + appendStringInfoString(result, sep); + sep = ", "; + add_json(args[i], nulls[i], result, types[i], false); + } + + appendStringInfoChar(result, ']'); + + return PointerGetDatum(cstring_to_text_with_len(result->data, result->len)); +} + +/* + * SQL function json_build_array(variadic "any") + */ +Datum +json_build_array(PG_FUNCTION_ARGS) +{ + Datum *args; + bool *nulls; + Oid *types; + + /* build argument values to build the object */ + int nargs = extract_variadic_args(fcinfo, 0, true, + &args, &types, &nulls); + + if (nargs < 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(json_build_array_worker(nargs, args, nulls, types, false)); +} + +/* + * degenerate case of json_build_array where it gets 0 arguments. + */ +Datum +json_build_array_noargs(PG_FUNCTION_ARGS) +{ + PG_RETURN_TEXT_P(cstring_to_text_with_len("[]", 2)); +} + +/* + * SQL function json_object(text[]) + * + * take a one or two dimensional array of text as key/value pairs + * for a json object. + */ +Datum +json_object(PG_FUNCTION_ARGS) +{ + ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0); + int ndims = ARR_NDIM(in_array); + StringInfoData result; + Datum *in_datums; + bool *in_nulls; + int in_count, + count, + i; + text *rval; + char *v; + + switch (ndims) + { + case 0: + PG_RETURN_DATUM(CStringGetTextDatum("{}")); + break; + + case 1: + if ((ARR_DIMS(in_array)[0]) % 2) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array must have even number of elements"))); + break; + + case 2: + if ((ARR_DIMS(in_array)[1]) != 2) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array must have two columns"))); + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + } + + deconstruct_array_builtin(in_array, TEXTOID, &in_datums, &in_nulls, &in_count); + + count = in_count / 2; + + initStringInfo(&result); + + appendStringInfoChar(&result, '{'); + + for (i = 0; i < count; ++i) + { + if (in_nulls[i * 2]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null value not allowed for object key"))); + + v = TextDatumGetCString(in_datums[i * 2]); + if (i > 0) + appendStringInfoString(&result, ", "); + escape_json(&result, v); + appendStringInfoString(&result, " : "); + pfree(v); + if (in_nulls[i * 2 + 1]) + appendStringInfoString(&result, "null"); + else + { + v = TextDatumGetCString(in_datums[i * 2 + 1]); + escape_json(&result, v); + pfree(v); + } + } + + appendStringInfoChar(&result, '}'); + + pfree(in_datums); + pfree(in_nulls); + + rval = cstring_to_text_with_len(result.data, result.len); + pfree(result.data); + + PG_RETURN_TEXT_P(rval); +} + +/* + * SQL function json_object(text[], text[]) + * + * take separate key and value arrays of text to construct a json object + * pairwise. + */ +Datum +json_object_two_arg(PG_FUNCTION_ARGS) +{ + ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1); + int nkdims = ARR_NDIM(key_array); + int nvdims = ARR_NDIM(val_array); + StringInfoData result; + Datum *key_datums, + *val_datums; + bool *key_nulls, + *val_nulls; + int key_count, + val_count, + i; + text *rval; + char *v; + + if (nkdims > 1 || nkdims != nvdims) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + if (nkdims == 0) + PG_RETURN_DATUM(CStringGetTextDatum("{}")); + + deconstruct_array_builtin(key_array, TEXTOID, &key_datums, &key_nulls, &key_count); + deconstruct_array_builtin(val_array, TEXTOID, &val_datums, &val_nulls, &val_count); + + if (key_count != val_count) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("mismatched array dimensions"))); + + initStringInfo(&result); + + appendStringInfoChar(&result, '{'); + + for (i = 0; i < key_count; ++i) + { + if (key_nulls[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null value not allowed for object key"))); + + v = TextDatumGetCString(key_datums[i]); + if (i > 0) + appendStringInfoString(&result, ", "); + escape_json(&result, v); + appendStringInfoString(&result, " : "); + pfree(v); + if (val_nulls[i]) + appendStringInfoString(&result, "null"); + else + { + v = TextDatumGetCString(val_datums[i]); + escape_json(&result, v); + pfree(v); + } + } + + appendStringInfoChar(&result, '}'); + + pfree(key_datums); + pfree(key_nulls); + pfree(val_datums); + pfree(val_nulls); + + rval = cstring_to_text_with_len(result.data, result.len); + pfree(result.data); + + PG_RETURN_TEXT_P(rval); +} + + +/* + * Produce a JSON string literal, properly escaping characters in the text. + */ +void +escape_json(StringInfo buf, const char *str) +{ + const char *p; + + appendStringInfoCharMacro(buf, '"'); + for (p = str; *p; p++) + { + switch (*p) + { + case '\b': + appendStringInfoString(buf, "\\b"); + break; + case '\f': + appendStringInfoString(buf, "\\f"); + break; + case '\n': + appendStringInfoString(buf, "\\n"); + break; + case '\r': + appendStringInfoString(buf, "\\r"); + break; + case '\t': + appendStringInfoString(buf, "\\t"); + break; + case '"': + appendStringInfoString(buf, "\\\""); + break; + case '\\': + appendStringInfoString(buf, "\\\\"); + break; + default: + if ((unsigned char) *p < ' ') + appendStringInfo(buf, "\\u%04x", (int) *p); + else + appendStringInfoCharMacro(buf, *p); + break; + } + } + appendStringInfoCharMacro(buf, '"'); +} + +/* Semantic actions for key uniqueness check */ +static JsonParseErrorType +json_unique_object_start(void *_state) +{ + JsonUniqueParsingState *state = _state; + JsonUniqueStackEntry *entry; + + if (!state->unique) + return JSON_SUCCESS; + + /* push object entry to stack */ + entry = palloc(sizeof(*entry)); + entry->object_id = state->id_counter++; + entry->parent = state->stack; + state->stack = entry; + + return JSON_SUCCESS; +} + +static JsonParseErrorType +json_unique_object_end(void *_state) +{ + JsonUniqueParsingState *state = _state; + JsonUniqueStackEntry *entry; + + if (!state->unique) + return JSON_SUCCESS; + + entry = state->stack; + state->stack = entry->parent; /* pop object from stack */ + pfree(entry); + return JSON_SUCCESS; +} + +static JsonParseErrorType +json_unique_object_field_start(void *_state, char *field, bool isnull) +{ + JsonUniqueParsingState *state = _state; + JsonUniqueStackEntry *entry; + + if (!state->unique) + return JSON_SUCCESS; + + /* find key collision in the current object */ + if (json_unique_check_key(&state->check, field, state->stack->object_id)) + return JSON_SUCCESS; + + state->unique = false; + + /* pop all objects entries */ + while ((entry = state->stack)) + { + state->stack = entry->parent; + pfree(entry); + } + return JSON_SUCCESS; +} + +/* Validate JSON text and additionally check key uniqueness */ +bool +json_validate(text *json, bool check_unique_keys, bool throw_error) +{ + JsonLexContext *lex = makeJsonLexContext(json, check_unique_keys); + JsonSemAction uniqueSemAction = {0}; + JsonUniqueParsingState state; + JsonParseErrorType result; + + if (check_unique_keys) + { + state.lex = lex; + state.stack = NULL; + state.id_counter = 0; + state.unique = true; + json_unique_check_init(&state.check); + + uniqueSemAction.semstate = &state; + uniqueSemAction.object_start = json_unique_object_start; + uniqueSemAction.object_field_start = json_unique_object_field_start; + uniqueSemAction.object_end = json_unique_object_end; + } + + result = pg_parse_json(lex, check_unique_keys ? &uniqueSemAction : &nullSemAction); + + if (result != JSON_SUCCESS) + { + if (throw_error) + json_errsave_error(result, lex, NULL); + + return false; /* invalid json */ + } + + if (check_unique_keys && !state.unique) + { + if (throw_error) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE), + errmsg("duplicate JSON object key value"))); + + return false; /* not unique keys */ + } + + return true; /* ok */ +} + +/* + * SQL function json_typeof(json) -> text + * + * Returns the type of the outermost JSON value as TEXT. Possible types are + * "object", "array", "string", "number", "boolean", and "null". + * + * Performs a single call to json_lex() to get the first token of the supplied + * value. This initial token uniquely determines the value's type. As our + * input must already have been validated by json_in() or json_recv(), the + * initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END, + * JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END. + */ +Datum +json_typeof(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_PP(0); + JsonLexContext *lex = makeJsonLexContext(json, false); + char *type; + JsonTokenType tok; + JsonParseErrorType result; + + /* Lex exactly one token from the input and check its type. */ + result = json_lex(lex); + if (result != JSON_SUCCESS) + json_errsave_error(result, lex, NULL); + tok = lex->token_type; + + switch (tok) + { + case JSON_TOKEN_OBJECT_START: + type = "object"; + break; + case JSON_TOKEN_ARRAY_START: + type = "array"; + break; + case JSON_TOKEN_STRING: + type = "string"; + break; + case JSON_TOKEN_NUMBER: + type = "number"; + break; + case JSON_TOKEN_TRUE: + case JSON_TOKEN_FALSE: + type = "boolean"; + break; + case JSON_TOKEN_NULL: + type = "null"; + break; + default: + elog(ERROR, "unexpected json token: %d", tok); + } + + PG_RETURN_TEXT_P(cstring_to_text(type)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb.c new file mode 100644 index 00000000000..cf43c3f2ded --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb.c @@ -0,0 +1,2259 @@ +/*------------------------------------------------------------------------- + * + * jsonb.c + * I/O routines for jsonb type + * + * Copyright (c) 2014-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/transam.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "parser/parse_coerce.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/json.h" +#include "utils/jsonb.h" +#include "utils/jsonfuncs.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +typedef struct JsonbInState +{ + JsonbParseState *parseState; + JsonbValue *res; + Node *escontext; +} JsonbInState; + +/* unlike with json categories, we need to treat json and jsonb differently */ +typedef enum /* type categories for datum_to_jsonb */ +{ + JSONBTYPE_NULL, /* null, so we didn't bother to identify */ + JSONBTYPE_BOOL, /* boolean (built-in types only) */ + JSONBTYPE_NUMERIC, /* numeric (ditto) */ + JSONBTYPE_DATE, /* we use special formatting for datetimes */ + JSONBTYPE_TIMESTAMP, /* we use special formatting for timestamp */ + JSONBTYPE_TIMESTAMPTZ, /* ... and timestamptz */ + JSONBTYPE_JSON, /* JSON */ + JSONBTYPE_JSONB, /* JSONB */ + JSONBTYPE_ARRAY, /* array */ + JSONBTYPE_COMPOSITE, /* composite */ + JSONBTYPE_JSONCAST, /* something with an explicit cast to JSON */ + JSONBTYPE_OTHER /* all else */ +} JsonbTypeCategory; + +typedef struct JsonbAggState +{ + JsonbInState *res; + JsonbTypeCategory key_category; + Oid key_output_func; + JsonbTypeCategory val_category; + Oid val_output_func; +} JsonbAggState; + +static inline Datum jsonb_from_cstring(char *json, int len, Node *escontext); +static bool checkStringLen(size_t len, Node *escontext); +static JsonParseErrorType jsonb_in_object_start(void *pstate); +static JsonParseErrorType jsonb_in_object_end(void *pstate); +static JsonParseErrorType jsonb_in_array_start(void *pstate); +static JsonParseErrorType jsonb_in_array_end(void *pstate); +static JsonParseErrorType jsonb_in_object_field_start(void *pstate, char *fname, bool isnull); +static void jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal); +static JsonParseErrorType jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype); +static void jsonb_categorize_type(Oid typoid, + JsonbTypeCategory *tcategory, + Oid *outfuncoid); +static void composite_to_jsonb(Datum composite, JsonbInState *result); +static void array_dim_to_jsonb(JsonbInState *result, int dim, int ndims, int *dims, + Datum *vals, bool *nulls, int *valcount, + JsonbTypeCategory tcategory, Oid outfuncoid); +static void array_to_jsonb_internal(Datum array, JsonbInState *result); +static void jsonb_categorize_type(Oid typoid, + JsonbTypeCategory *tcategory, + Oid *outfuncoid); +static void datum_to_jsonb(Datum val, bool is_null, JsonbInState *result, + JsonbTypeCategory tcategory, Oid outfuncoid, + bool key_scalar); +static void add_jsonb(Datum val, bool is_null, JsonbInState *result, + Oid val_type, bool key_scalar); +static JsonbParseState *clone_parse_state(JsonbParseState *state); +static char *JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool indent); +static void add_indent(StringInfo out, bool indent, int level); + +/* + * jsonb type input function + */ +Datum +jsonb_in(PG_FUNCTION_ARGS) +{ + char *json = PG_GETARG_CSTRING(0); + + return jsonb_from_cstring(json, strlen(json), fcinfo->context); +} + +/* + * jsonb type recv function + * + * The type is sent as text in binary mode, so this is almost the same + * as the input function, but it's prefixed with a version number so we + * can change the binary format sent in future if necessary. For now, + * only version 1 is supported. + */ +Datum +jsonb_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int version = pq_getmsgint(buf, 1); + char *str; + int nbytes; + + if (version == 1) + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + else + elog(ERROR, "unsupported jsonb version number %d", version); + + return jsonb_from_cstring(str, nbytes, NULL); +} + +/* + * jsonb type output function + */ +Datum +jsonb_out(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + char *out; + + out = JsonbToCString(NULL, &jb->root, VARSIZE(jb)); + + PG_RETURN_CSTRING(out); +} + +/* + * jsonb type send function + * + * Just send jsonb as a version number, then a string of text + */ +Datum +jsonb_send(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + StringInfoData buf; + StringInfo jtext = makeStringInfo(); + int version = 1; + + (void) JsonbToCString(jtext, &jb->root, VARSIZE(jb)); + + pq_begintypsend(&buf); + pq_sendint8(&buf, version); + pq_sendtext(&buf, jtext->data, jtext->len); + pfree(jtext->data); + pfree(jtext); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Get the type name of a jsonb container. + */ +static const char * +JsonbContainerTypeName(JsonbContainer *jbc) +{ + JsonbValue scalar; + + if (JsonbExtractScalar(jbc, &scalar)) + return JsonbTypeName(&scalar); + else if (JsonContainerIsArray(jbc)) + return "array"; + else if (JsonContainerIsObject(jbc)) + return "object"; + else + { + elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header); + return "unknown"; + } +} + +/* + * Get the type name of a jsonb value. + */ +const char * +JsonbTypeName(JsonbValue *val) +{ + switch (val->type) + { + case jbvBinary: + return JsonbContainerTypeName(val->val.binary.data); + case jbvObject: + return "object"; + case jbvArray: + return "array"; + case jbvNumeric: + return "number"; + case jbvString: + return "string"; + case jbvBool: + return "boolean"; + case jbvNull: + return "null"; + case jbvDatetime: + switch (val->val.datetime.typid) + { + case DATEOID: + return "date"; + case TIMEOID: + return "time without time zone"; + case TIMETZOID: + return "time with time zone"; + case TIMESTAMPOID: + return "timestamp without time zone"; + case TIMESTAMPTZOID: + return "timestamp with time zone"; + default: + elog(ERROR, "unrecognized jsonb value datetime type: %d", + val->val.datetime.typid); + } + return "unknown"; + default: + elog(ERROR, "unrecognized jsonb value type: %d", val->type); + return "unknown"; + } +} + +/* + * SQL function jsonb_typeof(jsonb) -> text + * + * This function is here because the analog json function is in json.c, since + * it uses the json parser internals not exposed elsewhere. + */ +Datum +jsonb_typeof(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + const char *result = JsonbContainerTypeName(&in->root); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +/* + * jsonb_from_cstring + * + * Turns json string into a jsonb Datum. + * + * Uses the json parser (with hooks) to construct a jsonb. + * + * If escontext points to an ErrorSaveContext, errors are reported there + * instead of being thrown. + */ +static inline Datum +jsonb_from_cstring(char *json, int len, Node *escontext) +{ + JsonLexContext *lex; + JsonbInState state; + JsonSemAction sem; + + memset(&state, 0, sizeof(state)); + memset(&sem, 0, sizeof(sem)); + lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true); + + state.escontext = escontext; + sem.semstate = (void *) &state; + + sem.object_start = jsonb_in_object_start; + sem.array_start = jsonb_in_array_start; + sem.object_end = jsonb_in_object_end; + sem.array_end = jsonb_in_array_end; + sem.scalar = jsonb_in_scalar; + sem.object_field_start = jsonb_in_object_field_start; + + if (!pg_parse_json_or_errsave(lex, &sem, escontext)) + return (Datum) 0; + + /* after parsing, the item member has the composed jsonb structure */ + PG_RETURN_POINTER(JsonbValueToJsonb(state.res)); +} + +static bool +checkStringLen(size_t len, Node *escontext) +{ + if (len > JENTRY_OFFLENMASK) + ereturn(escontext, false, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("string too long to represent as jsonb string"), + errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.", + JENTRY_OFFLENMASK))); + + return true; +} + +static JsonParseErrorType +jsonb_in_object_start(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_OBJECT, NULL); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +jsonb_in_object_end(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_END_OBJECT, NULL); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +jsonb_in_array_start(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, NULL); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +jsonb_in_array_end(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +jsonb_in_object_field_start(void *pstate, char *fname, bool isnull) +{ + JsonbInState *_state = (JsonbInState *) pstate; + JsonbValue v; + + Assert(fname != NULL); + v.type = jbvString; + v.val.string.len = strlen(fname); + if (!checkStringLen(v.val.string.len, _state->escontext)) + return JSON_SEM_ACTION_FAILED; + v.val.string.val = fname; + + _state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v); + + return JSON_SUCCESS; +} + +static void +jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal) +{ + switch (scalarVal->type) + { + case jbvNull: + appendBinaryStringInfo(out, "null", 4); + break; + case jbvString: + escape_json(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len)); + break; + case jbvNumeric: + appendStringInfoString(out, + DatumGetCString(DirectFunctionCall1(numeric_out, + PointerGetDatum(scalarVal->val.numeric)))); + break; + case jbvBool: + if (scalarVal->val.boolean) + appendBinaryStringInfo(out, "true", 4); + else + appendBinaryStringInfo(out, "false", 5); + break; + default: + elog(ERROR, "unknown jsonb scalar type"); + } +} + +/* + * For jsonb we always want the de-escaped value - that's what's in token + */ +static JsonParseErrorType +jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype) +{ + JsonbInState *_state = (JsonbInState *) pstate; + JsonbValue v; + Datum numd; + + switch (tokentype) + { + + case JSON_TOKEN_STRING: + Assert(token != NULL); + v.type = jbvString; + v.val.string.len = strlen(token); + if (!checkStringLen(v.val.string.len, _state->escontext)) + return JSON_SEM_ACTION_FAILED; + v.val.string.val = token; + break; + case JSON_TOKEN_NUMBER: + + /* + * No need to check size of numeric values, because maximum + * numeric size is well below the JsonbValue restriction + */ + Assert(token != NULL); + v.type = jbvNumeric; + if (!DirectInputFunctionCallSafe(numeric_in, token, + InvalidOid, -1, + _state->escontext, + &numd)) + return JSON_SEM_ACTION_FAILED; + v.val.numeric = DatumGetNumeric(numd); + break; + case JSON_TOKEN_TRUE: + v.type = jbvBool; + v.val.boolean = true; + break; + case JSON_TOKEN_FALSE: + v.type = jbvBool; + v.val.boolean = false; + break; + case JSON_TOKEN_NULL: + v.type = jbvNull; + break; + default: + /* should not be possible */ + elog(ERROR, "invalid json token type"); + break; + } + + if (_state->parseState == NULL) + { + /* single scalar */ + JsonbValue va; + + va.type = jbvArray; + va.val.array.rawScalar = true; + va.val.array.nElems = 1; + + _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, &va); + _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); + _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL); + } + else + { + JsonbValue *o = &_state->parseState->contVal; + + switch (o->type) + { + case jbvArray: + _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); + break; + case jbvObject: + _state->res = pushJsonbValue(&_state->parseState, WJB_VALUE, &v); + break; + default: + elog(ERROR, "unexpected parent of nested structure"); + } + } + + return JSON_SUCCESS; +} + +/* + * JsonbToCString + * Converts jsonb value to a C-string. + * + * If 'out' argument is non-null, the resulting C-string is stored inside the + * StringBuffer. The resulting string is always returned. + * + * A typical case for passing the StringInfo in rather than NULL is where the + * caller wants access to the len attribute without having to call strlen, e.g. + * if they are converting it to a text* object. + */ +char * +JsonbToCString(StringInfo out, JsonbContainer *in, int estimated_len) +{ + return JsonbToCStringWorker(out, in, estimated_len, false); +} + +/* + * same thing but with indentation turned on + */ +char * +JsonbToCStringIndent(StringInfo out, JsonbContainer *in, int estimated_len) +{ + return JsonbToCStringWorker(out, in, estimated_len, true); +} + +/* + * common worker for above two functions + */ +static char * +JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool indent) +{ + bool first = true; + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken type = WJB_DONE; + int level = 0; + bool redo_switch = false; + + /* If we are indenting, don't add a space after a comma */ + int ispaces = indent ? 1 : 2; + + /* + * Don't indent the very first item. This gets set to the indent flag at + * the bottom of the loop. + */ + bool use_indent = false; + bool raw_scalar = false; + bool last_was_key = false; + + if (out == NULL) + out = makeStringInfo(); + + enlargeStringInfo(out, (estimated_len >= 0) ? estimated_len : 64); + + it = JsonbIteratorInit(in); + + while (redo_switch || + ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)) + { + redo_switch = false; + switch (type) + { + case WJB_BEGIN_ARRAY: + if (!first) + appendBinaryStringInfo(out, ", ", ispaces); + + if (!v.val.array.rawScalar) + { + add_indent(out, use_indent && !last_was_key, level); + appendStringInfoCharMacro(out, '['); + } + else + raw_scalar = true; + + first = true; + level++; + break; + case WJB_BEGIN_OBJECT: + if (!first) + appendBinaryStringInfo(out, ", ", ispaces); + + add_indent(out, use_indent && !last_was_key, level); + appendStringInfoCharMacro(out, '{'); + + first = true; + level++; + break; + case WJB_KEY: + if (!first) + appendBinaryStringInfo(out, ", ", ispaces); + first = true; + + add_indent(out, use_indent, level); + + /* json rules guarantee this is a string */ + jsonb_put_escaped_value(out, &v); + appendBinaryStringInfo(out, ": ", 2); + + type = JsonbIteratorNext(&it, &v, false); + if (type == WJB_VALUE) + { + first = false; + jsonb_put_escaped_value(out, &v); + } + else + { + Assert(type == WJB_BEGIN_OBJECT || type == WJB_BEGIN_ARRAY); + + /* + * We need to rerun the current switch() since we need to + * output the object which we just got from the iterator + * before calling the iterator again. + */ + redo_switch = true; + } + break; + case WJB_ELEM: + if (!first) + appendBinaryStringInfo(out, ", ", ispaces); + first = false; + + if (!raw_scalar) + add_indent(out, use_indent, level); + jsonb_put_escaped_value(out, &v); + break; + case WJB_END_ARRAY: + level--; + if (!raw_scalar) + { + add_indent(out, use_indent, level); + appendStringInfoCharMacro(out, ']'); + } + first = false; + break; + case WJB_END_OBJECT: + level--; + add_indent(out, use_indent, level); + appendStringInfoCharMacro(out, '}'); + first = false; + break; + default: + elog(ERROR, "unknown jsonb iterator token type"); + } + use_indent = indent; + last_was_key = redo_switch; + } + + Assert(level == 0); + + return out->data; +} + +static void +add_indent(StringInfo out, bool indent, int level) +{ + if (indent) + { + appendStringInfoCharMacro(out, '\n'); + appendStringInfoSpaces(out, level * 4); + } +} + + +/* + * Determine how we want to render values of a given type in datum_to_jsonb. + * + * Given the datatype OID, return its JsonbTypeCategory, as well as the type's + * output function OID. If the returned category is JSONBTYPE_JSONCAST, + * we return the OID of the relevant cast function instead. + */ +static void +jsonb_categorize_type(Oid typoid, + JsonbTypeCategory *tcategory, + Oid *outfuncoid) +{ + bool typisvarlena; + + /* Look through any domain */ + typoid = getBaseType(typoid); + + *outfuncoid = InvalidOid; + + /* + * We need to get the output function for everything except date and + * timestamp types, booleans, array and composite types, json and jsonb, + * and non-builtin types where there's a cast to json. In this last case + * we return the oid of the cast function instead. + */ + + switch (typoid) + { + case BOOLOID: + *tcategory = JSONBTYPE_BOOL; + break; + + case INT2OID: + case INT4OID: + case INT8OID: + case FLOAT4OID: + case FLOAT8OID: + case NUMERICOID: + getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); + *tcategory = JSONBTYPE_NUMERIC; + break; + + case DATEOID: + *tcategory = JSONBTYPE_DATE; + break; + + case TIMESTAMPOID: + *tcategory = JSONBTYPE_TIMESTAMP; + break; + + case TIMESTAMPTZOID: + *tcategory = JSONBTYPE_TIMESTAMPTZ; + break; + + case JSONBOID: + *tcategory = JSONBTYPE_JSONB; + break; + + case JSONOID: + *tcategory = JSONBTYPE_JSON; + break; + + default: + /* Check for arrays and composites */ + if (OidIsValid(get_element_type(typoid)) || typoid == ANYARRAYOID + || typoid == ANYCOMPATIBLEARRAYOID || typoid == RECORDARRAYOID) + *tcategory = JSONBTYPE_ARRAY; + else if (type_is_rowtype(typoid)) /* includes RECORDOID */ + *tcategory = JSONBTYPE_COMPOSITE; + else + { + /* It's probably the general case ... */ + *tcategory = JSONBTYPE_OTHER; + + /* + * but first let's look for a cast to json (note: not to + * jsonb) if it's not built-in. + */ + if (typoid >= FirstNormalObjectId) + { + Oid castfunc; + CoercionPathType ctype; + + ctype = find_coercion_pathway(JSONOID, typoid, + COERCION_EXPLICIT, &castfunc); + if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc)) + { + *tcategory = JSONBTYPE_JSONCAST; + *outfuncoid = castfunc; + } + else + { + /* not a cast type, so just get the usual output func */ + getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); + } + } + else + { + /* any other builtin type */ + getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); + } + break; + } + } +} + +/* + * Turn a Datum into jsonb, adding it to the result JsonbInState. + * + * tcategory and outfuncoid are from a previous call to json_categorize_type, + * except that if is_null is true then they can be invalid. + * + * If key_scalar is true, the value is stored as a key, so insist + * it's of an acceptable type, and force it to be a jbvString. + * + * Note: currently, we assume that result->escontext is NULL and errors + * will be thrown. + */ +static void +datum_to_jsonb(Datum val, bool is_null, JsonbInState *result, + JsonbTypeCategory tcategory, Oid outfuncoid, + bool key_scalar) +{ + char *outputstr; + bool numeric_error; + JsonbValue jb; + bool scalar_jsonb = false; + + check_stack_depth(); + + /* Convert val to a JsonbValue in jb (in most cases) */ + if (is_null) + { + Assert(!key_scalar); + jb.type = jbvNull; + } + else if (key_scalar && + (tcategory == JSONBTYPE_ARRAY || + tcategory == JSONBTYPE_COMPOSITE || + tcategory == JSONBTYPE_JSON || + tcategory == JSONBTYPE_JSONB || + tcategory == JSONBTYPE_JSONCAST)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("key value must be scalar, not array, composite, or json"))); + } + else + { + if (tcategory == JSONBTYPE_JSONCAST) + val = OidFunctionCall1(outfuncoid, val); + + switch (tcategory) + { + case JSONBTYPE_ARRAY: + array_to_jsonb_internal(val, result); + break; + case JSONBTYPE_COMPOSITE: + composite_to_jsonb(val, result); + break; + case JSONBTYPE_BOOL: + if (key_scalar) + { + outputstr = DatumGetBool(val) ? "true" : "false"; + jb.type = jbvString; + jb.val.string.len = strlen(outputstr); + jb.val.string.val = outputstr; + } + else + { + jb.type = jbvBool; + jb.val.boolean = DatumGetBool(val); + } + break; + case JSONBTYPE_NUMERIC: + outputstr = OidOutputFunctionCall(outfuncoid, val); + if (key_scalar) + { + /* always quote keys */ + jb.type = jbvString; + jb.val.string.len = strlen(outputstr); + jb.val.string.val = outputstr; + } + else + { + /* + * Make it numeric if it's a valid JSON number, otherwise + * a string. Invalid numeric output will always have an + * 'N' or 'n' in it (I think). + */ + numeric_error = (strchr(outputstr, 'N') != NULL || + strchr(outputstr, 'n') != NULL); + if (!numeric_error) + { + Datum numd; + + jb.type = jbvNumeric; + numd = DirectFunctionCall3(numeric_in, + CStringGetDatum(outputstr), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + jb.val.numeric = DatumGetNumeric(numd); + pfree(outputstr); + } + else + { + jb.type = jbvString; + jb.val.string.len = strlen(outputstr); + jb.val.string.val = outputstr; + } + } + break; + case JSONBTYPE_DATE: + jb.type = jbvString; + jb.val.string.val = JsonEncodeDateTime(NULL, val, + DATEOID, NULL); + jb.val.string.len = strlen(jb.val.string.val); + break; + case JSONBTYPE_TIMESTAMP: + jb.type = jbvString; + jb.val.string.val = JsonEncodeDateTime(NULL, val, + TIMESTAMPOID, NULL); + jb.val.string.len = strlen(jb.val.string.val); + break; + case JSONBTYPE_TIMESTAMPTZ: + jb.type = jbvString; + jb.val.string.val = JsonEncodeDateTime(NULL, val, + TIMESTAMPTZOID, NULL); + jb.val.string.len = strlen(jb.val.string.val); + break; + case JSONBTYPE_JSONCAST: + case JSONBTYPE_JSON: + { + /* parse the json right into the existing result object */ + JsonLexContext *lex; + JsonSemAction sem; + text *json = DatumGetTextPP(val); + + lex = makeJsonLexContext(json, true); + + memset(&sem, 0, sizeof(sem)); + + sem.semstate = (void *) result; + + sem.object_start = jsonb_in_object_start; + sem.array_start = jsonb_in_array_start; + sem.object_end = jsonb_in_object_end; + sem.array_end = jsonb_in_array_end; + sem.scalar = jsonb_in_scalar; + sem.object_field_start = jsonb_in_object_field_start; + + pg_parse_json_or_ereport(lex, &sem); + } + break; + case JSONBTYPE_JSONB: + { + Jsonb *jsonb = DatumGetJsonbP(val); + JsonbIterator *it; + + it = JsonbIteratorInit(&jsonb->root); + + if (JB_ROOT_IS_SCALAR(jsonb)) + { + (void) JsonbIteratorNext(&it, &jb, true); + Assert(jb.type == jbvArray); + (void) JsonbIteratorNext(&it, &jb, true); + scalar_jsonb = true; + } + else + { + JsonbIteratorToken type; + + while ((type = JsonbIteratorNext(&it, &jb, false)) + != WJB_DONE) + { + if (type == WJB_END_ARRAY || type == WJB_END_OBJECT || + type == WJB_BEGIN_ARRAY || type == WJB_BEGIN_OBJECT) + result->res = pushJsonbValue(&result->parseState, + type, NULL); + else + result->res = pushJsonbValue(&result->parseState, + type, &jb); + } + } + } + break; + default: + outputstr = OidOutputFunctionCall(outfuncoid, val); + jb.type = jbvString; + jb.val.string.len = strlen(outputstr); + (void) checkStringLen(jb.val.string.len, NULL); + jb.val.string.val = outputstr; + break; + } + } + + /* Now insert jb into result, unless we did it recursively */ + if (!is_null && !scalar_jsonb && + tcategory >= JSONBTYPE_JSON && tcategory <= JSONBTYPE_JSONCAST) + { + /* work has been done recursively */ + return; + } + else if (result->parseState == NULL) + { + /* single root scalar */ + JsonbValue va; + + va.type = jbvArray; + va.val.array.rawScalar = true; + va.val.array.nElems = 1; + + result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_ARRAY, &va); + result->res = pushJsonbValue(&result->parseState, WJB_ELEM, &jb); + result->res = pushJsonbValue(&result->parseState, WJB_END_ARRAY, NULL); + } + else + { + JsonbValue *o = &result->parseState->contVal; + + switch (o->type) + { + case jbvArray: + result->res = pushJsonbValue(&result->parseState, WJB_ELEM, &jb); + break; + case jbvObject: + result->res = pushJsonbValue(&result->parseState, + key_scalar ? WJB_KEY : WJB_VALUE, + &jb); + break; + default: + elog(ERROR, "unexpected parent of nested structure"); + } + } +} + +/* + * Process a single dimension of an array. + * If it's the innermost dimension, output the values, otherwise call + * ourselves recursively to process the next dimension. + */ +static void +array_dim_to_jsonb(JsonbInState *result, int dim, int ndims, int *dims, Datum *vals, + bool *nulls, int *valcount, JsonbTypeCategory tcategory, + Oid outfuncoid) +{ + int i; + + Assert(dim < ndims); + + result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_ARRAY, NULL); + + for (i = 1; i <= dims[dim]; i++) + { + if (dim + 1 == ndims) + { + datum_to_jsonb(vals[*valcount], nulls[*valcount], result, tcategory, + outfuncoid, false); + (*valcount)++; + } + else + { + array_dim_to_jsonb(result, dim + 1, ndims, dims, vals, nulls, + valcount, tcategory, outfuncoid); + } + } + + result->res = pushJsonbValue(&result->parseState, WJB_END_ARRAY, NULL); +} + +/* + * Turn an array into JSON. + */ +static void +array_to_jsonb_internal(Datum array, JsonbInState *result) +{ + ArrayType *v = DatumGetArrayTypeP(array); + Oid element_type = ARR_ELEMTYPE(v); + int *dim; + int ndim; + int nitems; + int count = 0; + Datum *elements; + bool *nulls; + int16 typlen; + bool typbyval; + char typalign; + JsonbTypeCategory tcategory; + Oid outfuncoid; + + ndim = ARR_NDIM(v); + dim = ARR_DIMS(v); + nitems = ArrayGetNItems(ndim, dim); + + if (nitems <= 0) + { + result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_ARRAY, NULL); + result->res = pushJsonbValue(&result->parseState, WJB_END_ARRAY, NULL); + return; + } + + get_typlenbyvalalign(element_type, + &typlen, &typbyval, &typalign); + + jsonb_categorize_type(element_type, + &tcategory, &outfuncoid); + + deconstruct_array(v, element_type, typlen, typbyval, + typalign, &elements, &nulls, + &nitems); + + array_dim_to_jsonb(result, 0, ndim, dim, elements, nulls, &count, tcategory, + outfuncoid); + + pfree(elements); + pfree(nulls); +} + +/* + * Turn a composite / record into JSON. + */ +static void +composite_to_jsonb(Datum composite, JsonbInState *result) +{ + HeapTupleHeader td; + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tmptup, + *tuple; + int i; + + td = DatumGetHeapTupleHeader(composite); + + /* Extract rowtype info and find a tupdesc */ + tupType = HeapTupleHeaderGetTypeId(td); + tupTypmod = HeapTupleHeaderGetTypMod(td); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + + /* Build a temporary HeapTuple control structure */ + tmptup.t_len = HeapTupleHeaderGetDatumLength(td); + tmptup.t_data = td; + tuple = &tmptup; + + result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_OBJECT, NULL); + + for (i = 0; i < tupdesc->natts; i++) + { + Datum val; + bool isnull; + char *attname; + JsonbTypeCategory tcategory; + Oid outfuncoid; + JsonbValue v; + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + + attname = NameStr(att->attname); + + v.type = jbvString; + /* don't need checkStringLen here - can't exceed maximum name length */ + v.val.string.len = strlen(attname); + v.val.string.val = attname; + + result->res = pushJsonbValue(&result->parseState, WJB_KEY, &v); + + val = heap_getattr(tuple, i + 1, tupdesc, &isnull); + + if (isnull) + { + tcategory = JSONBTYPE_NULL; + outfuncoid = InvalidOid; + } + else + jsonb_categorize_type(att->atttypid, &tcategory, &outfuncoid); + + datum_to_jsonb(val, isnull, result, tcategory, outfuncoid, false); + } + + result->res = pushJsonbValue(&result->parseState, WJB_END_OBJECT, NULL); + ReleaseTupleDesc(tupdesc); +} + +/* + * Append JSON text for "val" to "result". + * + * This is just a thin wrapper around datum_to_jsonb. If the same type will be + * printed many times, avoid using this; better to do the jsonb_categorize_type + * lookups only once. + */ + +static void +add_jsonb(Datum val, bool is_null, JsonbInState *result, + Oid val_type, bool key_scalar) +{ + JsonbTypeCategory tcategory; + Oid outfuncoid; + + if (val_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + if (is_null) + { + tcategory = JSONBTYPE_NULL; + outfuncoid = InvalidOid; + } + else + jsonb_categorize_type(val_type, + &tcategory, &outfuncoid); + + datum_to_jsonb(val, is_null, result, tcategory, outfuncoid, key_scalar); +} + +/* + * Is the given type immutable when coming out of a JSONB context? + * + * At present, datetimes are all considered mutable, because they + * depend on timezone. XXX we should also drill down into objects and + * arrays, but do not. + */ +bool +to_jsonb_is_immutable(Oid typoid) +{ + JsonbTypeCategory tcategory; + Oid outfuncoid; + + jsonb_categorize_type(typoid, &tcategory, &outfuncoid); + + switch (tcategory) + { + case JSONBTYPE_NULL: + case JSONBTYPE_BOOL: + case JSONBTYPE_JSON: + case JSONBTYPE_JSONB: + return true; + + case JSONBTYPE_DATE: + case JSONBTYPE_TIMESTAMP: + case JSONBTYPE_TIMESTAMPTZ: + return false; + + case JSONBTYPE_ARRAY: + return false; /* TODO recurse into elements */ + + case JSONBTYPE_COMPOSITE: + return false; /* TODO recurse into fields */ + + case JSONBTYPE_NUMERIC: + case JSONBTYPE_JSONCAST: + case JSONBTYPE_OTHER: + return func_volatile(outfuncoid) == PROVOLATILE_IMMUTABLE; + } + + return false; /* not reached */ +} + +/* + * SQL function to_jsonb(anyvalue) + */ +Datum +to_jsonb(PG_FUNCTION_ARGS) +{ + Datum val = PG_GETARG_DATUM(0); + Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0); + JsonbInState result; + JsonbTypeCategory tcategory; + Oid outfuncoid; + + if (val_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + jsonb_categorize_type(val_type, + &tcategory, &outfuncoid); + + memset(&result, 0, sizeof(JsonbInState)); + + datum_to_jsonb(val, false, &result, tcategory, outfuncoid, false); + + PG_RETURN_POINTER(JsonbValueToJsonb(result.res)); +} + +Datum +jsonb_build_object_worker(int nargs, Datum *args, bool *nulls, Oid *types, + bool absent_on_null, bool unique_keys) +{ + int i; + JsonbInState result; + + if (nargs % 2 != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument list must have even number of elements"), + /* translator: %s is a SQL function name */ + errhint("The arguments of %s must consist of alternating keys and values.", + "jsonb_build_object()"))); + + memset(&result, 0, sizeof(JsonbInState)); + + result.res = pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL); + result.parseState->unique_keys = unique_keys; + result.parseState->skip_nulls = absent_on_null; + + for (i = 0; i < nargs; i += 2) + { + /* process key */ + bool skip; + + if (nulls[i]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument %d: key must not be null", i + 1))); + + /* skip null values if absent_on_null */ + skip = absent_on_null && nulls[i + 1]; + + /* we need to save skipped keys for the key uniqueness check */ + if (skip && !unique_keys) + continue; + + add_jsonb(args[i], false, &result, types[i], true); + + /* process value */ + add_jsonb(args[i + 1], nulls[i + 1], &result, types[i + 1], false); + } + + result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL); + + return JsonbPGetDatum(JsonbValueToJsonb(result.res)); +} + +/* + * SQL function jsonb_build_object(variadic "any") + */ +Datum +jsonb_build_object(PG_FUNCTION_ARGS) +{ + Datum *args; + bool *nulls; + Oid *types; + + /* build argument values to build the object */ + int nargs = extract_variadic_args(fcinfo, 0, true, + &args, &types, &nulls); + + if (nargs < 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(jsonb_build_object_worker(nargs, args, nulls, types, false, false)); +} + +/* + * degenerate case of jsonb_build_object where it gets 0 arguments. + */ +Datum +jsonb_build_object_noargs(PG_FUNCTION_ARGS) +{ + JsonbInState result; + + memset(&result, 0, sizeof(JsonbInState)); + + (void) pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL); + result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL); + + PG_RETURN_POINTER(JsonbValueToJsonb(result.res)); +} + +Datum +jsonb_build_array_worker(int nargs, Datum *args, bool *nulls, Oid *types, + bool absent_on_null) +{ + int i; + JsonbInState result; + + memset(&result, 0, sizeof(JsonbInState)); + + result.res = pushJsonbValue(&result.parseState, WJB_BEGIN_ARRAY, NULL); + + for (i = 0; i < nargs; i++) + { + if (absent_on_null && nulls[i]) + continue; + + add_jsonb(args[i], nulls[i], &result, types[i], false); + } + + result.res = pushJsonbValue(&result.parseState, WJB_END_ARRAY, NULL); + + return JsonbPGetDatum(JsonbValueToJsonb(result.res)); +} + +/* + * SQL function jsonb_build_array(variadic "any") + */ +Datum +jsonb_build_array(PG_FUNCTION_ARGS) +{ + Datum *args; + bool *nulls; + Oid *types; + + /* build argument values to build the object */ + int nargs = extract_variadic_args(fcinfo, 0, true, + &args, &types, &nulls); + + if (nargs < 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(jsonb_build_array_worker(nargs, args, nulls, types, false)); +} + + +/* + * degenerate case of jsonb_build_array where it gets 0 arguments. + */ +Datum +jsonb_build_array_noargs(PG_FUNCTION_ARGS) +{ + JsonbInState result; + + memset(&result, 0, sizeof(JsonbInState)); + + (void) pushJsonbValue(&result.parseState, WJB_BEGIN_ARRAY, NULL); + result.res = pushJsonbValue(&result.parseState, WJB_END_ARRAY, NULL); + + PG_RETURN_POINTER(JsonbValueToJsonb(result.res)); +} + + +/* + * SQL function jsonb_object(text[]) + * + * take a one or two dimensional array of text as name value pairs + * for a jsonb object. + * + */ +Datum +jsonb_object(PG_FUNCTION_ARGS) +{ + ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0); + int ndims = ARR_NDIM(in_array); + Datum *in_datums; + bool *in_nulls; + int in_count, + count, + i; + JsonbInState result; + + memset(&result, 0, sizeof(JsonbInState)); + + (void) pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL); + + switch (ndims) + { + case 0: + goto close_object; + break; + + case 1: + if ((ARR_DIMS(in_array)[0]) % 2) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array must have even number of elements"))); + break; + + case 2: + if ((ARR_DIMS(in_array)[1]) != 2) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array must have two columns"))); + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + } + + deconstruct_array_builtin(in_array, TEXTOID, &in_datums, &in_nulls, &in_count); + + count = in_count / 2; + + for (i = 0; i < count; ++i) + { + JsonbValue v; + char *str; + int len; + + if (in_nulls[i * 2]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null value not allowed for object key"))); + + str = TextDatumGetCString(in_datums[i * 2]); + len = strlen(str); + + v.type = jbvString; + + v.val.string.len = len; + v.val.string.val = str; + + (void) pushJsonbValue(&result.parseState, WJB_KEY, &v); + + if (in_nulls[i * 2 + 1]) + { + v.type = jbvNull; + } + else + { + str = TextDatumGetCString(in_datums[i * 2 + 1]); + len = strlen(str); + + v.type = jbvString; + + v.val.string.len = len; + v.val.string.val = str; + } + + (void) pushJsonbValue(&result.parseState, WJB_VALUE, &v); + } + + pfree(in_datums); + pfree(in_nulls); + +close_object: + result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL); + + PG_RETURN_POINTER(JsonbValueToJsonb(result.res)); +} + +/* + * SQL function jsonb_object(text[], text[]) + * + * take separate name and value arrays of text to construct a jsonb object + * pairwise. + */ +Datum +jsonb_object_two_arg(PG_FUNCTION_ARGS) +{ + ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1); + int nkdims = ARR_NDIM(key_array); + int nvdims = ARR_NDIM(val_array); + Datum *key_datums, + *val_datums; + bool *key_nulls, + *val_nulls; + int key_count, + val_count, + i; + JsonbInState result; + + memset(&result, 0, sizeof(JsonbInState)); + + (void) pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL); + + if (nkdims > 1 || nkdims != nvdims) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + if (nkdims == 0) + goto close_object; + + deconstruct_array_builtin(key_array, TEXTOID, &key_datums, &key_nulls, &key_count); + deconstruct_array_builtin(val_array, TEXTOID, &val_datums, &val_nulls, &val_count); + + if (key_count != val_count) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("mismatched array dimensions"))); + + for (i = 0; i < key_count; ++i) + { + JsonbValue v; + char *str; + int len; + + if (key_nulls[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null value not allowed for object key"))); + + str = TextDatumGetCString(key_datums[i]); + len = strlen(str); + + v.type = jbvString; + + v.val.string.len = len; + v.val.string.val = str; + + (void) pushJsonbValue(&result.parseState, WJB_KEY, &v); + + if (val_nulls[i]) + { + v.type = jbvNull; + } + else + { + str = TextDatumGetCString(val_datums[i]); + len = strlen(str); + + v.type = jbvString; + + v.val.string.len = len; + v.val.string.val = str; + } + + (void) pushJsonbValue(&result.parseState, WJB_VALUE, &v); + } + + pfree(key_datums); + pfree(key_nulls); + pfree(val_datums); + pfree(val_nulls); + +close_object: + result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL); + + PG_RETURN_POINTER(JsonbValueToJsonb(result.res)); +} + + +/* + * shallow clone of a parse state, suitable for use in aggregate + * final functions that will only append to the values rather than + * change them. + */ +static JsonbParseState * +clone_parse_state(JsonbParseState *state) +{ + JsonbParseState *result, + *icursor, + *ocursor; + + if (state == NULL) + return NULL; + + result = palloc(sizeof(JsonbParseState)); + icursor = state; + ocursor = result; + for (;;) + { + ocursor->contVal = icursor->contVal; + ocursor->size = icursor->size; + ocursor->unique_keys = icursor->unique_keys; + ocursor->skip_nulls = icursor->skip_nulls; + icursor = icursor->next; + if (icursor == NULL) + break; + ocursor->next = palloc(sizeof(JsonbParseState)); + ocursor = ocursor->next; + } + ocursor->next = NULL; + + return result; +} + +static Datum +jsonb_agg_transfn_worker(FunctionCallInfo fcinfo, bool absent_on_null) +{ + MemoryContext oldcontext, + aggcontext; + JsonbAggState *state; + JsonbInState elem; + Datum val; + JsonbInState *result; + bool single_scalar = false; + JsonbIterator *it; + Jsonb *jbelem; + JsonbValue v; + JsonbIteratorToken type; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "jsonb_agg_transfn called in non-aggregate context"); + } + + /* set up the accumulator on the first go round */ + + if (PG_ARGISNULL(0)) + { + Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + + if (arg_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + oldcontext = MemoryContextSwitchTo(aggcontext); + state = palloc(sizeof(JsonbAggState)); + result = palloc0(sizeof(JsonbInState)); + state->res = result; + result->res = pushJsonbValue(&result->parseState, + WJB_BEGIN_ARRAY, NULL); + MemoryContextSwitchTo(oldcontext); + + jsonb_categorize_type(arg_type, &state->val_category, + &state->val_output_func); + } + else + { + state = (JsonbAggState *) PG_GETARG_POINTER(0); + result = state->res; + } + + if (absent_on_null && PG_ARGISNULL(1)) + PG_RETURN_POINTER(state); + + /* turn the argument into jsonb in the normal function context */ + + val = PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1); + + memset(&elem, 0, sizeof(JsonbInState)); + + datum_to_jsonb(val, PG_ARGISNULL(1), &elem, state->val_category, + state->val_output_func, false); + + jbelem = JsonbValueToJsonb(elem.res); + + /* switch to the aggregate context for accumulation operations */ + + oldcontext = MemoryContextSwitchTo(aggcontext); + + it = JsonbIteratorInit(&jbelem->root); + + while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (type) + { + case WJB_BEGIN_ARRAY: + if (v.val.array.rawScalar) + single_scalar = true; + else + result->res = pushJsonbValue(&result->parseState, + type, NULL); + break; + case WJB_END_ARRAY: + if (!single_scalar) + result->res = pushJsonbValue(&result->parseState, + type, NULL); + break; + case WJB_BEGIN_OBJECT: + case WJB_END_OBJECT: + result->res = pushJsonbValue(&result->parseState, + type, NULL); + break; + case WJB_ELEM: + case WJB_KEY: + case WJB_VALUE: + if (v.type == jbvString) + { + /* copy string values in the aggregate context */ + char *buf = palloc(v.val.string.len + 1); + + snprintf(buf, v.val.string.len + 1, "%s", v.val.string.val); + v.val.string.val = buf; + } + else if (v.type == jbvNumeric) + { + /* same for numeric */ + v.val.numeric = + DatumGetNumeric(DirectFunctionCall1(numeric_uplus, + NumericGetDatum(v.val.numeric))); + } + result->res = pushJsonbValue(&result->parseState, + type, &v); + break; + default: + elog(ERROR, "unknown jsonb iterator token type"); + } + } + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_POINTER(state); +} + +/* + * jsonb_agg aggregate function + */ +Datum +jsonb_agg_transfn(PG_FUNCTION_ARGS) +{ + return jsonb_agg_transfn_worker(fcinfo, false); +} + +/* + * jsonb_agg_strict aggregate function + */ +Datum +jsonb_agg_strict_transfn(PG_FUNCTION_ARGS) +{ + return jsonb_agg_transfn_worker(fcinfo, true); +} + +Datum +jsonb_agg_finalfn(PG_FUNCTION_ARGS) +{ + JsonbAggState *arg; + JsonbInState result; + Jsonb *out; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); /* returns null iff no input values */ + + arg = (JsonbAggState *) PG_GETARG_POINTER(0); + + /* + * We need to do a shallow clone of the argument in case the final + * function is called more than once, so we avoid changing the argument. A + * shallow clone is sufficient as we aren't going to change any of the + * values, just add the final array end marker. + */ + memset(&result, 0, sizeof(JsonbInState)); + + result.parseState = clone_parse_state(arg->res->parseState); + + result.res = pushJsonbValue(&result.parseState, + WJB_END_ARRAY, NULL); + + out = JsonbValueToJsonb(result.res); + + PG_RETURN_POINTER(out); +} + +static Datum +jsonb_object_agg_transfn_worker(FunctionCallInfo fcinfo, + bool absent_on_null, bool unique_keys) +{ + MemoryContext oldcontext, + aggcontext; + JsonbInState elem; + JsonbAggState *state; + Datum val; + JsonbInState *result; + bool single_scalar; + JsonbIterator *it; + Jsonb *jbkey, + *jbval; + JsonbValue v; + JsonbIteratorToken type; + bool skip; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "jsonb_object_agg_transfn called in non-aggregate context"); + } + + /* set up the accumulator on the first go round */ + + if (PG_ARGISNULL(0)) + { + Oid arg_type; + + oldcontext = MemoryContextSwitchTo(aggcontext); + state = palloc(sizeof(JsonbAggState)); + result = palloc0(sizeof(JsonbInState)); + state->res = result; + result->res = pushJsonbValue(&result->parseState, + WJB_BEGIN_OBJECT, NULL); + result->parseState->unique_keys = unique_keys; + result->parseState->skip_nulls = absent_on_null; + + MemoryContextSwitchTo(oldcontext); + + arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + + if (arg_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + jsonb_categorize_type(arg_type, &state->key_category, + &state->key_output_func); + + arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + + if (arg_type == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not determine input data type"))); + + jsonb_categorize_type(arg_type, &state->val_category, + &state->val_output_func); + } + else + { + state = (JsonbAggState *) PG_GETARG_POINTER(0); + result = state->res; + } + + /* turn the argument into jsonb in the normal function context */ + + if (PG_ARGISNULL(1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("field name must not be null"))); + + /* + * Skip null values if absent_on_null unless key uniqueness check is + * needed (because we must save keys in this case). + */ + skip = absent_on_null && PG_ARGISNULL(2); + + if (skip && !unique_keys) + PG_RETURN_POINTER(state); + + val = PG_GETARG_DATUM(1); + + memset(&elem, 0, sizeof(JsonbInState)); + + datum_to_jsonb(val, false, &elem, state->key_category, + state->key_output_func, true); + + jbkey = JsonbValueToJsonb(elem.res); + + val = PG_ARGISNULL(2) ? (Datum) 0 : PG_GETARG_DATUM(2); + + memset(&elem, 0, sizeof(JsonbInState)); + + datum_to_jsonb(val, PG_ARGISNULL(2), &elem, state->val_category, + state->val_output_func, false); + + jbval = JsonbValueToJsonb(elem.res); + + it = JsonbIteratorInit(&jbkey->root); + + /* switch to the aggregate context for accumulation operations */ + + oldcontext = MemoryContextSwitchTo(aggcontext); + + /* + * keys should be scalar, and we should have already checked for that + * above when calling datum_to_jsonb, so we only need to look for these + * things. + */ + + while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (type) + { + case WJB_BEGIN_ARRAY: + if (!v.val.array.rawScalar) + elog(ERROR, "unexpected structure for key"); + break; + case WJB_ELEM: + if (v.type == jbvString) + { + /* copy string values in the aggregate context */ + char *buf = palloc(v.val.string.len + 1); + + snprintf(buf, v.val.string.len + 1, "%s", v.val.string.val); + v.val.string.val = buf; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("object keys must be strings"))); + } + result->res = pushJsonbValue(&result->parseState, + WJB_KEY, &v); + + if (skip) + { + v.type = jbvNull; + result->res = pushJsonbValue(&result->parseState, + WJB_VALUE, &v); + MemoryContextSwitchTo(oldcontext); + PG_RETURN_POINTER(state); + } + + break; + case WJB_END_ARRAY: + break; + default: + elog(ERROR, "unexpected structure for key"); + break; + } + } + + it = JsonbIteratorInit(&jbval->root); + + single_scalar = false; + + /* + * values can be anything, including structured and null, so we treat them + * as in json_agg_transfn, except that single scalars are always pushed as + * WJB_VALUE items. + */ + + while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (type) + { + case WJB_BEGIN_ARRAY: + if (v.val.array.rawScalar) + single_scalar = true; + else + result->res = pushJsonbValue(&result->parseState, + type, NULL); + break; + case WJB_END_ARRAY: + if (!single_scalar) + result->res = pushJsonbValue(&result->parseState, + type, NULL); + break; + case WJB_BEGIN_OBJECT: + case WJB_END_OBJECT: + result->res = pushJsonbValue(&result->parseState, + type, NULL); + break; + case WJB_ELEM: + case WJB_KEY: + case WJB_VALUE: + if (v.type == jbvString) + { + /* copy string values in the aggregate context */ + char *buf = palloc(v.val.string.len + 1); + + snprintf(buf, v.val.string.len + 1, "%s", v.val.string.val); + v.val.string.val = buf; + } + else if (v.type == jbvNumeric) + { + /* same for numeric */ + v.val.numeric = + DatumGetNumeric(DirectFunctionCall1(numeric_uplus, + NumericGetDatum(v.val.numeric))); + } + result->res = pushJsonbValue(&result->parseState, + single_scalar ? WJB_VALUE : type, + &v); + break; + default: + elog(ERROR, "unknown jsonb iterator token type"); + } + } + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_POINTER(state); +} + +/* + * jsonb_object_agg aggregate function + */ +Datum +jsonb_object_agg_transfn(PG_FUNCTION_ARGS) +{ + return jsonb_object_agg_transfn_worker(fcinfo, false, false); +} + + +/* + * jsonb_object_agg_strict aggregate function + */ +Datum +jsonb_object_agg_strict_transfn(PG_FUNCTION_ARGS) +{ + return jsonb_object_agg_transfn_worker(fcinfo, true, false); +} + +/* + * jsonb_object_agg_unique aggregate function + */ +Datum +jsonb_object_agg_unique_transfn(PG_FUNCTION_ARGS) +{ + return jsonb_object_agg_transfn_worker(fcinfo, false, true); +} + +/* + * jsonb_object_agg_unique_strict aggregate function + */ +Datum +jsonb_object_agg_unique_strict_transfn(PG_FUNCTION_ARGS) +{ + return jsonb_object_agg_transfn_worker(fcinfo, true, true); +} + +Datum +jsonb_object_agg_finalfn(PG_FUNCTION_ARGS) +{ + JsonbAggState *arg; + JsonbInState result; + Jsonb *out; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); /* returns null iff no input values */ + + arg = (JsonbAggState *) PG_GETARG_POINTER(0); + + /* + * We need to do a shallow clone of the argument's res field in case the + * final function is called more than once, so we avoid changing the + * aggregate state value. A shallow clone is sufficient as we aren't + * going to change any of the values, just add the final object end + * marker. + */ + memset(&result, 0, sizeof(JsonbInState)); + + result.parseState = clone_parse_state(arg->res->parseState); + + result.res = pushJsonbValue(&result.parseState, + WJB_END_OBJECT, NULL); + + out = JsonbValueToJsonb(result.res); + + PG_RETURN_POINTER(out); +} + + +/* + * Extract scalar value from raw-scalar pseudo-array jsonb. + */ +bool +JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res) +{ + JsonbIterator *it; + JsonbIteratorToken tok PG_USED_FOR_ASSERTS_ONLY; + JsonbValue tmp; + + if (!JsonContainerIsArray(jbc) || !JsonContainerIsScalar(jbc)) + { + /* inform caller about actual type of container */ + res->type = (JsonContainerIsArray(jbc)) ? jbvArray : jbvObject; + return false; + } + + /* + * A root scalar is stored as an array of one element, so we get the array + * and then its first (and only) member. + */ + it = JsonbIteratorInit(jbc); + + tok = JsonbIteratorNext(&it, &tmp, true); + Assert(tok == WJB_BEGIN_ARRAY); + Assert(tmp.val.array.nElems == 1 && tmp.val.array.rawScalar); + + tok = JsonbIteratorNext(&it, res, true); + Assert(tok == WJB_ELEM); + Assert(IsAJsonbScalar(res)); + + tok = JsonbIteratorNext(&it, &tmp, true); + Assert(tok == WJB_END_ARRAY); + + tok = JsonbIteratorNext(&it, &tmp, true); + Assert(tok == WJB_DONE); + + return true; +} + +/* + * Emit correct, translatable cast error message + */ +static void +cannotCastJsonbValue(enum jbvType type, const char *sqltype) +{ + static const struct + { + enum jbvType type; + const char *msg; + } + messages[] = + { + {jbvNull, gettext_noop("cannot cast jsonb null to type %s")}, + {jbvString, gettext_noop("cannot cast jsonb string to type %s")}, + {jbvNumeric, gettext_noop("cannot cast jsonb numeric to type %s")}, + {jbvBool, gettext_noop("cannot cast jsonb boolean to type %s")}, + {jbvArray, gettext_noop("cannot cast jsonb array to type %s")}, + {jbvObject, gettext_noop("cannot cast jsonb object to type %s")}, + {jbvBinary, gettext_noop("cannot cast jsonb array or object to type %s")} + }; + int i; + + for (i = 0; i < lengthof(messages); i++) + if (messages[i].type == type) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg(messages[i].msg, sqltype))); + + /* should be unreachable */ + elog(ERROR, "unknown jsonb type: %d", (int) type); +} + +Datum +jsonb_bool(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + JsonbValue v; + + if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvBool) + cannotCastJsonbValue(v.type, "boolean"); + + PG_FREE_IF_COPY(in, 0); + + PG_RETURN_BOOL(v.val.boolean); +} + +Datum +jsonb_numeric(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + JsonbValue v; + Numeric retValue; + + if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric) + cannotCastJsonbValue(v.type, "numeric"); + + /* + * v.val.numeric points into jsonb body, so we need to make a copy to + * return + */ + retValue = DatumGetNumericCopy(NumericGetDatum(v.val.numeric)); + + PG_FREE_IF_COPY(in, 0); + + PG_RETURN_NUMERIC(retValue); +} + +Datum +jsonb_int2(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + JsonbValue v; + Datum retValue; + + if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric) + cannotCastJsonbValue(v.type, "smallint"); + + retValue = DirectFunctionCall1(numeric_int2, + NumericGetDatum(v.val.numeric)); + + PG_FREE_IF_COPY(in, 0); + + PG_RETURN_DATUM(retValue); +} + +Datum +jsonb_int4(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + JsonbValue v; + Datum retValue; + + if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric) + cannotCastJsonbValue(v.type, "integer"); + + retValue = DirectFunctionCall1(numeric_int4, + NumericGetDatum(v.val.numeric)); + + PG_FREE_IF_COPY(in, 0); + + PG_RETURN_DATUM(retValue); +} + +Datum +jsonb_int8(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + JsonbValue v; + Datum retValue; + + if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric) + cannotCastJsonbValue(v.type, "bigint"); + + retValue = DirectFunctionCall1(numeric_int8, + NumericGetDatum(v.val.numeric)); + + PG_FREE_IF_COPY(in, 0); + + PG_RETURN_DATUM(retValue); +} + +Datum +jsonb_float4(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + JsonbValue v; + Datum retValue; + + if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric) + cannotCastJsonbValue(v.type, "real"); + + retValue = DirectFunctionCall1(numeric_float4, + NumericGetDatum(v.val.numeric)); + + PG_FREE_IF_COPY(in, 0); + + PG_RETURN_DATUM(retValue); +} + +Datum +jsonb_float8(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + JsonbValue v; + Datum retValue; + + if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric) + cannotCastJsonbValue(v.type, "double precision"); + + retValue = DirectFunctionCall1(numeric_float8, + NumericGetDatum(v.val.numeric)); + + PG_FREE_IF_COPY(in, 0); + + PG_RETURN_DATUM(retValue); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_gin.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_gin.c new file mode 100644 index 00000000000..e941439d749 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_gin.c @@ -0,0 +1,1409 @@ +/*------------------------------------------------------------------------- + * + * jsonb_gin.c + * GIN support functions for jsonb + * + * Copyright (c) 2014-2023, PostgreSQL Global Development Group + * + * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops. + * For their description see json.sgml and comments in jsonb.h. + * + * The operators support, among the others, "jsonb @? jsonpath" and + * "jsonb @@ jsonpath". Expressions containing these operators are easily + * expressed through each other. + * + * jb @? 'path' <=> jb @@ 'EXISTS(path)' + * jb @@ 'expr' <=> jb @? '$ ? (expr)' + * + * Thus, we're going to consider only @@ operator, while regarding @? operator + * the same is true for jb @@ 'EXISTS(path)'. + * + * Result of jsonpath query extraction is a tree, which leaf nodes are index + * entries and non-leaf nodes are AND/OR logical expressions. Basically we + * extract following statements out of jsonpath: + * + * 1) "accessors_chain = const", + * 2) "EXISTS(accessors_chain)". + * + * Accessors chain may consist of .key, [*] and [index] accessors. jsonb_ops + * additionally supports .* and .**. + * + * For now, both jsonb_ops and jsonb_path_ops supports only statements of + * the 1st find. jsonb_ops might also support statements of the 2nd kind, + * but given we have no statistics keys extracted from accessors chain + * are likely non-selective. Therefore, we choose to not confuse optimizer + * and skip statements of the 2nd kind altogether. In future versions that + * might be changed. + * + * In jsonb_ops statement of the 1st kind is split into expression of AND'ed + * keys and const. Sometimes const might be interpreted as both value or key + * in jsonb_ops. Then statement of 1st kind is decomposed into the expression + * below. + * + * key1 AND key2 AND ... AND keyN AND (const_as_value OR const_as_key) + * + * jsonb_path_ops transforms each statement of the 1st kind into single hash + * entry below. + * + * HASH(key1, key2, ... , keyN, const) + * + * Despite statements of the 2nd kind are not supported by both jsonb_ops and + * jsonb_path_ops, EXISTS(path) expressions might be still supported, + * when statements of 1st kind could be extracted out of their filters. + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_gin.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/gin.h" +#include "access/stratnum.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/jsonb.h" +#include "utils/jsonpath.h" +#include "utils/varlena.h" + +typedef struct PathHashStack +{ + uint32 hash; + struct PathHashStack *parent; +} PathHashStack; + +/* Buffer for GIN entries */ +typedef struct GinEntries +{ + Datum *buf; + int count; + int allocated; +} GinEntries; + +typedef enum JsonPathGinNodeType +{ + JSP_GIN_OR, + JSP_GIN_AND, + JSP_GIN_ENTRY +} JsonPathGinNodeType; + +typedef struct JsonPathGinNode JsonPathGinNode; + +/* Node in jsonpath expression tree */ +struct JsonPathGinNode +{ + JsonPathGinNodeType type; + union + { + int nargs; /* valid for OR and AND nodes */ + int entryIndex; /* index in GinEntries array, valid for ENTRY + * nodes after entries output */ + Datum entryDatum; /* path hash or key name/scalar, valid for + * ENTRY nodes before entries output */ + } val; + JsonPathGinNode *args[FLEXIBLE_ARRAY_MEMBER]; /* valid for OR and AND + * nodes */ +}; + +/* + * jsonb_ops entry extracted from jsonpath item. Corresponding path item + * may be: '.key', '.*', '.**', '[index]' or '[*]'. + * Entry type is stored in 'type' field. + */ +typedef struct JsonPathGinPathItem +{ + struct JsonPathGinPathItem *parent; + Datum keyName; /* key name (for '.key' path item) or NULL */ + JsonPathItemType type; /* type of jsonpath item */ +} JsonPathGinPathItem; + +/* GIN representation of the extracted json path */ +typedef union JsonPathGinPath +{ + JsonPathGinPathItem *items; /* list of path items (jsonb_ops) */ + uint32 hash; /* hash of the path (jsonb_path_ops) */ +} JsonPathGinPath; + +typedef struct JsonPathGinContext JsonPathGinContext; + +/* Callback, which stores information about path item into JsonPathGinPath */ +typedef bool (*JsonPathGinAddPathItemFunc) (JsonPathGinPath *path, + JsonPathItem *jsp); + +/* + * Callback, which extracts set of nodes from statement of 1st kind + * (scalar != NULL) or statement of 2nd kind (scalar == NULL). + */ +typedef List *(*JsonPathGinExtractNodesFunc) (JsonPathGinContext *cxt, + JsonPathGinPath path, + JsonbValue *scalar, + List *nodes); + +/* Context for jsonpath entries extraction */ +struct JsonPathGinContext +{ + JsonPathGinAddPathItemFunc add_path_item; + JsonPathGinExtractNodesFunc extract_nodes; + bool lax; +}; + +static Datum make_text_key(char flag, const char *str, int len); +static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key); + +static JsonPathGinNode *extract_jsp_bool_expr(JsonPathGinContext *cxt, + JsonPathGinPath path, JsonPathItem *jsp, bool not); + + +/* Initialize GinEntries struct */ +static void +init_gin_entries(GinEntries *entries, int preallocated) +{ + entries->allocated = preallocated; + entries->buf = preallocated ? palloc(sizeof(Datum) * preallocated) : NULL; + entries->count = 0; +} + +/* Add new entry to GinEntries */ +static int +add_gin_entry(GinEntries *entries, Datum entry) +{ + int id = entries->count; + + if (entries->count >= entries->allocated) + { + if (entries->allocated) + { + entries->allocated *= 2; + entries->buf = repalloc(entries->buf, + sizeof(Datum) * entries->allocated); + } + else + { + entries->allocated = 8; + entries->buf = palloc(sizeof(Datum) * entries->allocated); + } + } + + entries->buf[entries->count++] = entry; + + return id; +} + +/* + * + * jsonb_ops GIN opclass support functions + * + */ + +Datum +gin_compare_jsonb(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int32 result; + char *a1p, + *a2p; + int len1, + len2; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + /* Compare text as bttextcmp does, but always using C collation */ + result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + +Datum +gin_extract_jsonb(PG_FUNCTION_ARGS) +{ + Jsonb *jb = (Jsonb *) PG_GETARG_JSONB_P(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + int total = JB_ROOT_COUNT(jb); + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + GinEntries entries; + + /* If the root level is empty, we certainly have no keys */ + if (total == 0) + { + *nentries = 0; + PG_RETURN_POINTER(NULL); + } + + /* Otherwise, use 2 * root count as initial estimate of result size */ + init_gin_entries(&entries, 2 * total); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (r) + { + case WJB_KEY: + add_gin_entry(&entries, make_scalar_key(&v, true)); + break; + case WJB_ELEM: + /* Pretend string array elements are keys, see jsonb.h */ + add_gin_entry(&entries, make_scalar_key(&v, v.type == jbvString)); + break; + case WJB_VALUE: + add_gin_entry(&entries, make_scalar_key(&v, false)); + break; + default: + /* we can ignore structural items */ + break; + } + } + + *nentries = entries.count; + + PG_RETURN_POINTER(entries.buf); +} + +/* Append JsonPathGinPathItem to JsonPathGinPath (jsonb_ops) */ +static bool +jsonb_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) +{ + JsonPathGinPathItem *pentry; + Datum keyName; + + switch (jsp->type) + { + case jpiRoot: + path->items = NULL; /* reset path */ + return true; + + case jpiKey: + { + int len; + char *key = jspGetString(jsp, &len); + + keyName = make_text_key(JGINFLAG_KEY, key, len); + break; + } + + case jpiAny: + case jpiAnyKey: + case jpiAnyArray: + case jpiIndexArray: + keyName = PointerGetDatum(NULL); + break; + + default: + /* other path items like item methods are not supported */ + return false; + } + + pentry = palloc(sizeof(*pentry)); + + pentry->type = jsp->type; + pentry->keyName = keyName; + pentry->parent = path->items; + + path->items = pentry; + + return true; +} + +/* Combine existing path hash with next key hash (jsonb_path_ops) */ +static bool +jsonb_path_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) +{ + switch (jsp->type) + { + case jpiRoot: + path->hash = 0; /* reset path hash */ + return true; + + case jpiKey: + { + JsonbValue jbv; + + jbv.type = jbvString; + jbv.val.string.val = jspGetString(jsp, &jbv.val.string.len); + + JsonbHashScalarValue(&jbv, &path->hash); + return true; + } + + case jpiIndexArray: + case jpiAnyArray: + return true; /* path hash is unchanged */ + + default: + /* other items (wildcard paths, item methods) are not supported */ + return false; + } +} + +static JsonPathGinNode * +make_jsp_entry_node(Datum entry) +{ + JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args)); + + node->type = JSP_GIN_ENTRY; + node->val.entryDatum = entry; + + return node; +} + +static JsonPathGinNode * +make_jsp_entry_node_scalar(JsonbValue *scalar, bool iskey) +{ + return make_jsp_entry_node(make_scalar_key(scalar, iskey)); +} + +static JsonPathGinNode * +make_jsp_expr_node(JsonPathGinNodeType type, int nargs) +{ + JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args) + + sizeof(node->args[0]) * nargs); + + node->type = type; + node->val.nargs = nargs; + + return node; +} + +static JsonPathGinNode * +make_jsp_expr_node_args(JsonPathGinNodeType type, List *args) +{ + JsonPathGinNode *node = make_jsp_expr_node(type, list_length(args)); + ListCell *lc; + int i = 0; + + foreach(lc, args) + node->args[i++] = lfirst(lc); + + return node; +} + +static JsonPathGinNode * +make_jsp_expr_node_binary(JsonPathGinNodeType type, + JsonPathGinNode *arg1, JsonPathGinNode *arg2) +{ + JsonPathGinNode *node = make_jsp_expr_node(type, 2); + + node->args[0] = arg1; + node->args[1] = arg2; + + return node; +} + +/* Append a list of nodes from the jsonpath (jsonb_ops). */ +static List * +jsonb_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonbValue *scalar, List *nodes) +{ + JsonPathGinPathItem *pentry; + + if (scalar) + { + JsonPathGinNode *node; + + /* + * Append path entry nodes only if scalar is provided. See header + * comment for details. + */ + for (pentry = path.items; pentry; pentry = pentry->parent) + { + if (pentry->type == jpiKey) /* only keys are indexed */ + nodes = lappend(nodes, make_jsp_entry_node(pentry->keyName)); + } + + /* Append scalar node for equality queries. */ + if (scalar->type == jbvString) + { + JsonPathGinPathItem *last = path.items; + GinTernaryValue key_entry; + + /* + * Assuming that jsonb_ops interprets string array elements as + * keys, we may extract key or non-key entry or even both. In the + * latter case we create OR-node. It is possible in lax mode + * where arrays are automatically unwrapped, or in strict mode for + * jpiAny items. + */ + + if (cxt->lax) + key_entry = GIN_MAYBE; + else if (!last) /* root ($) */ + key_entry = GIN_FALSE; + else if (last->type == jpiAnyArray || last->type == jpiIndexArray) + key_entry = GIN_TRUE; + else if (last->type == jpiAny) + key_entry = GIN_MAYBE; + else + key_entry = GIN_FALSE; + + if (key_entry == GIN_MAYBE) + { + JsonPathGinNode *n1 = make_jsp_entry_node_scalar(scalar, true); + JsonPathGinNode *n2 = make_jsp_entry_node_scalar(scalar, false); + + node = make_jsp_expr_node_binary(JSP_GIN_OR, n1, n2); + } + else + { + node = make_jsp_entry_node_scalar(scalar, + key_entry == GIN_TRUE); + } + } + else + { + node = make_jsp_entry_node_scalar(scalar, false); + } + + nodes = lappend(nodes, node); + } + + return nodes; +} + +/* Append a list of nodes from the jsonpath (jsonb_path_ops). */ +static List * +jsonb_path_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonbValue *scalar, List *nodes) +{ + if (scalar) + { + /* append path hash node for equality queries */ + uint32 hash = path.hash; + + JsonbHashScalarValue(scalar, &hash); + + return lappend(nodes, + make_jsp_entry_node(UInt32GetDatum(hash))); + } + else + { + /* jsonb_path_ops doesn't support EXISTS queries => nothing to append */ + return nodes; + } +} + +/* + * Extract a list of expression nodes that need to be AND-ed by the caller. + * Extracted expression is 'path == scalar' if 'scalar' is non-NULL, and + * 'EXISTS(path)' otherwise. + */ +static List * +extract_jsp_path_expr_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonPathItem *jsp, JsonbValue *scalar) +{ + JsonPathItem next; + List *nodes = NIL; + + for (;;) + { + switch (jsp->type) + { + case jpiCurrent: + break; + + case jpiFilter: + { + JsonPathItem arg; + JsonPathGinNode *filter; + + jspGetArg(jsp, &arg); + + filter = extract_jsp_bool_expr(cxt, path, &arg, false); + + if (filter) + nodes = lappend(nodes, filter); + + break; + } + + default: + if (!cxt->add_path_item(&path, jsp)) + + /* + * Path is not supported by the index opclass, return only + * the extracted filter nodes. + */ + return nodes; + break; + } + + if (!jspGetNext(jsp, &next)) + break; + + jsp = &next; + } + + /* + * Append nodes from the path expression itself to the already extracted + * list of filter nodes. + */ + return cxt->extract_nodes(cxt, path, scalar, nodes); +} + +/* + * Extract an expression node from one of following jsonpath path expressions: + * EXISTS(jsp) (when 'scalar' is NULL) + * jsp == scalar (when 'scalar' is not NULL). + * + * The current path (@) is passed in 'path'. + */ +static JsonPathGinNode * +extract_jsp_path_expr(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonPathItem *jsp, JsonbValue *scalar) +{ + /* extract a list of nodes to be AND-ed */ + List *nodes = extract_jsp_path_expr_nodes(cxt, path, jsp, scalar); + + if (nodes == NIL) + /* no nodes were extracted => full scan is needed for this path */ + return NULL; + + if (list_length(nodes) == 1) + return linitial(nodes); /* avoid extra AND-node */ + + /* construct AND-node for path with filters */ + return make_jsp_expr_node_args(JSP_GIN_AND, nodes); +} + +/* Recursively extract nodes from the boolean jsonpath expression. */ +static JsonPathGinNode * +extract_jsp_bool_expr(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonPathItem *jsp, bool not) +{ + check_stack_depth(); + + switch (jsp->type) + { + case jpiAnd: /* expr && expr */ + case jpiOr: /* expr || expr */ + { + JsonPathItem arg; + JsonPathGinNode *larg; + JsonPathGinNode *rarg; + JsonPathGinNodeType type; + + jspGetLeftArg(jsp, &arg); + larg = extract_jsp_bool_expr(cxt, path, &arg, not); + + jspGetRightArg(jsp, &arg); + rarg = extract_jsp_bool_expr(cxt, path, &arg, not); + + if (!larg || !rarg) + { + if (jsp->type == jpiOr) + return NULL; + + return larg ? larg : rarg; + } + + type = not ^ (jsp->type == jpiAnd) ? JSP_GIN_AND : JSP_GIN_OR; + + return make_jsp_expr_node_binary(type, larg, rarg); + } + + case jpiNot: /* !expr */ + { + JsonPathItem arg; + + jspGetArg(jsp, &arg); + + /* extract child expression inverting 'not' flag */ + return extract_jsp_bool_expr(cxt, path, &arg, !not); + } + + case jpiExists: /* EXISTS(path) */ + { + JsonPathItem arg; + + if (not) + return NULL; /* NOT EXISTS is not supported */ + + jspGetArg(jsp, &arg); + + return extract_jsp_path_expr(cxt, path, &arg, NULL); + } + + case jpiNotEqual: + + /* + * 'not' == true case is not supported here because '!(path != + * scalar)' is not equivalent to 'path == scalar' in the general + * case because of sequence comparison semantics: 'path == scalar' + * === 'EXISTS (path, @ == scalar)', '!(path != scalar)' === + * 'FOR_ALL(path, @ == scalar)'. So, we should translate '!(path + * != scalar)' into GIN query 'path == scalar || EMPTY(path)', but + * 'EMPTY(path)' queries are not supported by the both jsonb + * opclasses. However in strict mode we could omit 'EMPTY(path)' + * part if the path can return exactly one item (it does not + * contain wildcard accessors or item methods like .keyvalue() + * etc.). + */ + return NULL; + + case jpiEqual: /* path == scalar */ + { + JsonPathItem left_item; + JsonPathItem right_item; + JsonPathItem *path_item; + JsonPathItem *scalar_item; + JsonbValue scalar; + + if (not) + return NULL; + + jspGetLeftArg(jsp, &left_item); + jspGetRightArg(jsp, &right_item); + + if (jspIsScalar(left_item.type)) + { + scalar_item = &left_item; + path_item = &right_item; + } + else if (jspIsScalar(right_item.type)) + { + scalar_item = &right_item; + path_item = &left_item; + } + else + return NULL; /* at least one operand should be a scalar */ + + switch (scalar_item->type) + { + case jpiNull: + scalar.type = jbvNull; + break; + case jpiBool: + scalar.type = jbvBool; + scalar.val.boolean = !!*scalar_item->content.value.data; + break; + case jpiNumeric: + scalar.type = jbvNumeric; + scalar.val.numeric = + (Numeric) scalar_item->content.value.data; + break; + case jpiString: + scalar.type = jbvString; + scalar.val.string.val = scalar_item->content.value.data; + scalar.val.string.len = + scalar_item->content.value.datalen; + break; + default: + elog(ERROR, "invalid scalar jsonpath item type: %d", + scalar_item->type); + return NULL; + } + + return extract_jsp_path_expr(cxt, path, path_item, &scalar); + } + + default: + return NULL; /* not a boolean expression */ + } +} + +/* Recursively emit all GIN entries found in the node tree */ +static void +emit_jsp_gin_entries(JsonPathGinNode *node, GinEntries *entries) +{ + check_stack_depth(); + + switch (node->type) + { + case JSP_GIN_ENTRY: + /* replace datum with its index in the array */ + node->val.entryIndex = add_gin_entry(entries, node->val.entryDatum); + break; + + case JSP_GIN_OR: + case JSP_GIN_AND: + { + int i; + + for (i = 0; i < node->val.nargs; i++) + emit_jsp_gin_entries(node->args[i], entries); + + break; + } + } +} + +/* + * Recursively extract GIN entries from jsonpath query. + * Root expression node is put into (*extra_data)[0]. + */ +static Datum * +extract_jsp_query(JsonPath *jp, StrategyNumber strat, bool pathOps, + int32 *nentries, Pointer **extra_data) +{ + JsonPathGinContext cxt; + JsonPathItem root; + JsonPathGinNode *node; + JsonPathGinPath path = {0}; + GinEntries entries = {0}; + + cxt.lax = (jp->header & JSONPATH_LAX) != 0; + + if (pathOps) + { + cxt.add_path_item = jsonb_path_ops__add_path_item; + cxt.extract_nodes = jsonb_path_ops__extract_nodes; + } + else + { + cxt.add_path_item = jsonb_ops__add_path_item; + cxt.extract_nodes = jsonb_ops__extract_nodes; + } + + jspInit(&root, jp); + + node = strat == JsonbJsonpathExistsStrategyNumber + ? extract_jsp_path_expr(&cxt, path, &root, NULL) + : extract_jsp_bool_expr(&cxt, path, &root, false); + + if (!node) + { + *nentries = 0; + return NULL; + } + + emit_jsp_gin_entries(node, &entries); + + *nentries = entries.count; + if (!*nentries) + return NULL; + + *extra_data = palloc0(sizeof(**extra_data) * entries.count); + **extra_data = (Pointer) node; + + return entries.buf; +} + +/* + * Recursively execute jsonpath expression. + * 'check' is a bool[] or a GinTernaryValue[] depending on 'ternary' flag. + */ +static GinTernaryValue +execute_jsp_gin_node(JsonPathGinNode *node, void *check, bool ternary) +{ + GinTernaryValue res; + GinTernaryValue v; + int i; + + switch (node->type) + { + case JSP_GIN_AND: + res = GIN_TRUE; + for (i = 0; i < node->val.nargs; i++) + { + v = execute_jsp_gin_node(node->args[i], check, ternary); + if (v == GIN_FALSE) + return GIN_FALSE; + else if (v == GIN_MAYBE) + res = GIN_MAYBE; + } + return res; + + case JSP_GIN_OR: + res = GIN_FALSE; + for (i = 0; i < node->val.nargs; i++) + { + v = execute_jsp_gin_node(node->args[i], check, ternary); + if (v == GIN_TRUE) + return GIN_TRUE; + else if (v == GIN_MAYBE) + res = GIN_MAYBE; + } + return res; + + case JSP_GIN_ENTRY: + { + int index = node->val.entryIndex; + + if (ternary) + return ((GinTernaryValue *) check)[index]; + else + return ((bool *) check)[index] ? GIN_TRUE : GIN_FALSE; + } + + default: + elog(ERROR, "invalid jsonpath gin node type: %d", node->type); + return GIN_FALSE; /* keep compiler quiet */ + } +} + +Datum +gin_extract_jsonb_query(PG_FUNCTION_ARGS) +{ + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries; + + if (strategy == JsonbContainsStrategyNumber) + { + /* Query is a jsonb, so just apply gin_extract_jsonb... */ + entries = (Datum *) + DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, + PG_GETARG_DATUM(0), + PointerGetDatum(nentries))); + /* ...although "contains {}" requires a full index scan */ + if (*nentries == 0) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == JsonbExistsStrategyNumber) + { + /* Query is a text string, which we treat as a key */ + text *query = PG_GETARG_TEXT_PP(0); + + *nentries = 1; + entries = (Datum *) palloc(sizeof(Datum)); + entries[0] = make_text_key(JGINFLAG_KEY, + VARDATA_ANY(query), + VARSIZE_ANY_EXHDR(query)); + } + else if (strategy == JsonbExistsAnyStrategyNumber || + strategy == JsonbExistsAllStrategyNumber) + { + /* Query is a text array; each element is treated as a key */ + ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); + Datum *key_datums; + bool *key_nulls; + int key_count; + int i, + j; + + deconstruct_array_builtin(query, TEXTOID, &key_datums, &key_nulls, &key_count); + + entries = (Datum *) palloc(sizeof(Datum) * key_count); + + for (i = 0, j = 0; i < key_count; i++) + { + /* Nulls in the array are ignored */ + if (key_nulls[i]) + continue; + /* We rely on the array elements not being toasted */ + entries[j++] = make_text_key(JGINFLAG_KEY, + VARDATA_ANY(key_datums[i]), + VARSIZE_ANY_EXHDR(key_datums[i])); + } + + *nentries = j; + /* ExistsAll with no keys should match everything */ + if (j == 0 && strategy == JsonbExistsAllStrategyNumber) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + JsonPath *jp = PG_GETARG_JSONPATH_P(0); + Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); + + entries = extract_jsp_query(jp, strategy, false, nentries, extra_data); + + if (!entries) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else + { + elog(ERROR, "unrecognized strategy number: %d", strategy); + entries = NULL; /* keep compiler quiet */ + } + + PG_RETURN_POINTER(entries); +} + +Datum +gin_consistent_jsonb(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res = true; + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + /* + * We must always recheck, since we can't tell from the index whether + * the positions of the matched items match the structure of the query + * object. (Even if we could, we'd also have to worry about hashed + * keys and the index's failure to distinguish keys from string array + * elements.) However, the tuple certainly doesn't match unless it + * contains all the query keys. + */ + *recheck = true; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else if (strategy == JsonbExistsStrategyNumber) + { + /* + * Although the key is certainly present in the index, we must recheck + * because (1) the key might be hashed, and (2) the index match might + * be for a key that's not at top level of the JSON object. For (1), + * we could look at the query key to see if it's hashed and not + * recheck if not, but the index lacks enough info to tell about (2). + */ + *recheck = true; + res = true; + } + else if (strategy == JsonbExistsAnyStrategyNumber) + { + /* As for plain exists, we must recheck */ + *recheck = true; + res = true; + } + else if (strategy == JsonbExistsAllStrategyNumber) + { + /* As for plain exists, we must recheck */ + *recheck = true; + /* ... but unless all the keys are present, we can say "false" */ + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + *recheck = true; + + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + false) != GIN_FALSE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_BOOL(res); +} + +Datum +gin_triconsistent_jsonb(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_MAYBE; + int32 i; + + /* + * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this + * corresponds to always forcing recheck in the regular consistent + * function, for the reasons listed there. + */ + if (strategy == JsonbContainsStrategyNumber || + strategy == JsonbExistsAllStrategyNumber) + { + /* All extracted keys must be present */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + } + else if (strategy == JsonbExistsStrategyNumber || + strategy == JsonbExistsAnyStrategyNumber) + { + /* At least one extracted key must be present */ + res = GIN_FALSE; + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_TRUE || + check[i] == GIN_MAYBE) + { + res = GIN_MAYBE; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + true); + + /* Should always recheck the result */ + if (res == GIN_TRUE) + res = GIN_MAYBE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_GIN_TERNARY_VALUE(res); +} + +/* + * + * jsonb_path_ops GIN opclass support functions + * + * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON + * value; but the JSON key(s) leading to each value are also included in its + * hash computation. This means we can only support containment queries, + * but the index can distinguish, for example, {"foo": 42} from {"bar": 42} + * since different hashes will be generated. + * + */ + +Datum +gin_extract_jsonb_path(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + int total = JB_ROOT_COUNT(jb); + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + PathHashStack tail; + PathHashStack *stack; + GinEntries entries; + + /* If the root level is empty, we certainly have no keys */ + if (total == 0) + { + *nentries = 0; + PG_RETURN_POINTER(NULL); + } + + /* Otherwise, use 2 * root count as initial estimate of result size */ + init_gin_entries(&entries, 2 * total); + + /* We keep a stack of partial hashes corresponding to parent key levels */ + tail.parent = NULL; + tail.hash = 0; + stack = &tail; + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + PathHashStack *parent; + + switch (r) + { + case WJB_BEGIN_ARRAY: + case WJB_BEGIN_OBJECT: + /* Push a stack level for this object */ + parent = stack; + stack = (PathHashStack *) palloc(sizeof(PathHashStack)); + + /* + * We pass forward hashes from outer nesting levels so that + * the hashes for nested values will include outer keys as + * well as their own keys. + * + * Nesting an array within another array will not alter + * innermost scalar element hash values, but that seems + * inconsequential. + */ + stack->hash = parent->hash; + stack->parent = parent; + break; + case WJB_KEY: + /* mix this key into the current outer hash */ + JsonbHashScalarValue(&v, &stack->hash); + /* hash is now ready to incorporate the value */ + break; + case WJB_ELEM: + case WJB_VALUE: + /* mix the element or value's hash into the prepared hash */ + JsonbHashScalarValue(&v, &stack->hash); + /* and emit an index entry */ + add_gin_entry(&entries, UInt32GetDatum(stack->hash)); + /* reset hash for next key, value, or sub-object */ + stack->hash = stack->parent->hash; + break; + case WJB_END_ARRAY: + case WJB_END_OBJECT: + /* Pop the stack */ + parent = stack->parent; + pfree(stack); + stack = parent; + /* reset hash for next key, value, or sub-object */ + if (stack->parent) + stack->hash = stack->parent->hash; + else + stack->hash = 0; + break; + default: + elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); + } + } + + *nentries = entries.count; + + PG_RETURN_POINTER(entries.buf); +} + +Datum +gin_extract_jsonb_query_path(PG_FUNCTION_ARGS) +{ + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries; + + if (strategy == JsonbContainsStrategyNumber) + { + /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */ + entries = (Datum *) + DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path, + PG_GETARG_DATUM(0), + PointerGetDatum(nentries))); + + /* ... although "contains {}" requires a full index scan */ + if (*nentries == 0) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + JsonPath *jp = PG_GETARG_JSONPATH_P(0); + Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); + + entries = extract_jsp_query(jp, strategy, true, nentries, extra_data); + + if (!entries) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else + { + elog(ERROR, "unrecognized strategy number: %d", strategy); + entries = NULL; + } + + PG_RETURN_POINTER(entries); +} + +Datum +gin_consistent_jsonb_path(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res = true; + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + /* + * jsonb_path_ops is necessarily lossy, not only because of hash + * collisions but also because it doesn't preserve complete + * information about the structure of the JSON object. Besides, there + * are some special rules around the containment of raw scalars in + * arrays that are not handled here. So we must always recheck a + * match. However, if not all of the keys are present, the tuple + * certainly doesn't match. + */ + *recheck = true; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + *recheck = true; + + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + false) != GIN_FALSE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_BOOL(res); +} + +Datum +gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_MAYBE; + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + /* + * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; + * this corresponds to always forcing recheck in the regular + * consistent function, for the reasons listed there. + */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + true); + + /* Should always recheck the result */ + if (res == GIN_TRUE) + res = GIN_MAYBE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_GIN_TERNARY_VALUE(res); +} + +/* + * Construct a jsonb_ops GIN key from a flag byte and a textual representation + * (which need not be null-terminated). This function is responsible + * for hashing overlength text representations; it will add the + * JGINFLAG_HASHED bit to the flag value if it does that. + */ +static Datum +make_text_key(char flag, const char *str, int len) +{ + text *item; + char hashbuf[10]; + + if (len > JGIN_MAXLENGTH) + { + uint32 hashval; + + hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); + snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval); + str = hashbuf; + len = 8; + flag |= JGINFLAG_HASHED; + } + + /* + * Now build the text Datum. For simplicity we build a 4-byte-header + * varlena text Datum here, but we expect it will get converted to short + * header format when stored in the index. + */ + item = (text *) palloc(VARHDRSZ + len + 1); + SET_VARSIZE(item, VARHDRSZ + len + 1); + + *VARDATA(item) = flag; + + memcpy(VARDATA(item) + 1, str, len); + + return PointerGetDatum(item); +} + +/* + * Create a textual representation of a JsonbValue that will serve as a GIN + * key in a jsonb_ops index. is_key is true if the JsonbValue is a key, + * or if it is a string array element (since we pretend those are keys, + * see jsonb.h). + */ +static Datum +make_scalar_key(const JsonbValue *scalarVal, bool is_key) +{ + Datum item; + char *cstr; + + switch (scalarVal->type) + { + case jbvNull: + Assert(!is_key); + item = make_text_key(JGINFLAG_NULL, "", 0); + break; + case jbvBool: + Assert(!is_key); + item = make_text_key(JGINFLAG_BOOL, + scalarVal->val.boolean ? "t" : "f", 1); + break; + case jbvNumeric: + Assert(!is_key); + + /* + * A normalized textual representation, free of trailing zeroes, + * is required so that numerically equal values will produce equal + * strings. + * + * It isn't ideal that numerics are stored in a relatively bulky + * textual format. However, it's a notationally convenient way of + * storing a "union" type in the GIN B-Tree, and indexing Jsonb + * strings takes precedence. + */ + cstr = numeric_normalize(scalarVal->val.numeric); + item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr)); + pfree(cstr); + break; + case jbvString: + item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR, + scalarVal->val.string.val, + scalarVal->val.string.len); + break; + default: + elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type); + item = 0; /* keep compiler quiet */ + break; + } + + return item; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_op.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_op.c new file mode 100644 index 00000000000..054351f0a31 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_op.c @@ -0,0 +1,336 @@ +/*------------------------------------------------------------------------- + * + * jsonb_op.c + * Special operators for jsonb only, used by various index access methods + * + * Copyright (c) 2014-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_op.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_type.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/jsonb.h" + +Datum +jsonb_exists(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + text *key = PG_GETARG_TEXT_PP(1); + JsonbValue kval; + JsonbValue *v = NULL; + + /* + * We only match Object keys (which are naturally always Strings), or + * string elements in arrays. In particular, we do not match non-string + * scalar elements. Existence of a key/element is only considered at the + * top level. No recursion occurs. + */ + kval.type = jbvString; + kval.val.string.val = VARDATA_ANY(key); + kval.val.string.len = VARSIZE_ANY_EXHDR(key); + + v = findJsonbValueFromContainer(&jb->root, + JB_FOBJECT | JB_FARRAY, + &kval); + + PG_RETURN_BOOL(v != NULL); +} + +Datum +jsonb_exists_any(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1); + int i; + Datum *key_datums; + bool *key_nulls; + int elem_count; + + deconstruct_array_builtin(keys, TEXTOID, &key_datums, &key_nulls, &elem_count); + + for (i = 0; i < elem_count; i++) + { + JsonbValue strVal; + + if (key_nulls[i]) + continue; + + strVal.type = jbvString; + /* We rely on the array elements not being toasted */ + strVal.val.string.val = VARDATA_ANY(key_datums[i]); + strVal.val.string.len = VARSIZE_ANY_EXHDR(key_datums[i]); + + if (findJsonbValueFromContainer(&jb->root, + JB_FOBJECT | JB_FARRAY, + &strVal) != NULL) + PG_RETURN_BOOL(true); + } + + PG_RETURN_BOOL(false); +} + +Datum +jsonb_exists_all(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1); + int i; + Datum *key_datums; + bool *key_nulls; + int elem_count; + + deconstruct_array_builtin(keys, TEXTOID, &key_datums, &key_nulls, &elem_count); + + for (i = 0; i < elem_count; i++) + { + JsonbValue strVal; + + if (key_nulls[i]) + continue; + + strVal.type = jbvString; + /* We rely on the array elements not being toasted */ + strVal.val.string.val = VARDATA_ANY(key_datums[i]); + strVal.val.string.len = VARSIZE_ANY_EXHDR(key_datums[i]); + + if (findJsonbValueFromContainer(&jb->root, + JB_FOBJECT | JB_FARRAY, + &strVal) == NULL) + PG_RETURN_BOOL(false); + } + + PG_RETURN_BOOL(true); +} + +Datum +jsonb_contains(PG_FUNCTION_ARGS) +{ + Jsonb *val = PG_GETARG_JSONB_P(0); + Jsonb *tmpl = PG_GETARG_JSONB_P(1); + + JsonbIterator *it1, + *it2; + + if (JB_ROOT_IS_OBJECT(val) != JB_ROOT_IS_OBJECT(tmpl)) + PG_RETURN_BOOL(false); + + it1 = JsonbIteratorInit(&val->root); + it2 = JsonbIteratorInit(&tmpl->root); + + PG_RETURN_BOOL(JsonbDeepContains(&it1, &it2)); +} + +Datum +jsonb_contained(PG_FUNCTION_ARGS) +{ + /* Commutator of "contains" */ + Jsonb *tmpl = PG_GETARG_JSONB_P(0); + Jsonb *val = PG_GETARG_JSONB_P(1); + + JsonbIterator *it1, + *it2; + + if (JB_ROOT_IS_OBJECT(val) != JB_ROOT_IS_OBJECT(tmpl)) + PG_RETURN_BOOL(false); + + it1 = JsonbIteratorInit(&val->root); + it2 = JsonbIteratorInit(&tmpl->root); + + PG_RETURN_BOOL(JsonbDeepContains(&it1, &it2)); +} + +Datum +jsonb_ne(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB_P(0); + Jsonb *jbb = PG_GETARG_JSONB_P(1); + bool res; + + res = (compareJsonbContainers(&jba->root, &jbb->root) != 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +/* + * B-Tree operator class operators, support function + */ +Datum +jsonb_lt(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB_P(0); + Jsonb *jbb = PG_GETARG_JSONB_P(1); + bool res; + + res = (compareJsonbContainers(&jba->root, &jbb->root) < 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_gt(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB_P(0); + Jsonb *jbb = PG_GETARG_JSONB_P(1); + bool res; + + res = (compareJsonbContainers(&jba->root, &jbb->root) > 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_le(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB_P(0); + Jsonb *jbb = PG_GETARG_JSONB_P(1); + bool res; + + res = (compareJsonbContainers(&jba->root, &jbb->root) <= 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_ge(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB_P(0); + Jsonb *jbb = PG_GETARG_JSONB_P(1); + bool res; + + res = (compareJsonbContainers(&jba->root, &jbb->root) >= 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_eq(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB_P(0); + Jsonb *jbb = PG_GETARG_JSONB_P(1); + bool res; + + res = (compareJsonbContainers(&jba->root, &jbb->root) == 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_cmp(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB_P(0); + Jsonb *jbb = PG_GETARG_JSONB_P(1); + int res; + + res = compareJsonbContainers(&jba->root, &jbb->root); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_INT32(res); +} + +/* + * Hash operator class jsonb hashing function + */ +Datum +jsonb_hash(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + uint32 hash = 0; + + if (JB_ROOT_COUNT(jb) == 0) + PG_RETURN_INT32(0); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (r) + { + /* Rotation is left to JsonbHashScalarValue() */ + case WJB_BEGIN_ARRAY: + hash ^= JB_FARRAY; + break; + case WJB_BEGIN_OBJECT: + hash ^= JB_FOBJECT; + break; + case WJB_KEY: + case WJB_VALUE: + case WJB_ELEM: + JsonbHashScalarValue(&v, &hash); + break; + case WJB_END_ARRAY: + case WJB_END_OBJECT: + break; + default: + elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); + } + } + + PG_FREE_IF_COPY(jb, 0); + PG_RETURN_INT32(hash); +} + +Datum +jsonb_hash_extended(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + uint64 seed = PG_GETARG_INT64(1); + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + uint64 hash = 0; + + if (JB_ROOT_COUNT(jb) == 0) + PG_RETURN_UINT64(seed); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (r) + { + /* Rotation is left to JsonbHashScalarValueExtended() */ + case WJB_BEGIN_ARRAY: + hash ^= ((uint64) JB_FARRAY) << 32 | JB_FARRAY; + break; + case WJB_BEGIN_OBJECT: + hash ^= ((uint64) JB_FOBJECT) << 32 | JB_FOBJECT; + break; + case WJB_KEY: + case WJB_VALUE: + case WJB_ELEM: + JsonbHashScalarValueExtended(&v, &hash, seed); + break; + case WJB_END_ARRAY: + case WJB_END_OBJECT: + break; + default: + elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); + } + } + + PG_FREE_IF_COPY(jb, 0); + PG_RETURN_UINT64(hash); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_util.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_util.c new file mode 100644 index 00000000000..9cc95b773db --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonb_util.c @@ -0,0 +1,1994 @@ +/*------------------------------------------------------------------------- + * + * jsonb_util.c + * converting between Jsonb and JsonbValues, and iterating. + * + * Copyright (c) 2014-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_util.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "common/jsonapi.h" +#include "miscadmin.h" +#include "port/pg_bitutils.h" +#include "utils/builtins.h" +#include "utils/datetime.h" +#include "utils/json.h" +#include "utils/jsonb.h" +#include "utils/memutils.h" +#include "utils/varlena.h" + +/* + * Maximum number of elements in an array (or key/value pairs in an object). + * This is limited by two things: the size of the JEntry array must fit + * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits + * reserved for that in the JsonbContainer.header field. + * + * (The total size of an array's or object's elements is also limited by + * JENTRY_OFFLENMASK, but we're not concerned about that here.) + */ +#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK)) +#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK)) + +static void fillJsonbValue(JsonbContainer *container, int index, + char *base_addr, uint32 offset, + JsonbValue *result); +static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b); +static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b); +static Jsonb *convertToJsonb(JsonbValue *val); +static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level); +static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level); +static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level); +static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal); + +static int reserveFromBuffer(StringInfo buffer, int len); +static void appendToBuffer(StringInfo buffer, const char *data, int len); +static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len); +static short padBufferToInt(StringInfo buffer); + +static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent); +static JsonbIterator *freeAndGetParent(JsonbIterator *it); +static JsonbParseState *pushState(JsonbParseState **pstate); +static void appendKey(JsonbParseState *pstate, JsonbValue *string); +static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal); +static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal); +static int lengthCompareJsonbStringValue(const void *a, const void *b); +static int lengthCompareJsonbString(const char *val1, int len1, + const char *val2, int len2); +static int lengthCompareJsonbPair(const void *a, const void *b, void *binequal); +static void uniqueifyJsonbObject(JsonbValue *object, bool unique_keys, + bool skip_nulls); +static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate, + JsonbIteratorToken seq, + JsonbValue *scalarVal); + +void +JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val) +{ + val->type = jbvBinary; + val->val.binary.data = &jsonb->root; + val->val.binary.len = VARSIZE(jsonb) - VARHDRSZ; +} + +/* + * Turn an in-memory JsonbValue into a Jsonb for on-disk storage. + * + * Generally we find it more convenient to directly iterate through the Jsonb + * representation and only really convert nested scalar values. + * JsonbIteratorNext() does this, so that clients of the iteration code don't + * have to directly deal with the binary representation (JsonbDeepContains() is + * a notable exception, although all exceptions are internal to this module). + * In general, functions that accept a JsonbValue argument are concerned with + * the manipulation of scalar values, or simple containers of scalar values, + * where it would be inconvenient to deal with a great amount of other state. + */ +Jsonb * +JsonbValueToJsonb(JsonbValue *val) +{ + Jsonb *out; + + if (IsAJsonbScalar(val)) + { + /* Scalar value */ + JsonbParseState *pstate = NULL; + JsonbValue *res; + JsonbValue scalarArray; + + scalarArray.type = jbvArray; + scalarArray.val.array.rawScalar = true; + scalarArray.val.array.nElems = 1; + + pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray); + pushJsonbValue(&pstate, WJB_ELEM, val); + res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL); + + out = convertToJsonb(res); + } + else if (val->type == jbvObject || val->type == jbvArray) + { + out = convertToJsonb(val); + } + else + { + Assert(val->type == jbvBinary); + out = palloc(VARHDRSZ + val->val.binary.len); + SET_VARSIZE(out, VARHDRSZ + val->val.binary.len); + memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len); + } + + return out; +} + +/* + * Get the offset of the variable-length portion of a Jsonb node within + * the variable-length-data part of its container. The node is identified + * by index within the container's JEntry array. + */ +uint32 +getJsonbOffset(const JsonbContainer *jc, int index) +{ + uint32 offset = 0; + int i; + + /* + * Start offset of this entry is equal to the end offset of the previous + * entry. Walk backwards to the most recent entry stored as an end + * offset, returning that offset plus any lengths in between. + */ + for (i = index - 1; i >= 0; i--) + { + offset += JBE_OFFLENFLD(jc->children[i]); + if (JBE_HAS_OFF(jc->children[i])) + break; + } + + return offset; +} + +/* + * Get the length of the variable-length portion of a Jsonb node. + * The node is identified by index within the container's JEntry array. + */ +uint32 +getJsonbLength(const JsonbContainer *jc, int index) +{ + uint32 off; + uint32 len; + + /* + * If the length is stored directly in the JEntry, just return it. + * Otherwise, get the begin offset of the entry, and subtract that from + * the stored end+1 offset. + */ + if (JBE_HAS_OFF(jc->children[index])) + { + off = getJsonbOffset(jc, index); + len = JBE_OFFLENFLD(jc->children[index]) - off; + } + else + len = JBE_OFFLENFLD(jc->children[index]); + + return len; +} + +/* + * BT comparator worker function. Returns an integer less than, equal to, or + * greater than zero, indicating whether a is less than, equal to, or greater + * than b. Consistent with the requirements for a B-Tree operator class + * + * Strings are compared lexically, in contrast with other places where we use a + * much simpler comparator logic for searching through Strings. Since this is + * called from B-Tree support function 1, we're careful about not leaking + * memory here. + */ +int +compareJsonbContainers(JsonbContainer *a, JsonbContainer *b) +{ + JsonbIterator *ita, + *itb; + int res = 0; + + ita = JsonbIteratorInit(a); + itb = JsonbIteratorInit(b); + + do + { + JsonbValue va, + vb; + JsonbIteratorToken ra, + rb; + + ra = JsonbIteratorNext(&ita, &va, false); + rb = JsonbIteratorNext(&itb, &vb, false); + + if (ra == rb) + { + if (ra == WJB_DONE) + { + /* Decisively equal */ + break; + } + + if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT) + { + /* + * There is no array or object to compare at this stage of + * processing. jbvArray/jbvObject values are compared + * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT + * tokens. + */ + continue; + } + + if (va.type == vb.type) + { + switch (va.type) + { + case jbvString: + case jbvNull: + case jbvNumeric: + case jbvBool: + res = compareJsonbScalarValue(&va, &vb); + break; + case jbvArray: + + /* + * This could be a "raw scalar" pseudo array. That's + * a special case here though, since we still want the + * general type-based comparisons to apply, and as far + * as we're concerned a pseudo array is just a scalar. + */ + if (va.val.array.rawScalar != vb.val.array.rawScalar) + res = (va.val.array.rawScalar) ? -1 : 1; + if (va.val.array.nElems != vb.val.array.nElems) + res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1; + break; + case jbvObject: + if (va.val.object.nPairs != vb.val.object.nPairs) + res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1; + break; + case jbvBinary: + elog(ERROR, "unexpected jbvBinary value"); + break; + case jbvDatetime: + elog(ERROR, "unexpected jbvDatetime value"); + break; + } + } + else + { + /* Type-defined order */ + res = (va.type > vb.type) ? 1 : -1; + } + } + else + { + /* + * It's safe to assume that the types differed, and that the va + * and vb values passed were set. + * + * If the two values were of the same container type, then there'd + * have been a chance to observe the variation in the number of + * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're + * either two heterogeneously-typed containers, or a container and + * some scalar type. + * + * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT + * cases here, because we would have seen the corresponding + * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and + * concluded that they don't match. + */ + Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT); + Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT); + + Assert(va.type != vb.type); + Assert(va.type != jbvBinary); + Assert(vb.type != jbvBinary); + /* Type-defined order */ + res = (va.type > vb.type) ? 1 : -1; + } + } + while (res == 0); + + while (ita != NULL) + { + JsonbIterator *i = ita->parent; + + pfree(ita); + ita = i; + } + while (itb != NULL) + { + JsonbIterator *i = itb->parent; + + pfree(itb); + itb = i; + } + + return res; +} + +/* + * Find value in object (i.e. the "value" part of some key/value pair in an + * object), or find a matching element if we're looking through an array. Do + * so on the basis of equality of the object keys only, or alternatively + * element values only, with a caller-supplied value "key". The "flags" + * argument allows the caller to specify which container types are of interest. + * + * This exported utility function exists to facilitate various cases concerned + * with "containment". If asked to look through an object, the caller had + * better pass a Jsonb String, because their keys can only be strings. + * Otherwise, for an array, any type of JsonbValue will do. + * + * In order to proceed with the search, it is necessary for callers to have + * both specified an interest in exactly one particular container type with an + * appropriate flag, as well as having the pointed-to Jsonb container be of + * one of those same container types at the top level. (Actually, we just do + * whichever makes sense to save callers the trouble of figuring it out - at + * most one can make sense, because the container either points to an array + * (possibly a "raw scalar" pseudo array) or an object.) + * + * Note that we can return a jbvBinary JsonbValue if this is called on an + * object, but we never do so on an array. If the caller asks to look through + * a container type that is not of the type pointed to by the container, + * immediately fall through and return NULL. If we cannot find the value, + * return NULL. Otherwise, return palloc()'d copy of value. + */ +JsonbValue * +findJsonbValueFromContainer(JsonbContainer *container, uint32 flags, + JsonbValue *key) +{ + JEntry *children = container->children; + int count = JsonContainerSize(container); + + Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0); + + /* Quick out without a palloc cycle if object/array is empty */ + if (count <= 0) + return NULL; + + if ((flags & JB_FARRAY) && JsonContainerIsArray(container)) + { + JsonbValue *result = palloc(sizeof(JsonbValue)); + char *base_addr = (char *) (children + count); + uint32 offset = 0; + int i; + + for (i = 0; i < count; i++) + { + fillJsonbValue(container, i, base_addr, offset, result); + + if (key->type == result->type) + { + if (equalsJsonbScalarValue(key, result)) + return result; + } + + JBE_ADVANCE_OFFSET(offset, children[i]); + } + + pfree(result); + } + else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container)) + { + /* Object key passed by caller must be a string */ + Assert(key->type == jbvString); + + return getKeyJsonValueFromContainer(container, key->val.string.val, + key->val.string.len, NULL); + } + + /* Not found */ + return NULL; +} + +/* + * Find value by key in Jsonb object and fetch it into 'res', which is also + * returned. + * + * 'res' can be passed in as NULL, in which case it's newly palloc'ed here. + */ +JsonbValue * +getKeyJsonValueFromContainer(JsonbContainer *container, + const char *keyVal, int keyLen, JsonbValue *res) +{ + JEntry *children = container->children; + int count = JsonContainerSize(container); + char *baseAddr; + uint32 stopLow, + stopHigh; + + Assert(JsonContainerIsObject(container)); + + /* Quick out without a palloc cycle if object is empty */ + if (count <= 0) + return NULL; + + /* + * Binary search the container. Since we know this is an object, account + * for *Pairs* of Jentrys + */ + baseAddr = (char *) (children + count * 2); + stopLow = 0; + stopHigh = count; + while (stopLow < stopHigh) + { + uint32 stopMiddle; + int difference; + const char *candidateVal; + int candidateLen; + + stopMiddle = stopLow + (stopHigh - stopLow) / 2; + + candidateVal = baseAddr + getJsonbOffset(container, stopMiddle); + candidateLen = getJsonbLength(container, stopMiddle); + + difference = lengthCompareJsonbString(candidateVal, candidateLen, + keyVal, keyLen); + + if (difference == 0) + { + /* Found our key, return corresponding value */ + int index = stopMiddle + count; + + if (!res) + res = palloc(sizeof(JsonbValue)); + + fillJsonbValue(container, index, baseAddr, + getJsonbOffset(container, index), + res); + + return res; + } + else + { + if (difference < 0) + stopLow = stopMiddle + 1; + else + stopHigh = stopMiddle; + } + } + + /* Not found */ + return NULL; +} + +/* + * Get i-th value of a Jsonb array. + * + * Returns palloc()'d copy of the value, or NULL if it does not exist. + */ +JsonbValue * +getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i) +{ + JsonbValue *result; + char *base_addr; + uint32 nelements; + + if (!JsonContainerIsArray(container)) + elog(ERROR, "not a jsonb array"); + + nelements = JsonContainerSize(container); + base_addr = (char *) &container->children[nelements]; + + if (i >= nelements) + return NULL; + + result = palloc(sizeof(JsonbValue)); + + fillJsonbValue(container, i, base_addr, + getJsonbOffset(container, i), + result); + + return result; +} + +/* + * A helper function to fill in a JsonbValue to represent an element of an + * array, or a key or value of an object. + * + * The node's JEntry is at container->children[index], and its variable-length + * data is at base_addr + offset. We make the caller determine the offset + * since in many cases the caller can amortize that work across multiple + * children. When it can't, it can just call getJsonbOffset(). + * + * A nested array or object will be returned as jbvBinary, ie. it won't be + * expanded. + */ +static void +fillJsonbValue(JsonbContainer *container, int index, + char *base_addr, uint32 offset, + JsonbValue *result) +{ + JEntry entry = container->children[index]; + + if (JBE_ISNULL(entry)) + { + result->type = jbvNull; + } + else if (JBE_ISSTRING(entry)) + { + result->type = jbvString; + result->val.string.val = base_addr + offset; + result->val.string.len = getJsonbLength(container, index); + Assert(result->val.string.len >= 0); + } + else if (JBE_ISNUMERIC(entry)) + { + result->type = jbvNumeric; + result->val.numeric = (Numeric) (base_addr + INTALIGN(offset)); + } + else if (JBE_ISBOOL_TRUE(entry)) + { + result->type = jbvBool; + result->val.boolean = true; + } + else if (JBE_ISBOOL_FALSE(entry)) + { + result->type = jbvBool; + result->val.boolean = false; + } + else + { + Assert(JBE_ISCONTAINER(entry)); + result->type = jbvBinary; + /* Remove alignment padding from data pointer and length */ + result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset)); + result->val.binary.len = getJsonbLength(container, index) - + (INTALIGN(offset) - offset); + } +} + +/* + * Push JsonbValue into JsonbParseState. + * + * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory + * JsonbValue to a Jsonb. + * + * Initial state of *JsonbParseState is NULL, since it'll be allocated here + * originally (caller will get JsonbParseState back by reference). + * + * Only sequential tokens pertaining to non-container types should pass a + * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a + * "raw scalar" pseudo array to append it - the actual scalar should be passed + * next and it will be added as the only member of the array. + * + * Values of type jbvBinary, which are rolled up arrays and objects, + * are unpacked before being added to the result. + */ +JsonbValue * +pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, + JsonbValue *jbval) +{ + JsonbIterator *it; + JsonbValue *res = NULL; + JsonbValue v; + JsonbIteratorToken tok; + int i; + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvObject) + { + pushJsonbValue(pstate, WJB_BEGIN_OBJECT, NULL); + for (i = 0; i < jbval->val.object.nPairs; i++) + { + pushJsonbValue(pstate, WJB_KEY, &jbval->val.object.pairs[i].key); + pushJsonbValue(pstate, WJB_VALUE, &jbval->val.object.pairs[i].value); + } + + return pushJsonbValue(pstate, WJB_END_OBJECT, NULL); + } + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvArray) + { + pushJsonbValue(pstate, WJB_BEGIN_ARRAY, NULL); + for (i = 0; i < jbval->val.array.nElems; i++) + { + pushJsonbValue(pstate, WJB_ELEM, &jbval->val.array.elems[i]); + } + + return pushJsonbValue(pstate, WJB_END_ARRAY, NULL); + } + + if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) || + jbval->type != jbvBinary) + { + /* drop through */ + return pushJsonbValueScalar(pstate, seq, jbval); + } + + /* unpack the binary and add each piece to the pstate */ + it = JsonbIteratorInit(jbval->val.binary.data); + + if ((jbval->val.binary.data->header & JB_FSCALAR) && *pstate) + { + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_BEGIN_ARRAY); + Assert(v.type == jbvArray && v.val.array.rawScalar); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_ELEM); + + res = pushJsonbValueScalar(pstate, seq, &v); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_END_ARRAY); + Assert(it == NULL); + + return res; + } + + while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + res = pushJsonbValueScalar(pstate, tok, + tok < WJB_BEGIN_ARRAY || + (tok == WJB_BEGIN_ARRAY && + v.val.array.rawScalar) ? &v : NULL); + + return res; +} + +/* + * Do the actual pushing, with only scalar or pseudo-scalar-array values + * accepted. + */ +static JsonbValue * +pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq, + JsonbValue *scalarVal) +{ + JsonbValue *result = NULL; + + switch (seq) + { + case WJB_BEGIN_ARRAY: + Assert(!scalarVal || scalarVal->val.array.rawScalar); + *pstate = pushState(pstate); + result = &(*pstate)->contVal; + (*pstate)->contVal.type = jbvArray; + (*pstate)->contVal.val.array.nElems = 0; + (*pstate)->contVal.val.array.rawScalar = (scalarVal && + scalarVal->val.array.rawScalar); + if (scalarVal && scalarVal->val.array.nElems > 0) + { + /* Assume that this array is still really a scalar */ + Assert(scalarVal->type == jbvArray); + (*pstate)->size = scalarVal->val.array.nElems; + } + else + { + (*pstate)->size = 4; + } + (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) * + (*pstate)->size); + break; + case WJB_BEGIN_OBJECT: + Assert(!scalarVal); + *pstate = pushState(pstate); + result = &(*pstate)->contVal; + (*pstate)->contVal.type = jbvObject; + (*pstate)->contVal.val.object.nPairs = 0; + (*pstate)->size = 4; + (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) * + (*pstate)->size); + break; + case WJB_KEY: + Assert(scalarVal->type == jbvString); + appendKey(*pstate, scalarVal); + break; + case WJB_VALUE: + Assert(IsAJsonbScalar(scalarVal)); + appendValue(*pstate, scalarVal); + break; + case WJB_ELEM: + Assert(IsAJsonbScalar(scalarVal)); + appendElement(*pstate, scalarVal); + break; + case WJB_END_OBJECT: + uniqueifyJsonbObject(&(*pstate)->contVal, + (*pstate)->unique_keys, + (*pstate)->skip_nulls); + /* fall through! */ + case WJB_END_ARRAY: + /* Steps here common to WJB_END_OBJECT case */ + Assert(!scalarVal); + result = &(*pstate)->contVal; + + /* + * Pop stack and push current array/object as value in parent + * array/object + */ + *pstate = (*pstate)->next; + if (*pstate) + { + switch ((*pstate)->contVal.type) + { + case jbvArray: + appendElement(*pstate, result); + break; + case jbvObject: + appendValue(*pstate, result); + break; + default: + elog(ERROR, "invalid jsonb container type"); + } + } + break; + default: + elog(ERROR, "unrecognized jsonb sequential processing token"); + } + + return result; +} + +/* + * pushJsonbValue() worker: Iteration-like forming of Jsonb + */ +static JsonbParseState * +pushState(JsonbParseState **pstate) +{ + JsonbParseState *ns = palloc(sizeof(JsonbParseState)); + + ns->next = *pstate; + ns->unique_keys = false; + ns->skip_nulls = false; + + return ns; +} + +/* + * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb + */ +static void +appendKey(JsonbParseState *pstate, JsonbValue *string) +{ + JsonbValue *object = &pstate->contVal; + + Assert(object->type == jbvObject); + Assert(string->type == jbvString); + + if (object->val.object.nPairs >= JSONB_MAX_PAIRS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)", + JSONB_MAX_PAIRS))); + + if (object->val.object.nPairs >= pstate->size) + { + pstate->size *= 2; + object->val.object.pairs = repalloc(object->val.object.pairs, + sizeof(JsonbPair) * pstate->size); + } + + object->val.object.pairs[object->val.object.nPairs].key = *string; + object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs; +} + +/* + * pushJsonbValue() worker: Append a pair value to state when generating a + * Jsonb + */ +static void +appendValue(JsonbParseState *pstate, JsonbValue *scalarVal) +{ + JsonbValue *object = &pstate->contVal; + + Assert(object->type == jbvObject); + + object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal; +} + +/* + * pushJsonbValue() worker: Append an element to state when generating a Jsonb + */ +static void +appendElement(JsonbParseState *pstate, JsonbValue *scalarVal) +{ + JsonbValue *array = &pstate->contVal; + + Assert(array->type == jbvArray); + + if (array->val.array.nElems >= JSONB_MAX_ELEMS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)", + JSONB_MAX_ELEMS))); + + if (array->val.array.nElems >= pstate->size) + { + pstate->size *= 2; + array->val.array.elems = repalloc(array->val.array.elems, + sizeof(JsonbValue) * pstate->size); + } + + array->val.array.elems[array->val.array.nElems++] = *scalarVal; +} + +/* + * Given a JsonbContainer, expand to JsonbIterator to iterate over items + * fully expanded to in-memory representation for manipulation. + * + * See JsonbIteratorNext() for notes on memory management. + */ +JsonbIterator * +JsonbIteratorInit(JsonbContainer *container) +{ + return iteratorFromContainer(container, NULL); +} + +/* + * Get next JsonbValue while iterating + * + * Caller should initially pass their own, original iterator. They may get + * back a child iterator palloc()'d here instead. The function can be relied + * on to free those child iterators, lest the memory allocated for highly + * nested objects become unreasonable, but only if callers don't end iteration + * early (by breaking upon having found something in a search, for example). + * + * Callers in such a scenario, that are particularly sensitive to leaking + * memory in a long-lived context may walk the ancestral tree from the final + * iterator we left them with to its oldest ancestor, pfree()ing as they go. + * They do not have to free any other memory previously allocated for iterators + * but not accessible as direct ancestors of the iterator they're last passed + * back. + * + * Returns "Jsonb sequential processing" token value. Iterator "state" + * reflects the current stage of the process in a less granular fashion, and is + * mostly used here to track things internally with respect to particular + * iterators. + * + * Clients of this function should not have to handle any jbvBinary values + * (since recursive calls will deal with this), provided skipNested is false. + * It is our job to expand the jbvBinary representation without bothering them + * with it. However, clients should not take it upon themselves to touch array + * or Object element/pair buffers, since their element/pair pointers are + * garbage. Also, *val will not be set when returning WJB_END_ARRAY or + * WJB_END_OBJECT, on the assumption that it's only useful to access values + * when recursing in. + */ +JsonbIteratorToken +JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) +{ + if (*it == NULL) + return WJB_DONE; + + /* + * When stepping into a nested container, we jump back here to start + * processing the child. We will not recurse further in one call, because + * processing the child will always begin in JBI_ARRAY_START or + * JBI_OBJECT_START state. + */ +recurse: + switch ((*it)->state) + { + case JBI_ARRAY_START: + /* Set v to array on first array call */ + val->type = jbvArray; + val->val.array.nElems = (*it)->nElems; + + /* + * v->val.array.elems is not actually set, because we aren't doing + * a full conversion + */ + val->val.array.rawScalar = (*it)->isScalar; + (*it)->curIndex = 0; + (*it)->curDataOffset = 0; + (*it)->curValueOffset = 0; /* not actually used */ + /* Set state for next call */ + (*it)->state = JBI_ARRAY_ELEM; + return WJB_BEGIN_ARRAY; + + case JBI_ARRAY_ELEM: + if ((*it)->curIndex >= (*it)->nElems) + { + /* + * All elements within array already processed. Report this + * to caller, and give it back original parent iterator (which + * independently tracks iteration progress at its level of + * nesting). + */ + *it = freeAndGetParent(*it); + return WJB_END_ARRAY; + } + + fillJsonbValue((*it)->container, (*it)->curIndex, + (*it)->dataProper, (*it)->curDataOffset, + val); + + JBE_ADVANCE_OFFSET((*it)->curDataOffset, + (*it)->children[(*it)->curIndex]); + (*it)->curIndex++; + + if (!IsAJsonbScalar(val) && !skipNested) + { + /* Recurse into container. */ + *it = iteratorFromContainer(val->val.binary.data, *it); + goto recurse; + } + else + { + /* + * Scalar item in array, or a container and caller didn't want + * us to recurse into it. + */ + return WJB_ELEM; + } + + case JBI_OBJECT_START: + /* Set v to object on first object call */ + val->type = jbvObject; + val->val.object.nPairs = (*it)->nElems; + + /* + * v->val.object.pairs is not actually set, because we aren't + * doing a full conversion + */ + (*it)->curIndex = 0; + (*it)->curDataOffset = 0; + (*it)->curValueOffset = getJsonbOffset((*it)->container, + (*it)->nElems); + /* Set state for next call */ + (*it)->state = JBI_OBJECT_KEY; + return WJB_BEGIN_OBJECT; + + case JBI_OBJECT_KEY: + if ((*it)->curIndex >= (*it)->nElems) + { + /* + * All pairs within object already processed. Report this to + * caller, and give it back original containing iterator + * (which independently tracks iteration progress at its level + * of nesting). + */ + *it = freeAndGetParent(*it); + return WJB_END_OBJECT; + } + else + { + /* Return key of a key/value pair. */ + fillJsonbValue((*it)->container, (*it)->curIndex, + (*it)->dataProper, (*it)->curDataOffset, + val); + if (val->type != jbvString) + elog(ERROR, "unexpected jsonb type as object key"); + + /* Set state for next call */ + (*it)->state = JBI_OBJECT_VALUE; + return WJB_KEY; + } + + case JBI_OBJECT_VALUE: + /* Set state for next call */ + (*it)->state = JBI_OBJECT_KEY; + + fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems, + (*it)->dataProper, (*it)->curValueOffset, + val); + + JBE_ADVANCE_OFFSET((*it)->curDataOffset, + (*it)->children[(*it)->curIndex]); + JBE_ADVANCE_OFFSET((*it)->curValueOffset, + (*it)->children[(*it)->curIndex + (*it)->nElems]); + (*it)->curIndex++; + + /* + * Value may be a container, in which case we recurse with new, + * child iterator (unless the caller asked not to, by passing + * skipNested). + */ + if (!IsAJsonbScalar(val) && !skipNested) + { + *it = iteratorFromContainer(val->val.binary.data, *it); + goto recurse; + } + else + return WJB_VALUE; + } + + elog(ERROR, "invalid iterator state"); + return -1; +} + +/* + * Initialize an iterator for iterating all elements in a container. + */ +static JsonbIterator * +iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent) +{ + JsonbIterator *it; + + it = palloc0(sizeof(JsonbIterator)); + it->container = container; + it->parent = parent; + it->nElems = JsonContainerSize(container); + + /* Array starts just after header */ + it->children = container->children; + + switch (container->header & (JB_FARRAY | JB_FOBJECT)) + { + case JB_FARRAY: + it->dataProper = + (char *) it->children + it->nElems * sizeof(JEntry); + it->isScalar = JsonContainerIsScalar(container); + /* This is either a "raw scalar", or an array */ + Assert(!it->isScalar || it->nElems == 1); + + it->state = JBI_ARRAY_START; + break; + + case JB_FOBJECT: + it->dataProper = + (char *) it->children + it->nElems * sizeof(JEntry) * 2; + it->state = JBI_OBJECT_START; + break; + + default: + elog(ERROR, "unknown type of jsonb container"); + } + + return it; +} + +/* + * JsonbIteratorNext() worker: Return parent, while freeing memory for current + * iterator + */ +static JsonbIterator * +freeAndGetParent(JsonbIterator *it) +{ + JsonbIterator *v = it->parent; + + pfree(it); + return v; +} + +/* + * Worker for "contains" operator's function + * + * Formally speaking, containment is top-down, unordered subtree isomorphism. + * + * Takes iterators that belong to some container type. These iterators + * "belong" to those values in the sense that they've just been initialized in + * respect of them by the caller (perhaps in a nested fashion). + * + * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level. + * We determine if mContained is contained within val. + */ +bool +JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained) +{ + JsonbValue vval, + vcontained; + JsonbIteratorToken rval, + rcont; + + /* + * Guard against stack overflow due to overly complex Jsonb. + * + * Functions called here independently take this precaution, but that + * might not be sufficient since this is also a recursive function. + */ + check_stack_depth(); + + rval = JsonbIteratorNext(val, &vval, false); + rcont = JsonbIteratorNext(mContained, &vcontained, false); + + if (rval != rcont) + { + /* + * The differing return values can immediately be taken as indicating + * two differing container types at this nesting level, which is + * sufficient reason to give up entirely (but it should be the case + * that they're both some container type). + */ + Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY); + Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY); + return false; + } + else if (rcont == WJB_BEGIN_OBJECT) + { + Assert(vval.type == jbvObject); + Assert(vcontained.type == jbvObject); + + /* + * If the lhs has fewer pairs than the rhs, it can't possibly contain + * the rhs. (This conclusion is safe only because we de-duplicate + * keys in all Jsonb objects; thus there can be no corresponding + * optimization in the array case.) The case probably won't arise + * often, but since it's such a cheap check we may as well make it. + */ + if (vval.val.object.nPairs < vcontained.val.object.nPairs) + return false; + + /* Work through rhs "is it contained within?" object */ + for (;;) + { + JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */ + JsonbValue lhsValBuf; + + rcont = JsonbIteratorNext(mContained, &vcontained, false); + + /* + * When we get through caller's rhs "is it contained within?" + * object without failing to find one of its values, it's + * contained. + */ + if (rcont == WJB_END_OBJECT) + return true; + + Assert(rcont == WJB_KEY); + Assert(vcontained.type == jbvString); + + /* First, find value by key... */ + lhsVal = + getKeyJsonValueFromContainer((*val)->container, + vcontained.val.string.val, + vcontained.val.string.len, + &lhsValBuf); + if (!lhsVal) + return false; + + /* + * ...at this stage it is apparent that there is at least a key + * match for this rhs pair. + */ + rcont = JsonbIteratorNext(mContained, &vcontained, true); + + Assert(rcont == WJB_VALUE); + + /* + * Compare rhs pair's value with lhs pair's value just found using + * key + */ + if (lhsVal->type != vcontained.type) + { + return false; + } + else if (IsAJsonbScalar(lhsVal)) + { + if (!equalsJsonbScalarValue(lhsVal, &vcontained)) + return false; + } + else + { + /* Nested container value (object or array) */ + JsonbIterator *nestval, + *nestContained; + + Assert(lhsVal->type == jbvBinary); + Assert(vcontained.type == jbvBinary); + + nestval = JsonbIteratorInit(lhsVal->val.binary.data); + nestContained = JsonbIteratorInit(vcontained.val.binary.data); + + /* + * Match "value" side of rhs datum object's pair recursively. + * It's a nested structure. + * + * Note that nesting still has to "match up" at the right + * nesting sub-levels. However, there need only be zero or + * more matching pairs (or elements) at each nesting level + * (provided the *rhs* pairs/elements *all* match on each + * level), which enables searching nested structures for a + * single String or other primitive type sub-datum quite + * effectively (provided the user constructed the rhs nested + * structure such that we "know where to look"). + * + * In other words, the mapping of container nodes in the rhs + * "vcontained" Jsonb to internal nodes on the lhs is + * injective, and parent-child edges on the rhs must be mapped + * to parent-child edges on the lhs to satisfy the condition + * of containment (plus of course the mapped nodes must be + * equal). + */ + if (!JsonbDeepContains(&nestval, &nestContained)) + return false; + } + } + } + else if (rcont == WJB_BEGIN_ARRAY) + { + JsonbValue *lhsConts = NULL; + uint32 nLhsElems = vval.val.array.nElems; + + Assert(vval.type == jbvArray); + Assert(vcontained.type == jbvArray); + + /* + * Handle distinction between "raw scalar" pseudo arrays, and real + * arrays. + * + * A raw scalar may contain another raw scalar, and an array may + * contain a raw scalar, but a raw scalar may not contain an array. We + * don't do something like this for the object case, since objects can + * only contain pairs, never raw scalars (a pair is represented by an + * rhs object argument with a single contained pair). + */ + if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar) + return false; + + /* Work through rhs "is it contained within?" array */ + for (;;) + { + rcont = JsonbIteratorNext(mContained, &vcontained, true); + + /* + * When we get through caller's rhs "is it contained within?" + * array without failing to find one of its values, it's + * contained. + */ + if (rcont == WJB_END_ARRAY) + return true; + + Assert(rcont == WJB_ELEM); + + if (IsAJsonbScalar(&vcontained)) + { + if (!findJsonbValueFromContainer((*val)->container, + JB_FARRAY, + &vcontained)) + return false; + } + else + { + uint32 i; + + /* + * If this is first container found in rhs array (at this + * depth), initialize temp lhs array of containers + */ + if (lhsConts == NULL) + { + uint32 j = 0; + + /* Make room for all possible values */ + lhsConts = palloc(sizeof(JsonbValue) * nLhsElems); + + for (i = 0; i < nLhsElems; i++) + { + /* Store all lhs elements in temp array */ + rcont = JsonbIteratorNext(val, &vval, true); + Assert(rcont == WJB_ELEM); + + if (vval.type == jbvBinary) + lhsConts[j++] = vval; + } + + /* No container elements in temp array, so give up now */ + if (j == 0) + return false; + + /* We may have only partially filled array */ + nLhsElems = j; + } + + /* XXX: Nested array containment is O(N^2) */ + for (i = 0; i < nLhsElems; i++) + { + /* Nested container value (object or array) */ + JsonbIterator *nestval, + *nestContained; + bool contains; + + nestval = JsonbIteratorInit(lhsConts[i].val.binary.data); + nestContained = JsonbIteratorInit(vcontained.val.binary.data); + + contains = JsonbDeepContains(&nestval, &nestContained); + + if (nestval) + pfree(nestval); + if (nestContained) + pfree(nestContained); + if (contains) + break; + } + + /* + * Report rhs container value is not contained if couldn't + * match rhs container to *some* lhs cont + */ + if (i == nLhsElems) + return false; + } + } + } + else + { + elog(ERROR, "invalid jsonb container type"); + } + + elog(ERROR, "unexpectedly fell off end of jsonb container"); + return false; +} + +/* + * Hash a JsonbValue scalar value, mixing the hash value into an existing + * hash provided by the caller. + * + * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY + * flags. + */ +void +JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash) +{ + uint32 tmp; + + /* Compute hash value for scalarVal */ + switch (scalarVal->type) + { + case jbvNull: + tmp = 0x01; + break; + case jbvString: + tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val, + scalarVal->val.string.len)); + break; + case jbvNumeric: + /* Must hash equal numerics to equal hash codes */ + tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric, + NumericGetDatum(scalarVal->val.numeric))); + break; + case jbvBool: + tmp = scalarVal->val.boolean ? 0x02 : 0x04; + + break; + default: + elog(ERROR, "invalid jsonb scalar type"); + tmp = 0; /* keep compiler quiet */ + break; + } + + /* + * Combine hash values of successive keys, values and elements by rotating + * the previous value left 1 bit, then XOR'ing in the new + * key/value/element's hash value. + */ + *hash = pg_rotate_left32(*hash, 1); + *hash ^= tmp; +} + +/* + * Hash a value to a 64-bit value, with a seed. Otherwise, similar to + * JsonbHashScalarValue. + */ +void +JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash, + uint64 seed) +{ + uint64 tmp; + + switch (scalarVal->type) + { + case jbvNull: + tmp = seed + 0x01; + break; + case jbvString: + tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val, + scalarVal->val.string.len, + seed)); + break; + case jbvNumeric: + tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended, + NumericGetDatum(scalarVal->val.numeric), + UInt64GetDatum(seed))); + break; + case jbvBool: + if (seed) + tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended, + BoolGetDatum(scalarVal->val.boolean), + UInt64GetDatum(seed))); + else + tmp = scalarVal->val.boolean ? 0x02 : 0x04; + + break; + default: + elog(ERROR, "invalid jsonb scalar type"); + break; + } + + *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash); + *hash ^= tmp; +} + +/* + * Are two scalar JsonbValues of the same type a and b equal? + */ +static bool +equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b) +{ + if (a->type == b->type) + { + switch (a->type) + { + case jbvNull: + return true; + case jbvString: + return lengthCompareJsonbStringValue(a, b) == 0; + case jbvNumeric: + return DatumGetBool(DirectFunctionCall2(numeric_eq, + PointerGetDatum(a->val.numeric), + PointerGetDatum(b->val.numeric))); + case jbvBool: + return a->val.boolean == b->val.boolean; + + default: + elog(ERROR, "invalid jsonb scalar type"); + } + } + elog(ERROR, "jsonb scalar type mismatch"); + return false; +} + +/* + * Compare two scalar JsonbValues, returning -1, 0, or 1. + * + * Strings are compared using the default collation. Used by B-tree + * operators, where a lexical sort order is generally expected. + */ +static int +compareJsonbScalarValue(JsonbValue *a, JsonbValue *b) +{ + if (a->type == b->type) + { + switch (a->type) + { + case jbvNull: + return 0; + case jbvString: + return varstr_cmp(a->val.string.val, + a->val.string.len, + b->val.string.val, + b->val.string.len, + DEFAULT_COLLATION_OID); + case jbvNumeric: + return DatumGetInt32(DirectFunctionCall2(numeric_cmp, + PointerGetDatum(a->val.numeric), + PointerGetDatum(b->val.numeric))); + case jbvBool: + if (a->val.boolean == b->val.boolean) + return 0; + else if (a->val.boolean > b->val.boolean) + return 1; + else + return -1; + default: + elog(ERROR, "invalid jsonb scalar type"); + } + } + elog(ERROR, "jsonb scalar type mismatch"); + return -1; +} + + +/* + * Functions for manipulating the resizable buffer used by convertJsonb and + * its subroutines. + */ + +/* + * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary. + * Returns the offset to the reserved area. The caller is expected to fill + * the reserved area later with copyToBuffer(). + */ +static int +reserveFromBuffer(StringInfo buffer, int len) +{ + int offset; + + /* Make more room if needed */ + enlargeStringInfo(buffer, len); + + /* remember current offset */ + offset = buffer->len; + + /* reserve the space */ + buffer->len += len; + + /* + * Keep a trailing null in place, even though it's not useful for us; it + * seems best to preserve the invariants of StringInfos. + */ + buffer->data[buffer->len] = '\0'; + + return offset; +} + +/* + * Copy 'len' bytes to a previously reserved area in buffer. + */ +static void +copyToBuffer(StringInfo buffer, int offset, const char *data, int len) +{ + memcpy(buffer->data + offset, data, len); +} + +/* + * A shorthand for reserveFromBuffer + copyToBuffer. + */ +static void +appendToBuffer(StringInfo buffer, const char *data, int len) +{ + int offset; + + offset = reserveFromBuffer(buffer, len); + copyToBuffer(buffer, offset, data, len); +} + + +/* + * Append padding, so that the length of the StringInfo is int-aligned. + * Returns the number of padding bytes appended. + */ +static short +padBufferToInt(StringInfo buffer) +{ + int padlen, + p, + offset; + + padlen = INTALIGN(buffer->len) - buffer->len; + + offset = reserveFromBuffer(buffer, padlen); + + /* padlen must be small, so this is probably faster than a memset */ + for (p = 0; p < padlen; p++) + buffer->data[offset + p] = '\0'; + + return padlen; +} + +/* + * Given a JsonbValue, convert to Jsonb. The result is palloc'd. + */ +static Jsonb * +convertToJsonb(JsonbValue *val) +{ + StringInfoData buffer; + JEntry jentry; + Jsonb *res; + + /* Should not already have binary representation */ + Assert(val->type != jbvBinary); + + /* Allocate an output buffer. It will be enlarged as needed */ + initStringInfo(&buffer); + + /* Make room for the varlena header */ + reserveFromBuffer(&buffer, VARHDRSZ); + + convertJsonbValue(&buffer, &jentry, val, 0); + + /* + * Note: the JEntry of the root is discarded. Therefore the root + * JsonbContainer struct must contain enough information to tell what kind + * of value it is. + */ + + res = (Jsonb *) buffer.data; + + SET_VARSIZE(res, buffer.len); + + return res; +} + +/* + * Subroutine of convertJsonb: serialize a single JsonbValue into buffer. + * + * The JEntry header for this node is returned in *header. It is filled in + * with the length of this value and appropriate type bits. If we wish to + * store an end offset rather than a length, it is the caller's responsibility + * to adjust for that. + * + * If the value is an array or an object, this recurses. 'level' is only used + * for debugging purposes. + */ +static void +convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level) +{ + check_stack_depth(); + + if (!val) + return; + + /* + * A JsonbValue passed as val should never have a type of jbvBinary, and + * neither should any of its sub-components. Those values will be produced + * by convertJsonbArray and convertJsonbObject, the results of which will + * not be passed back to this function as an argument. + */ + + if (IsAJsonbScalar(val)) + convertJsonbScalar(buffer, header, val); + else if (val->type == jbvArray) + convertJsonbArray(buffer, header, val, level); + else if (val->type == jbvObject) + convertJsonbObject(buffer, header, val, level); + else + elog(ERROR, "unknown type of jsonb container to convert"); +} + +static void +convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level) +{ + int base_offset; + int jentry_offset; + int i; + int totallen; + uint32 containerhead; + int nElems = val->val.array.nElems; + + /* Remember where in the buffer this array starts. */ + base_offset = buffer->len; + + /* Align to 4-byte boundary (any padding counts as part of my data) */ + padBufferToInt(buffer); + + /* + * Construct the header Jentry and store it in the beginning of the + * variable-length payload. + */ + containerhead = nElems | JB_FARRAY; + if (val->val.array.rawScalar) + { + Assert(nElems == 1); + Assert(level == 0); + containerhead |= JB_FSCALAR; + } + + appendToBuffer(buffer, (char *) &containerhead, sizeof(uint32)); + + /* Reserve space for the JEntries of the elements. */ + jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems); + + totallen = 0; + for (i = 0; i < nElems; i++) + { + JsonbValue *elem = &val->val.array.elems[i]; + int len; + JEntry meta; + + /* + * Convert element, producing a JEntry and appending its + * variable-length data to buffer + */ + convertJsonbValue(buffer, &meta, elem, level + 1); + + len = JBE_OFFLENFLD(meta); + totallen += len; + + /* + * Bail out if total variable-length data exceeds what will fit in a + * JEntry length field. We check this in each iteration, not just + * once at the end, to forestall possible integer overflow. + */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb array elements exceeds the maximum of %d bytes", + JENTRY_OFFLENMASK))); + + /* + * Convert each JB_OFFSET_STRIDE'th length to an offset. + */ + if ((i % JB_OFFSET_STRIDE) == 0) + meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; + + copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); + jentry_offset += sizeof(JEntry); + } + + /* Total data size is everything we've appended to buffer */ + totallen = buffer->len - base_offset; + + /* Check length again, since we didn't include the metadata above */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb array elements exceeds the maximum of %d bytes", + JENTRY_OFFLENMASK))); + + /* Initialize the header of this node in the container's JEntry array */ + *header = JENTRY_ISCONTAINER | totallen; +} + +static void +convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level) +{ + int base_offset; + int jentry_offset; + int i; + int totallen; + uint32 containerheader; + int nPairs = val->val.object.nPairs; + + /* Remember where in the buffer this object starts. */ + base_offset = buffer->len; + + /* Align to 4-byte boundary (any padding counts as part of my data) */ + padBufferToInt(buffer); + + /* + * Construct the header Jentry and store it in the beginning of the + * variable-length payload. + */ + containerheader = nPairs | JB_FOBJECT; + appendToBuffer(buffer, (char *) &containerheader, sizeof(uint32)); + + /* Reserve space for the JEntries of the keys and values. */ + jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2); + + /* + * Iterate over the keys, then over the values, since that is the ordering + * we want in the on-disk representation. + */ + totallen = 0; + for (i = 0; i < nPairs; i++) + { + JsonbPair *pair = &val->val.object.pairs[i]; + int len; + JEntry meta; + + /* + * Convert key, producing a JEntry and appending its variable-length + * data to buffer + */ + convertJsonbScalar(buffer, &meta, &pair->key); + + len = JBE_OFFLENFLD(meta); + totallen += len; + + /* + * Bail out if total variable-length data exceeds what will fit in a + * JEntry length field. We check this in each iteration, not just + * once at the end, to forestall possible integer overflow. + */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb object elements exceeds the maximum of %d bytes", + JENTRY_OFFLENMASK))); + + /* + * Convert each JB_OFFSET_STRIDE'th length to an offset. + */ + if ((i % JB_OFFSET_STRIDE) == 0) + meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; + + copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); + jentry_offset += sizeof(JEntry); + } + for (i = 0; i < nPairs; i++) + { + JsonbPair *pair = &val->val.object.pairs[i]; + int len; + JEntry meta; + + /* + * Convert value, producing a JEntry and appending its variable-length + * data to buffer + */ + convertJsonbValue(buffer, &meta, &pair->value, level + 1); + + len = JBE_OFFLENFLD(meta); + totallen += len; + + /* + * Bail out if total variable-length data exceeds what will fit in a + * JEntry length field. We check this in each iteration, not just + * once at the end, to forestall possible integer overflow. + */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb object elements exceeds the maximum of %d bytes", + JENTRY_OFFLENMASK))); + + /* + * Convert each JB_OFFSET_STRIDE'th length to an offset. + */ + if (((i + nPairs) % JB_OFFSET_STRIDE) == 0) + meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; + + copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); + jentry_offset += sizeof(JEntry); + } + + /* Total data size is everything we've appended to buffer */ + totallen = buffer->len - base_offset; + + /* Check length again, since we didn't include the metadata above */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb object elements exceeds the maximum of %d bytes", + JENTRY_OFFLENMASK))); + + /* Initialize the header of this node in the container's JEntry array */ + *header = JENTRY_ISCONTAINER | totallen; +} + +static void +convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal) +{ + int numlen; + short padlen; + + switch (scalarVal->type) + { + case jbvNull: + *header = JENTRY_ISNULL; + break; + + case jbvString: + appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len); + + *header = scalarVal->val.string.len; + break; + + case jbvNumeric: + numlen = VARSIZE_ANY(scalarVal->val.numeric); + padlen = padBufferToInt(buffer); + + appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen); + + *header = JENTRY_ISNUMERIC | (padlen + numlen); + break; + + case jbvBool: + *header = (scalarVal->val.boolean) ? + JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE; + break; + + case jbvDatetime: + { + char buf[MAXDATELEN + 1]; + size_t len; + + JsonEncodeDateTime(buf, + scalarVal->val.datetime.value, + scalarVal->val.datetime.typid, + &scalarVal->val.datetime.tz); + len = strlen(buf); + appendToBuffer(buffer, buf, len); + + *header = len; + } + break; + + default: + elog(ERROR, "invalid jsonb scalar type"); + } +} + +/* + * Compare two jbvString JsonbValue values, a and b. + * + * This is a special qsort() comparator used to sort strings in certain + * internal contexts where it is sufficient to have a well-defined sort order. + * In particular, object pair keys are sorted according to this criteria to + * facilitate cheap binary searches where we don't care about lexical sort + * order. + * + * a and b are first sorted based on their length. If a tie-breaker is + * required, only then do we consider string binary equality. + */ +static int +lengthCompareJsonbStringValue(const void *a, const void *b) +{ + const JsonbValue *va = (const JsonbValue *) a; + const JsonbValue *vb = (const JsonbValue *) b; + + Assert(va->type == jbvString); + Assert(vb->type == jbvString); + + return lengthCompareJsonbString(va->val.string.val, va->val.string.len, + vb->val.string.val, vb->val.string.len); +} + +/* + * Subroutine for lengthCompareJsonbStringValue + * + * This is also useful separately to implement binary search on + * JsonbContainers. + */ +static int +lengthCompareJsonbString(const char *val1, int len1, const char *val2, int len2) +{ + if (len1 == len2) + return memcmp(val1, val2, len1); + else + return len1 > len2 ? 1 : -1; +} + +/* + * qsort_arg() comparator to compare JsonbPair values. + * + * Third argument 'binequal' may point to a bool. If it's set, *binequal is set + * to true iff a and b have full binary equality, since some callers have an + * interest in whether the two values are equal or merely equivalent. + * + * N.B: String comparisons here are "length-wise" + * + * Pairs with equals keys are ordered such that the order field is respected. + */ +static int +lengthCompareJsonbPair(const void *a, const void *b, void *binequal) +{ + const JsonbPair *pa = (const JsonbPair *) a; + const JsonbPair *pb = (const JsonbPair *) b; + int res; + + res = lengthCompareJsonbStringValue(&pa->key, &pb->key); + if (res == 0 && binequal) + *((bool *) binequal) = true; + + /* + * Guarantee keeping order of equal pair. Unique algorithm will prefer + * first element as value. + */ + if (res == 0) + res = (pa->order > pb->order) ? -1 : 1; + + return res; +} + +/* + * Sort and unique-ify pairs in JsonbValue object + */ +static void +uniqueifyJsonbObject(JsonbValue *object, bool unique_keys, bool skip_nulls) +{ + bool hasNonUniq = false; + + Assert(object->type == jbvObject); + + if (object->val.object.nPairs > 1) + qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair), + lengthCompareJsonbPair, &hasNonUniq); + + if (hasNonUniq && unique_keys) + ereport(ERROR, + errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE), + errmsg("duplicate JSON object key value")); + + if (hasNonUniq || skip_nulls) + { + JsonbPair *ptr, + *res; + + while (skip_nulls && object->val.object.nPairs > 0 && + object->val.object.pairs->value.type == jbvNull) + { + /* If skip_nulls is true, remove leading items with null */ + object->val.object.pairs++; + object->val.object.nPairs--; + } + + if (object->val.object.nPairs > 0) + { + ptr = object->val.object.pairs + 1; + res = object->val.object.pairs; + + while (ptr - object->val.object.pairs < object->val.object.nPairs) + { + /* Avoid copying over duplicate or null */ + if (lengthCompareJsonbStringValue(ptr, res) != 0 && + (!skip_nulls || ptr->value.type != jbvNull)) + { + res++; + if (ptr != res) + memcpy(res, ptr, sizeof(JsonbPair)); + } + ptr++; + } + + object->val.object.nPairs = res + 1 - object->val.object.pairs; + } + } +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonbsubs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonbsubs.c new file mode 100644 index 00000000000..de0ae3604ff --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonbsubs.c @@ -0,0 +1,416 @@ +/*------------------------------------------------------------------------- + * + * jsonbsubs.c + * Subscripting support functions for jsonb. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonbsubs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "executor/execExpr.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/subscripting.h" +#include "parser/parse_coerce.h" +#include "parser/parse_expr.h" +#include "utils/jsonb.h" +#include "utils/jsonfuncs.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + + +/* SubscriptingRefState.workspace for jsonb subscripting execution */ +typedef struct JsonbSubWorkspace +{ + bool expectArray; /* jsonb root is expected to be an array */ + Oid *indexOid; /* OID of coerced subscript expression, could + * be only integer or text */ + Datum *index; /* Subscript values in Datum format */ +} JsonbSubWorkspace; + + +/* + * Finish parse analysis of a SubscriptingRef expression for a jsonb. + * + * Transform the subscript expressions, coerce them to text, + * and determine the result type of the SubscriptingRef node. + */ +static void +jsonb_subscript_transform(SubscriptingRef *sbsref, + List *indirection, + ParseState *pstate, + bool isSlice, + bool isAssignment) +{ + List *upperIndexpr = NIL; + ListCell *idx; + + /* + * Transform and convert the subscript expressions. Jsonb subscripting + * does not support slices, look only and the upper index. + */ + foreach(idx, indirection) + { + A_Indices *ai = lfirst_node(A_Indices, idx); + Node *subExpr; + + if (isSlice) + { + Node *expr = ai->uidx ? ai->uidx : ai->lidx; + + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript does not support slices"), + parser_errposition(pstate, exprLocation(expr)))); + } + + if (ai->uidx) + { + Oid subExprType = InvalidOid, + targetType = UNKNOWNOID; + + subExpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); + subExprType = exprType(subExpr); + + if (subExprType != UNKNOWNOID) + { + Oid targets[2] = {INT4OID, TEXTOID}; + + /* + * Jsonb can handle multiple subscript types, but cases when a + * subscript could be coerced to multiple target types must be + * avoided, similar to overloaded functions. It could be + * possibly extend with jsonpath in the future. + */ + for (int i = 0; i < 2; i++) + { + if (can_coerce_type(1, &subExprType, &targets[i], COERCION_IMPLICIT)) + { + /* + * One type has already succeeded, it means there are + * two coercion targets possible, failure. + */ + if (targetType != UNKNOWNOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("subscript type %s is not supported", format_type_be(subExprType)), + errhint("jsonb subscript must be coercible to only one type, integer or text."), + parser_errposition(pstate, exprLocation(subExpr)))); + + targetType = targets[i]; + } + } + + /* + * No suitable types were found, failure. + */ + if (targetType == UNKNOWNOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("subscript type %s is not supported", format_type_be(subExprType)), + errhint("jsonb subscript must be coercible to either integer or text."), + parser_errposition(pstate, exprLocation(subExpr)))); + } + else + targetType = TEXTOID; + + /* + * We known from can_coerce_type that coercion will succeed, so + * coerce_type could be used. Note the implicit coercion context, + * which is required to handle subscripts of different types, + * similar to overloaded functions. + */ + subExpr = coerce_type(pstate, + subExpr, subExprType, + targetType, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST, + -1); + if (subExpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript must have text type"), + parser_errposition(pstate, exprLocation(subExpr)))); + } + else + { + /* + * Slice with omitted upper bound. Should not happen as we already + * errored out on slice earlier, but handle this just in case. + */ + Assert(isSlice && ai->is_slice); + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript does not support slices"), + parser_errposition(pstate, exprLocation(ai->uidx)))); + } + + upperIndexpr = lappend(upperIndexpr, subExpr); + } + + /* store the transformed lists into the SubscriptRef node */ + sbsref->refupperindexpr = upperIndexpr; + sbsref->reflowerindexpr = NIL; + + /* Determine the result type of the subscripting operation; always jsonb */ + sbsref->refrestype = JSONBOID; + sbsref->reftypmod = -1; +} + +/* + * During execution, process the subscripts in a SubscriptingRef expression. + * + * The subscript expressions are already evaluated in Datum form in the + * SubscriptingRefState's arrays. Check and convert them as necessary. + * + * If any subscript is NULL, we throw error in assignment cases, or in fetch + * cases set result to NULL and return false (instructing caller to skip the + * rest of the SubscriptingRef sequence). + */ +static bool +jsonb_subscript_check_subscripts(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state; + JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace; + + /* + * In case if the first subscript is an integer, the source jsonb is + * expected to be an array. This information is not used directly, all + * such cases are handled within corresponding jsonb assign functions. But + * if the source jsonb is NULL the expected type will be used to construct + * an empty source. + */ + if (sbsrefstate->numupper > 0 && sbsrefstate->upperprovided[0] && + !sbsrefstate->upperindexnull[0] && workspace->indexOid[0] == INT4OID) + workspace->expectArray = true; + + /* Process upper subscripts */ + for (int i = 0; i < sbsrefstate->numupper; i++) + { + if (sbsrefstate->upperprovided[i]) + { + /* If any index expr yields NULL, result is NULL or error */ + if (sbsrefstate->upperindexnull[i]) + { + if (sbsrefstate->isassignment) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("jsonb subscript in assignment must not be null"))); + *op->resnull = true; + return false; + } + + /* + * For jsonb fetch and assign functions we need to provide path in + * text format. Convert if it's not already text. + */ + if (workspace->indexOid[i] == INT4OID) + { + Datum datum = sbsrefstate->upperindex[i]; + char *cs = DatumGetCString(DirectFunctionCall1(int4out, datum)); + + workspace->index[i] = CStringGetTextDatum(cs); + } + else + workspace->index[i] = sbsrefstate->upperindex[i]; + } + } + + return true; +} + +/* + * Evaluate SubscriptingRef fetch for a jsonb element. + * + * Source container is in step's result variable (it's known not NULL, since + * we set fetch_strict to true). + */ +static void +jsonb_subscript_fetch(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace; + Jsonb *jsonbSource; + + /* Should not get here if source jsonb (or any subscript) is null */ + Assert(!(*op->resnull)); + + jsonbSource = DatumGetJsonbP(*op->resvalue); + *op->resvalue = jsonb_get_element(jsonbSource, + workspace->index, + sbsrefstate->numupper, + op->resnull, + false); +} + +/* + * Evaluate SubscriptingRef assignment for a jsonb element assignment. + * + * Input container (possibly null) is in result area, replacement value is in + * SubscriptingRefState's replacevalue/replacenull. + */ +static void +jsonb_subscript_assign(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace; + Jsonb *jsonbSource; + JsonbValue replacevalue; + + if (sbsrefstate->replacenull) + replacevalue.type = jbvNull; + else + JsonbToJsonbValue(DatumGetJsonbP(sbsrefstate->replacevalue), + &replacevalue); + + /* + * In case if the input container is null, set up an empty jsonb and + * proceed with the assignment. + */ + if (*op->resnull) + { + JsonbValue newSource; + + /* + * To avoid any surprising results, set up an empty jsonb array in + * case of an array is expected (i.e. the first subscript is integer), + * otherwise jsonb object. + */ + if (workspace->expectArray) + { + newSource.type = jbvArray; + newSource.val.array.nElems = 0; + newSource.val.array.rawScalar = false; + } + else + { + newSource.type = jbvObject; + newSource.val.object.nPairs = 0; + } + + jsonbSource = JsonbValueToJsonb(&newSource); + *op->resnull = false; + } + else + jsonbSource = DatumGetJsonbP(*op->resvalue); + + *op->resvalue = jsonb_set_element(jsonbSource, + workspace->index, + sbsrefstate->numupper, + &replacevalue); + /* The result is never NULL, so no need to change *op->resnull */ +} + +/* + * Compute old jsonb element value for a SubscriptingRef assignment + * expression. Will only be called if the new-value subexpression + * contains SubscriptingRef or FieldStore. This is the same as the + * regular fetch case, except that we have to handle a null jsonb, + * and the value should be stored into the SubscriptingRefState's + * prevvalue/prevnull fields. + */ +static void +jsonb_subscript_fetch_old(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + + if (*op->resnull) + { + /* whole jsonb is null, so any element is too */ + sbsrefstate->prevvalue = (Datum) 0; + sbsrefstate->prevnull = true; + } + else + { + Jsonb *jsonbSource = DatumGetJsonbP(*op->resvalue); + + sbsrefstate->prevvalue = jsonb_get_element(jsonbSource, + sbsrefstate->upperindex, + sbsrefstate->numupper, + &sbsrefstate->prevnull, + false); + } +} + +/* + * Set up execution state for a jsonb subscript operation. Opposite to the + * arrays subscription, there is no limit for number of subscripts as jsonb + * type itself doesn't have nesting limits. + */ +static void +jsonb_exec_setup(const SubscriptingRef *sbsref, + SubscriptingRefState *sbsrefstate, + SubscriptExecSteps *methods) +{ + JsonbSubWorkspace *workspace; + ListCell *lc; + int nupper = sbsref->refupperindexpr->length; + char *ptr; + + /* Allocate type-specific workspace with space for per-subscript data */ + workspace = palloc0(MAXALIGN(sizeof(JsonbSubWorkspace)) + + nupper * (sizeof(Datum) + sizeof(Oid))); + workspace->expectArray = false; + ptr = ((char *) workspace) + MAXALIGN(sizeof(JsonbSubWorkspace)); + + /* + * This coding assumes sizeof(Datum) >= sizeof(Oid), else we might + * misalign the indexOid pointer + */ + workspace->index = (Datum *) ptr; + ptr += nupper * sizeof(Datum); + workspace->indexOid = (Oid *) ptr; + + sbsrefstate->workspace = workspace; + + /* Collect subscript data types necessary at execution time */ + foreach(lc, sbsref->refupperindexpr) + { + Node *expr = lfirst(lc); + int i = foreach_current_index(lc); + + workspace->indexOid[i] = exprType(expr); + } + + /* + * Pass back pointers to appropriate step execution functions. + */ + methods->sbs_check_subscripts = jsonb_subscript_check_subscripts; + methods->sbs_fetch = jsonb_subscript_fetch; + methods->sbs_assign = jsonb_subscript_assign; + methods->sbs_fetch_old = jsonb_subscript_fetch_old; +} + +/* + * jsonb_subscript_handler + * Subscripting handler for jsonb. + * + */ +Datum +jsonb_subscript_handler(PG_FUNCTION_ARGS) +{ + static const SubscriptRoutines sbsroutines = { + .transform = jsonb_subscript_transform, + .exec_setup = jsonb_exec_setup, + .fetch_strict = true, /* fetch returns NULL for NULL inputs */ + .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ + .store_leakproof = false /* ... but assignment throws error */ + }; + + PG_RETURN_POINTER(&sbsroutines); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonfuncs.c new file mode 100644 index 00000000000..70cb922e6b7 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonfuncs.c @@ -0,0 +1,5687 @@ +/*------------------------------------------------------------------------- + * + * jsonfuncs.c + * Functions to process JSON data types. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/jsonfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <limits.h> + +#include "access/htup_details.h" +#include "catalog/pg_type.h" +#include "common/jsonapi.h" +#include "common/string.h" +#include "fmgr.h" +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/miscnodes.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/hsearch.h" +#include "utils/json.h" +#include "utils/jsonb.h" +#include "utils/jsonfuncs.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +/* Operations available for setPath */ +#define JB_PATH_CREATE 0x0001 +#define JB_PATH_DELETE 0x0002 +#define JB_PATH_REPLACE 0x0004 +#define JB_PATH_INSERT_BEFORE 0x0008 +#define JB_PATH_INSERT_AFTER 0x0010 +#define JB_PATH_CREATE_OR_INSERT \ + (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE) +#define JB_PATH_FILL_GAPS 0x0020 +#define JB_PATH_CONSISTENT_POSITION 0x0040 + +/* state for json_object_keys */ +typedef struct OkeysState +{ + JsonLexContext *lex; + char **result; + int result_size; + int result_count; + int sent_count; +} OkeysState; + +/* state for iterate_json_values function */ +typedef struct IterateJsonStringValuesState +{ + JsonLexContext *lex; + JsonIterateStringValuesAction action; /* an action that will be applied + * to each json value */ + void *action_state; /* any necessary context for iteration */ + uint32 flags; /* what kind of elements from a json we want + * to iterate */ +} IterateJsonStringValuesState; + +/* state for transform_json_string_values function */ +typedef struct TransformJsonStringValuesState +{ + JsonLexContext *lex; + StringInfo strval; /* resulting json */ + JsonTransformStringValuesAction action; /* an action that will be applied + * to each json value */ + void *action_state; /* any necessary context for transformation */ +} TransformJsonStringValuesState; + +/* state for json_get* functions */ +typedef struct GetState +{ + JsonLexContext *lex; + text *tresult; + char *result_start; + bool normalize_results; + bool next_scalar; + int npath; /* length of each path-related array */ + char **path_names; /* field name(s) being sought */ + int *path_indexes; /* array index(es) being sought */ + bool *pathok; /* is path matched to current depth? */ + int *array_cur_index; /* current element index at each path + * level */ +} GetState; + +/* state for json_array_length */ +typedef struct AlenState +{ + JsonLexContext *lex; + int count; +} AlenState; + +/* state for json_each */ +typedef struct EachState +{ + JsonLexContext *lex; + Tuplestorestate *tuple_store; + TupleDesc ret_tdesc; + MemoryContext tmp_cxt; + char *result_start; + bool normalize_results; + bool next_scalar; + char *normalized_scalar; +} EachState; + +/* state for json_array_elements */ +typedef struct ElementsState +{ + JsonLexContext *lex; + const char *function_name; + Tuplestorestate *tuple_store; + TupleDesc ret_tdesc; + MemoryContext tmp_cxt; + char *result_start; + bool normalize_results; + bool next_scalar; + char *normalized_scalar; +} ElementsState; + +/* state for get_json_object_as_hash */ +typedef struct JHashState +{ + JsonLexContext *lex; + const char *function_name; + HTAB *hash; + char *saved_scalar; + char *save_json_start; + JsonTokenType saved_token_type; +} JHashState; + +/* hashtable element */ +typedef struct JsonHashEntry +{ + char fname[NAMEDATALEN]; /* hash key (MUST BE FIRST) */ + char *val; + JsonTokenType type; +} JsonHashEntry; + +/* structure to cache type I/O metadata needed for populate_scalar() */ +typedef struct ScalarIOData +{ + Oid typioparam; + FmgrInfo typiofunc; +} ScalarIOData; + +/* these two structures are used recursively */ +typedef struct ColumnIOData ColumnIOData; +typedef struct RecordIOData RecordIOData; + +/* structure to cache metadata needed for populate_array() */ +typedef struct ArrayIOData +{ + ColumnIOData *element_info; /* metadata cache */ + Oid element_type; /* array element type id */ + int32 element_typmod; /* array element type modifier */ +} ArrayIOData; + +/* structure to cache metadata needed for populate_composite() */ +typedef struct CompositeIOData +{ + /* + * We use pointer to a RecordIOData here because variable-length struct + * RecordIOData can't be used directly in ColumnIOData.io union + */ + RecordIOData *record_io; /* metadata cache for populate_record() */ + TupleDesc tupdesc; /* cached tuple descriptor */ + /* these fields differ from target type only if domain over composite: */ + Oid base_typid; /* base type id */ + int32 base_typmod; /* base type modifier */ + /* this field is used only if target type is domain over composite: */ + void *domain_info; /* opaque cache for domain checks */ +} CompositeIOData; + +/* structure to cache metadata needed for populate_domain() */ +typedef struct DomainIOData +{ + ColumnIOData *base_io; /* metadata cache */ + Oid base_typid; /* base type id */ + int32 base_typmod; /* base type modifier */ + void *domain_info; /* opaque cache for domain checks */ +} DomainIOData; + +/* enumeration type categories */ +typedef enum TypeCat +{ + TYPECAT_SCALAR = 's', + TYPECAT_ARRAY = 'a', + TYPECAT_COMPOSITE = 'c', + TYPECAT_COMPOSITE_DOMAIN = 'C', + TYPECAT_DOMAIN = 'd' +} TypeCat; + +/* these two are stolen from hstore / record_out, used in populate_record* */ + +/* structure to cache record metadata needed for populate_record_field() */ +struct ColumnIOData +{ + Oid typid; /* column type id */ + int32 typmod; /* column type modifier */ + TypeCat typcat; /* column type category */ + ScalarIOData scalar_io; /* metadata cache for direct conversion + * through input function */ + union + { + ArrayIOData array; + CompositeIOData composite; + DomainIOData domain; + } io; /* metadata cache for various column type + * categories */ +}; + +/* structure to cache record metadata needed for populate_record() */ +struct RecordIOData +{ + Oid record_type; + int32 record_typmod; + int ncolumns; + ColumnIOData columns[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* per-query cache for populate_record_worker and populate_recordset_worker */ +typedef struct PopulateRecordCache +{ + Oid argtype; /* declared type of the record argument */ + ColumnIOData c; /* metadata cache for populate_composite() */ + MemoryContext fn_mcxt; /* where this is stored */ +} PopulateRecordCache; + +/* per-call state for populate_recordset */ +typedef struct PopulateRecordsetState +{ + JsonLexContext *lex; + const char *function_name; + HTAB *json_hash; + char *saved_scalar; + char *save_json_start; + JsonTokenType saved_token_type; + Tuplestorestate *tuple_store; + HeapTupleHeader rec; + PopulateRecordCache *cache; +} PopulateRecordsetState; + +/* common data for populate_array_json() and populate_array_dim_jsonb() */ +typedef struct PopulateArrayContext +{ + ArrayBuildState *astate; /* array build state */ + ArrayIOData *aio; /* metadata cache */ + MemoryContext acxt; /* array build memory context */ + MemoryContext mcxt; /* cache memory context */ + const char *colname; /* for diagnostics only */ + int *dims; /* dimensions */ + int *sizes; /* current dimension counters */ + int ndims; /* number of dimensions */ +} PopulateArrayContext; + +/* state for populate_array_json() */ +typedef struct PopulateArrayState +{ + JsonLexContext *lex; /* json lexer */ + PopulateArrayContext *ctx; /* context */ + char *element_start; /* start of the current array element */ + char *element_scalar; /* current array element token if it is a + * scalar */ + JsonTokenType element_type; /* current array element type */ +} PopulateArrayState; + +/* state for json_strip_nulls */ +typedef struct StripnullState +{ + JsonLexContext *lex; + StringInfo strval; + bool skip_next_null; +} StripnullState; + +/* structure for generalized json/jsonb value passing */ +typedef struct JsValue +{ + bool is_json; /* json/jsonb */ + union + { + struct + { + char *str; /* json string */ + int len; /* json string length or -1 if null-terminated */ + JsonTokenType type; /* json type */ + } json; /* json value */ + + JsonbValue *jsonb; /* jsonb value */ + } val; +} JsValue; + +typedef struct JsObject +{ + bool is_json; /* json/jsonb */ + union + { + HTAB *json_hash; + JsonbContainer *jsonb_cont; + } val; +} JsObject; + +/* useful macros for testing JsValue properties */ +#define JsValueIsNull(jsv) \ + ((jsv)->is_json ? \ + (!(jsv)->val.json.str || (jsv)->val.json.type == JSON_TOKEN_NULL) : \ + (!(jsv)->val.jsonb || (jsv)->val.jsonb->type == jbvNull)) + +#define JsValueIsString(jsv) \ + ((jsv)->is_json ? (jsv)->val.json.type == JSON_TOKEN_STRING \ + : ((jsv)->val.jsonb && (jsv)->val.jsonb->type == jbvString)) + +#define JsObjectIsEmpty(jso) \ + ((jso)->is_json \ + ? hash_get_num_entries((jso)->val.json_hash) == 0 \ + : ((jso)->val.jsonb_cont == NULL || \ + JsonContainerSize((jso)->val.jsonb_cont) == 0)) + +#define JsObjectFree(jso) \ + do { \ + if ((jso)->is_json) \ + hash_destroy((jso)->val.json_hash); \ + } while (0) + +static int report_json_context(JsonLexContext *lex); + +/* semantic action functions for json_object_keys */ +static JsonParseErrorType okeys_object_field_start(void *state, char *fname, bool isnull); +static JsonParseErrorType okeys_array_start(void *state); +static JsonParseErrorType okeys_scalar(void *state, char *token, JsonTokenType tokentype); + +/* semantic action functions for json_get* functions */ +static JsonParseErrorType get_object_start(void *state); +static JsonParseErrorType get_object_end(void *state); +static JsonParseErrorType get_object_field_start(void *state, char *fname, bool isnull); +static JsonParseErrorType get_object_field_end(void *state, char *fname, bool isnull); +static JsonParseErrorType get_array_start(void *state); +static JsonParseErrorType get_array_end(void *state); +static JsonParseErrorType get_array_element_start(void *state, bool isnull); +static JsonParseErrorType get_array_element_end(void *state, bool isnull); +static JsonParseErrorType get_scalar(void *state, char *token, JsonTokenType tokentype); + +/* common worker function for json getter functions */ +static Datum get_path_all(FunctionCallInfo fcinfo, bool as_text); +static text *get_worker(text *json, char **tpath, int *ipath, int npath, + bool normalize_results); +static Datum get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text); +static text *JsonbValueAsText(JsonbValue *v); + +/* semantic action functions for json_array_length */ +static JsonParseErrorType alen_object_start(void *state); +static JsonParseErrorType alen_scalar(void *state, char *token, JsonTokenType tokentype); +static JsonParseErrorType alen_array_element_start(void *state, bool isnull); + +/* common workers for json{b}_each* functions */ +static Datum each_worker(FunctionCallInfo fcinfo, bool as_text); +static Datum each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, + bool as_text); + +/* semantic action functions for json_each */ +static JsonParseErrorType each_object_field_start(void *state, char *fname, bool isnull); +static JsonParseErrorType each_object_field_end(void *state, char *fname, bool isnull); +static JsonParseErrorType each_array_start(void *state); +static JsonParseErrorType each_scalar(void *state, char *token, JsonTokenType tokentype); + +/* common workers for json{b}_array_elements_* functions */ +static Datum elements_worker(FunctionCallInfo fcinfo, const char *funcname, + bool as_text); +static Datum elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, + bool as_text); + +/* semantic action functions for json_array_elements */ +static JsonParseErrorType elements_object_start(void *state); +static JsonParseErrorType elements_array_element_start(void *state, bool isnull); +static JsonParseErrorType elements_array_element_end(void *state, bool isnull); +static JsonParseErrorType elements_scalar(void *state, char *token, JsonTokenType tokentype); + +/* turn a json object into a hash table */ +static HTAB *get_json_object_as_hash(char *json, int len, const char *funcname); + +/* semantic actions for populate_array_json */ +static JsonParseErrorType populate_array_object_start(void *_state); +static JsonParseErrorType populate_array_array_end(void *_state); +static JsonParseErrorType populate_array_element_start(void *_state, bool isnull); +static JsonParseErrorType populate_array_element_end(void *_state, bool isnull); +static JsonParseErrorType populate_array_scalar(void *_state, char *token, JsonTokenType tokentype); + +/* semantic action functions for get_json_object_as_hash */ +static JsonParseErrorType hash_object_field_start(void *state, char *fname, bool isnull); +static JsonParseErrorType hash_object_field_end(void *state, char *fname, bool isnull); +static JsonParseErrorType hash_array_start(void *state); +static JsonParseErrorType hash_scalar(void *state, char *token, JsonTokenType tokentype); + +/* semantic action functions for populate_recordset */ +static JsonParseErrorType populate_recordset_object_field_start(void *state, char *fname, bool isnull); +static JsonParseErrorType populate_recordset_object_field_end(void *state, char *fname, bool isnull); +static JsonParseErrorType populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype); +static JsonParseErrorType populate_recordset_object_start(void *state); +static JsonParseErrorType populate_recordset_object_end(void *state); +static JsonParseErrorType populate_recordset_array_start(void *state); +static JsonParseErrorType populate_recordset_array_element_start(void *state, bool isnull); + +/* semantic action functions for json_strip_nulls */ +static JsonParseErrorType sn_object_start(void *state); +static JsonParseErrorType sn_object_end(void *state); +static JsonParseErrorType sn_array_start(void *state); +static JsonParseErrorType sn_array_end(void *state); +static JsonParseErrorType sn_object_field_start(void *state, char *fname, bool isnull); +static JsonParseErrorType sn_array_element_start(void *state, bool isnull); +static JsonParseErrorType sn_scalar(void *state, char *token, JsonTokenType tokentype); + +/* worker functions for populate_record, to_record, populate_recordset and to_recordset */ +static Datum populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname, + bool is_json, bool have_record_arg); +static Datum populate_record_worker(FunctionCallInfo fcinfo, const char *funcname, + bool is_json, bool have_record_arg); + +/* helper functions for populate_record[set] */ +static HeapTupleHeader populate_record(TupleDesc tupdesc, RecordIOData **record_p, + HeapTupleHeader defaultval, MemoryContext mcxt, + JsObject *obj); +static void get_record_type_from_argument(FunctionCallInfo fcinfo, + const char *funcname, + PopulateRecordCache *cache); +static void get_record_type_from_query(FunctionCallInfo fcinfo, + const char *funcname, + PopulateRecordCache *cache); +static void JsValueToJsObject(JsValue *jsv, JsObject *jso); +static Datum populate_composite(CompositeIOData *io, Oid typid, + const char *colname, MemoryContext mcxt, + HeapTupleHeader defaultval, JsValue *jsv, bool isnull); +static Datum populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv); +static void prepare_column_cache(ColumnIOData *column, Oid typid, int32 typmod, + MemoryContext mcxt, bool need_scalar); +static Datum populate_record_field(ColumnIOData *col, Oid typid, int32 typmod, + const char *colname, MemoryContext mcxt, Datum defaultval, + JsValue *jsv, bool *isnull); +static RecordIOData *allocate_record_info(MemoryContext mcxt, int ncolumns); +static bool JsObjectGetField(JsObject *obj, char *field, JsValue *jsv); +static void populate_recordset_record(PopulateRecordsetState *state, JsObject *obj); +static void populate_array_json(PopulateArrayContext *ctx, char *json, int len); +static void populate_array_dim_jsonb(PopulateArrayContext *ctx, JsonbValue *jbv, + int ndim); +static void populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim); +static void populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims); +static void populate_array_check_dimension(PopulateArrayContext *ctx, int ndim); +static void populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv); +static Datum populate_array(ArrayIOData *aio, const char *colname, + MemoryContext mcxt, JsValue *jsv); +static Datum populate_domain(DomainIOData *io, Oid typid, const char *colname, + MemoryContext mcxt, JsValue *jsv, bool isnull); + +/* functions supporting jsonb_delete, jsonb_set and jsonb_concat */ +static JsonbValue *IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, + JsonbParseState **state); +static JsonbValue *setPath(JsonbIterator **it, Datum *path_elems, + bool *path_nulls, int path_len, + JsonbParseState **st, int level, JsonbValue *newval, + int op_type); +static void setPathObject(JsonbIterator **it, Datum *path_elems, + bool *path_nulls, int path_len, JsonbParseState **st, + int level, + JsonbValue *newval, uint32 npairs, int op_type); +static void setPathArray(JsonbIterator **it, Datum *path_elems, + bool *path_nulls, int path_len, JsonbParseState **st, + int level, + JsonbValue *newval, uint32 nelems, int op_type); + +/* function supporting iterate_json_values */ +static JsonParseErrorType iterate_values_scalar(void *state, char *token, JsonTokenType tokentype); +static JsonParseErrorType iterate_values_object_field_start(void *state, char *fname, bool isnull); + +/* functions supporting transform_json_string_values */ +static JsonParseErrorType transform_string_values_object_start(void *state); +static JsonParseErrorType transform_string_values_object_end(void *state); +static JsonParseErrorType transform_string_values_array_start(void *state); +static JsonParseErrorType transform_string_values_array_end(void *state); +static JsonParseErrorType transform_string_values_object_field_start(void *state, char *fname, bool isnull); +static JsonParseErrorType transform_string_values_array_element_start(void *state, bool isnull); +static JsonParseErrorType transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype); + + +/* + * pg_parse_json_or_errsave + * + * This function is like pg_parse_json, except that it does not return a + * JsonParseErrorType. Instead, in case of any failure, this function will + * save error data into *escontext if that's an ErrorSaveContext, otherwise + * ereport(ERROR). + * + * Returns a boolean indicating success or failure (failure will only be + * returned when escontext is an ErrorSaveContext). + */ +bool +pg_parse_json_or_errsave(JsonLexContext *lex, JsonSemAction *sem, + Node *escontext) +{ + JsonParseErrorType result; + + result = pg_parse_json(lex, sem); + if (result != JSON_SUCCESS) + { + json_errsave_error(result, lex, escontext); + return false; + } + return true; +} + +/* + * makeJsonLexContext + * + * This is like makeJsonLexContextCstringLen, but it accepts a text value + * directly. + */ +JsonLexContext * +makeJsonLexContext(text *json, bool need_escapes) +{ + /* + * Most callers pass a detoasted datum, but it's not clear that they all + * do. pg_detoast_datum_packed() is cheap insurance. + */ + json = pg_detoast_datum_packed(json); + + return makeJsonLexContextCstringLen(VARDATA_ANY(json), + VARSIZE_ANY_EXHDR(json), + GetDatabaseEncoding(), + need_escapes); +} + +/* + * SQL function json_object_keys + * + * Returns the set of keys for the object argument. + * + * This SRF operates in value-per-call mode. It processes the + * object during the first call, and the keys are simply stashed + * in an array, whose size is expanded as necessary. This is probably + * safe enough for a list of keys of a single object, since they are + * limited in size to NAMEDATALEN and the number of keys is unlikely to + * be so huge that it has major memory implications. + */ +Datum +jsonb_object_keys(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + OkeysState *state; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + Jsonb *jb = PG_GETARG_JSONB_P(0); + bool skipNested = false; + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a scalar", + "jsonb_object_keys"))); + else if (JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on an array", + "jsonb_object_keys"))); + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + state = palloc(sizeof(OkeysState)); + + state->result_size = JB_ROOT_COUNT(jb); + state->result_count = 0; + state->sent_count = 0; + state->result = palloc(state->result_size * sizeof(char *)); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_KEY) + { + char *cstr; + + cstr = palloc(v.val.string.len + 1 * sizeof(char)); + memcpy(cstr, v.val.string.val, v.val.string.len); + cstr[v.val.string.len] = '\0'; + state->result[state->result_count++] = cstr; + } + } + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) state; + } + + funcctx = SRF_PERCALL_SETUP(); + state = (OkeysState *) funcctx->user_fctx; + + if (state->sent_count < state->result_count) + { + char *nxt = state->result[state->sent_count++]; + + SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt)); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * Report a JSON error. + */ +void +json_errsave_error(JsonParseErrorType error, JsonLexContext *lex, + Node *escontext) +{ + if (error == JSON_UNICODE_HIGH_ESCAPE || + error == JSON_UNICODE_UNTRANSLATABLE || + error == JSON_UNICODE_CODE_POINT_ZERO) + errsave(escontext, + (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), + errmsg("unsupported Unicode escape sequence"), + errdetail_internal("%s", json_errdetail(error, lex)), + report_json_context(lex))); + else if (error == JSON_SEM_ACTION_FAILED) + { + /* semantic action function had better have reported something */ + if (!SOFT_ERROR_OCCURRED(escontext)) + elog(ERROR, "JSON semantic action function did not provide error information"); + } + else + errsave(escontext, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "json"), + errdetail_internal("%s", json_errdetail(error, lex)), + report_json_context(lex))); +} + +/* + * Report a CONTEXT line for bogus JSON input. + * + * lex->token_terminator must be set to identify the spot where we detected + * the error. Note that lex->token_start might be NULL, in case we recognized + * error at EOF. + * + * The return value isn't meaningful, but we make it non-void so that this + * can be invoked inside ereport(). + */ +static int +report_json_context(JsonLexContext *lex) +{ + const char *context_start; + const char *context_end; + const char *line_start; + char *ctxt; + int ctxtlen; + const char *prefix; + const char *suffix; + + /* Choose boundaries for the part of the input we will display */ + line_start = lex->line_start; + context_start = line_start; + context_end = lex->token_terminator; + Assert(context_end >= context_start); + + /* Advance until we are close enough to context_end */ + while (context_end - context_start >= 50) + { + /* Advance to next multibyte character */ + if (IS_HIGHBIT_SET(*context_start)) + context_start += pg_mblen(context_start); + else + context_start++; + } + + /* + * We add "..." to indicate that the excerpt doesn't start at the + * beginning of the line ... but if we're within 3 characters of the + * beginning of the line, we might as well just show the whole line. + */ + if (context_start - line_start <= 3) + context_start = line_start; + + /* Get a null-terminated copy of the data to present */ + ctxtlen = context_end - context_start; + ctxt = palloc(ctxtlen + 1); + memcpy(ctxt, context_start, ctxtlen); + ctxt[ctxtlen] = '\0'; + + /* + * Show the context, prefixing "..." if not starting at start of line, and + * suffixing "..." if not ending at end of line. + */ + prefix = (context_start > line_start) ? "..." : ""; + suffix = (lex->token_type != JSON_TOKEN_END && + context_end - lex->input < lex->input_length && + *context_end != '\n' && *context_end != '\r') ? "..." : ""; + + return errcontext("JSON data, line %d: %s%s%s", + lex->line_number, prefix, ctxt, suffix); +} + + +Datum +json_object_keys(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + OkeysState *state; + + if (SRF_IS_FIRSTCALL()) + { + text *json = PG_GETARG_TEXT_PP(0); + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction *sem; + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + state = palloc(sizeof(OkeysState)); + sem = palloc0(sizeof(JsonSemAction)); + + state->lex = lex; + state->result_size = 256; + state->result_count = 0; + state->sent_count = 0; + state->result = palloc(256 * sizeof(char *)); + + sem->semstate = (void *) state; + sem->array_start = okeys_array_start; + sem->scalar = okeys_scalar; + sem->object_field_start = okeys_object_field_start; + /* remainder are all NULL, courtesy of palloc0 above */ + + pg_parse_json_or_ereport(lex, sem); + /* keys are now in state->result */ + + pfree(lex->strval->data); + pfree(lex->strval); + pfree(lex); + pfree(sem); + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) state; + } + + funcctx = SRF_PERCALL_SETUP(); + state = (OkeysState *) funcctx->user_fctx; + + if (state->sent_count < state->result_count) + { + char *nxt = state->result[state->sent_count++]; + + SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt)); + } + + SRF_RETURN_DONE(funcctx); +} + +static JsonParseErrorType +okeys_object_field_start(void *state, char *fname, bool isnull) +{ + OkeysState *_state = (OkeysState *) state; + + /* only collecting keys for the top level object */ + if (_state->lex->lex_level != 1) + return JSON_SUCCESS; + + /* enlarge result array if necessary */ + if (_state->result_count >= _state->result_size) + { + _state->result_size *= 2; + _state->result = (char **) + repalloc(_state->result, sizeof(char *) * _state->result_size); + } + + /* save a copy of the field name */ + _state->result[_state->result_count++] = pstrdup(fname); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +okeys_array_start(void *state) +{ + OkeysState *_state = (OkeysState *) state; + + /* top level must be a json object */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on an array", + "json_object_keys"))); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +okeys_scalar(void *state, char *token, JsonTokenType tokentype) +{ + OkeysState *_state = (OkeysState *) state; + + /* top level must be a json object */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a scalar", + "json_object_keys"))); + + return JSON_SUCCESS; +} + +/* + * json and jsonb getter functions + * these implement the -> ->> #> and #>> operators + * and the json{b?}_extract_path*(json, text, ...) functions + */ + + +Datum +json_object_field(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_PP(0); + text *fname = PG_GETARG_TEXT_PP(1); + char *fnamestr = text_to_cstring(fname); + text *result; + + result = get_worker(json, &fnamestr, NULL, 1, false); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +jsonb_object_field(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + text *key = PG_GETARG_TEXT_PP(1); + JsonbValue *v; + JsonbValue vbuf; + + if (!JB_ROOT_IS_OBJECT(jb)) + PG_RETURN_NULL(); + + v = getKeyJsonValueFromContainer(&jb->root, + VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + &vbuf); + + if (v != NULL) + PG_RETURN_JSONB_P(JsonbValueToJsonb(v)); + + PG_RETURN_NULL(); +} + +Datum +json_object_field_text(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_PP(0); + text *fname = PG_GETARG_TEXT_PP(1); + char *fnamestr = text_to_cstring(fname); + text *result; + + result = get_worker(json, &fnamestr, NULL, 1, true); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +jsonb_object_field_text(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + text *key = PG_GETARG_TEXT_PP(1); + JsonbValue *v; + JsonbValue vbuf; + + if (!JB_ROOT_IS_OBJECT(jb)) + PG_RETURN_NULL(); + + v = getKeyJsonValueFromContainer(&jb->root, + VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + &vbuf); + + if (v != NULL && v->type != jbvNull) + PG_RETURN_TEXT_P(JsonbValueAsText(v)); + + PG_RETURN_NULL(); +} + +Datum +json_array_element(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_PP(0); + int element = PG_GETARG_INT32(1); + text *result; + + result = get_worker(json, NULL, &element, 1, false); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +jsonb_array_element(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + int element = PG_GETARG_INT32(1); + JsonbValue *v; + + if (!JB_ROOT_IS_ARRAY(jb)) + PG_RETURN_NULL(); + + /* Handle negative subscript */ + if (element < 0) + { + uint32 nelements = JB_ROOT_COUNT(jb); + + if (-element > nelements) + PG_RETURN_NULL(); + else + element += nelements; + } + + v = getIthJsonbValueFromContainer(&jb->root, element); + if (v != NULL) + PG_RETURN_JSONB_P(JsonbValueToJsonb(v)); + + PG_RETURN_NULL(); +} + +Datum +json_array_element_text(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_PP(0); + int element = PG_GETARG_INT32(1); + text *result; + + result = get_worker(json, NULL, &element, 1, true); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +jsonb_array_element_text(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + int element = PG_GETARG_INT32(1); + JsonbValue *v; + + if (!JB_ROOT_IS_ARRAY(jb)) + PG_RETURN_NULL(); + + /* Handle negative subscript */ + if (element < 0) + { + uint32 nelements = JB_ROOT_COUNT(jb); + + if (-element > nelements) + PG_RETURN_NULL(); + else + element += nelements; + } + + v = getIthJsonbValueFromContainer(&jb->root, element); + + if (v != NULL && v->type != jbvNull) + PG_RETURN_TEXT_P(JsonbValueAsText(v)); + + PG_RETURN_NULL(); +} + +Datum +json_extract_path(PG_FUNCTION_ARGS) +{ + return get_path_all(fcinfo, false); +} + +Datum +json_extract_path_text(PG_FUNCTION_ARGS) +{ + return get_path_all(fcinfo, true); +} + +/* + * common routine for extract_path functions + */ +static Datum +get_path_all(FunctionCallInfo fcinfo, bool as_text) +{ + text *json = PG_GETARG_TEXT_PP(0); + ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); + text *result; + Datum *pathtext; + bool *pathnulls; + int npath; + char **tpath; + int *ipath; + int i; + + /* + * If the array contains any null elements, return NULL, on the grounds + * that you'd have gotten NULL if any RHS value were NULL in a nested + * series of applications of the -> operator. (Note: because we also + * return NULL for error cases such as no-such-field, this is true + * regardless of the contents of the rest of the array.) + */ + if (array_contains_nulls(path)) + PG_RETURN_NULL(); + + deconstruct_array_builtin(path, TEXTOID, &pathtext, &pathnulls, &npath); + + tpath = palloc(npath * sizeof(char *)); + ipath = palloc(npath * sizeof(int)); + + for (i = 0; i < npath; i++) + { + Assert(!pathnulls[i]); + tpath[i] = TextDatumGetCString(pathtext[i]); + + /* + * we have no idea at this stage what structure the document is so + * just convert anything in the path that we can to an integer and set + * all the other integers to INT_MIN which will never match. + */ + if (*tpath[i] != '\0') + { + int ind; + char *endptr; + + errno = 0; + ind = strtoint(tpath[i], &endptr, 10); + if (endptr == tpath[i] || *endptr != '\0' || errno != 0) + ipath[i] = INT_MIN; + else + ipath[i] = ind; + } + else + ipath[i] = INT_MIN; + } + + result = get_worker(json, tpath, ipath, npath, as_text); + + if (result != NULL) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +/* + * get_worker + * + * common worker for all the json getter functions + * + * json: JSON object (in text form) + * tpath[]: field name(s) to extract + * ipath[]: array index(es) (zero-based) to extract, accepts negatives + * npath: length of tpath[] and/or ipath[] + * normalize_results: true to de-escape string and null scalars + * + * tpath can be NULL, or any one tpath[] entry can be NULL, if an object + * field is not to be matched at that nesting level. Similarly, ipath can + * be NULL, or any one ipath[] entry can be INT_MIN if an array element is + * not to be matched at that nesting level (a json datum should never be + * large enough to have -INT_MIN elements due to MaxAllocSize restriction). + */ +static text * +get_worker(text *json, + char **tpath, + int *ipath, + int npath, + bool normalize_results) +{ + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction *sem = palloc0(sizeof(JsonSemAction)); + GetState *state = palloc0(sizeof(GetState)); + + Assert(npath >= 0); + + state->lex = lex; + /* is it "_as_text" variant? */ + state->normalize_results = normalize_results; + state->npath = npath; + state->path_names = tpath; + state->path_indexes = ipath; + state->pathok = palloc0(sizeof(bool) * npath); + state->array_cur_index = palloc(sizeof(int) * npath); + + if (npath > 0) + state->pathok[0] = true; + + sem->semstate = (void *) state; + + /* + * Not all variants need all the semantic routines. Only set the ones that + * are actually needed for maximum efficiency. + */ + sem->scalar = get_scalar; + if (npath == 0) + { + sem->object_start = get_object_start; + sem->object_end = get_object_end; + sem->array_start = get_array_start; + sem->array_end = get_array_end; + } + if (tpath != NULL) + { + sem->object_field_start = get_object_field_start; + sem->object_field_end = get_object_field_end; + } + if (ipath != NULL) + { + sem->array_start = get_array_start; + sem->array_element_start = get_array_element_start; + sem->array_element_end = get_array_element_end; + } + + pg_parse_json_or_ereport(lex, sem); + + return state->tresult; +} + +static JsonParseErrorType +get_object_start(void *state) +{ + GetState *_state = (GetState *) state; + int lex_level = _state->lex->lex_level; + + if (lex_level == 0 && _state->npath == 0) + { + /* + * Special case: we should match the entire object. We only need this + * at outermost level because at nested levels the match will have + * been started by the outer field or array element callback. + */ + _state->result_start = _state->lex->token_start; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_object_end(void *state) +{ + GetState *_state = (GetState *) state; + int lex_level = _state->lex->lex_level; + + if (lex_level == 0 && _state->npath == 0) + { + /* Special case: return the entire object */ + char *start = _state->result_start; + int len = _state->lex->prev_token_terminator - start; + + _state->tresult = cstring_to_text_with_len(start, len); + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_object_field_start(void *state, char *fname, bool isnull) +{ + GetState *_state = (GetState *) state; + bool get_next = false; + int lex_level = _state->lex->lex_level; + + if (lex_level <= _state->npath && + _state->pathok[lex_level - 1] && + _state->path_names != NULL && + _state->path_names[lex_level - 1] != NULL && + strcmp(fname, _state->path_names[lex_level - 1]) == 0) + { + if (lex_level < _state->npath) + { + /* if not at end of path just mark path ok */ + _state->pathok[lex_level] = true; + } + else + { + /* end of path, so we want this value */ + get_next = true; + } + } + + if (get_next) + { + /* this object overrides any previous matching object */ + _state->tresult = NULL; + _state->result_start = NULL; + + if (_state->normalize_results && + _state->lex->token_type == JSON_TOKEN_STRING) + { + /* for as_text variants, tell get_scalar to set it for us */ + _state->next_scalar = true; + } + else + { + /* for non-as_text variants, just note the json starting point */ + _state->result_start = _state->lex->token_start; + } + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_object_field_end(void *state, char *fname, bool isnull) +{ + GetState *_state = (GetState *) state; + bool get_last = false; + int lex_level = _state->lex->lex_level; + + /* same tests as in get_object_field_start */ + if (lex_level <= _state->npath && + _state->pathok[lex_level - 1] && + _state->path_names != NULL && + _state->path_names[lex_level - 1] != NULL && + strcmp(fname, _state->path_names[lex_level - 1]) == 0) + { + if (lex_level < _state->npath) + { + /* done with this field so reset pathok */ + _state->pathok[lex_level] = false; + } + else + { + /* end of path, so we want this value */ + get_last = true; + } + } + + /* for as_text scalar case, our work is already done */ + if (get_last && _state->result_start != NULL) + { + /* + * make a text object from the string from the previously noted json + * start up to the end of the previous token (the lexer is by now + * ahead of us on whatever came after what we're interested in). + */ + if (isnull && _state->normalize_results) + _state->tresult = (text *) NULL; + else + { + char *start = _state->result_start; + int len = _state->lex->prev_token_terminator - start; + + _state->tresult = cstring_to_text_with_len(start, len); + } + + /* this should be unnecessary but let's do it for cleanliness: */ + _state->result_start = NULL; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_array_start(void *state) +{ + GetState *_state = (GetState *) state; + int lex_level = _state->lex->lex_level; + + if (lex_level < _state->npath) + { + /* Initialize counting of elements in this array */ + _state->array_cur_index[lex_level] = -1; + + /* INT_MIN value is reserved to represent invalid subscript */ + if (_state->path_indexes[lex_level] < 0 && + _state->path_indexes[lex_level] != INT_MIN) + { + /* Negative subscript -- convert to positive-wise subscript */ + JsonParseErrorType error; + int nelements; + + error = json_count_array_elements(_state->lex, &nelements); + if (error != JSON_SUCCESS) + json_errsave_error(error, _state->lex, NULL); + + if (-_state->path_indexes[lex_level] <= nelements) + _state->path_indexes[lex_level] += nelements; + } + } + else if (lex_level == 0 && _state->npath == 0) + { + /* + * Special case: we should match the entire array. We only need this + * at the outermost level because at nested levels the match will have + * been started by the outer field or array element callback. + */ + _state->result_start = _state->lex->token_start; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_array_end(void *state) +{ + GetState *_state = (GetState *) state; + int lex_level = _state->lex->lex_level; + + if (lex_level == 0 && _state->npath == 0) + { + /* Special case: return the entire array */ + char *start = _state->result_start; + int len = _state->lex->prev_token_terminator - start; + + _state->tresult = cstring_to_text_with_len(start, len); + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_array_element_start(void *state, bool isnull) +{ + GetState *_state = (GetState *) state; + bool get_next = false; + int lex_level = _state->lex->lex_level; + + /* Update array element counter */ + if (lex_level <= _state->npath) + _state->array_cur_index[lex_level - 1]++; + + if (lex_level <= _state->npath && + _state->pathok[lex_level - 1] && + _state->path_indexes != NULL && + _state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1]) + { + if (lex_level < _state->npath) + { + /* if not at end of path just mark path ok */ + _state->pathok[lex_level] = true; + } + else + { + /* end of path, so we want this value */ + get_next = true; + } + } + + /* same logic as for objects */ + if (get_next) + { + _state->tresult = NULL; + _state->result_start = NULL; + + if (_state->normalize_results && + _state->lex->token_type == JSON_TOKEN_STRING) + { + _state->next_scalar = true; + } + else + { + _state->result_start = _state->lex->token_start; + } + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_array_element_end(void *state, bool isnull) +{ + GetState *_state = (GetState *) state; + bool get_last = false; + int lex_level = _state->lex->lex_level; + + /* same tests as in get_array_element_start */ + if (lex_level <= _state->npath && + _state->pathok[lex_level - 1] && + _state->path_indexes != NULL && + _state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1]) + { + if (lex_level < _state->npath) + { + /* done with this element so reset pathok */ + _state->pathok[lex_level] = false; + } + else + { + /* end of path, so we want this value */ + get_last = true; + } + } + + /* same logic as for objects */ + if (get_last && _state->result_start != NULL) + { + if (isnull && _state->normalize_results) + _state->tresult = (text *) NULL; + else + { + char *start = _state->result_start; + int len = _state->lex->prev_token_terminator - start; + + _state->tresult = cstring_to_text_with_len(start, len); + } + + _state->result_start = NULL; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +get_scalar(void *state, char *token, JsonTokenType tokentype) +{ + GetState *_state = (GetState *) state; + int lex_level = _state->lex->lex_level; + + /* Check for whole-object match */ + if (lex_level == 0 && _state->npath == 0) + { + if (_state->normalize_results && tokentype == JSON_TOKEN_STRING) + { + /* we want the de-escaped string */ + _state->next_scalar = true; + } + else if (_state->normalize_results && tokentype == JSON_TOKEN_NULL) + { + _state->tresult = (text *) NULL; + } + else + { + /* + * This is a bit hokey: we will suppress whitespace after the + * scalar token, but not whitespace before it. Probably not worth + * doing our own space-skipping to avoid that. + */ + char *start = _state->lex->input; + int len = _state->lex->prev_token_terminator - start; + + _state->tresult = cstring_to_text_with_len(start, len); + } + } + + if (_state->next_scalar) + { + /* a de-escaped text value is wanted, so supply it */ + _state->tresult = cstring_to_text(token); + /* make sure the next call to get_scalar doesn't overwrite it */ + _state->next_scalar = false; + } + + return JSON_SUCCESS; +} + +Datum +jsonb_extract_path(PG_FUNCTION_ARGS) +{ + return get_jsonb_path_all(fcinfo, false); +} + +Datum +jsonb_extract_path_text(PG_FUNCTION_ARGS) +{ + return get_jsonb_path_all(fcinfo, true); +} + +static Datum +get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); + Datum *pathtext; + bool *pathnulls; + bool isnull; + int npath; + Datum res; + + /* + * If the array contains any null elements, return NULL, on the grounds + * that you'd have gotten NULL if any RHS value were NULL in a nested + * series of applications of the -> operator. (Note: because we also + * return NULL for error cases such as no-such-field, this is true + * regardless of the contents of the rest of the array.) + */ + if (array_contains_nulls(path)) + PG_RETURN_NULL(); + + deconstruct_array_builtin(path, TEXTOID, &pathtext, &pathnulls, &npath); + + res = jsonb_get_element(jb, pathtext, npath, &isnull, as_text); + + if (isnull) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(res); +} + +Datum +jsonb_get_element(Jsonb *jb, Datum *path, int npath, bool *isnull, bool as_text) +{ + JsonbContainer *container = &jb->root; + JsonbValue *jbvp = NULL; + int i; + bool have_object = false, + have_array = false; + + *isnull = false; + + /* Identify whether we have object, array, or scalar at top-level */ + if (JB_ROOT_IS_OBJECT(jb)) + have_object = true; + else if (JB_ROOT_IS_ARRAY(jb) && !JB_ROOT_IS_SCALAR(jb)) + have_array = true; + else + { + Assert(JB_ROOT_IS_ARRAY(jb) && JB_ROOT_IS_SCALAR(jb)); + /* Extract the scalar value, if it is what we'll return */ + if (npath <= 0) + jbvp = getIthJsonbValueFromContainer(container, 0); + } + + /* + * If the array is empty, return the entire LHS object, on the grounds + * that we should do zero field or element extractions. For the + * non-scalar case we can just hand back the object without much work. For + * the scalar case, fall through and deal with the value below the loop. + * (This inconsistency arises because there's no easy way to generate a + * JsonbValue directly for root-level containers.) + */ + if (npath <= 0 && jbvp == NULL) + { + if (as_text) + { + return PointerGetDatum(cstring_to_text(JsonbToCString(NULL, + container, + VARSIZE(jb)))); + } + else + { + /* not text mode - just hand back the jsonb */ + PG_RETURN_JSONB_P(jb); + } + } + + for (i = 0; i < npath; i++) + { + if (have_object) + { + text *subscr = DatumGetTextPP(path[i]); + + jbvp = getKeyJsonValueFromContainer(container, + VARDATA_ANY(subscr), + VARSIZE_ANY_EXHDR(subscr), + NULL); + } + else if (have_array) + { + int lindex; + uint32 index; + char *indextext = TextDatumGetCString(path[i]); + char *endptr; + + errno = 0; + lindex = strtoint(indextext, &endptr, 10); + if (endptr == indextext || *endptr != '\0' || errno != 0) + { + *isnull = true; + return PointerGetDatum(NULL); + } + + if (lindex >= 0) + { + index = (uint32) lindex; + } + else + { + /* Handle negative subscript */ + uint32 nelements; + + /* Container must be array, but make sure */ + if (!JsonContainerIsArray(container)) + elog(ERROR, "not a jsonb array"); + + nelements = JsonContainerSize(container); + + if (lindex == INT_MIN || -lindex > nelements) + { + *isnull = true; + return PointerGetDatum(NULL); + } + else + index = nelements + lindex; + } + + jbvp = getIthJsonbValueFromContainer(container, index); + } + else + { + /* scalar, extraction yields a null */ + *isnull = true; + return PointerGetDatum(NULL); + } + + if (jbvp == NULL) + { + *isnull = true; + return PointerGetDatum(NULL); + } + else if (i == npath - 1) + break; + + if (jbvp->type == jbvBinary) + { + container = jbvp->val.binary.data; + have_object = JsonContainerIsObject(container); + have_array = JsonContainerIsArray(container); + Assert(!JsonContainerIsScalar(container)); + } + else + { + Assert(IsAJsonbScalar(jbvp)); + have_object = false; + have_array = false; + } + } + + if (as_text) + { + if (jbvp->type == jbvNull) + { + *isnull = true; + return PointerGetDatum(NULL); + } + + return PointerGetDatum(JsonbValueAsText(jbvp)); + } + else + { + Jsonb *res = JsonbValueToJsonb(jbvp); + + /* not text mode - just hand back the jsonb */ + PG_RETURN_JSONB_P(res); + } +} + +Datum +jsonb_set_element(Jsonb *jb, Datum *path, int path_len, + JsonbValue *newval) +{ + JsonbValue *res; + JsonbParseState *state = NULL; + JsonbIterator *it; + bool *path_nulls = palloc0(path_len * sizeof(bool)); + + if (newval->type == jbvArray && newval->val.array.rawScalar) + *newval = newval->val.array.elems[0]; + + it = JsonbIteratorInit(&jb->root); + + res = setPath(&it, path, path_nulls, path_len, &state, 0, newval, + JB_PATH_CREATE | JB_PATH_FILL_GAPS | + JB_PATH_CONSISTENT_POSITION); + + pfree(path_nulls); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + +static void +push_null_elements(JsonbParseState **ps, int num) +{ + JsonbValue null; + + null.type = jbvNull; + + while (num-- > 0) + pushJsonbValue(ps, WJB_ELEM, &null); +} + +/* + * Prepare a new structure containing nested empty objects and arrays + * corresponding to the specified path, and assign a new value at the end of + * this path. E.g. the path [a][0][b] with the new value 1 will produce the + * structure {a: [{b: 1}]}. + * + * Caller is responsible to make sure such path does not exist yet. + */ +static void +push_path(JsonbParseState **st, int level, Datum *path_elems, + bool *path_nulls, int path_len, JsonbValue *newval) +{ + /* + * tpath contains expected type of an empty jsonb created at each level + * higher or equal than the current one, either jbvObject or jbvArray. + * Since it contains only information about path slice from level to the + * end, the access index must be normalized by level. + */ + enum jbvType *tpath = palloc0((path_len - level) * sizeof(enum jbvType)); + JsonbValue newkey; + + /* + * Create first part of the chain with beginning tokens. For the current + * level WJB_BEGIN_OBJECT/WJB_BEGIN_ARRAY was already created, so start + * with the next one. + */ + for (int i = level + 1; i < path_len; i++) + { + char *c, + *badp; + int lindex; + + if (path_nulls[i]) + break; + + /* + * Try to convert to an integer to find out the expected type, object + * or array. + */ + c = TextDatumGetCString(path_elems[i]); + errno = 0; + lindex = strtoint(c, &badp, 10); + if (badp == c || *badp != '\0' || errno != 0) + { + /* text, an object is expected */ + newkey.type = jbvString; + newkey.val.string.val = c; + newkey.val.string.len = strlen(c); + + (void) pushJsonbValue(st, WJB_BEGIN_OBJECT, NULL); + (void) pushJsonbValue(st, WJB_KEY, &newkey); + + tpath[i - level] = jbvObject; + } + else + { + /* integer, an array is expected */ + (void) pushJsonbValue(st, WJB_BEGIN_ARRAY, NULL); + + push_null_elements(st, lindex); + + tpath[i - level] = jbvArray; + } + } + + /* Insert an actual value for either an object or array */ + if (tpath[(path_len - level) - 1] == jbvArray) + { + (void) pushJsonbValue(st, WJB_ELEM, newval); + } + else + (void) pushJsonbValue(st, WJB_VALUE, newval); + + /* + * Close everything up to the last but one level. The last one will be + * closed outside of this function. + */ + for (int i = path_len - 1; i > level; i--) + { + if (path_nulls[i]) + break; + + if (tpath[i - level] == jbvObject) + (void) pushJsonbValue(st, WJB_END_OBJECT, NULL); + else + (void) pushJsonbValue(st, WJB_END_ARRAY, NULL); + } +} + +/* + * Return the text representation of the given JsonbValue. + */ +static text * +JsonbValueAsText(JsonbValue *v) +{ + switch (v->type) + { + case jbvNull: + return NULL; + + case jbvBool: + return v->val.boolean ? + cstring_to_text_with_len("true", 4) : + cstring_to_text_with_len("false", 5); + + case jbvString: + return cstring_to_text_with_len(v->val.string.val, + v->val.string.len); + + case jbvNumeric: + { + Datum cstr; + + cstr = DirectFunctionCall1(numeric_out, + PointerGetDatum(v->val.numeric)); + + return cstring_to_text(DatumGetCString(cstr)); + } + + case jbvBinary: + { + StringInfoData jtext; + + initStringInfo(&jtext); + (void) JsonbToCString(&jtext, v->val.binary.data, + v->val.binary.len); + + return cstring_to_text_with_len(jtext.data, jtext.len); + } + + default: + elog(ERROR, "unrecognized jsonb type: %d", (int) v->type); + return NULL; + } +} + +/* + * SQL function json_array_length(json) -> int + */ +Datum +json_array_length(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_PP(0); + AlenState *state; + JsonLexContext *lex; + JsonSemAction *sem; + + lex = makeJsonLexContext(json, false); + state = palloc0(sizeof(AlenState)); + sem = palloc0(sizeof(JsonSemAction)); + + /* palloc0 does this for us */ +#if 0 + state->count = 0; +#endif + state->lex = lex; + + sem->semstate = (void *) state; + sem->object_start = alen_object_start; + sem->scalar = alen_scalar; + sem->array_element_start = alen_array_element_start; + + pg_parse_json_or_ereport(lex, sem); + + PG_RETURN_INT32(state->count); +} + +Datum +jsonb_array_length(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a scalar"))); + else if (!JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a non-array"))); + + PG_RETURN_INT32(JB_ROOT_COUNT(jb)); +} + +/* + * These next two checks ensure that the json is an array (since it can't be + * a scalar or an object). + */ + +static JsonParseErrorType +alen_object_start(void *state) +{ + AlenState *_state = (AlenState *) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a non-array"))); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +alen_scalar(void *state, char *token, JsonTokenType tokentype) +{ + AlenState *_state = (AlenState *) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a scalar"))); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +alen_array_element_start(void *state, bool isnull) +{ + AlenState *_state = (AlenState *) state; + + /* just count up all the level 1 elements */ + if (_state->lex->lex_level == 1) + _state->count++; + + return JSON_SUCCESS; +} + +/* + * SQL function json_each and json_each_text + * + * decompose a json object into key value pairs. + * + * Unlike json_object_keys() these SRFs operate in materialize mode, + * stashing results into a Tuplestore object as they go. + * The construction of tuples is done using a temporary memory context + * that is cleared out after each tuple is built. + */ +Datum +json_each(PG_FUNCTION_ARGS) +{ + return each_worker(fcinfo, false); +} + +Datum +jsonb_each(PG_FUNCTION_ARGS) +{ + return each_worker_jsonb(fcinfo, "jsonb_each", false); +} + +Datum +json_each_text(PG_FUNCTION_ARGS) +{ + return each_worker(fcinfo, true); +} + +Datum +jsonb_each_text(PG_FUNCTION_ARGS) +{ + return each_worker_jsonb(fcinfo, "jsonb_each_text", true); +} + +static Datum +each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, bool as_text) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + ReturnSetInfo *rsi; + MemoryContext old_cxt, + tmp_cxt; + bool skipNested = false; + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + + if (!JB_ROOT_IS_OBJECT(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a non-object", + funcname))); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + InitMaterializedSRF(fcinfo, MAT_SRF_BLESS); + + tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "jsonb_each temporary cxt", + ALLOCSET_DEFAULT_SIZES); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_KEY) + { + text *key; + Datum values[2]; + bool nulls[2] = {false, false}; + + /* Use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(tmp_cxt); + + key = cstring_to_text_with_len(v.val.string.val, v.val.string.len); + + /* + * The next thing the iterator fetches should be the value, no + * matter what shape it is. + */ + r = JsonbIteratorNext(&it, &v, skipNested); + Assert(r != WJB_DONE); + + values[0] = PointerGetDatum(key); + + if (as_text) + { + if (v.type == jbvNull) + { + /* a json null is an sql null in text mode */ + nulls[1] = true; + values[1] = (Datum) NULL; + } + else + values[1] = PointerGetDatum(JsonbValueAsText(&v)); + } + else + { + /* Not in text mode, just return the Jsonb */ + Jsonb *val = JsonbValueToJsonb(&v); + + values[1] = PointerGetDatum(val); + } + + tuplestore_putvalues(rsi->setResult, rsi->setDesc, values, nulls); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(tmp_cxt); + } + } + + MemoryContextDelete(tmp_cxt); + + PG_RETURN_NULL(); +} + + +static Datum +each_worker(FunctionCallInfo fcinfo, bool as_text) +{ + text *json = PG_GETARG_TEXT_PP(0); + JsonLexContext *lex; + JsonSemAction *sem; + ReturnSetInfo *rsi; + EachState *state; + + lex = makeJsonLexContext(json, true); + state = palloc0(sizeof(EachState)); + sem = palloc0(sizeof(JsonSemAction)); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, MAT_SRF_BLESS); + state->tuple_store = rsi->setResult; + state->ret_tdesc = rsi->setDesc; + + sem->semstate = (void *) state; + sem->array_start = each_array_start; + sem->scalar = each_scalar; + sem->object_field_start = each_object_field_start; + sem->object_field_end = each_object_field_end; + + state->normalize_results = as_text; + state->next_scalar = false; + state->lex = lex; + state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "json_each temporary cxt", + ALLOCSET_DEFAULT_SIZES); + + pg_parse_json_or_ereport(lex, sem); + + MemoryContextDelete(state->tmp_cxt); + + PG_RETURN_NULL(); +} + + +static JsonParseErrorType +each_object_field_start(void *state, char *fname, bool isnull) +{ + EachState *_state = (EachState *) state; + + /* save a pointer to where the value starts */ + if (_state->lex->lex_level == 1) + { + /* + * next_scalar will be reset in the object_field_end handler, and + * since we know the value is a scalar there is no danger of it being + * on while recursing down the tree. + */ + if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING) + _state->next_scalar = true; + else + _state->result_start = _state->lex->token_start; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +each_object_field_end(void *state, char *fname, bool isnull) +{ + EachState *_state = (EachState *) state; + MemoryContext old_cxt; + int len; + text *val; + HeapTuple tuple; + Datum values[2]; + bool nulls[2] = {false, false}; + + /* skip over nested objects */ + if (_state->lex->lex_level != 1) + return JSON_SUCCESS; + + /* use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(_state->tmp_cxt); + + values[0] = CStringGetTextDatum(fname); + + if (isnull && _state->normalize_results) + { + nulls[1] = true; + values[1] = (Datum) 0; + } + else if (_state->next_scalar) + { + values[1] = CStringGetTextDatum(_state->normalized_scalar); + _state->next_scalar = false; + } + else + { + len = _state->lex->prev_token_terminator - _state->result_start; + val = cstring_to_text_with_len(_state->result_start, len); + values[1] = PointerGetDatum(val); + } + + tuple = heap_form_tuple(_state->ret_tdesc, values, nulls); + + tuplestore_puttuple(_state->tuple_store, tuple); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(_state->tmp_cxt); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +each_array_start(void *state) +{ + EachState *_state = (EachState *) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot deconstruct an array as an object"))); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +each_scalar(void *state, char *token, JsonTokenType tokentype) +{ + EachState *_state = (EachState *) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot deconstruct a scalar"))); + + /* supply de-escaped value if required */ + if (_state->next_scalar) + _state->normalized_scalar = token; + + return JSON_SUCCESS; +} + +/* + * SQL functions json_array_elements and json_array_elements_text + * + * get the elements from a json array + * + * a lot of this processing is similar to the json_each* functions + */ + +Datum +jsonb_array_elements(PG_FUNCTION_ARGS) +{ + return elements_worker_jsonb(fcinfo, "jsonb_array_elements", false); +} + +Datum +jsonb_array_elements_text(PG_FUNCTION_ARGS) +{ + return elements_worker_jsonb(fcinfo, "jsonb_array_elements_text", true); +} + +static Datum +elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, + bool as_text) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + ReturnSetInfo *rsi; + MemoryContext old_cxt, + tmp_cxt; + bool skipNested = false; + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract elements from a scalar"))); + else if (!JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract elements from an object"))); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC | MAT_SRF_BLESS); + + tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "jsonb_array_elements temporary cxt", + ALLOCSET_DEFAULT_SIZES); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_ELEM) + { + Datum values[1]; + bool nulls[1] = {false}; + + /* use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(tmp_cxt); + + if (as_text) + { + if (v.type == jbvNull) + { + /* a json null is an sql null in text mode */ + nulls[0] = true; + values[0] = (Datum) NULL; + } + else + values[0] = PointerGetDatum(JsonbValueAsText(&v)); + } + else + { + /* Not in text mode, just return the Jsonb */ + Jsonb *val = JsonbValueToJsonb(&v); + + values[0] = PointerGetDatum(val); + } + + tuplestore_putvalues(rsi->setResult, rsi->setDesc, values, nulls); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(tmp_cxt); + } + } + + MemoryContextDelete(tmp_cxt); + + PG_RETURN_NULL(); +} + +Datum +json_array_elements(PG_FUNCTION_ARGS) +{ + return elements_worker(fcinfo, "json_array_elements", false); +} + +Datum +json_array_elements_text(PG_FUNCTION_ARGS) +{ + return elements_worker(fcinfo, "json_array_elements_text", true); +} + +static Datum +elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text) +{ + text *json = PG_GETARG_TEXT_PP(0); + + /* elements only needs escaped strings when as_text */ + JsonLexContext *lex = makeJsonLexContext(json, as_text); + JsonSemAction *sem; + ReturnSetInfo *rsi; + ElementsState *state; + + state = palloc0(sizeof(ElementsState)); + sem = palloc0(sizeof(JsonSemAction)); + + InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC | MAT_SRF_BLESS); + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + state->tuple_store = rsi->setResult; + state->ret_tdesc = rsi->setDesc; + + sem->semstate = (void *) state; + sem->object_start = elements_object_start; + sem->scalar = elements_scalar; + sem->array_element_start = elements_array_element_start; + sem->array_element_end = elements_array_element_end; + + state->function_name = funcname; + state->normalize_results = as_text; + state->next_scalar = false; + state->lex = lex; + state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "json_array_elements temporary cxt", + ALLOCSET_DEFAULT_SIZES); + + pg_parse_json_or_ereport(lex, sem); + + MemoryContextDelete(state->tmp_cxt); + + PG_RETURN_NULL(); +} + +static JsonParseErrorType +elements_array_element_start(void *state, bool isnull) +{ + ElementsState *_state = (ElementsState *) state; + + /* save a pointer to where the value starts */ + if (_state->lex->lex_level == 1) + { + /* + * next_scalar will be reset in the array_element_end handler, and + * since we know the value is a scalar there is no danger of it being + * on while recursing down the tree. + */ + if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING) + _state->next_scalar = true; + else + _state->result_start = _state->lex->token_start; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +elements_array_element_end(void *state, bool isnull) +{ + ElementsState *_state = (ElementsState *) state; + MemoryContext old_cxt; + int len; + text *val; + HeapTuple tuple; + Datum values[1]; + bool nulls[1] = {false}; + + /* skip over nested objects */ + if (_state->lex->lex_level != 1) + return JSON_SUCCESS; + + /* use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(_state->tmp_cxt); + + if (isnull && _state->normalize_results) + { + nulls[0] = true; + values[0] = (Datum) NULL; + } + else if (_state->next_scalar) + { + values[0] = CStringGetTextDatum(_state->normalized_scalar); + _state->next_scalar = false; + } + else + { + len = _state->lex->prev_token_terminator - _state->result_start; + val = cstring_to_text_with_len(_state->result_start, len); + values[0] = PointerGetDatum(val); + } + + tuple = heap_form_tuple(_state->ret_tdesc, values, nulls); + + tuplestore_puttuple(_state->tuple_store, tuple); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(_state->tmp_cxt); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +elements_object_start(void *state) +{ + ElementsState *_state = (ElementsState *) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a non-array", + _state->function_name))); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +elements_scalar(void *state, char *token, JsonTokenType tokentype) +{ + ElementsState *_state = (ElementsState *) state; + + /* json structure check */ + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a scalar", + _state->function_name))); + + /* supply de-escaped value if required */ + if (_state->next_scalar) + _state->normalized_scalar = token; + + return JSON_SUCCESS; +} + +/* + * SQL function json_populate_record + * + * set fields in a record from the argument json + * + * Code adapted shamelessly from hstore's populate_record + * which is in turn partly adapted from record_out. + * + * The json is decomposed into a hash table, in which each + * field in the record is then looked up by name. For jsonb + * we fetch the values direct from the object. + */ +Datum +jsonb_populate_record(PG_FUNCTION_ARGS) +{ + return populate_record_worker(fcinfo, "jsonb_populate_record", + false, true); +} + +Datum +jsonb_to_record(PG_FUNCTION_ARGS) +{ + return populate_record_worker(fcinfo, "jsonb_to_record", + false, false); +} + +Datum +json_populate_record(PG_FUNCTION_ARGS) +{ + return populate_record_worker(fcinfo, "json_populate_record", + true, true); +} + +Datum +json_to_record(PG_FUNCTION_ARGS) +{ + return populate_record_worker(fcinfo, "json_to_record", + true, false); +} + +/* helper function for diagnostics */ +static void +populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim) +{ + if (ndim <= 0) + { + if (ctx->colname) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("expected JSON array"), + errhint("See the value of key \"%s\".", ctx->colname))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("expected JSON array"))); + } + else + { + StringInfoData indices; + int i; + + initStringInfo(&indices); + + Assert(ctx->ndims > 0 && ndim < ctx->ndims); + + for (i = 0; i < ndim; i++) + appendStringInfo(&indices, "[%d]", ctx->sizes[i]); + + if (ctx->colname) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("expected JSON array"), + errhint("See the array element %s of key \"%s\".", + indices.data, ctx->colname))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("expected JSON array"), + errhint("See the array element %s.", + indices.data))); + } +} + +/* set the number of dimensions of the populated array when it becomes known */ +static void +populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims) +{ + int i; + + Assert(ctx->ndims <= 0); + + if (ndims <= 0) + populate_array_report_expected_array(ctx, ndims); + + ctx->ndims = ndims; + ctx->dims = palloc(sizeof(int) * ndims); + ctx->sizes = palloc0(sizeof(int) * ndims); + + for (i = 0; i < ndims; i++) + ctx->dims[i] = -1; /* dimensions are unknown yet */ +} + +/* check the populated subarray dimension */ +static void +populate_array_check_dimension(PopulateArrayContext *ctx, int ndim) +{ + int dim = ctx->sizes[ndim]; /* current dimension counter */ + + if (ctx->dims[ndim] == -1) + ctx->dims[ndim] = dim; /* assign dimension if not yet known */ + else if (ctx->dims[ndim] != dim) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed JSON array"), + errdetail("Multidimensional arrays must have " + "sub-arrays with matching dimensions."))); + + /* reset the current array dimension size counter */ + ctx->sizes[ndim] = 0; + + /* increment the parent dimension counter if it is a nested sub-array */ + if (ndim > 0) + ctx->sizes[ndim - 1]++; +} + +static void +populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv) +{ + Datum element; + bool element_isnull; + + /* populate the array element */ + element = populate_record_field(ctx->aio->element_info, + ctx->aio->element_type, + ctx->aio->element_typmod, + NULL, ctx->mcxt, PointerGetDatum(NULL), + jsv, &element_isnull); + + accumArrayResult(ctx->astate, element, element_isnull, + ctx->aio->element_type, ctx->acxt); + + Assert(ndim > 0); + ctx->sizes[ndim - 1]++; /* increment current dimension counter */ +} + +/* json object start handler for populate_array_json() */ +static JsonParseErrorType +populate_array_object_start(void *_state) +{ + PopulateArrayState *state = (PopulateArrayState *) _state; + int ndim = state->lex->lex_level; + + if (state->ctx->ndims <= 0) + populate_array_assign_ndims(state->ctx, ndim); + else if (ndim < state->ctx->ndims) + populate_array_report_expected_array(state->ctx, ndim); + + return JSON_SUCCESS; +} + +/* json array end handler for populate_array_json() */ +static JsonParseErrorType +populate_array_array_end(void *_state) +{ + PopulateArrayState *state = (PopulateArrayState *) _state; + PopulateArrayContext *ctx = state->ctx; + int ndim = state->lex->lex_level; + + if (ctx->ndims <= 0) + populate_array_assign_ndims(ctx, ndim + 1); + + if (ndim < ctx->ndims) + populate_array_check_dimension(ctx, ndim); + + return JSON_SUCCESS; +} + +/* json array element start handler for populate_array_json() */ +static JsonParseErrorType +populate_array_element_start(void *_state, bool isnull) +{ + PopulateArrayState *state = (PopulateArrayState *) _state; + int ndim = state->lex->lex_level; + + if (state->ctx->ndims <= 0 || ndim == state->ctx->ndims) + { + /* remember current array element start */ + state->element_start = state->lex->token_start; + state->element_type = state->lex->token_type; + state->element_scalar = NULL; + } + + return JSON_SUCCESS; +} + +/* json array element end handler for populate_array_json() */ +static JsonParseErrorType +populate_array_element_end(void *_state, bool isnull) +{ + PopulateArrayState *state = (PopulateArrayState *) _state; + PopulateArrayContext *ctx = state->ctx; + int ndim = state->lex->lex_level; + + Assert(ctx->ndims > 0); + + if (ndim == ctx->ndims) + { + JsValue jsv; + + jsv.is_json = true; + jsv.val.json.type = state->element_type; + + if (isnull) + { + Assert(jsv.val.json.type == JSON_TOKEN_NULL); + jsv.val.json.str = NULL; + jsv.val.json.len = 0; + } + else if (state->element_scalar) + { + jsv.val.json.str = state->element_scalar; + jsv.val.json.len = -1; /* null-terminated */ + } + else + { + jsv.val.json.str = state->element_start; + jsv.val.json.len = (state->lex->prev_token_terminator - + state->element_start) * sizeof(char); + } + + populate_array_element(ctx, ndim, &jsv); + } + + return JSON_SUCCESS; +} + +/* json scalar handler for populate_array_json() */ +static JsonParseErrorType +populate_array_scalar(void *_state, char *token, JsonTokenType tokentype) +{ + PopulateArrayState *state = (PopulateArrayState *) _state; + PopulateArrayContext *ctx = state->ctx; + int ndim = state->lex->lex_level; + + if (ctx->ndims <= 0) + populate_array_assign_ndims(ctx, ndim); + else if (ndim < ctx->ndims) + populate_array_report_expected_array(ctx, ndim); + + if (ndim == ctx->ndims) + { + /* remember the scalar element token */ + state->element_scalar = token; + /* element_type must already be set in populate_array_element_start() */ + Assert(state->element_type == tokentype); + } + + return JSON_SUCCESS; +} + +/* parse a json array and populate array */ +static void +populate_array_json(PopulateArrayContext *ctx, char *json, int len) +{ + PopulateArrayState state; + JsonSemAction sem; + + state.lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true); + state.ctx = ctx; + + memset(&sem, 0, sizeof(sem)); + sem.semstate = (void *) &state; + sem.object_start = populate_array_object_start; + sem.array_end = populate_array_array_end; + sem.array_element_start = populate_array_element_start; + sem.array_element_end = populate_array_element_end; + sem.scalar = populate_array_scalar; + + pg_parse_json_or_ereport(state.lex, &sem); + + /* number of dimensions should be already known */ + Assert(ctx->ndims > 0 && ctx->dims); + + pfree(state.lex); +} + +/* + * populate_array_dim_jsonb() -- Iterate recursively through jsonb sub-array + * elements and accumulate result using given ArrayBuildState. + */ +static void +populate_array_dim_jsonb(PopulateArrayContext *ctx, /* context */ + JsonbValue *jbv, /* jsonb sub-array */ + int ndim) /* current dimension */ +{ + JsonbContainer *jbc = jbv->val.binary.data; + JsonbIterator *it; + JsonbIteratorToken tok; + JsonbValue val; + JsValue jsv; + + check_stack_depth(); + + if (jbv->type != jbvBinary || !JsonContainerIsArray(jbc)) + populate_array_report_expected_array(ctx, ndim - 1); + + Assert(!JsonContainerIsScalar(jbc)); + + it = JsonbIteratorInit(jbc); + + tok = JsonbIteratorNext(&it, &val, true); + Assert(tok == WJB_BEGIN_ARRAY); + + tok = JsonbIteratorNext(&it, &val, true); + + /* + * If the number of dimensions is not yet known and we have found end of + * the array, or the first child element is not an array, then assign the + * number of dimensions now. + */ + if (ctx->ndims <= 0 && + (tok == WJB_END_ARRAY || + (tok == WJB_ELEM && + (val.type != jbvBinary || + !JsonContainerIsArray(val.val.binary.data))))) + populate_array_assign_ndims(ctx, ndim); + + jsv.is_json = false; + jsv.val.jsonb = &val; + + /* process all the array elements */ + while (tok == WJB_ELEM) + { + /* + * Recurse only if the dimensions of dimensions is still unknown or if + * it is not the innermost dimension. + */ + if (ctx->ndims > 0 && ndim >= ctx->ndims) + populate_array_element(ctx, ndim, &jsv); + else + { + /* populate child sub-array */ + populate_array_dim_jsonb(ctx, &val, ndim + 1); + + /* number of dimensions should be already known */ + Assert(ctx->ndims > 0 && ctx->dims); + + populate_array_check_dimension(ctx, ndim); + } + + tok = JsonbIteratorNext(&it, &val, true); + } + + Assert(tok == WJB_END_ARRAY); + + /* free iterator, iterating until WJB_DONE */ + tok = JsonbIteratorNext(&it, &val, true); + Assert(tok == WJB_DONE && !it); +} + +/* recursively populate an array from json/jsonb */ +static Datum +populate_array(ArrayIOData *aio, + const char *colname, + MemoryContext mcxt, + JsValue *jsv) +{ + PopulateArrayContext ctx; + Datum result; + int *lbs; + int i; + + ctx.aio = aio; + ctx.mcxt = mcxt; + ctx.acxt = CurrentMemoryContext; + ctx.astate = initArrayResult(aio->element_type, ctx.acxt, true); + ctx.colname = colname; + ctx.ndims = 0; /* unknown yet */ + ctx.dims = NULL; + ctx.sizes = NULL; + + if (jsv->is_json) + populate_array_json(&ctx, jsv->val.json.str, + jsv->val.json.len >= 0 ? jsv->val.json.len + : strlen(jsv->val.json.str)); + else + { + populate_array_dim_jsonb(&ctx, jsv->val.jsonb, 1); + ctx.dims[0] = ctx.sizes[0]; + } + + Assert(ctx.ndims > 0); + + lbs = palloc(sizeof(int) * ctx.ndims); + + for (i = 0; i < ctx.ndims; i++) + lbs[i] = 1; + + result = makeMdArrayResult(ctx.astate, ctx.ndims, ctx.dims, lbs, + ctx.acxt, true); + + pfree(ctx.dims); + pfree(ctx.sizes); + pfree(lbs); + + return result; +} + +static void +JsValueToJsObject(JsValue *jsv, JsObject *jso) +{ + jso->is_json = jsv->is_json; + + if (jsv->is_json) + { + /* convert plain-text json into a hash table */ + jso->val.json_hash = + get_json_object_as_hash(jsv->val.json.str, + jsv->val.json.len >= 0 + ? jsv->val.json.len + : strlen(jsv->val.json.str), + "populate_composite"); + } + else + { + JsonbValue *jbv = jsv->val.jsonb; + + if (jbv->type == jbvBinary && + JsonContainerIsObject(jbv->val.binary.data)) + { + jso->val.jsonb_cont = jbv->val.binary.data; + } + else + { + bool is_scalar; + + is_scalar = IsAJsonbScalar(jbv) || + (jbv->type == jbvBinary && + JsonContainerIsScalar(jbv->val.binary.data)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + is_scalar + ? errmsg("cannot call %s on a scalar", + "populate_composite") + : errmsg("cannot call %s on an array", + "populate_composite"))); + } + } +} + +/* acquire or update cached tuple descriptor for a composite type */ +static void +update_cached_tupdesc(CompositeIOData *io, MemoryContext mcxt) +{ + if (!io->tupdesc || + io->tupdesc->tdtypeid != io->base_typid || + io->tupdesc->tdtypmod != io->base_typmod) + { + TupleDesc tupdesc = lookup_rowtype_tupdesc(io->base_typid, + io->base_typmod); + MemoryContext oldcxt; + + if (io->tupdesc) + FreeTupleDesc(io->tupdesc); + + /* copy tuple desc without constraints into cache memory context */ + oldcxt = MemoryContextSwitchTo(mcxt); + io->tupdesc = CreateTupleDescCopy(tupdesc); + MemoryContextSwitchTo(oldcxt); + + ReleaseTupleDesc(tupdesc); + } +} + +/* recursively populate a composite (row type) value from json/jsonb */ +static Datum +populate_composite(CompositeIOData *io, + Oid typid, + const char *colname, + MemoryContext mcxt, + HeapTupleHeader defaultval, + JsValue *jsv, + bool isnull) +{ + Datum result; + + /* acquire/update cached tuple descriptor */ + update_cached_tupdesc(io, mcxt); + + if (isnull) + result = (Datum) 0; + else + { + HeapTupleHeader tuple; + JsObject jso; + + /* prepare input value */ + JsValueToJsObject(jsv, &jso); + + /* populate resulting record tuple */ + tuple = populate_record(io->tupdesc, &io->record_io, + defaultval, mcxt, &jso); + result = HeapTupleHeaderGetDatum(tuple); + + JsObjectFree(&jso); + } + + /* + * If it's domain over composite, check domain constraints. (This should + * probably get refactored so that we can see the TYPECAT value, but for + * now, we can tell by comparing typid to base_typid.) + */ + if (typid != io->base_typid && typid != RECORDOID) + domain_check(result, isnull, typid, &io->domain_info, mcxt); + + return result; +} + +/* populate non-null scalar value from json/jsonb value */ +static Datum +populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv) +{ + Datum res; + char *str = NULL; + char *json = NULL; + + if (jsv->is_json) + { + int len = jsv->val.json.len; + + json = jsv->val.json.str; + Assert(json); + if (len >= 0) + { + /* Need to copy non-null-terminated string */ + str = palloc(len + 1 * sizeof(char)); + memcpy(str, json, len); + str[len] = '\0'; + } + else + str = json; /* string is already null-terminated */ + + /* If converting to json/jsonb, make string into valid JSON literal */ + if ((typid == JSONOID || typid == JSONBOID) && + jsv->val.json.type == JSON_TOKEN_STRING) + { + StringInfoData buf; + + initStringInfo(&buf); + escape_json(&buf, str); + /* free temporary buffer */ + if (str != json) + pfree(str); + str = buf.data; + } + } + else + { + JsonbValue *jbv = jsv->val.jsonb; + + if (typid == JSONBOID) + { + Jsonb *jsonb = JsonbValueToJsonb(jbv); /* directly use jsonb */ + + return JsonbPGetDatum(jsonb); + } + /* convert jsonb to string for typio call */ + else if (typid == JSONOID && jbv->type != jbvBinary) + { + /* + * Convert scalar jsonb (non-scalars are passed here as jbvBinary) + * to json string, preserving quotes around top-level strings. + */ + Jsonb *jsonb = JsonbValueToJsonb(jbv); + + str = JsonbToCString(NULL, &jsonb->root, VARSIZE(jsonb)); + } + else if (jbv->type == jbvString) /* quotes are stripped */ + str = pnstrdup(jbv->val.string.val, jbv->val.string.len); + else if (jbv->type == jbvBool) + str = pstrdup(jbv->val.boolean ? "true" : "false"); + else if (jbv->type == jbvNumeric) + str = DatumGetCString(DirectFunctionCall1(numeric_out, + PointerGetDatum(jbv->val.numeric))); + else if (jbv->type == jbvBinary) + str = JsonbToCString(NULL, jbv->val.binary.data, + jbv->val.binary.len); + else + elog(ERROR, "unrecognized jsonb type: %d", (int) jbv->type); + } + + res = InputFunctionCall(&io->typiofunc, str, io->typioparam, typmod); + + /* free temporary buffer */ + if (str != json) + pfree(str); + + return res; +} + +static Datum +populate_domain(DomainIOData *io, + Oid typid, + const char *colname, + MemoryContext mcxt, + JsValue *jsv, + bool isnull) +{ + Datum res; + + if (isnull) + res = (Datum) 0; + else + { + res = populate_record_field(io->base_io, + io->base_typid, io->base_typmod, + colname, mcxt, PointerGetDatum(NULL), + jsv, &isnull); + Assert(!isnull); + } + + domain_check(res, isnull, typid, &io->domain_info, mcxt); + + return res; +} + +/* prepare column metadata cache for the given type */ +static void +prepare_column_cache(ColumnIOData *column, + Oid typid, + int32 typmod, + MemoryContext mcxt, + bool need_scalar) +{ + HeapTuple tup; + Form_pg_type type; + + column->typid = typid; + column->typmod = typmod; + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typid); + + type = (Form_pg_type) GETSTRUCT(tup); + + if (type->typtype == TYPTYPE_DOMAIN) + { + /* + * We can move directly to the bottom base type; domain_check() will + * take care of checking all constraints for a stack of domains. + */ + Oid base_typid; + int32 base_typmod = typmod; + + base_typid = getBaseTypeAndTypmod(typid, &base_typmod); + if (get_typtype(base_typid) == TYPTYPE_COMPOSITE) + { + /* domain over composite has its own code path */ + column->typcat = TYPECAT_COMPOSITE_DOMAIN; + column->io.composite.record_io = NULL; + column->io.composite.tupdesc = NULL; + column->io.composite.base_typid = base_typid; + column->io.composite.base_typmod = base_typmod; + column->io.composite.domain_info = NULL; + } + else + { + /* domain over anything else */ + column->typcat = TYPECAT_DOMAIN; + column->io.domain.base_typid = base_typid; + column->io.domain.base_typmod = base_typmod; + column->io.domain.base_io = + MemoryContextAllocZero(mcxt, sizeof(ColumnIOData)); + column->io.domain.domain_info = NULL; + } + } + else if (type->typtype == TYPTYPE_COMPOSITE || typid == RECORDOID) + { + column->typcat = TYPECAT_COMPOSITE; + column->io.composite.record_io = NULL; + column->io.composite.tupdesc = NULL; + column->io.composite.base_typid = typid; + column->io.composite.base_typmod = typmod; + column->io.composite.domain_info = NULL; + } + else if (IsTrueArrayType(type)) + { + column->typcat = TYPECAT_ARRAY; + column->io.array.element_info = MemoryContextAllocZero(mcxt, + sizeof(ColumnIOData)); + column->io.array.element_type = type->typelem; + /* array element typemod stored in attribute's typmod */ + column->io.array.element_typmod = typmod; + } + else + { + column->typcat = TYPECAT_SCALAR; + need_scalar = true; + } + + /* caller can force us to look up scalar_io info even for non-scalars */ + if (need_scalar) + { + Oid typioproc; + + getTypeInputInfo(typid, &typioproc, &column->scalar_io.typioparam); + fmgr_info_cxt(typioproc, &column->scalar_io.typiofunc, mcxt); + } + + ReleaseSysCache(tup); +} + +/* recursively populate a record field or an array element from a json/jsonb value */ +static Datum +populate_record_field(ColumnIOData *col, + Oid typid, + int32 typmod, + const char *colname, + MemoryContext mcxt, + Datum defaultval, + JsValue *jsv, + bool *isnull) +{ + TypeCat typcat; + + check_stack_depth(); + + /* + * Prepare column metadata cache for the given type. Force lookup of the + * scalar_io data so that the json string hack below will work. + */ + if (col->typid != typid || col->typmod != typmod) + prepare_column_cache(col, typid, typmod, mcxt, true); + + *isnull = JsValueIsNull(jsv); + + typcat = col->typcat; + + /* try to convert json string to a non-scalar type through input function */ + if (JsValueIsString(jsv) && + (typcat == TYPECAT_ARRAY || + typcat == TYPECAT_COMPOSITE || + typcat == TYPECAT_COMPOSITE_DOMAIN)) + typcat = TYPECAT_SCALAR; + + /* we must perform domain checks for NULLs, otherwise exit immediately */ + if (*isnull && + typcat != TYPECAT_DOMAIN && + typcat != TYPECAT_COMPOSITE_DOMAIN) + return (Datum) 0; + + switch (typcat) + { + case TYPECAT_SCALAR: + return populate_scalar(&col->scalar_io, typid, typmod, jsv); + + case TYPECAT_ARRAY: + return populate_array(&col->io.array, colname, mcxt, jsv); + + case TYPECAT_COMPOSITE: + case TYPECAT_COMPOSITE_DOMAIN: + return populate_composite(&col->io.composite, typid, + colname, mcxt, + DatumGetPointer(defaultval) + ? DatumGetHeapTupleHeader(defaultval) + : NULL, + jsv, *isnull); + + case TYPECAT_DOMAIN: + return populate_domain(&col->io.domain, typid, colname, mcxt, + jsv, *isnull); + + default: + elog(ERROR, "unrecognized type category '%c'", typcat); + return (Datum) 0; + } +} + +static RecordIOData * +allocate_record_info(MemoryContext mcxt, int ncolumns) +{ + RecordIOData *data = (RecordIOData *) + MemoryContextAlloc(mcxt, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + + data->record_type = InvalidOid; + data->record_typmod = 0; + data->ncolumns = ncolumns; + MemSet(data->columns, 0, sizeof(ColumnIOData) * ncolumns); + + return data; +} + +static bool +JsObjectGetField(JsObject *obj, char *field, JsValue *jsv) +{ + jsv->is_json = obj->is_json; + + if (jsv->is_json) + { + JsonHashEntry *hashentry = hash_search(obj->val.json_hash, field, + HASH_FIND, NULL); + + jsv->val.json.type = hashentry ? hashentry->type : JSON_TOKEN_NULL; + jsv->val.json.str = jsv->val.json.type == JSON_TOKEN_NULL ? NULL : + hashentry->val; + jsv->val.json.len = jsv->val.json.str ? -1 : 0; /* null-terminated */ + + return hashentry != NULL; + } + else + { + jsv->val.jsonb = !obj->val.jsonb_cont ? NULL : + getKeyJsonValueFromContainer(obj->val.jsonb_cont, field, strlen(field), + NULL); + + return jsv->val.jsonb != NULL; + } +} + +/* populate a record tuple from json/jsonb value */ +static HeapTupleHeader +populate_record(TupleDesc tupdesc, + RecordIOData **record_p, + HeapTupleHeader defaultval, + MemoryContext mcxt, + JsObject *obj) +{ + RecordIOData *record = *record_p; + Datum *values; + bool *nulls; + HeapTuple res; + int ncolumns = tupdesc->natts; + int i; + + /* + * if the input json is empty, we can only skip the rest if we were passed + * in a non-null record, since otherwise there may be issues with domain + * nulls. + */ + if (defaultval && JsObjectIsEmpty(obj)) + return defaultval; + + /* (re)allocate metadata cache */ + if (record == NULL || + record->ncolumns != ncolumns) + *record_p = record = allocate_record_info(mcxt, ncolumns); + + /* invalidate metadata cache if the record type has changed */ + if (record->record_type != tupdesc->tdtypeid || + record->record_typmod != tupdesc->tdtypmod) + { + MemSet(record, 0, offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + record->record_type = tupdesc->tdtypeid; + record->record_typmod = tupdesc->tdtypmod; + record->ncolumns = ncolumns; + } + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + if (defaultval) + { + HeapTupleData tuple; + + /* Build a temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = defaultval; + + /* Break down the tuple into fields */ + heap_deform_tuple(&tuple, tupdesc, values, nulls); + } + else + { + for (i = 0; i < ncolumns; ++i) + { + values[i] = (Datum) 0; + nulls[i] = true; + } + } + + for (i = 0; i < ncolumns; ++i) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + char *colname = NameStr(att->attname); + JsValue field = {0}; + bool found; + + /* Ignore dropped columns in datatype */ + if (att->attisdropped) + { + nulls[i] = true; + continue; + } + + found = JsObjectGetField(obj, colname, &field); + + /* + * we can't just skip here if the key wasn't found since we might have + * a domain to deal with. If we were passed in a non-null record + * datum, we assume that the existing values are valid (if they're + * not, then it's not our fault), but if we were passed in a null, + * then every field which we don't populate needs to be run through + * the input function just in case it's a domain type. + */ + if (defaultval && !found) + continue; + + values[i] = populate_record_field(&record->columns[i], + att->atttypid, + att->atttypmod, + colname, + mcxt, + nulls[i] ? (Datum) 0 : values[i], + &field, + &nulls[i]); + } + + res = heap_form_tuple(tupdesc, values, nulls); + + pfree(values); + pfree(nulls); + + return res->t_data; +} + +/* + * Setup for json{b}_populate_record{set}: result type will be same as first + * argument's type --- unless first argument is "null::record", which we can't + * extract type info from; we handle that later. + */ +static void +get_record_type_from_argument(FunctionCallInfo fcinfo, + const char *funcname, + PopulateRecordCache *cache) +{ + cache->argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); + prepare_column_cache(&cache->c, + cache->argtype, -1, + cache->fn_mcxt, false); + if (cache->c.typcat != TYPECAT_COMPOSITE && + cache->c.typcat != TYPECAT_COMPOSITE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + /* translator: %s is a function name, eg json_to_record */ + errmsg("first argument of %s must be a row type", + funcname))); +} + +/* + * Setup for json{b}_to_record{set}: result type is specified by calling + * query. We'll also use this code for json{b}_populate_record{set}, + * if we discover that the first argument is a null of type RECORD. + * + * Here it is syntactically impossible to specify the target type + * as domain-over-composite. + */ +static void +get_record_type_from_query(FunctionCallInfo fcinfo, + const char *funcname, + PopulateRecordCache *cache) +{ + TupleDesc tupdesc; + MemoryContext old_cxt; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /* translator: %s is a function name, eg json_to_record */ + errmsg("could not determine row type for result of %s", + funcname), + errhint("Provide a non-null record argument, " + "or call the function in the FROM clause " + "using a column definition list."))); + + Assert(tupdesc); + cache->argtype = tupdesc->tdtypeid; + + /* If we go through this more than once, avoid memory leak */ + if (cache->c.io.composite.tupdesc) + FreeTupleDesc(cache->c.io.composite.tupdesc); + + /* Save identified tupdesc */ + old_cxt = MemoryContextSwitchTo(cache->fn_mcxt); + cache->c.io.composite.tupdesc = CreateTupleDescCopy(tupdesc); + cache->c.io.composite.base_typid = tupdesc->tdtypeid; + cache->c.io.composite.base_typmod = tupdesc->tdtypmod; + MemoryContextSwitchTo(old_cxt); +} + +/* + * common worker for json{b}_populate_record() and json{b}_to_record() + * is_json and have_record_arg identify the specific function + */ +static Datum +populate_record_worker(FunctionCallInfo fcinfo, const char *funcname, + bool is_json, bool have_record_arg) +{ + int json_arg_num = have_record_arg ? 1 : 0; + JsValue jsv = {0}; + HeapTupleHeader rec; + Datum rettuple; + JsonbValue jbv; + MemoryContext fnmcxt = fcinfo->flinfo->fn_mcxt; + PopulateRecordCache *cache = fcinfo->flinfo->fn_extra; + + /* + * If first time through, identify input/result record type. Note that + * this stanza looks only at fcinfo context, which can't change during the + * query; so we may not be able to fully resolve a RECORD input type yet. + */ + if (!cache) + { + fcinfo->flinfo->fn_extra = cache = + MemoryContextAllocZero(fnmcxt, sizeof(*cache)); + cache->fn_mcxt = fnmcxt; + + if (have_record_arg) + get_record_type_from_argument(fcinfo, funcname, cache); + else + get_record_type_from_query(fcinfo, funcname, cache); + } + + /* Collect record arg if we have one */ + if (!have_record_arg) + rec = NULL; /* it's json{b}_to_record() */ + else if (!PG_ARGISNULL(0)) + { + rec = PG_GETARG_HEAPTUPLEHEADER(0); + + /* + * When declared arg type is RECORD, identify actual record type from + * the tuple itself. + */ + if (cache->argtype == RECORDOID) + { + cache->c.io.composite.base_typid = HeapTupleHeaderGetTypeId(rec); + cache->c.io.composite.base_typmod = HeapTupleHeaderGetTypMod(rec); + } + } + else + { + rec = NULL; + + /* + * When declared arg type is RECORD, identify actual record type from + * calling query, or fail if we can't. + */ + if (cache->argtype == RECORDOID) + { + get_record_type_from_query(fcinfo, funcname, cache); + /* This can't change argtype, which is important for next time */ + Assert(cache->argtype == RECORDOID); + } + } + + /* If no JSON argument, just return the record (if any) unchanged */ + if (PG_ARGISNULL(json_arg_num)) + { + if (rec) + PG_RETURN_POINTER(rec); + else + PG_RETURN_NULL(); + } + + jsv.is_json = is_json; + + if (is_json) + { + text *json = PG_GETARG_TEXT_PP(json_arg_num); + + jsv.val.json.str = VARDATA_ANY(json); + jsv.val.json.len = VARSIZE_ANY_EXHDR(json); + jsv.val.json.type = JSON_TOKEN_INVALID; /* not used in + * populate_composite() */ + } + else + { + Jsonb *jb = PG_GETARG_JSONB_P(json_arg_num); + + jsv.val.jsonb = &jbv; + + /* fill binary jsonb value pointing to jb */ + jbv.type = jbvBinary; + jbv.val.binary.data = &jb->root; + jbv.val.binary.len = VARSIZE(jb) - VARHDRSZ; + } + + rettuple = populate_composite(&cache->c.io.composite, cache->argtype, + NULL, fnmcxt, rec, &jsv, false); + + PG_RETURN_DATUM(rettuple); +} + +/* + * get_json_object_as_hash + * + * decompose a json object into a hash table. + */ +static HTAB * +get_json_object_as_hash(char *json, int len, const char *funcname) +{ + HASHCTL ctl; + HTAB *tab; + JHashState *state; + JsonLexContext *lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true); + JsonSemAction *sem; + + ctl.keysize = NAMEDATALEN; + ctl.entrysize = sizeof(JsonHashEntry); + ctl.hcxt = CurrentMemoryContext; + tab = hash_create("json object hashtable", + 100, + &ctl, + HASH_ELEM | HASH_STRINGS | HASH_CONTEXT); + + state = palloc0(sizeof(JHashState)); + sem = palloc0(sizeof(JsonSemAction)); + + state->function_name = funcname; + state->hash = tab; + state->lex = lex; + + sem->semstate = (void *) state; + sem->array_start = hash_array_start; + sem->scalar = hash_scalar; + sem->object_field_start = hash_object_field_start; + sem->object_field_end = hash_object_field_end; + + pg_parse_json_or_ereport(lex, sem); + + return tab; +} + +static JsonParseErrorType +hash_object_field_start(void *state, char *fname, bool isnull) +{ + JHashState *_state = (JHashState *) state; + + if (_state->lex->lex_level > 1) + return JSON_SUCCESS; + + /* remember token type */ + _state->saved_token_type = _state->lex->token_type; + + if (_state->lex->token_type == JSON_TOKEN_ARRAY_START || + _state->lex->token_type == JSON_TOKEN_OBJECT_START) + { + /* remember start position of the whole text of the subobject */ + _state->save_json_start = _state->lex->token_start; + } + else + { + /* must be a scalar */ + _state->save_json_start = NULL; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +hash_object_field_end(void *state, char *fname, bool isnull) +{ + JHashState *_state = (JHashState *) state; + JsonHashEntry *hashentry; + bool found; + + /* + * Ignore nested fields. + */ + if (_state->lex->lex_level > 1) + return JSON_SUCCESS; + + /* + * Ignore field names >= NAMEDATALEN - they can't match a record field. + * (Note: without this test, the hash code would truncate the string at + * NAMEDATALEN-1, and could then match against a similarly-truncated + * record field name. That would be a reasonable behavior, but this code + * has previously insisted on exact equality, so we keep this behavior.) + */ + if (strlen(fname) >= NAMEDATALEN) + return JSON_SUCCESS; + + hashentry = hash_search(_state->hash, fname, HASH_ENTER, &found); + + /* + * found being true indicates a duplicate. We don't do anything about + * that, a later field with the same name overrides the earlier field. + */ + + hashentry->type = _state->saved_token_type; + Assert(isnull == (hashentry->type == JSON_TOKEN_NULL)); + + if (_state->save_json_start != NULL) + { + int len = _state->lex->prev_token_terminator - _state->save_json_start; + char *val = palloc((len + 1) * sizeof(char)); + + memcpy(val, _state->save_json_start, len); + val[len] = '\0'; + hashentry->val = val; + } + else + { + /* must have had a scalar instead */ + hashentry->val = _state->saved_scalar; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +hash_array_start(void *state) +{ + JHashState *_state = (JHashState *) state; + + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on an array", _state->function_name))); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +hash_scalar(void *state, char *token, JsonTokenType tokentype) +{ + JHashState *_state = (JHashState *) state; + + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a scalar", _state->function_name))); + + if (_state->lex->lex_level == 1) + { + _state->saved_scalar = token; + /* saved_token_type must already be set in hash_object_field_start() */ + Assert(_state->saved_token_type == tokentype); + } + + return JSON_SUCCESS; +} + + +/* + * SQL function json_populate_recordset + * + * set fields in a set of records from the argument json, + * which must be an array of objects. + * + * similar to json_populate_record, but the tuple-building code + * is pushed down into the semantic action handlers so it's done + * per object in the array. + */ +Datum +jsonb_populate_recordset(PG_FUNCTION_ARGS) +{ + return populate_recordset_worker(fcinfo, "jsonb_populate_recordset", + false, true); +} + +Datum +jsonb_to_recordset(PG_FUNCTION_ARGS) +{ + return populate_recordset_worker(fcinfo, "jsonb_to_recordset", + false, false); +} + +Datum +json_populate_recordset(PG_FUNCTION_ARGS) +{ + return populate_recordset_worker(fcinfo, "json_populate_recordset", + true, true); +} + +Datum +json_to_recordset(PG_FUNCTION_ARGS) +{ + return populate_recordset_worker(fcinfo, "json_to_recordset", + true, false); +} + +static void +populate_recordset_record(PopulateRecordsetState *state, JsObject *obj) +{ + PopulateRecordCache *cache = state->cache; + HeapTupleHeader tuphead; + HeapTupleData tuple; + + /* acquire/update cached tuple descriptor */ + update_cached_tupdesc(&cache->c.io.composite, cache->fn_mcxt); + + /* replace record fields from json */ + tuphead = populate_record(cache->c.io.composite.tupdesc, + &cache->c.io.composite.record_io, + state->rec, + cache->fn_mcxt, + obj); + + /* if it's domain over composite, check domain constraints */ + if (cache->c.typcat == TYPECAT_COMPOSITE_DOMAIN) + domain_check(HeapTupleHeaderGetDatum(tuphead), false, + cache->argtype, + &cache->c.io.composite.domain_info, + cache->fn_mcxt); + + /* ok, save into tuplestore */ + tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = tuphead; + + tuplestore_puttuple(state->tuple_store, &tuple); +} + +/* + * common worker for json{b}_populate_recordset() and json{b}_to_recordset() + * is_json and have_record_arg identify the specific function + */ +static Datum +populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname, + bool is_json, bool have_record_arg) +{ + int json_arg_num = have_record_arg ? 1 : 0; + ReturnSetInfo *rsi; + MemoryContext old_cxt; + HeapTupleHeader rec; + PopulateRecordCache *cache = fcinfo->flinfo->fn_extra; + PopulateRecordsetState *state; + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + if (!rsi || !IsA(rsi, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + + if (!(rsi->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + rsi->returnMode = SFRM_Materialize; + + /* + * If first time through, identify input/result record type. Note that + * this stanza looks only at fcinfo context, which can't change during the + * query; so we may not be able to fully resolve a RECORD input type yet. + */ + if (!cache) + { + fcinfo->flinfo->fn_extra = cache = + MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, sizeof(*cache)); + cache->fn_mcxt = fcinfo->flinfo->fn_mcxt; + + if (have_record_arg) + get_record_type_from_argument(fcinfo, funcname, cache); + else + get_record_type_from_query(fcinfo, funcname, cache); + } + + /* Collect record arg if we have one */ + if (!have_record_arg) + rec = NULL; /* it's json{b}_to_recordset() */ + else if (!PG_ARGISNULL(0)) + { + rec = PG_GETARG_HEAPTUPLEHEADER(0); + + /* + * When declared arg type is RECORD, identify actual record type from + * the tuple itself. + */ + if (cache->argtype == RECORDOID) + { + cache->c.io.composite.base_typid = HeapTupleHeaderGetTypeId(rec); + cache->c.io.composite.base_typmod = HeapTupleHeaderGetTypMod(rec); + } + } + else + { + rec = NULL; + + /* + * When declared arg type is RECORD, identify actual record type from + * calling query, or fail if we can't. + */ + if (cache->argtype == RECORDOID) + { + get_record_type_from_query(fcinfo, funcname, cache); + /* This can't change argtype, which is important for next time */ + Assert(cache->argtype == RECORDOID); + } + } + + /* if the json is null send back an empty set */ + if (PG_ARGISNULL(json_arg_num)) + PG_RETURN_NULL(); + + /* + * Forcibly update the cached tupdesc, to ensure we have the right tupdesc + * to return even if the JSON contains no rows. + */ + update_cached_tupdesc(&cache->c.io.composite, cache->fn_mcxt); + + state = palloc0(sizeof(PopulateRecordsetState)); + + /* make tuplestore in a sufficiently long-lived memory context */ + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + state->tuple_store = tuplestore_begin_heap(rsi->allowedModes & + SFRM_Materialize_Random, + false, work_mem); + MemoryContextSwitchTo(old_cxt); + + state->function_name = funcname; + state->cache = cache; + state->rec = rec; + + if (is_json) + { + text *json = PG_GETARG_TEXT_PP(json_arg_num); + JsonLexContext *lex; + JsonSemAction *sem; + + sem = palloc0(sizeof(JsonSemAction)); + + lex = makeJsonLexContext(json, true); + + sem->semstate = (void *) state; + sem->array_start = populate_recordset_array_start; + sem->array_element_start = populate_recordset_array_element_start; + sem->scalar = populate_recordset_scalar; + sem->object_field_start = populate_recordset_object_field_start; + sem->object_field_end = populate_recordset_object_field_end; + sem->object_start = populate_recordset_object_start; + sem->object_end = populate_recordset_object_end; + + state->lex = lex; + + pg_parse_json_or_ereport(lex, sem); + } + else + { + Jsonb *jb = PG_GETARG_JSONB_P(json_arg_num); + JsonbIterator *it; + JsonbValue v; + bool skipNested = false; + JsonbIteratorToken r; + + if (JB_ROOT_IS_SCALAR(jb) || !JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a non-array", + funcname))); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_ELEM) + { + JsObject obj; + + if (v.type != jbvBinary || + !JsonContainerIsObject(v.val.binary.data)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument of %s must be an array of objects", + funcname))); + + obj.is_json = false; + obj.val.jsonb_cont = v.val.binary.data; + + populate_recordset_record(state, &obj); + } + } + } + + /* + * Note: we must copy the cached tupdesc because the executor will free + * the passed-back setDesc, but we want to hang onto the cache in case + * we're called again in the same query. + */ + rsi->setResult = state->tuple_store; + rsi->setDesc = CreateTupleDescCopy(cache->c.io.composite.tupdesc); + + PG_RETURN_NULL(); +} + +static JsonParseErrorType +populate_recordset_object_start(void *state) +{ + PopulateRecordsetState *_state = (PopulateRecordsetState *) state; + int lex_level = _state->lex->lex_level; + HASHCTL ctl; + + /* Reject object at top level: we must have an array at level 0 */ + if (lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on an object", + _state->function_name))); + + /* Nested objects require no special processing */ + if (lex_level > 1) + return JSON_SUCCESS; + + /* Object at level 1: set up a new hash table for this object */ + ctl.keysize = NAMEDATALEN; + ctl.entrysize = sizeof(JsonHashEntry); + ctl.hcxt = CurrentMemoryContext; + _state->json_hash = hash_create("json object hashtable", + 100, + &ctl, + HASH_ELEM | HASH_STRINGS | HASH_CONTEXT); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +populate_recordset_object_end(void *state) +{ + PopulateRecordsetState *_state = (PopulateRecordsetState *) state; + JsObject obj; + + /* Nested objects require no special processing */ + if (_state->lex->lex_level > 1) + return JSON_SUCCESS; + + obj.is_json = true; + obj.val.json_hash = _state->json_hash; + + /* Otherwise, construct and return a tuple based on this level-1 object */ + populate_recordset_record(_state, &obj); + + /* Done with hash for this object */ + hash_destroy(_state->json_hash); + _state->json_hash = NULL; + + return JSON_SUCCESS; +} + +static JsonParseErrorType +populate_recordset_array_element_start(void *state, bool isnull) +{ + PopulateRecordsetState *_state = (PopulateRecordsetState *) state; + + if (_state->lex->lex_level == 1 && + _state->lex->token_type != JSON_TOKEN_OBJECT_START) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument of %s must be an array of objects", + _state->function_name))); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +populate_recordset_array_start(void *state) +{ + /* nothing to do */ + return JSON_SUCCESS; +} + +static JsonParseErrorType +populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype) +{ + PopulateRecordsetState *_state = (PopulateRecordsetState *) state; + + if (_state->lex->lex_level == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call %s on a scalar", + _state->function_name))); + + if (_state->lex->lex_level == 2) + _state->saved_scalar = token; + + return JSON_SUCCESS; +} + +static JsonParseErrorType +populate_recordset_object_field_start(void *state, char *fname, bool isnull) +{ + PopulateRecordsetState *_state = (PopulateRecordsetState *) state; + + if (_state->lex->lex_level > 2) + return JSON_SUCCESS; + + _state->saved_token_type = _state->lex->token_type; + + if (_state->lex->token_type == JSON_TOKEN_ARRAY_START || + _state->lex->token_type == JSON_TOKEN_OBJECT_START) + { + _state->save_json_start = _state->lex->token_start; + } + else + { + _state->save_json_start = NULL; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +populate_recordset_object_field_end(void *state, char *fname, bool isnull) +{ + PopulateRecordsetState *_state = (PopulateRecordsetState *) state; + JsonHashEntry *hashentry; + bool found; + + /* + * Ignore nested fields. + */ + if (_state->lex->lex_level > 2) + return JSON_SUCCESS; + + /* + * Ignore field names >= NAMEDATALEN - they can't match a record field. + * (Note: without this test, the hash code would truncate the string at + * NAMEDATALEN-1, and could then match against a similarly-truncated + * record field name. That would be a reasonable behavior, but this code + * has previously insisted on exact equality, so we keep this behavior.) + */ + if (strlen(fname) >= NAMEDATALEN) + return JSON_SUCCESS; + + hashentry = hash_search(_state->json_hash, fname, HASH_ENTER, &found); + + /* + * found being true indicates a duplicate. We don't do anything about + * that, a later field with the same name overrides the earlier field. + */ + + hashentry->type = _state->saved_token_type; + Assert(isnull == (hashentry->type == JSON_TOKEN_NULL)); + + if (_state->save_json_start != NULL) + { + int len = _state->lex->prev_token_terminator - _state->save_json_start; + char *val = palloc((len + 1) * sizeof(char)); + + memcpy(val, _state->save_json_start, len); + val[len] = '\0'; + hashentry->val = val; + } + else + { + /* must have had a scalar instead */ + hashentry->val = _state->saved_scalar; + } + + return JSON_SUCCESS; +} + +/* + * Semantic actions for json_strip_nulls. + * + * Simply repeat the input on the output unless we encounter + * a null object field. State for this is set when the field + * is started and reset when the scalar action (which must be next) + * is called. + */ + +static JsonParseErrorType +sn_object_start(void *state) +{ + StripnullState *_state = (StripnullState *) state; + + appendStringInfoCharMacro(_state->strval, '{'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +sn_object_end(void *state) +{ + StripnullState *_state = (StripnullState *) state; + + appendStringInfoCharMacro(_state->strval, '}'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +sn_array_start(void *state) +{ + StripnullState *_state = (StripnullState *) state; + + appendStringInfoCharMacro(_state->strval, '['); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +sn_array_end(void *state) +{ + StripnullState *_state = (StripnullState *) state; + + appendStringInfoCharMacro(_state->strval, ']'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +sn_object_field_start(void *state, char *fname, bool isnull) +{ + StripnullState *_state = (StripnullState *) state; + + if (isnull) + { + /* + * The next thing must be a scalar or isnull couldn't be true, so + * there is no danger of this state being carried down into a nested + * object or array. The flag will be reset in the scalar action. + */ + _state->skip_next_null = true; + return JSON_SUCCESS; + } + + if (_state->strval->data[_state->strval->len - 1] != '{') + appendStringInfoCharMacro(_state->strval, ','); + + /* + * Unfortunately we don't have the quoted and escaped string any more, so + * we have to re-escape it. + */ + escape_json(_state->strval, fname); + + appendStringInfoCharMacro(_state->strval, ':'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +sn_array_element_start(void *state, bool isnull) +{ + StripnullState *_state = (StripnullState *) state; + + if (_state->strval->data[_state->strval->len - 1] != '[') + appendStringInfoCharMacro(_state->strval, ','); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +sn_scalar(void *state, char *token, JsonTokenType tokentype) +{ + StripnullState *_state = (StripnullState *) state; + + if (_state->skip_next_null) + { + Assert(tokentype == JSON_TOKEN_NULL); + _state->skip_next_null = false; + return JSON_SUCCESS; + } + + if (tokentype == JSON_TOKEN_STRING) + escape_json(_state->strval, token); + else + appendStringInfoString(_state->strval, token); + + return JSON_SUCCESS; +} + +/* + * SQL function json_strip_nulls(json) -> json + */ +Datum +json_strip_nulls(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_PP(0); + StripnullState *state; + JsonLexContext *lex; + JsonSemAction *sem; + + lex = makeJsonLexContext(json, true); + state = palloc0(sizeof(StripnullState)); + sem = palloc0(sizeof(JsonSemAction)); + + state->strval = makeStringInfo(); + state->skip_next_null = false; + state->lex = lex; + + sem->semstate = (void *) state; + sem->object_start = sn_object_start; + sem->object_end = sn_object_end; + sem->array_start = sn_array_start; + sem->array_end = sn_array_end; + sem->scalar = sn_scalar; + sem->array_element_start = sn_array_element_start; + sem->object_field_start = sn_object_field_start; + + pg_parse_json_or_ereport(lex, sem); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data, + state->strval->len)); +} + +/* + * SQL function jsonb_strip_nulls(jsonb) -> jsonb + */ +Datum +jsonb_strip_nulls(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonbIterator *it; + JsonbParseState *parseState = NULL; + JsonbValue *res = NULL; + JsonbValue v, + k; + JsonbIteratorToken type; + bool last_was_key = false; + + if (JB_ROOT_IS_SCALAR(jb)) + PG_RETURN_POINTER(jb); + + it = JsonbIteratorInit(&jb->root); + + while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + Assert(!(type == WJB_KEY && last_was_key)); + + if (type == WJB_KEY) + { + /* stash the key until we know if it has a null value */ + k = v; + last_was_key = true; + continue; + } + + if (last_was_key) + { + /* if the last element was a key this one can't be */ + last_was_key = false; + + /* skip this field if value is null */ + if (type == WJB_VALUE && v.type == jbvNull) + continue; + + /* otherwise, do a delayed push of the key */ + (void) pushJsonbValue(&parseState, WJB_KEY, &k); + } + + if (type == WJB_VALUE || type == WJB_ELEM) + res = pushJsonbValue(&parseState, type, &v); + else + res = pushJsonbValue(&parseState, type, NULL); + } + + Assert(res != NULL); + + PG_RETURN_POINTER(JsonbValueToJsonb(res)); +} + +/* + * SQL function jsonb_pretty (jsonb) + * + * Pretty-printed text for the jsonb + */ +Datum +jsonb_pretty(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + StringInfo str = makeStringInfo(); + + JsonbToCStringIndent(str, &jb->root, VARSIZE(jb)); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(str->data, str->len)); +} + +/* + * SQL function jsonb_concat (jsonb, jsonb) + * + * function for || operator + */ +Datum +jsonb_concat(PG_FUNCTION_ARGS) +{ + Jsonb *jb1 = PG_GETARG_JSONB_P(0); + Jsonb *jb2 = PG_GETARG_JSONB_P(1); + JsonbParseState *state = NULL; + JsonbValue *res; + JsonbIterator *it1, + *it2; + + /* + * If one of the jsonb is empty, just return the other if it's not scalar + * and both are of the same kind. If it's a scalar or they are of + * different kinds we need to perform the concatenation even if one is + * empty. + */ + if (JB_ROOT_IS_OBJECT(jb1) == JB_ROOT_IS_OBJECT(jb2)) + { + if (JB_ROOT_COUNT(jb1) == 0 && !JB_ROOT_IS_SCALAR(jb2)) + PG_RETURN_JSONB_P(jb2); + else if (JB_ROOT_COUNT(jb2) == 0 && !JB_ROOT_IS_SCALAR(jb1)) + PG_RETURN_JSONB_P(jb1); + } + + it1 = JsonbIteratorInit(&jb1->root); + it2 = JsonbIteratorInit(&jb2->root); + + res = IteratorConcat(&it1, &it2, &state); + + Assert(res != NULL); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + + +/* + * SQL function jsonb_delete (jsonb, text) + * + * return a copy of the jsonb with the indicated item + * removed. + */ +Datum +jsonb_delete(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + text *key = PG_GETARG_TEXT_PP(1); + char *keyptr = VARDATA_ANY(key); + int keylen = VARSIZE_ANY_EXHDR(key); + JsonbParseState *state = NULL; + JsonbIterator *it; + JsonbValue v, + *res = NULL; + bool skipNested = false; + JsonbIteratorToken r; + + if (JB_ROOT_IS_SCALAR(in)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot delete from scalar"))); + + if (JB_ROOT_COUNT(in) == 0) + PG_RETURN_JSONB_P(in); + + it = JsonbIteratorInit(&in->root); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if ((r == WJB_ELEM || r == WJB_KEY) && + (v.type == jbvString && keylen == v.val.string.len && + memcmp(keyptr, v.val.string.val, keylen) == 0)) + { + /* skip corresponding value as well */ + if (r == WJB_KEY) + (void) JsonbIteratorNext(&it, &v, true); + + continue; + } + + res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL); + } + + Assert(res != NULL); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + +/* + * SQL function jsonb_delete (jsonb, variadic text[]) + * + * return a copy of the jsonb with the indicated items + * removed. + */ +Datum +jsonb_delete_array(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1); + Datum *keys_elems; + bool *keys_nulls; + int keys_len; + JsonbParseState *state = NULL; + JsonbIterator *it; + JsonbValue v, + *res = NULL; + bool skipNested = false; + JsonbIteratorToken r; + + if (ARR_NDIM(keys) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + if (JB_ROOT_IS_SCALAR(in)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot delete from scalar"))); + + if (JB_ROOT_COUNT(in) == 0) + PG_RETURN_JSONB_P(in); + + deconstruct_array_builtin(keys, TEXTOID, &keys_elems, &keys_nulls, &keys_len); + + if (keys_len == 0) + PG_RETURN_JSONB_P(in); + + it = JsonbIteratorInit(&in->root); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if ((r == WJB_ELEM || r == WJB_KEY) && v.type == jbvString) + { + int i; + bool found = false; + + for (i = 0; i < keys_len; i++) + { + char *keyptr; + int keylen; + + if (keys_nulls[i]) + continue; + + /* We rely on the array elements not being toasted */ + keyptr = VARDATA_ANY(keys_elems[i]); + keylen = VARSIZE_ANY_EXHDR(keys_elems[i]); + if (keylen == v.val.string.len && + memcmp(keyptr, v.val.string.val, keylen) == 0) + { + found = true; + break; + } + } + if (found) + { + /* skip corresponding value as well */ + if (r == WJB_KEY) + (void) JsonbIteratorNext(&it, &v, true); + + continue; + } + } + + res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL); + } + + Assert(res != NULL); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + +/* + * SQL function jsonb_delete (jsonb, int) + * + * return a copy of the jsonb with the indicated item + * removed. Negative int means count back from the + * end of the items. + */ +Datum +jsonb_delete_idx(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + int idx = PG_GETARG_INT32(1); + JsonbParseState *state = NULL; + JsonbIterator *it; + uint32 i = 0, + n; + JsonbValue v, + *res = NULL; + JsonbIteratorToken r; + + if (JB_ROOT_IS_SCALAR(in)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot delete from scalar"))); + + if (JB_ROOT_IS_OBJECT(in)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot delete from object using integer index"))); + + if (JB_ROOT_COUNT(in) == 0) + PG_RETURN_JSONB_P(in); + + it = JsonbIteratorInit(&in->root); + + r = JsonbIteratorNext(&it, &v, false); + Assert(r == WJB_BEGIN_ARRAY); + n = v.val.array.nElems; + + if (idx < 0) + { + if (-idx > n) + idx = n; + else + idx = n + idx; + } + + if (idx >= n) + PG_RETURN_JSONB_P(in); + + pushJsonbValue(&state, r, NULL); + + while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) + { + if (r == WJB_ELEM) + { + if (i++ == idx) + continue; + } + + res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL); + } + + Assert(res != NULL); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + +/* + * SQL function jsonb_set(jsonb, text[], jsonb, boolean) + */ +Datum +jsonb_set(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); + Jsonb *newjsonb = PG_GETARG_JSONB_P(2); + JsonbValue newval; + bool create = PG_GETARG_BOOL(3); + JsonbValue *res = NULL; + Datum *path_elems; + bool *path_nulls; + int path_len; + JsonbIterator *it; + JsonbParseState *st = NULL; + + JsonbToJsonbValue(newjsonb, &newval); + + if (ARR_NDIM(path) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + if (JB_ROOT_IS_SCALAR(in)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot set path in scalar"))); + + if (JB_ROOT_COUNT(in) == 0 && !create) + PG_RETURN_JSONB_P(in); + + deconstruct_array_builtin(path, TEXTOID, &path_elems, &path_nulls, &path_len); + + if (path_len == 0) + PG_RETURN_JSONB_P(in); + + it = JsonbIteratorInit(&in->root); + + res = setPath(&it, path_elems, path_nulls, path_len, &st, + 0, &newval, create ? JB_PATH_CREATE : JB_PATH_REPLACE); + + Assert(res != NULL); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + + +/* + * SQL function jsonb_set_lax(jsonb, text[], jsonb, boolean, text) + */ +Datum +jsonb_set_lax(PG_FUNCTION_ARGS) +{ + /* Jsonb *in = PG_GETARG_JSONB_P(0); */ + /* ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); */ + /* Jsonb *newval = PG_GETARG_JSONB_P(2); */ + /* bool create = PG_GETARG_BOOL(3); */ + text *handle_null; + char *handle_val; + + if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(3)) + PG_RETURN_NULL(); + + /* could happen if they pass in an explicit NULL */ + if (PG_ARGISNULL(4)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("null_value_treatment must be \"delete_key\", \"return_target\", \"use_json_null\", or \"raise_exception\""))); + + /* if the new value isn't an SQL NULL just call jsonb_set */ + if (!PG_ARGISNULL(2)) + return jsonb_set(fcinfo); + + handle_null = PG_GETARG_TEXT_P(4); + handle_val = text_to_cstring(handle_null); + + if (strcmp(handle_val, "raise_exception") == 0) + { + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("JSON value must not be null"), + errdetail("Exception was raised because null_value_treatment is \"raise_exception\"."), + errhint("To avoid, either change the null_value_treatment argument or ensure that an SQL NULL is not passed."))); + return (Datum) 0; /* silence stupider compilers */ + } + else if (strcmp(handle_val, "use_json_null") == 0) + { + Datum newval; + + newval = DirectFunctionCall1(jsonb_in, CStringGetDatum("null")); + + fcinfo->args[2].value = newval; + fcinfo->args[2].isnull = false; + return jsonb_set(fcinfo); + } + else if (strcmp(handle_val, "delete_key") == 0) + { + return jsonb_delete_path(fcinfo); + } + else if (strcmp(handle_val, "return_target") == 0) + { + Jsonb *in = PG_GETARG_JSONB_P(0); + + PG_RETURN_JSONB_P(in); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("null_value_treatment must be \"delete_key\", \"return_target\", \"use_json_null\", or \"raise_exception\""))); + return (Datum) 0; /* silence stupider compilers */ + } +} + +/* + * SQL function jsonb_delete_path(jsonb, text[]) + */ +Datum +jsonb_delete_path(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); + JsonbValue *res = NULL; + Datum *path_elems; + bool *path_nulls; + int path_len; + JsonbIterator *it; + JsonbParseState *st = NULL; + + if (ARR_NDIM(path) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + if (JB_ROOT_IS_SCALAR(in)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot delete path in scalar"))); + + if (JB_ROOT_COUNT(in) == 0) + PG_RETURN_JSONB_P(in); + + deconstruct_array_builtin(path, TEXTOID, &path_elems, &path_nulls, &path_len); + + if (path_len == 0) + PG_RETURN_JSONB_P(in); + + it = JsonbIteratorInit(&in->root); + + res = setPath(&it, path_elems, path_nulls, path_len, &st, + 0, NULL, JB_PATH_DELETE); + + Assert(res != NULL); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + +/* + * SQL function jsonb_insert(jsonb, text[], jsonb, boolean) + */ +Datum +jsonb_insert(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB_P(0); + ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); + Jsonb *newjsonb = PG_GETARG_JSONB_P(2); + JsonbValue newval; + bool after = PG_GETARG_BOOL(3); + JsonbValue *res = NULL; + Datum *path_elems; + bool *path_nulls; + int path_len; + JsonbIterator *it; + JsonbParseState *st = NULL; + + JsonbToJsonbValue(newjsonb, &newval); + + if (ARR_NDIM(path) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("wrong number of array subscripts"))); + + if (JB_ROOT_IS_SCALAR(in)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot set path in scalar"))); + + deconstruct_array_builtin(path, TEXTOID, &path_elems, &path_nulls, &path_len); + + if (path_len == 0) + PG_RETURN_JSONB_P(in); + + it = JsonbIteratorInit(&in->root); + + res = setPath(&it, path_elems, path_nulls, path_len, &st, 0, &newval, + after ? JB_PATH_INSERT_AFTER : JB_PATH_INSERT_BEFORE); + + Assert(res != NULL); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + +/* + * Iterate over all jsonb objects and merge them into one. + * The logic of this function copied from the same hstore function, + * except the case, when it1 & it2 represents jbvObject. + * In that case we just append the content of it2 to it1 without any + * verifications. + */ +static JsonbValue * +IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, + JsonbParseState **state) +{ + JsonbValue v1, + v2, + *res = NULL; + JsonbIteratorToken r1, + r2, + rk1, + rk2; + + rk1 = JsonbIteratorNext(it1, &v1, false); + rk2 = JsonbIteratorNext(it2, &v2, false); + + /* + * JsonbIteratorNext reports raw scalars as if they were single-element + * arrays; hence we only need consider "object" and "array" cases here. + */ + if (rk1 == WJB_BEGIN_OBJECT && rk2 == WJB_BEGIN_OBJECT) + { + /* + * Both inputs are objects. + * + * Append all the tokens from v1 to res, except last WJB_END_OBJECT + * (because res will not be finished yet). + */ + pushJsonbValue(state, rk1, NULL); + while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_OBJECT) + pushJsonbValue(state, r1, &v1); + + /* + * Append all the tokens from v2 to res, including last WJB_END_OBJECT + * (the concatenation will be completed). Any duplicate keys will + * automatically override the value from the first object. + */ + while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE) + res = pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL); + } + else if (rk1 == WJB_BEGIN_ARRAY && rk2 == WJB_BEGIN_ARRAY) + { + /* + * Both inputs are arrays. + */ + pushJsonbValue(state, rk1, NULL); + + while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_ARRAY) + { + Assert(r1 == WJB_ELEM); + pushJsonbValue(state, r1, &v1); + } + + while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_END_ARRAY) + { + Assert(r2 == WJB_ELEM); + pushJsonbValue(state, WJB_ELEM, &v2); + } + + res = pushJsonbValue(state, WJB_END_ARRAY, NULL /* signal to sort */ ); + } + else if (rk1 == WJB_BEGIN_OBJECT) + { + /* + * We have object || array. + */ + Assert(rk2 == WJB_BEGIN_ARRAY); + + pushJsonbValue(state, WJB_BEGIN_ARRAY, NULL); + + pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL); + while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_DONE) + pushJsonbValue(state, r1, r1 != WJB_END_OBJECT ? &v1 : NULL); + + while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE) + res = pushJsonbValue(state, r2, r2 != WJB_END_ARRAY ? &v2 : NULL); + } + else + { + /* + * We have array || object. + */ + Assert(rk1 == WJB_BEGIN_ARRAY); + Assert(rk2 == WJB_BEGIN_OBJECT); + + pushJsonbValue(state, WJB_BEGIN_ARRAY, NULL); + + while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_ARRAY) + pushJsonbValue(state, r1, &v1); + + pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL); + while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE) + pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL); + + res = pushJsonbValue(state, WJB_END_ARRAY, NULL); + } + + return res; +} + +/* + * Do most of the heavy work for jsonb_set/jsonb_insert + * + * If JB_PATH_DELETE bit is set in op_type, the element is to be removed. + * + * If any bit mentioned in JB_PATH_CREATE_OR_INSERT is set in op_type, + * we create the new value if the key or array index does not exist. + * + * Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type + * behave as JB_PATH_CREATE if new value is inserted in JsonbObject. + * + * If JB_PATH_FILL_GAPS bit is set, this will change an assignment logic in + * case if target is an array. The assignment index will not be restricted by + * number of elements in the array, and if there are any empty slots between + * last element of the array and a new one they will be filled with nulls. If + * the index is negative, it still will be considered an index from the end + * of the array. Of a part of the path is not present and this part is more + * than just one last element, this flag will instruct to create the whole + * chain of corresponding objects and insert the value. + * + * JB_PATH_CONSISTENT_POSITION for an array indicates that the caller wants to + * keep values with fixed indices. Indices for existing elements could be + * changed (shifted forward) in case if the array is prepended with a new value + * and a negative index out of the range, so this behavior will be prevented + * and return an error. + * + * All path elements before the last must already exist + * whatever bits in op_type are set, or nothing is done. + */ +static JsonbValue * +setPath(JsonbIterator **it, Datum *path_elems, + bool *path_nulls, int path_len, + JsonbParseState **st, int level, JsonbValue *newval, int op_type) +{ + JsonbValue v; + JsonbIteratorToken r; + JsonbValue *res; + + check_stack_depth(); + + if (path_nulls[level]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("path element at position %d is null", + level + 1))); + + r = JsonbIteratorNext(it, &v, false); + + switch (r) + { + case WJB_BEGIN_ARRAY: + + /* + * If instructed complain about attempts to replace within a raw + * scalar value. This happens even when current level is equal to + * path_len, because the last path key should also correspond to + * an object or an array, not raw scalar. + */ + if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1) && + v.val.array.rawScalar) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot replace existing key"), + errdetail("The path assumes key is a composite object, " + "but it is a scalar value."))); + + (void) pushJsonbValue(st, r, NULL); + setPathArray(it, path_elems, path_nulls, path_len, st, level, + newval, v.val.array.nElems, op_type); + r = JsonbIteratorNext(it, &v, false); + Assert(r == WJB_END_ARRAY); + res = pushJsonbValue(st, r, NULL); + break; + case WJB_BEGIN_OBJECT: + (void) pushJsonbValue(st, r, NULL); + setPathObject(it, path_elems, path_nulls, path_len, st, level, + newval, v.val.object.nPairs, op_type); + r = JsonbIteratorNext(it, &v, true); + Assert(r == WJB_END_OBJECT); + res = pushJsonbValue(st, r, NULL); + break; + case WJB_ELEM: + case WJB_VALUE: + + /* + * If instructed complain about attempts to replace within a + * scalar value. This happens even when current level is equal to + * path_len, because the last path key should also correspond to + * an object or an array, not an element or value. + */ + if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot replace existing key"), + errdetail("The path assumes key is a composite object, " + "but it is a scalar value."))); + + res = pushJsonbValue(st, r, &v); + break; + default: + elog(ERROR, "unrecognized iterator result: %d", (int) r); + res = NULL; /* keep compiler quiet */ + break; + } + + return res; +} + +/* + * Object walker for setPath + */ +static void +setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, + int path_len, JsonbParseState **st, int level, + JsonbValue *newval, uint32 npairs, int op_type) +{ + text *pathelem = NULL; + int i; + JsonbValue k, + v; + bool done = false; + + if (level >= path_len || path_nulls[level]) + done = true; + else + { + /* The path Datum could be toasted, in which case we must detoast it */ + pathelem = DatumGetTextPP(path_elems[level]); + } + + /* empty object is a special case for create */ + if ((npairs == 0) && (op_type & JB_PATH_CREATE_OR_INSERT) && + (level == path_len - 1)) + { + JsonbValue newkey; + + newkey.type = jbvString; + newkey.val.string.val = VARDATA_ANY(pathelem); + newkey.val.string.len = VARSIZE_ANY_EXHDR(pathelem); + + (void) pushJsonbValue(st, WJB_KEY, &newkey); + (void) pushJsonbValue(st, WJB_VALUE, newval); + } + + for (i = 0; i < npairs; i++) + { + JsonbIteratorToken r = JsonbIteratorNext(it, &k, true); + + Assert(r == WJB_KEY); + + if (!done && + k.val.string.len == VARSIZE_ANY_EXHDR(pathelem) && + memcmp(k.val.string.val, VARDATA_ANY(pathelem), + k.val.string.len) == 0) + { + done = true; + + if (level == path_len - 1) + { + /* + * called from jsonb_insert(), it forbids redefining an + * existing value + */ + if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot replace existing key"), + errhint("Try using the function jsonb_set " + "to replace key value."))); + + r = JsonbIteratorNext(it, &v, true); /* skip value */ + if (!(op_type & JB_PATH_DELETE)) + { + (void) pushJsonbValue(st, WJB_KEY, &k); + (void) pushJsonbValue(st, WJB_VALUE, newval); + } + } + else + { + (void) pushJsonbValue(st, r, &k); + setPath(it, path_elems, path_nulls, path_len, + st, level + 1, newval, op_type); + } + } + else + { + if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && + level == path_len - 1 && i == npairs - 1) + { + JsonbValue newkey; + + newkey.type = jbvString; + newkey.val.string.val = VARDATA_ANY(pathelem); + newkey.val.string.len = VARSIZE_ANY_EXHDR(pathelem); + + (void) pushJsonbValue(st, WJB_KEY, &newkey); + (void) pushJsonbValue(st, WJB_VALUE, newval); + } + + (void) pushJsonbValue(st, r, &k); + r = JsonbIteratorNext(it, &v, false); + (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL); + if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT) + { + int walking_level = 1; + + while (walking_level != 0) + { + r = JsonbIteratorNext(it, &v, false); + + if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT) + ++walking_level; + if (r == WJB_END_ARRAY || r == WJB_END_OBJECT) + --walking_level; + + (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL); + } + } + } + } + + /*-- + * If we got here there are only few possibilities: + * - no target path was found, and an open object with some keys/values was + * pushed into the state + * - an object is empty, only WJB_BEGIN_OBJECT is pushed + * + * In both cases if instructed to create the path when not present, + * generate the whole chain of empty objects and insert the new value + * there. + */ + if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1)) + { + JsonbValue newkey; + + newkey.type = jbvString; + newkey.val.string.val = VARDATA_ANY(pathelem); + newkey.val.string.len = VARSIZE_ANY_EXHDR(pathelem); + + (void) pushJsonbValue(st, WJB_KEY, &newkey); + (void) push_path(st, level, path_elems, path_nulls, + path_len, newval); + + /* Result is closed with WJB_END_OBJECT outside of this function */ + } +} + +/* + * Array walker for setPath + */ +static void +setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, + int path_len, JsonbParseState **st, int level, + JsonbValue *newval, uint32 nelems, int op_type) +{ + JsonbValue v; + int idx, + i; + bool done = false; + + /* pick correct index */ + if (level < path_len && !path_nulls[level]) + { + char *c = TextDatumGetCString(path_elems[level]); + char *badp; + + errno = 0; + idx = strtoint(c, &badp, 10); + if (badp == c || *badp != '\0' || errno != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("path element at position %d is not an integer: \"%s\"", + level + 1, c))); + } + else + idx = nelems; + + if (idx < 0) + { + if (-idx > nelems) + { + /* + * If asked to keep elements position consistent, it's not allowed + * to prepend the array. + */ + if (op_type & JB_PATH_CONSISTENT_POSITION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("path element at position %d is out of range: %d", + level + 1, idx))); + else + idx = INT_MIN; + } + else + idx = nelems + idx; + } + + /* + * Filling the gaps means there are no limits on the positive index are + * imposed, we can set any element. Otherwise limit the index by nelems. + */ + if (!(op_type & JB_PATH_FILL_GAPS)) + { + if (idx > 0 && idx > nelems) + idx = nelems; + } + + /* + * if we're creating, and idx == INT_MIN, we prepend the new value to the + * array also if the array is empty - in which case we don't really care + * what the idx value is + */ + if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) && + (op_type & JB_PATH_CREATE_OR_INSERT)) + { + Assert(newval != NULL); + + if (op_type & JB_PATH_FILL_GAPS && nelems == 0 && idx > 0) + push_null_elements(st, idx); + + (void) pushJsonbValue(st, WJB_ELEM, newval); + + done = true; + } + + /* iterate over the array elements */ + for (i = 0; i < nelems; i++) + { + JsonbIteratorToken r; + + if (i == idx && level < path_len) + { + done = true; + + if (level == path_len - 1) + { + r = JsonbIteratorNext(it, &v, true); /* skip */ + + if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_CREATE)) + (void) pushJsonbValue(st, WJB_ELEM, newval); + + /* + * We should keep current value only in case of + * JB_PATH_INSERT_BEFORE or JB_PATH_INSERT_AFTER because + * otherwise it should be deleted or replaced + */ + if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_INSERT_BEFORE)) + (void) pushJsonbValue(st, r, &v); + + if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE)) + (void) pushJsonbValue(st, WJB_ELEM, newval); + } + else + (void) setPath(it, path_elems, path_nulls, path_len, + st, level + 1, newval, op_type); + } + else + { + r = JsonbIteratorNext(it, &v, false); + + (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL); + + if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT) + { + int walking_level = 1; + + while (walking_level != 0) + { + r = JsonbIteratorNext(it, &v, false); + + if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT) + ++walking_level; + if (r == WJB_END_ARRAY || r == WJB_END_OBJECT) + --walking_level; + + (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL); + } + } + } + } + + if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && level == path_len - 1) + { + /* + * If asked to fill the gaps, idx could be bigger than nelems, so + * prepend the new element with nulls if that's the case. + */ + if (op_type & JB_PATH_FILL_GAPS && idx > nelems) + push_null_elements(st, idx - nelems); + + (void) pushJsonbValue(st, WJB_ELEM, newval); + done = true; + } + + /*-- + * If we got here there are only few possibilities: + * - no target path was found, and an open array with some keys/values was + * pushed into the state + * - an array is empty, only WJB_BEGIN_ARRAY is pushed + * + * In both cases if instructed to create the path when not present, + * generate the whole chain of empty objects and insert the new value + * there. + */ + if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1)) + { + if (idx > 0) + push_null_elements(st, idx - nelems); + + (void) push_path(st, level, path_elems, path_nulls, + path_len, newval); + + /* Result is closed with WJB_END_OBJECT outside of this function */ + } +} + +/* + * Parse information about what elements of a jsonb document we want to iterate + * in functions iterate_json(b)_values. This information is presented in jsonb + * format, so that it can be easily extended in the future. + */ +uint32 +parse_jsonb_index_flags(Jsonb *jb) +{ + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken type; + uint32 flags = 0; + + it = JsonbIteratorInit(&jb->root); + + type = JsonbIteratorNext(&it, &v, false); + + /* + * We iterate over array (scalar internally is represented as array, so, + * we will accept it too) to check all its elements. Flag names are + * chosen the same as jsonb_typeof uses. + */ + if (type != WJB_BEGIN_ARRAY) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("wrong flag type, only arrays and scalars are allowed"))); + + while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM) + { + if (v.type != jbvString) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("flag array element is not a string"), + errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\", and \"all\"."))); + + if (v.val.string.len == 3 && + pg_strncasecmp(v.val.string.val, "all", 3) == 0) + flags |= jtiAll; + else if (v.val.string.len == 3 && + pg_strncasecmp(v.val.string.val, "key", 3) == 0) + flags |= jtiKey; + else if (v.val.string.len == 6 && + pg_strncasecmp(v.val.string.val, "string", 6) == 0) + flags |= jtiString; + else if (v.val.string.len == 7 && + pg_strncasecmp(v.val.string.val, "numeric", 7) == 0) + flags |= jtiNumeric; + else if (v.val.string.len == 7 && + pg_strncasecmp(v.val.string.val, "boolean", 7) == 0) + flags |= jtiBool; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("wrong flag in flag array: \"%s\"", + pnstrdup(v.val.string.val, v.val.string.len)), + errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\", and \"all\"."))); + } + + /* expect end of array now */ + if (type != WJB_END_ARRAY) + elog(ERROR, "unexpected end of flag array"); + + /* get final WJB_DONE and free iterator */ + type = JsonbIteratorNext(&it, &v, false); + if (type != WJB_DONE) + elog(ERROR, "unexpected end of flag array"); + + return flags; +} + +/* + * Iterate over jsonb values or elements, specified by flags, and pass them + * together with an iteration state to a specified JsonIterateStringValuesAction. + */ +void +iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state, + JsonIterateStringValuesAction action) +{ + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken type; + + it = JsonbIteratorInit(&jb->root); + + /* + * Just recursively iterating over jsonb and call callback on all + * corresponding elements + */ + while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + if (type == WJB_KEY) + { + if (flags & jtiKey) + action(state, v.val.string.val, v.val.string.len); + + continue; + } + else if (!(type == WJB_VALUE || type == WJB_ELEM)) + { + /* do not call callback for composite JsonbValue */ + continue; + } + + /* JsonbValue is a value of object or element of array */ + switch (v.type) + { + case jbvString: + if (flags & jtiString) + action(state, v.val.string.val, v.val.string.len); + break; + case jbvNumeric: + if (flags & jtiNumeric) + { + char *val; + + val = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(v.val.numeric))); + + action(state, val, strlen(val)); + pfree(val); + } + break; + case jbvBool: + if (flags & jtiBool) + { + if (v.val.boolean) + action(state, "true", 4); + else + action(state, "false", 5); + } + break; + default: + /* do not call callback for composite JsonbValue */ + break; + } + } +} + +/* + * Iterate over json values and elements, specified by flags, and pass them + * together with an iteration state to a specified JsonIterateStringValuesAction. + */ +void +iterate_json_values(text *json, uint32 flags, void *action_state, + JsonIterateStringValuesAction action) +{ + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction *sem = palloc0(sizeof(JsonSemAction)); + IterateJsonStringValuesState *state = palloc0(sizeof(IterateJsonStringValuesState)); + + state->lex = lex; + state->action = action; + state->action_state = action_state; + state->flags = flags; + + sem->semstate = (void *) state; + sem->scalar = iterate_values_scalar; + sem->object_field_start = iterate_values_object_field_start; + + pg_parse_json_or_ereport(lex, sem); +} + +/* + * An auxiliary function for iterate_json_values to invoke a specified + * JsonIterateStringValuesAction for specified values. + */ +static JsonParseErrorType +iterate_values_scalar(void *state, char *token, JsonTokenType tokentype) +{ + IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state; + + switch (tokentype) + { + case JSON_TOKEN_STRING: + if (_state->flags & jtiString) + _state->action(_state->action_state, token, strlen(token)); + break; + case JSON_TOKEN_NUMBER: + if (_state->flags & jtiNumeric) + _state->action(_state->action_state, token, strlen(token)); + break; + case JSON_TOKEN_TRUE: + case JSON_TOKEN_FALSE: + if (_state->flags & jtiBool) + _state->action(_state->action_state, token, strlen(token)); + break; + default: + /* do not call callback for any other token */ + break; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +iterate_values_object_field_start(void *state, char *fname, bool isnull) +{ + IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state; + + if (_state->flags & jtiKey) + { + char *val = pstrdup(fname); + + _state->action(_state->action_state, val, strlen(val)); + } + + return JSON_SUCCESS; +} + +/* + * Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction + * to every string value or element. Any necessary context for a + * JsonTransformStringValuesAction can be passed in the action_state variable. + * Function returns a copy of an original jsonb object with transformed values. + */ +Jsonb * +transform_jsonb_string_values(Jsonb *jsonb, void *action_state, + JsonTransformStringValuesAction transform_action) +{ + JsonbIterator *it; + JsonbValue v, + *res = NULL; + JsonbIteratorToken type; + JsonbParseState *st = NULL; + text *out; + bool is_scalar = false; + + it = JsonbIteratorInit(&jsonb->root); + is_scalar = it->isScalar; + + while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString) + { + out = transform_action(action_state, v.val.string.val, v.val.string.len); + /* out is probably not toasted, but let's be sure */ + out = pg_detoast_datum_packed(out); + v.val.string.val = VARDATA_ANY(out); + v.val.string.len = VARSIZE_ANY_EXHDR(out); + res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL); + } + else + { + res = pushJsonbValue(&st, type, (type == WJB_KEY || + type == WJB_VALUE || + type == WJB_ELEM) ? &v : NULL); + } + } + + if (res->type == jbvArray) + res->val.array.rawScalar = is_scalar; + + return JsonbValueToJsonb(res); +} + +/* + * Iterate over a json, and apply a specified JsonTransformStringValuesAction + * to every string value or element. Any necessary context for a + * JsonTransformStringValuesAction can be passed in the action_state variable. + * Function returns a StringInfo, which is a copy of an original json with + * transformed values. + */ +text * +transform_json_string_values(text *json, void *action_state, + JsonTransformStringValuesAction transform_action) +{ + JsonLexContext *lex = makeJsonLexContext(json, true); + JsonSemAction *sem = palloc0(sizeof(JsonSemAction)); + TransformJsonStringValuesState *state = palloc0(sizeof(TransformJsonStringValuesState)); + + state->lex = lex; + state->strval = makeStringInfo(); + state->action = transform_action; + state->action_state = action_state; + + sem->semstate = (void *) state; + sem->object_start = transform_string_values_object_start; + sem->object_end = transform_string_values_object_end; + sem->array_start = transform_string_values_array_start; + sem->array_end = transform_string_values_array_end; + sem->scalar = transform_string_values_scalar; + sem->array_element_start = transform_string_values_array_element_start; + sem->object_field_start = transform_string_values_object_field_start; + + pg_parse_json_or_ereport(lex, sem); + + return cstring_to_text_with_len(state->strval->data, state->strval->len); +} + +/* + * Set of auxiliary functions for transform_json_string_values to invoke a + * specified JsonTransformStringValuesAction for all values and left everything + * else untouched. + */ +static JsonParseErrorType +transform_string_values_object_start(void *state) +{ + TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state; + + appendStringInfoCharMacro(_state->strval, '{'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +transform_string_values_object_end(void *state) +{ + TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state; + + appendStringInfoCharMacro(_state->strval, '}'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +transform_string_values_array_start(void *state) +{ + TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state; + + appendStringInfoCharMacro(_state->strval, '['); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +transform_string_values_array_end(void *state) +{ + TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state; + + appendStringInfoCharMacro(_state->strval, ']'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +transform_string_values_object_field_start(void *state, char *fname, bool isnull) +{ + TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state; + + if (_state->strval->data[_state->strval->len - 1] != '{') + appendStringInfoCharMacro(_state->strval, ','); + + /* + * Unfortunately we don't have the quoted and escaped string any more, so + * we have to re-escape it. + */ + escape_json(_state->strval, fname); + appendStringInfoCharMacro(_state->strval, ':'); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +transform_string_values_array_element_start(void *state, bool isnull) +{ + TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state; + + if (_state->strval->data[_state->strval->len - 1] != '[') + appendStringInfoCharMacro(_state->strval, ','); + + return JSON_SUCCESS; +} + +static JsonParseErrorType +transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype) +{ + TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state; + + if (tokentype == JSON_TOKEN_STRING) + { + text *out = _state->action(_state->action_state, token, strlen(token)); + + escape_json(_state->strval, text_to_cstring(out)); + } + else + appendStringInfoString(_state->strval, token); + + return JSON_SUCCESS; +} + +JsonTokenType +json_get_first_token(text *json, bool throw_error) +{ + JsonLexContext *lex; + JsonParseErrorType result; + + lex = makeJsonLexContext(json, false); + + /* Lex exactly one token from the input and check its type. */ + result = json_lex(lex); + + if (result == JSON_SUCCESS) + return lex->token_type; + + if (throw_error) + json_errsave_error(result, lex, NULL); + + return JSON_TOKEN_INVALID; /* invalid json */ +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath.c new file mode 100644 index 00000000000..c5ba3b7f1d0 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath.c @@ -0,0 +1,1112 @@ +/*------------------------------------------------------------------------- + * + * jsonpath.c + * Input/output and supporting routines for jsonpath + * + * jsonpath expression is a chain of path items. First path item is $, $var, + * literal or arithmetic expression. Subsequent path items are accessors + * (.key, .*, [subscripts], [*]), filters (? (predicate)) and methods (.type(), + * .size() etc). + * + * For instance, structure of path items for simple expression: + * + * $.a[*].type() + * + * is pretty evident: + * + * $ => .a => [*] => .type() + * + * Some path items such as arithmetic operations, predicates or array + * subscripts may comprise subtrees. For instance, more complex expression + * + * ($.a + $[1 to 5, 7] ? (@ > 3).double()).type() + * + * have following structure of path items: + * + * + => .type() + * ___/ \___ + * / \ + * $ => .a $ => [] => ? => .double() + * _||_ | + * / \ > + * to to / \ + * / \ / @ 3 + * 1 5 7 + * + * Binary encoding of jsonpath constitutes a sequence of 4-bytes aligned + * variable-length path items connected by links. Every item has a header + * consisting of item type (enum JsonPathItemType) and offset of next item + * (zero means no next item). After the header, item may have payload + * depending on item type. For instance, payload of '.key' accessor item is + * length of key name and key name itself. Payload of '>' arithmetic operator + * item is offsets of right and left operands. + * + * So, binary representation of sample expression above is: + * (bottom arrows are next links, top lines are argument links) + * + * _____ + * _____ ___/____ \ __ + * _ /_ \ _____/__/____ \ \ __ _ /_ \ + * / / \ \ / / / \ \ \ / \ / / \ \ + * +(LR) $ .a $ [](* to *, * to *) 1 5 7 ?(A) >(LR) @ 3 .double() .type() + * | | ^ | ^| ^| ^ ^ + * | |__| |__||________________________||___________________| | + * |_______________________________________________________________________| + * + * Copyright (c) 2019-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "nodes/miscnodes.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/json.h" +#include "utils/jsonpath.h" + + +static Datum jsonPathFromCstring(char *in, int len, struct Node *escontext); +static char *jsonPathToCstring(StringInfo out, JsonPath *in, + int estimated_len); +static bool flattenJsonPathParseItem(StringInfo buf, int *result, + struct Node *escontext, + JsonPathParseItem *item, + int nestingLevel, bool insideArraySubscript); +static void alignStringInfoInt(StringInfo buf); +static int32 reserveSpaceForItemPointer(StringInfo buf); +static void printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, + bool printBracketes); +static int operationPriority(JsonPathItemType op); + + +/**************************** INPUT/OUTPUT ********************************/ + +/* + * jsonpath type input function + */ +Datum +jsonpath_in(PG_FUNCTION_ARGS) +{ + char *in = PG_GETARG_CSTRING(0); + int len = strlen(in); + + return jsonPathFromCstring(in, len, fcinfo->context); +} + +/* + * jsonpath type recv function + * + * The type is sent as text in binary mode, so this is almost the same + * as the input function, but it's prefixed with a version number so we + * can change the binary format sent in future if necessary. For now, + * only version 1 is supported. + */ +Datum +jsonpath_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int version = pq_getmsgint(buf, 1); + char *str; + int nbytes; + + if (version == JSONPATH_VERSION) + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + else + elog(ERROR, "unsupported jsonpath version number: %d", version); + + return jsonPathFromCstring(str, nbytes, NULL); +} + +/* + * jsonpath type output function + */ +Datum +jsonpath_out(PG_FUNCTION_ARGS) +{ + JsonPath *in = PG_GETARG_JSONPATH_P(0); + + PG_RETURN_CSTRING(jsonPathToCstring(NULL, in, VARSIZE(in))); +} + +/* + * jsonpath type send function + * + * Just send jsonpath as a version number, then a string of text + */ +Datum +jsonpath_send(PG_FUNCTION_ARGS) +{ + JsonPath *in = PG_GETARG_JSONPATH_P(0); + StringInfoData buf; + StringInfoData jtext; + int version = JSONPATH_VERSION; + + initStringInfo(&jtext); + (void) jsonPathToCstring(&jtext, in, VARSIZE(in)); + + pq_begintypsend(&buf); + pq_sendint8(&buf, version); + pq_sendtext(&buf, jtext.data, jtext.len); + pfree(jtext.data); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Converts C-string to a jsonpath value. + * + * Uses jsonpath parser to turn string into an AST, then + * flattenJsonPathParseItem() does second pass turning AST into binary + * representation of jsonpath. + */ +static Datum +jsonPathFromCstring(char *in, int len, struct Node *escontext) +{ + JsonPathParseResult *jsonpath = parsejsonpath(in, len, escontext); + JsonPath *res; + StringInfoData buf; + + if (SOFT_ERROR_OCCURRED(escontext)) + return (Datum) 0; + + if (!jsonpath) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", "jsonpath", + in))); + + initStringInfo(&buf); + enlargeStringInfo(&buf, 4 * len /* estimation */ ); + + appendStringInfoSpaces(&buf, JSONPATH_HDRSZ); + + if (!flattenJsonPathParseItem(&buf, NULL, escontext, + jsonpath->expr, 0, false)) + return (Datum) 0; + + res = (JsonPath *) buf.data; + SET_VARSIZE(res, buf.len); + res->header = JSONPATH_VERSION; + if (jsonpath->lax) + res->header |= JSONPATH_LAX; + + PG_RETURN_JSONPATH_P(res); +} + +/* + * Converts jsonpath value to a C-string. + * + * If 'out' argument is non-null, the resulting C-string is stored inside the + * StringBuffer. The resulting string is always returned. + */ +static char * +jsonPathToCstring(StringInfo out, JsonPath *in, int estimated_len) +{ + StringInfoData buf; + JsonPathItem v; + + if (!out) + { + out = &buf; + initStringInfo(out); + } + enlargeStringInfo(out, estimated_len); + + if (!(in->header & JSONPATH_LAX)) + appendStringInfoString(out, "strict "); + + jspInit(&v, in); + printJsonPathItem(out, &v, false, true); + + return out->data; +} + +/* + * Recursive function converting given jsonpath parse item and all its + * children into a binary representation. + */ +static bool +flattenJsonPathParseItem(StringInfo buf, int *result, struct Node *escontext, + JsonPathParseItem *item, int nestingLevel, + bool insideArraySubscript) +{ + /* position from beginning of jsonpath data */ + int32 pos = buf->len - JSONPATH_HDRSZ; + int32 chld; + int32 next; + int argNestingLevel = 0; + + check_stack_depth(); + CHECK_FOR_INTERRUPTS(); + + appendStringInfoChar(buf, (char) (item->type)); + + /* + * We align buffer to int32 because a series of int32 values often goes + * after the header, and we want to read them directly by dereferencing + * int32 pointer (see jspInitByBuffer()). + */ + alignStringInfoInt(buf); + + /* + * Reserve space for next item pointer. Actual value will be recorded + * later, after next and children items processing. + */ + next = reserveSpaceForItemPointer(buf); + + switch (item->type) + { + case jpiString: + case jpiVariable: + case jpiKey: + appendBinaryStringInfo(buf, &item->value.string.len, + sizeof(item->value.string.len)); + appendBinaryStringInfo(buf, item->value.string.val, + item->value.string.len); + appendStringInfoChar(buf, '\0'); + break; + case jpiNumeric: + appendBinaryStringInfo(buf, item->value.numeric, + VARSIZE(item->value.numeric)); + break; + case jpiBool: + appendBinaryStringInfo(buf, &item->value.boolean, + sizeof(item->value.boolean)); + break; + case jpiAnd: + case jpiOr: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiAdd: + case jpiSub: + case jpiMul: + case jpiDiv: + case jpiMod: + case jpiStartsWith: + { + /* + * First, reserve place for left/right arg's positions, then + * record both args and sets actual position in reserved + * places. + */ + int32 left = reserveSpaceForItemPointer(buf); + int32 right = reserveSpaceForItemPointer(buf); + + if (!item->value.args.left) + chld = pos; + else if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.args.left, + nestingLevel + argNestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + left) = chld - pos; + + if (!item->value.args.right) + chld = pos; + else if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.args.right, + nestingLevel + argNestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + right) = chld - pos; + } + break; + case jpiLikeRegex: + { + int32 offs; + + appendBinaryStringInfo(buf, + &item->value.like_regex.flags, + sizeof(item->value.like_regex.flags)); + offs = reserveSpaceForItemPointer(buf); + appendBinaryStringInfo(buf, + &item->value.like_regex.patternlen, + sizeof(item->value.like_regex.patternlen)); + appendBinaryStringInfo(buf, item->value.like_regex.pattern, + item->value.like_regex.patternlen); + appendStringInfoChar(buf, '\0'); + + if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.like_regex.expr, + nestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + offs) = chld - pos; + } + break; + case jpiFilter: + argNestingLevel++; + /* FALLTHROUGH */ + case jpiIsUnknown: + case jpiNot: + case jpiPlus: + case jpiMinus: + case jpiExists: + case jpiDatetime: + { + int32 arg = reserveSpaceForItemPointer(buf); + + if (!item->value.arg) + chld = pos; + else if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.arg, + nestingLevel + argNestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + arg) = chld - pos; + } + break; + case jpiNull: + break; + case jpiRoot: + break; + case jpiAnyArray: + case jpiAnyKey: + break; + case jpiCurrent: + if (nestingLevel <= 0) + ereturn(escontext, false, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("@ is not allowed in root expressions"))); + break; + case jpiLast: + if (!insideArraySubscript) + ereturn(escontext, false, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("LAST is allowed only in array subscripts"))); + break; + case jpiIndexArray: + { + int32 nelems = item->value.array.nelems; + int offset; + int i; + + appendBinaryStringInfo(buf, &nelems, sizeof(nelems)); + + offset = buf->len; + + appendStringInfoSpaces(buf, sizeof(int32) * 2 * nelems); + + for (i = 0; i < nelems; i++) + { + int32 *ppos; + int32 topos; + int32 frompos; + + if (!flattenJsonPathParseItem(buf, &frompos, escontext, + item->value.array.elems[i].from, + nestingLevel, true)) + return false; + frompos -= pos; + + if (item->value.array.elems[i].to) + { + if (!flattenJsonPathParseItem(buf, &topos, escontext, + item->value.array.elems[i].to, + nestingLevel, true)) + return false; + topos -= pos; + } + else + topos = 0; + + ppos = (int32 *) &buf->data[offset + i * 2 * sizeof(int32)]; + + ppos[0] = frompos; + ppos[1] = topos; + } + } + break; + case jpiAny: + appendBinaryStringInfo(buf, + &item->value.anybounds.first, + sizeof(item->value.anybounds.first)); + appendBinaryStringInfo(buf, + &item->value.anybounds.last, + sizeof(item->value.anybounds.last)); + break; + case jpiType: + case jpiSize: + case jpiAbs: + case jpiFloor: + case jpiCeiling: + case jpiDouble: + case jpiKeyValue: + break; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", item->type); + } + + if (item->next) + { + if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->next, nestingLevel, + insideArraySubscript)) + return false; + chld -= pos; + *(int32 *) (buf->data + next) = chld; + } + + if (result) + *result = pos; + return true; +} + +/* + * Align StringInfo to int by adding zero padding bytes + */ +static void +alignStringInfoInt(StringInfo buf) +{ + switch (INTALIGN(buf->len) - buf->len) + { + case 3: + appendStringInfoCharMacro(buf, 0); + /* FALLTHROUGH */ + case 2: + appendStringInfoCharMacro(buf, 0); + /* FALLTHROUGH */ + case 1: + appendStringInfoCharMacro(buf, 0); + /* FALLTHROUGH */ + default: + break; + } +} + +/* + * Reserve space for int32 JsonPathItem pointer. Now zero pointer is written, + * actual value will be recorded at '(int32 *) &buf->data[pos]' later. + */ +static int32 +reserveSpaceForItemPointer(StringInfo buf) +{ + int32 pos = buf->len; + int32 ptr = 0; + + appendBinaryStringInfo(buf, &ptr, sizeof(ptr)); + + return pos; +} + +/* + * Prints text representation of given jsonpath item and all its children. + */ +static void +printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, + bool printBracketes) +{ + JsonPathItem elem; + int i; + + check_stack_depth(); + CHECK_FOR_INTERRUPTS(); + + switch (v->type) + { + case jpiNull: + appendStringInfoString(buf, "null"); + break; + case jpiKey: + if (inKey) + appendStringInfoChar(buf, '.'); + escape_json(buf, jspGetString(v, NULL)); + break; + case jpiString: + escape_json(buf, jspGetString(v, NULL)); + break; + case jpiVariable: + appendStringInfoChar(buf, '$'); + escape_json(buf, jspGetString(v, NULL)); + break; + case jpiNumeric: + if (jspHasNext(v)) + appendStringInfoChar(buf, '('); + appendStringInfoString(buf, + DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(jspGetNumeric(v))))); + if (jspHasNext(v)) + appendStringInfoChar(buf, ')'); + break; + case jpiBool: + if (jspGetBool(v)) + appendStringInfoString(buf, "true"); + else + appendStringInfoString(buf, "false"); + break; + case jpiAnd: + case jpiOr: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiAdd: + case jpiSub: + case jpiMul: + case jpiDiv: + case jpiMod: + case jpiStartsWith: + if (printBracketes) + appendStringInfoChar(buf, '('); + jspGetLeftArg(v, &elem); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + appendStringInfoChar(buf, ' '); + appendStringInfoString(buf, jspOperationName(v->type)); + appendStringInfoChar(buf, ' '); + jspGetRightArg(v, &elem); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + if (printBracketes) + appendStringInfoChar(buf, ')'); + break; + case jpiLikeRegex: + if (printBracketes) + appendStringInfoChar(buf, '('); + + jspInitByBuffer(&elem, v->base, v->content.like_regex.expr); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + + appendStringInfoString(buf, " like_regex "); + + escape_json(buf, v->content.like_regex.pattern); + + if (v->content.like_regex.flags) + { + appendStringInfoString(buf, " flag \""); + + if (v->content.like_regex.flags & JSP_REGEX_ICASE) + appendStringInfoChar(buf, 'i'); + if (v->content.like_regex.flags & JSP_REGEX_DOTALL) + appendStringInfoChar(buf, 's'); + if (v->content.like_regex.flags & JSP_REGEX_MLINE) + appendStringInfoChar(buf, 'm'); + if (v->content.like_regex.flags & JSP_REGEX_WSPACE) + appendStringInfoChar(buf, 'x'); + if (v->content.like_regex.flags & JSP_REGEX_QUOTE) + appendStringInfoChar(buf, 'q'); + + appendStringInfoChar(buf, '"'); + } + + if (printBracketes) + appendStringInfoChar(buf, ')'); + break; + case jpiPlus: + case jpiMinus: + if (printBracketes) + appendStringInfoChar(buf, '('); + appendStringInfoChar(buf, v->type == jpiPlus ? '+' : '-'); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + if (printBracketes) + appendStringInfoChar(buf, ')'); + break; + case jpiFilter: + appendStringInfoString(buf, "?("); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendStringInfoChar(buf, ')'); + break; + case jpiNot: + appendStringInfoString(buf, "!("); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendStringInfoChar(buf, ')'); + break; + case jpiIsUnknown: + appendStringInfoChar(buf, '('); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendStringInfoString(buf, ") is unknown"); + break; + case jpiExists: + appendStringInfoString(buf, "exists ("); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendStringInfoChar(buf, ')'); + break; + case jpiCurrent: + Assert(!inKey); + appendStringInfoChar(buf, '@'); + break; + case jpiRoot: + Assert(!inKey); + appendStringInfoChar(buf, '$'); + break; + case jpiLast: + appendStringInfoString(buf, "last"); + break; + case jpiAnyArray: + appendStringInfoString(buf, "[*]"); + break; + case jpiAnyKey: + if (inKey) + appendStringInfoChar(buf, '.'); + appendStringInfoChar(buf, '*'); + break; + case jpiIndexArray: + appendStringInfoChar(buf, '['); + for (i = 0; i < v->content.array.nelems; i++) + { + JsonPathItem from; + JsonPathItem to; + bool range = jspGetArraySubscript(v, &from, &to, i); + + if (i) + appendStringInfoChar(buf, ','); + + printJsonPathItem(buf, &from, false, false); + + if (range) + { + appendStringInfoString(buf, " to "); + printJsonPathItem(buf, &to, false, false); + } + } + appendStringInfoChar(buf, ']'); + break; + case jpiAny: + if (inKey) + appendStringInfoChar(buf, '.'); + + if (v->content.anybounds.first == 0 && + v->content.anybounds.last == PG_UINT32_MAX) + appendStringInfoString(buf, "**"); + else if (v->content.anybounds.first == v->content.anybounds.last) + { + if (v->content.anybounds.first == PG_UINT32_MAX) + appendStringInfoString(buf, "**{last}"); + else + appendStringInfo(buf, "**{%u}", + v->content.anybounds.first); + } + else if (v->content.anybounds.first == PG_UINT32_MAX) + appendStringInfo(buf, "**{last to %u}", + v->content.anybounds.last); + else if (v->content.anybounds.last == PG_UINT32_MAX) + appendStringInfo(buf, "**{%u to last}", + v->content.anybounds.first); + else + appendStringInfo(buf, "**{%u to %u}", + v->content.anybounds.first, + v->content.anybounds.last); + break; + case jpiType: + appendStringInfoString(buf, ".type()"); + break; + case jpiSize: + appendStringInfoString(buf, ".size()"); + break; + case jpiAbs: + appendStringInfoString(buf, ".abs()"); + break; + case jpiFloor: + appendStringInfoString(buf, ".floor()"); + break; + case jpiCeiling: + appendStringInfoString(buf, ".ceiling()"); + break; + case jpiDouble: + appendStringInfoString(buf, ".double()"); + break; + case jpiDatetime: + appendStringInfoString(buf, ".datetime("); + if (v->content.arg) + { + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + } + appendStringInfoChar(buf, ')'); + break; + case jpiKeyValue: + appendStringInfoString(buf, ".keyvalue()"); + break; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", v->type); + } + + if (jspGetNext(v, &elem)) + printJsonPathItem(buf, &elem, true, true); +} + +const char * +jspOperationName(JsonPathItemType type) +{ + switch (type) + { + case jpiAnd: + return "&&"; + case jpiOr: + return "||"; + case jpiEqual: + return "=="; + case jpiNotEqual: + return "!="; + case jpiLess: + return "<"; + case jpiGreater: + return ">"; + case jpiLessOrEqual: + return "<="; + case jpiGreaterOrEqual: + return ">="; + case jpiPlus: + case jpiAdd: + return "+"; + case jpiMinus: + case jpiSub: + return "-"; + case jpiMul: + return "*"; + case jpiDiv: + return "/"; + case jpiMod: + return "%"; + case jpiStartsWith: + return "starts with"; + case jpiLikeRegex: + return "like_regex"; + case jpiType: + return "type"; + case jpiSize: + return "size"; + case jpiKeyValue: + return "keyvalue"; + case jpiDouble: + return "double"; + case jpiAbs: + return "abs"; + case jpiFloor: + return "floor"; + case jpiCeiling: + return "ceiling"; + case jpiDatetime: + return "datetime"; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", type); + return NULL; + } +} + +static int +operationPriority(JsonPathItemType op) +{ + switch (op) + { + case jpiOr: + return 0; + case jpiAnd: + return 1; + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiStartsWith: + return 2; + case jpiAdd: + case jpiSub: + return 3; + case jpiMul: + case jpiDiv: + case jpiMod: + return 4; + case jpiPlus: + case jpiMinus: + return 5; + default: + return 6; + } +} + +/******************* Support functions for JsonPath *************************/ + +/* + * Support macros to read stored values + */ + +#define read_byte(v, b, p) do { \ + (v) = *(uint8*)((b) + (p)); \ + (p) += 1; \ +} while(0) \ + +#define read_int32(v, b, p) do { \ + (v) = *(uint32*)((b) + (p)); \ + (p) += sizeof(int32); \ +} while(0) \ + +#define read_int32_n(v, b, p, n) do { \ + (v) = (void *)((b) + (p)); \ + (p) += sizeof(int32) * (n); \ +} while(0) \ + +/* + * Read root node and fill root node representation + */ +void +jspInit(JsonPathItem *v, JsonPath *js) +{ + Assert((js->header & ~JSONPATH_LAX) == JSONPATH_VERSION); + jspInitByBuffer(v, js->data, 0); +} + +/* + * Read node from buffer and fill its representation + */ +void +jspInitByBuffer(JsonPathItem *v, char *base, int32 pos) +{ + v->base = base + pos; + + read_byte(v->type, base, pos); + pos = INTALIGN((uintptr_t) (base + pos)) - (uintptr_t) base; + read_int32(v->nextPos, base, pos); + + switch (v->type) + { + case jpiNull: + case jpiRoot: + case jpiCurrent: + case jpiAnyArray: + case jpiAnyKey: + case jpiType: + case jpiSize: + case jpiAbs: + case jpiFloor: + case jpiCeiling: + case jpiDouble: + case jpiKeyValue: + case jpiLast: + break; + case jpiKey: + case jpiString: + case jpiVariable: + read_int32(v->content.value.datalen, base, pos); + /* FALLTHROUGH */ + case jpiNumeric: + case jpiBool: + v->content.value.data = base + pos; + break; + case jpiAnd: + case jpiOr: + case jpiAdd: + case jpiSub: + case jpiMul: + case jpiDiv: + case jpiMod: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiStartsWith: + read_int32(v->content.args.left, base, pos); + read_int32(v->content.args.right, base, pos); + break; + case jpiLikeRegex: + read_int32(v->content.like_regex.flags, base, pos); + read_int32(v->content.like_regex.expr, base, pos); + read_int32(v->content.like_regex.patternlen, base, pos); + v->content.like_regex.pattern = base + pos; + break; + case jpiNot: + case jpiExists: + case jpiIsUnknown: + case jpiPlus: + case jpiMinus: + case jpiFilter: + case jpiDatetime: + read_int32(v->content.arg, base, pos); + break; + case jpiIndexArray: + read_int32(v->content.array.nelems, base, pos); + read_int32_n(v->content.array.elems, base, pos, + v->content.array.nelems * 2); + break; + case jpiAny: + read_int32(v->content.anybounds.first, base, pos); + read_int32(v->content.anybounds.last, base, pos); + break; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", v->type); + } +} + +void +jspGetArg(JsonPathItem *v, JsonPathItem *a) +{ + Assert(v->type == jpiFilter || + v->type == jpiNot || + v->type == jpiIsUnknown || + v->type == jpiExists || + v->type == jpiPlus || + v->type == jpiMinus || + v->type == jpiDatetime); + + jspInitByBuffer(a, v->base, v->content.arg); +} + +bool +jspGetNext(JsonPathItem *v, JsonPathItem *a) +{ + if (jspHasNext(v)) + { + Assert(v->type == jpiString || + v->type == jpiNumeric || + v->type == jpiBool || + v->type == jpiNull || + v->type == jpiKey || + v->type == jpiAny || + v->type == jpiAnyArray || + v->type == jpiAnyKey || + v->type == jpiIndexArray || + v->type == jpiFilter || + v->type == jpiCurrent || + v->type == jpiExists || + v->type == jpiRoot || + v->type == jpiVariable || + v->type == jpiLast || + v->type == jpiAdd || + v->type == jpiSub || + v->type == jpiMul || + v->type == jpiDiv || + v->type == jpiMod || + v->type == jpiPlus || + v->type == jpiMinus || + v->type == jpiEqual || + v->type == jpiNotEqual || + v->type == jpiGreater || + v->type == jpiGreaterOrEqual || + v->type == jpiLess || + v->type == jpiLessOrEqual || + v->type == jpiAnd || + v->type == jpiOr || + v->type == jpiNot || + v->type == jpiIsUnknown || + v->type == jpiType || + v->type == jpiSize || + v->type == jpiAbs || + v->type == jpiFloor || + v->type == jpiCeiling || + v->type == jpiDouble || + v->type == jpiDatetime || + v->type == jpiKeyValue || + v->type == jpiStartsWith || + v->type == jpiLikeRegex); + + if (a) + jspInitByBuffer(a, v->base, v->nextPos); + return true; + } + + return false; +} + +void +jspGetLeftArg(JsonPathItem *v, JsonPathItem *a) +{ + Assert(v->type == jpiAnd || + v->type == jpiOr || + v->type == jpiEqual || + v->type == jpiNotEqual || + v->type == jpiLess || + v->type == jpiGreater || + v->type == jpiLessOrEqual || + v->type == jpiGreaterOrEqual || + v->type == jpiAdd || + v->type == jpiSub || + v->type == jpiMul || + v->type == jpiDiv || + v->type == jpiMod || + v->type == jpiStartsWith); + + jspInitByBuffer(a, v->base, v->content.args.left); +} + +void +jspGetRightArg(JsonPathItem *v, JsonPathItem *a) +{ + Assert(v->type == jpiAnd || + v->type == jpiOr || + v->type == jpiEqual || + v->type == jpiNotEqual || + v->type == jpiLess || + v->type == jpiGreater || + v->type == jpiLessOrEqual || + v->type == jpiGreaterOrEqual || + v->type == jpiAdd || + v->type == jpiSub || + v->type == jpiMul || + v->type == jpiDiv || + v->type == jpiMod || + v->type == jpiStartsWith); + + jspInitByBuffer(a, v->base, v->content.args.right); +} + +bool +jspGetBool(JsonPathItem *v) +{ + Assert(v->type == jpiBool); + + return (bool) *v->content.value.data; +} + +Numeric +jspGetNumeric(JsonPathItem *v) +{ + Assert(v->type == jpiNumeric); + + return (Numeric) v->content.value.data; +} + +char * +jspGetString(JsonPathItem *v, int32 *len) +{ + Assert(v->type == jpiKey || + v->type == jpiString || + v->type == jpiVariable); + + if (len) + *len = v->content.value.datalen; + return v->content.value.data; +} + +bool +jspGetArraySubscript(JsonPathItem *v, JsonPathItem *from, JsonPathItem *to, + int i) +{ + Assert(v->type == jpiIndexArray); + + jspInitByBuffer(from, v->base, v->content.array.elems[i].from); + + if (!v->content.array.elems[i].to) + return false; + + jspInitByBuffer(to, v->base, v->content.array.elems[i].to); + + return true; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_exec.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_exec.c new file mode 100644 index 00000000000..971979800bf --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_exec.c @@ -0,0 +1,2817 @@ +/*------------------------------------------------------------------------- + * + * jsonpath_exec.c + * Routines for SQL/JSON path execution. + * + * Jsonpath is executed in the global context stored in JsonPathExecContext, + * which is passed to almost every function involved into execution. Entry + * point for jsonpath execution is executeJsonPath() function, which + * initializes execution context including initial JsonPathItem and JsonbValue, + * flags, stack for calculation of @ in filters. + * + * The result of jsonpath query execution is enum JsonPathExecResult and + * if succeeded sequence of JsonbValue, written to JsonValueList *found, which + * is passed through the jsonpath items. When found == NULL, we're inside + * exists-query and we're interested only in whether result is empty. In this + * case execution is stopped once first result item is found, and the only + * execution result is JsonPathExecResult. The values of JsonPathExecResult + * are following: + * - jperOk -- result sequence is not empty + * - jperNotFound -- result sequence is empty + * - jperError -- error occurred during execution + * + * Jsonpath is executed recursively (see executeItem()) starting form the + * first path item (which in turn might be, for instance, an arithmetic + * expression evaluated separately). On each step single JsonbValue obtained + * from previous path item is processed. The result of processing is a + * sequence of JsonbValue (probably empty), which is passed to the next path + * item one by one. When there is no next path item, then JsonbValue is added + * to the 'found' list. When found == NULL, then execution functions just + * return jperOk (see executeNextItem()). + * + * Many of jsonpath operations require automatic unwrapping of arrays in lax + * mode. So, if input value is array, then corresponding operation is + * processed not on array itself, but on all of its members one by one. + * executeItemOptUnwrapTarget() function have 'unwrap' argument, which indicates + * whether unwrapping of array is needed. When unwrap == true, each of array + * members is passed to executeItemOptUnwrapTarget() again but with unwrap == false + * in order to avoid subsequent array unwrapping. + * + * All boolean expressions (predicates) are evaluated by executeBoolItem() + * function, which returns tri-state JsonPathBool. When error is occurred + * during predicate execution, it returns jpbUnknown. According to standard + * predicates can be only inside filters. But we support their usage as + * jsonpath expression. This helps us to implement @@ operator. In this case + * resulting JsonPathBool is transformed into jsonb bool or null. + * + * Arithmetic and boolean expression are evaluated recursively from expression + * tree top down to the leaves. Therefore, for binary arithmetic expressions + * we calculate operands first. Then we check that results are numeric + * singleton lists, calculate the result and pass it to the next path item. + * + * Copyright (c) 2019-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath_exec.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "nodes/miscnodes.h" +#include "regex/regex.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/datum.h" +#include "utils/float.h" +#include "utils/formatting.h" +#include "utils/guc.h" +#include "utils/json.h" +#include "utils/jsonpath.h" +#include "utils/timestamp.h" +#include "utils/varlena.h" + +/* + * Represents "base object" and it's "id" for .keyvalue() evaluation. + */ +typedef struct JsonBaseObjectInfo +{ + JsonbContainer *jbc; + int id; +} JsonBaseObjectInfo; + +/* + * Context of jsonpath execution. + */ +typedef struct JsonPathExecContext +{ + Jsonb *vars; /* variables to substitute into jsonpath */ + JsonbValue *root; /* for $ evaluation */ + JsonbValue *current; /* for @ evaluation */ + JsonBaseObjectInfo baseObject; /* "base object" for .keyvalue() + * evaluation */ + int lastGeneratedObjectId; /* "id" counter for .keyvalue() + * evaluation */ + int innermostArraySize; /* for LAST array index evaluation */ + bool laxMode; /* true for "lax" mode, false for "strict" + * mode */ + bool ignoreStructuralErrors; /* with "true" structural errors such + * as absence of required json item or + * unexpected json item type are + * ignored */ + bool throwErrors; /* with "false" all suppressible errors are + * suppressed */ + bool useTz; +} JsonPathExecContext; + +/* Context for LIKE_REGEX execution. */ +typedef struct JsonLikeRegexContext +{ + text *regex; + int cflags; +} JsonLikeRegexContext; + +/* Result of jsonpath predicate evaluation */ +typedef enum JsonPathBool +{ + jpbFalse = 0, + jpbTrue = 1, + jpbUnknown = 2 +} JsonPathBool; + +/* Result of jsonpath expression evaluation */ +typedef enum JsonPathExecResult +{ + jperOk = 0, + jperNotFound = 1, + jperError = 2 +} JsonPathExecResult; + +#define jperIsError(jper) ((jper) == jperError) + +/* + * List of jsonb values with shortcut for single-value list. + */ +typedef struct JsonValueList +{ + JsonbValue *singleton; + List *list; +} JsonValueList; + +typedef struct JsonValueListIterator +{ + JsonbValue *value; + List *list; + ListCell *next; +} JsonValueListIterator; + +/* strict/lax flags is decomposed into four [un]wrap/error flags */ +#define jspStrictAbsenseOfErrors(cxt) (!(cxt)->laxMode) +#define jspAutoUnwrap(cxt) ((cxt)->laxMode) +#define jspAutoWrap(cxt) ((cxt)->laxMode) +#define jspIgnoreStructuralErrors(cxt) ((cxt)->ignoreStructuralErrors) +#define jspThrowErrors(cxt) ((cxt)->throwErrors) + +/* Convenience macro: return or throw error depending on context */ +#define RETURN_ERROR(throw_error) \ +do { \ + if (jspThrowErrors(cxt)) \ + throw_error; \ + else \ + return jperError; \ +} while (0) + +typedef JsonPathBool (*JsonPathPredicateCallback) (JsonPathItem *jsp, + JsonbValue *larg, + JsonbValue *rarg, + void *param); +typedef Numeric (*BinaryArithmFunc) (Numeric num1, Numeric num2, bool *error); + +static JsonPathExecResult executeJsonPath(JsonPath *path, Jsonb *vars, + Jsonb *json, bool throwErrors, + JsonValueList *result, bool useTz); +static JsonPathExecResult executeItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, JsonValueList *found); +static JsonPathExecResult executeItemOptUnwrapTarget(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, + JsonValueList *found, bool unwrap); +static JsonPathExecResult executeItemUnwrapTargetArray(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, + JsonValueList *found, bool unwrapElements); +static JsonPathExecResult executeNextItem(JsonPathExecContext *cxt, + JsonPathItem *cur, JsonPathItem *next, + JsonbValue *v, JsonValueList *found, bool copy); +static JsonPathExecResult executeItemOptUnwrapResult(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb, + bool unwrap, JsonValueList *found); +static JsonPathExecResult executeItemOptUnwrapResultNoThrow(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, JsonValueList *found); +static JsonPathBool executeBoolItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, bool canHaveNext); +static JsonPathBool executeNestedBoolItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb); +static JsonPathExecResult executeAnyItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbContainer *jbc, JsonValueList *found, + uint32 level, uint32 first, uint32 last, + bool ignoreStructuralErrors, bool unwrapNext); +static JsonPathBool executePredicate(JsonPathExecContext *cxt, + JsonPathItem *pred, JsonPathItem *larg, JsonPathItem *rarg, + JsonbValue *jb, bool unwrapRightArg, + JsonPathPredicateCallback exec, void *param); +static JsonPathExecResult executeBinaryArithmExpr(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, + BinaryArithmFunc func, JsonValueList *found); +static JsonPathExecResult executeUnaryArithmExpr(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, PGFunction func, + JsonValueList *found); +static JsonPathBool executeStartsWith(JsonPathItem *jsp, + JsonbValue *whole, JsonbValue *initial, void *param); +static JsonPathBool executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, + JsonbValue *rarg, void *param); +static JsonPathExecResult executeNumericItemMethod(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, bool unwrap, PGFunction func, + JsonValueList *found); +static JsonPathExecResult executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found); +static JsonPathExecResult executeKeyValueMethod(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, JsonValueList *found); +static JsonPathExecResult appendBoolResult(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonValueList *found, JsonPathBool res); +static void getJsonPathItem(JsonPathExecContext *cxt, JsonPathItem *item, + JsonbValue *value); +static void getJsonPathVariable(JsonPathExecContext *cxt, + JsonPathItem *variable, Jsonb *vars, JsonbValue *value); +static int JsonbArraySize(JsonbValue *jb); +static JsonPathBool executeComparison(JsonPathItem *cmp, JsonbValue *lv, + JsonbValue *rv, void *p); +static JsonPathBool compareItems(int32 op, JsonbValue *jb1, JsonbValue *jb2, + bool useTz); +static int compareNumeric(Numeric a, Numeric b); +static JsonbValue *copyJsonbValue(JsonbValue *src); +static JsonPathExecResult getArrayIndex(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, int32 *index); +static JsonBaseObjectInfo setBaseObject(JsonPathExecContext *cxt, + JsonbValue *jbv, int32 id); +static void JsonValueListAppend(JsonValueList *jvl, JsonbValue *jbv); +static int JsonValueListLength(const JsonValueList *jvl); +static bool JsonValueListIsEmpty(JsonValueList *jvl); +static JsonbValue *JsonValueListHead(JsonValueList *jvl); +static List *JsonValueListGetList(JsonValueList *jvl); +static void JsonValueListInitIterator(const JsonValueList *jvl, + JsonValueListIterator *it); +static JsonbValue *JsonValueListNext(const JsonValueList *jvl, + JsonValueListIterator *it); +static int JsonbType(JsonbValue *jb); +static JsonbValue *JsonbInitBinary(JsonbValue *jbv, Jsonb *jb); +static int JsonbType(JsonbValue *jb); +static JsonbValue *getScalar(JsonbValue *scalar, enum jbvType type); +static JsonbValue *wrapItemsInArray(const JsonValueList *items); +static int compareDatetime(Datum val1, Oid typid1, Datum val2, Oid typid2, + bool useTz, bool *cast_error); + +/****************** User interface to JsonPath executor ********************/ + +/* + * jsonb_path_exists + * Returns true if jsonpath returns at least one item for the specified + * jsonb value. This function and jsonb_path_match() are used to + * implement @? and @@ operators, which in turn are intended to have an + * index support. Thus, it's desirable to make it easier to achieve + * consistency between index scan results and sequential scan results. + * So, we throw as few errors as possible. Regarding this function, + * such behavior also matches behavior of JSON_EXISTS() clause of + * SQL/JSON. Regarding jsonb_path_match(), this function doesn't have + * an analogy in SQL/JSON, so we define its behavior on our own. + */ +static Datum +jsonb_path_exists_internal(FunctionCallInfo fcinfo, bool tz) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonPathExecResult res; + Jsonb *vars = NULL; + bool silent = true; + + if (PG_NARGS() == 4) + { + vars = PG_GETARG_JSONB_P(2); + silent = PG_GETARG_BOOL(3); + } + + res = executeJsonPath(jp, vars, jb, !silent, NULL, tz); + + PG_FREE_IF_COPY(jb, 0); + PG_FREE_IF_COPY(jp, 1); + + if (jperIsError(res)) + PG_RETURN_NULL(); + + PG_RETURN_BOOL(res == jperOk); +} + +Datum +jsonb_path_exists(PG_FUNCTION_ARGS) +{ + return jsonb_path_exists_internal(fcinfo, false); +} + +Datum +jsonb_path_exists_tz(PG_FUNCTION_ARGS) +{ + return jsonb_path_exists_internal(fcinfo, true); +} + +/* + * jsonb_path_exists_opr + * Implementation of operator "jsonb @? jsonpath" (2-argument version of + * jsonb_path_exists()). + */ +Datum +jsonb_path_exists_opr(PG_FUNCTION_ARGS) +{ + /* just call the other one -- it can handle both cases */ + return jsonb_path_exists_internal(fcinfo, false); +} + +/* + * jsonb_path_match + * Returns jsonpath predicate result item for the specified jsonb value. + * See jsonb_path_exists() comment for details regarding error handling. + */ +static Datum +jsonb_path_match_internal(FunctionCallInfo fcinfo, bool tz) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonValueList found = {0}; + Jsonb *vars = NULL; + bool silent = true; + + if (PG_NARGS() == 4) + { + vars = PG_GETARG_JSONB_P(2); + silent = PG_GETARG_BOOL(3); + } + + (void) executeJsonPath(jp, vars, jb, !silent, &found, tz); + + PG_FREE_IF_COPY(jb, 0); + PG_FREE_IF_COPY(jp, 1); + + if (JsonValueListLength(&found) == 1) + { + JsonbValue *jbv = JsonValueListHead(&found); + + if (jbv->type == jbvBool) + PG_RETURN_BOOL(jbv->val.boolean); + + if (jbv->type == jbvNull) + PG_RETURN_NULL(); + } + + if (!silent) + ereport(ERROR, + (errcode(ERRCODE_SINGLETON_SQL_JSON_ITEM_REQUIRED), + errmsg("single boolean result is expected"))); + + PG_RETURN_NULL(); +} + +Datum +jsonb_path_match(PG_FUNCTION_ARGS) +{ + return jsonb_path_match_internal(fcinfo, false); +} + +Datum +jsonb_path_match_tz(PG_FUNCTION_ARGS) +{ + return jsonb_path_match_internal(fcinfo, true); +} + +/* + * jsonb_path_match_opr + * Implementation of operator "jsonb @@ jsonpath" (2-argument version of + * jsonb_path_match()). + */ +Datum +jsonb_path_match_opr(PG_FUNCTION_ARGS) +{ + /* just call the other one -- it can handle both cases */ + return jsonb_path_match_internal(fcinfo, false); +} + +/* + * jsonb_path_query + * Executes jsonpath for given jsonb document and returns result as + * rowset. + */ +static Datum +jsonb_path_query_internal(FunctionCallInfo fcinfo, bool tz) +{ + FuncCallContext *funcctx; + List *found; + JsonbValue *v; + ListCell *c; + + if (SRF_IS_FIRSTCALL()) + { + JsonPath *jp; + Jsonb *jb; + MemoryContext oldcontext; + Jsonb *vars; + bool silent; + JsonValueList found = {0}; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + jb = PG_GETARG_JSONB_P_COPY(0); + jp = PG_GETARG_JSONPATH_P_COPY(1); + vars = PG_GETARG_JSONB_P_COPY(2); + silent = PG_GETARG_BOOL(3); + + (void) executeJsonPath(jp, vars, jb, !silent, &found, tz); + + funcctx->user_fctx = JsonValueListGetList(&found); + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + found = funcctx->user_fctx; + + c = list_head(found); + + if (c == NULL) + SRF_RETURN_DONE(funcctx); + + v = lfirst(c); + funcctx->user_fctx = list_delete_first(found); + + SRF_RETURN_NEXT(funcctx, JsonbPGetDatum(JsonbValueToJsonb(v))); +} + +Datum +jsonb_path_query(PG_FUNCTION_ARGS) +{ + return jsonb_path_query_internal(fcinfo, false); +} + +Datum +jsonb_path_query_tz(PG_FUNCTION_ARGS) +{ + return jsonb_path_query_internal(fcinfo, true); +} + +/* + * jsonb_path_query_array + * Executes jsonpath for given jsonb document and returns result as + * jsonb array. + */ +static Datum +jsonb_path_query_array_internal(FunctionCallInfo fcinfo, bool tz) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonValueList found = {0}; + Jsonb *vars = PG_GETARG_JSONB_P(2); + bool silent = PG_GETARG_BOOL(3); + + (void) executeJsonPath(jp, vars, jb, !silent, &found, tz); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(wrapItemsInArray(&found))); +} + +Datum +jsonb_path_query_array(PG_FUNCTION_ARGS) +{ + return jsonb_path_query_array_internal(fcinfo, false); +} + +Datum +jsonb_path_query_array_tz(PG_FUNCTION_ARGS) +{ + return jsonb_path_query_array_internal(fcinfo, true); +} + +/* + * jsonb_path_query_first + * Executes jsonpath for given jsonb document and returns first result + * item. If there are no items, NULL returned. + */ +static Datum +jsonb_path_query_first_internal(FunctionCallInfo fcinfo, bool tz) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonValueList found = {0}; + Jsonb *vars = PG_GETARG_JSONB_P(2); + bool silent = PG_GETARG_BOOL(3); + + (void) executeJsonPath(jp, vars, jb, !silent, &found, tz); + + if (JsonValueListLength(&found) >= 1) + PG_RETURN_JSONB_P(JsonbValueToJsonb(JsonValueListHead(&found))); + else + PG_RETURN_NULL(); +} + +Datum +jsonb_path_query_first(PG_FUNCTION_ARGS) +{ + return jsonb_path_query_first_internal(fcinfo, false); +} + +Datum +jsonb_path_query_first_tz(PG_FUNCTION_ARGS) +{ + return jsonb_path_query_first_internal(fcinfo, true); +} + +/********************Execute functions for JsonPath**************************/ + +/* + * Interface to jsonpath executor + * + * 'path' - jsonpath to be executed + * 'vars' - variables to be substituted to jsonpath + * 'json' - target document for jsonpath evaluation + * 'throwErrors' - whether we should throw suppressible errors + * 'result' - list to store result items into + * + * Returns an error if a recoverable error happens during processing, or NULL + * on no error. + * + * Note, jsonb and jsonpath values should be available and untoasted during + * work because JsonPathItem, JsonbValue and result item could have pointers + * into input values. If caller needs to just check if document matches + * jsonpath, then it doesn't provide a result arg. In this case executor + * works till first positive result and does not check the rest if possible. + * In other case it tries to find all the satisfied result items. + */ +static JsonPathExecResult +executeJsonPath(JsonPath *path, Jsonb *vars, Jsonb *json, bool throwErrors, + JsonValueList *result, bool useTz) +{ + JsonPathExecContext cxt; + JsonPathExecResult res; + JsonPathItem jsp; + JsonbValue jbv; + + jspInit(&jsp, path); + + if (!JsonbExtractScalar(&json->root, &jbv)) + JsonbInitBinary(&jbv, json); + + if (vars && !JsonContainerIsObject(&vars->root)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"vars\" argument is not an object"), + errdetail("Jsonpath parameters should be encoded as key-value pairs of \"vars\" object."))); + } + + cxt.vars = vars; + cxt.laxMode = (path->header & JSONPATH_LAX) != 0; + cxt.ignoreStructuralErrors = cxt.laxMode; + cxt.root = &jbv; + cxt.current = &jbv; + cxt.baseObject.jbc = NULL; + cxt.baseObject.id = 0; + cxt.lastGeneratedObjectId = vars ? 2 : 1; + cxt.innermostArraySize = -1; + cxt.throwErrors = throwErrors; + cxt.useTz = useTz; + + if (jspStrictAbsenseOfErrors(&cxt) && !result) + { + /* + * In strict mode we must get a complete list of values to check that + * there are no errors at all. + */ + JsonValueList vals = {0}; + + res = executeItem(&cxt, &jsp, &jbv, &vals); + + if (jperIsError(res)) + return res; + + return JsonValueListIsEmpty(&vals) ? jperNotFound : jperOk; + } + + res = executeItem(&cxt, &jsp, &jbv, result); + + Assert(!throwErrors || !jperIsError(res)); + + return res; +} + +/* + * Execute jsonpath with automatic unwrapping of current item in lax mode. + */ +static JsonPathExecResult +executeItem(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found) +{ + return executeItemOptUnwrapTarget(cxt, jsp, jb, found, jspAutoUnwrap(cxt)); +} + +/* + * Main jsonpath executor function: walks on jsonpath structure, finds + * relevant parts of jsonb and evaluates expressions over them. + * When 'unwrap' is true current SQL/JSON item is unwrapped if it is an array. + */ +static JsonPathExecResult +executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found, bool unwrap) +{ + JsonPathItem elem; + JsonPathExecResult res = jperNotFound; + JsonBaseObjectInfo baseObject; + + check_stack_depth(); + CHECK_FOR_INTERRUPTS(); + + switch (jsp->type) + { + /* all boolean item types: */ + case jpiAnd: + case jpiOr: + case jpiNot: + case jpiIsUnknown: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiExists: + case jpiStartsWith: + case jpiLikeRegex: + { + JsonPathBool st = executeBoolItem(cxt, jsp, jb, true); + + res = appendBoolResult(cxt, jsp, found, st); + break; + } + + case jpiKey: + if (JsonbType(jb) == jbvObject) + { + JsonbValue *v; + JsonbValue key; + + key.type = jbvString; + key.val.string.val = jspGetString(jsp, &key.val.string.len); + + v = findJsonbValueFromContainer(jb->val.binary.data, + JB_FOBJECT, &key); + + if (v != NULL) + { + res = executeNextItem(cxt, jsp, NULL, + v, found, false); + + /* free value if it was not added to found list */ + if (jspHasNext(jsp) || !found) + pfree(v); + } + else if (!jspIgnoreStructuralErrors(cxt)) + { + Assert(found); + + if (!jspThrowErrors(cxt)) + return jperError; + + ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_MEMBER_NOT_FOUND), \ + errmsg("JSON object does not contain key \"%s\"", + pnstrdup(key.val.string.val, + key.val.string.len)))); + } + } + else if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + else if (!jspIgnoreStructuralErrors(cxt)) + { + Assert(found); + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_MEMBER_NOT_FOUND), + errmsg("jsonpath member accessor can only be applied to an object")))); + } + break; + + case jpiRoot: + jb = cxt->root; + baseObject = setBaseObject(cxt, jb, 0); + res = executeNextItem(cxt, jsp, NULL, jb, found, true); + cxt->baseObject = baseObject; + break; + + case jpiCurrent: + res = executeNextItem(cxt, jsp, NULL, cxt->current, + found, true); + break; + + case jpiAnyArray: + if (JsonbType(jb) == jbvArray) + { + bool hasNext = jspGetNext(jsp, &elem); + + res = executeItemUnwrapTargetArray(cxt, hasNext ? &elem : NULL, + jb, found, jspAutoUnwrap(cxt)); + } + else if (jspAutoWrap(cxt)) + res = executeNextItem(cxt, jsp, NULL, jb, found, true); + else if (!jspIgnoreStructuralErrors(cxt)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_ARRAY_NOT_FOUND), + errmsg("jsonpath wildcard array accessor can only be applied to an array")))); + break; + + case jpiIndexArray: + if (JsonbType(jb) == jbvArray || jspAutoWrap(cxt)) + { + int innermostArraySize = cxt->innermostArraySize; + int i; + int size = JsonbArraySize(jb); + bool singleton = size < 0; + bool hasNext = jspGetNext(jsp, &elem); + + if (singleton) + size = 1; + + cxt->innermostArraySize = size; /* for LAST evaluation */ + + for (i = 0; i < jsp->content.array.nelems; i++) + { + JsonPathItem from; + JsonPathItem to; + int32 index; + int32 index_from; + int32 index_to; + bool range = jspGetArraySubscript(jsp, &from, + &to, i); + + res = getArrayIndex(cxt, &from, jb, &index_from); + + if (jperIsError(res)) + break; + + if (range) + { + res = getArrayIndex(cxt, &to, jb, &index_to); + + if (jperIsError(res)) + break; + } + else + index_to = index_from; + + if (!jspIgnoreStructuralErrors(cxt) && + (index_from < 0 || + index_from > index_to || + index_to >= size)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_SQL_JSON_SUBSCRIPT), + errmsg("jsonpath array subscript is out of bounds")))); + + if (index_from < 0) + index_from = 0; + + if (index_to >= size) + index_to = size - 1; + + res = jperNotFound; + + for (index = index_from; index <= index_to; index++) + { + JsonbValue *v; + bool copy; + + if (singleton) + { + v = jb; + copy = true; + } + else + { + v = getIthJsonbValueFromContainer(jb->val.binary.data, + (uint32) index); + + if (v == NULL) + continue; + + copy = false; + } + + if (!hasNext && !found) + return jperOk; + + res = executeNextItem(cxt, jsp, &elem, v, found, + copy); + + if (jperIsError(res)) + break; + + if (res == jperOk && !found) + break; + } + + if (jperIsError(res)) + break; + + if (res == jperOk && !found) + break; + } + + cxt->innermostArraySize = innermostArraySize; + } + else if (!jspIgnoreStructuralErrors(cxt)) + { + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_ARRAY_NOT_FOUND), + errmsg("jsonpath array accessor can only be applied to an array")))); + } + break; + + case jpiLast: + { + JsonbValue tmpjbv; + JsonbValue *lastjbv; + int last; + bool hasNext = jspGetNext(jsp, &elem); + + if (cxt->innermostArraySize < 0) + elog(ERROR, "evaluating jsonpath LAST outside of array subscript"); + + if (!hasNext && !found) + { + res = jperOk; + break; + } + + last = cxt->innermostArraySize - 1; + + lastjbv = hasNext ? &tmpjbv : palloc(sizeof(*lastjbv)); + + lastjbv->type = jbvNumeric; + lastjbv->val.numeric = int64_to_numeric(last); + + res = executeNextItem(cxt, jsp, &elem, + lastjbv, found, hasNext); + } + break; + + case jpiAnyKey: + if (JsonbType(jb) == jbvObject) + { + bool hasNext = jspGetNext(jsp, &elem); + + if (jb->type != jbvBinary) + elog(ERROR, "invalid jsonb object type: %d", jb->type); + + return executeAnyItem + (cxt, hasNext ? &elem : NULL, + jb->val.binary.data, found, 1, 1, 1, + false, jspAutoUnwrap(cxt)); + } + else if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + else if (!jspIgnoreStructuralErrors(cxt)) + { + Assert(found); + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_OBJECT_NOT_FOUND), + errmsg("jsonpath wildcard member accessor can only be applied to an object")))); + } + break; + + case jpiAdd: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_add_opt_error, found); + + case jpiSub: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_sub_opt_error, found); + + case jpiMul: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_mul_opt_error, found); + + case jpiDiv: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_div_opt_error, found); + + case jpiMod: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_mod_opt_error, found); + + case jpiPlus: + return executeUnaryArithmExpr(cxt, jsp, jb, NULL, found); + + case jpiMinus: + return executeUnaryArithmExpr(cxt, jsp, jb, numeric_uminus, + found); + + case jpiFilter: + { + JsonPathBool st; + + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, + false); + + jspGetArg(jsp, &elem); + st = executeNestedBoolItem(cxt, &elem, jb); + if (st != jpbTrue) + res = jperNotFound; + else + res = executeNextItem(cxt, jsp, NULL, + jb, found, true); + break; + } + + case jpiAny: + { + bool hasNext = jspGetNext(jsp, &elem); + + /* first try without any intermediate steps */ + if (jsp->content.anybounds.first == 0) + { + bool savedIgnoreStructuralErrors; + + savedIgnoreStructuralErrors = cxt->ignoreStructuralErrors; + cxt->ignoreStructuralErrors = true; + res = executeNextItem(cxt, jsp, &elem, + jb, found, true); + cxt->ignoreStructuralErrors = savedIgnoreStructuralErrors; + + if (res == jperOk && !found) + break; + } + + if (jb->type == jbvBinary) + res = executeAnyItem + (cxt, hasNext ? &elem : NULL, + jb->val.binary.data, found, + 1, + jsp->content.anybounds.first, + jsp->content.anybounds.last, + true, jspAutoUnwrap(cxt)); + break; + } + + case jpiNull: + case jpiBool: + case jpiNumeric: + case jpiString: + case jpiVariable: + { + JsonbValue vbuf; + JsonbValue *v; + bool hasNext = jspGetNext(jsp, &elem); + + if (!hasNext && !found && jsp->type != jpiVariable) + { + /* + * Skip evaluation, but not for variables. We must + * trigger an error for the missing variable. + */ + res = jperOk; + break; + } + + v = hasNext ? &vbuf : palloc(sizeof(*v)); + + baseObject = cxt->baseObject; + getJsonPathItem(cxt, jsp, v); + + res = executeNextItem(cxt, jsp, &elem, + v, found, hasNext); + cxt->baseObject = baseObject; + } + break; + + case jpiType: + { + JsonbValue *jbv = palloc(sizeof(*jbv)); + + jbv->type = jbvString; + jbv->val.string.val = pstrdup(JsonbTypeName(jb)); + jbv->val.string.len = strlen(jbv->val.string.val); + + res = executeNextItem(cxt, jsp, NULL, jbv, + found, false); + } + break; + + case jpiSize: + { + int size = JsonbArraySize(jb); + + if (size < 0) + { + if (!jspAutoWrap(cxt)) + { + if (!jspIgnoreStructuralErrors(cxt)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_ARRAY_NOT_FOUND), + errmsg("jsonpath item method .%s() can only be applied to an array", + jspOperationName(jsp->type))))); + break; + } + + size = 1; + } + + jb = palloc(sizeof(*jb)); + + jb->type = jbvNumeric; + jb->val.numeric = int64_to_numeric(size); + + res = executeNextItem(cxt, jsp, NULL, jb, found, false); + } + break; + + case jpiAbs: + return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_abs, + found); + + case jpiFloor: + return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_floor, + found); + + case jpiCeiling: + return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_ceil, + found); + + case jpiDouble: + { + JsonbValue jbv; + + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, + false); + + if (jb->type == jbvNumeric) + { + char *tmp = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(jb->val.numeric))); + double val; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + val = float8in_internal(tmp, + NULL, + "double precision", + tmp, + (Node *) &escontext); + + if (escontext.error_occurred || isinf(val) || isnan(val)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM), + errmsg("numeric argument of jsonpath item method .%s() is out of range for type double precision", + jspOperationName(jsp->type))))); + res = jperOk; + } + else if (jb->type == jbvString) + { + /* cast string as double */ + double val; + char *tmp = pnstrdup(jb->val.string.val, + jb->val.string.len); + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + val = float8in_internal(tmp, + NULL, + "double precision", + tmp, + (Node *) &escontext); + + if (escontext.error_occurred || isinf(val) || isnan(val)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM), + errmsg("string argument of jsonpath item method .%s() is not a valid representation of a double precision number", + jspOperationName(jsp->type))))); + + jb = &jbv; + jb->type = jbvNumeric; + jb->val.numeric = DatumGetNumeric(DirectFunctionCall1(float8_numeric, + Float8GetDatum(val))); + res = jperOk; + } + + if (res == jperNotFound) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM), + errmsg("jsonpath item method .%s() can only be applied to a string or numeric value", + jspOperationName(jsp->type))))); + + res = executeNextItem(cxt, jsp, NULL, jb, found, true); + } + break; + + case jpiDatetime: + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + + return executeDateTimeMethod(cxt, jsp, jb, found); + + case jpiKeyValue: + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + + return executeKeyValueMethod(cxt, jsp, jb, found); + + default: + elog(ERROR, "unrecognized jsonpath item type: %d", jsp->type); + } + + return res; +} + +/* + * Unwrap current array item and execute jsonpath for each of its elements. + */ +static JsonPathExecResult +executeItemUnwrapTargetArray(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found, + bool unwrapElements) +{ + if (jb->type != jbvBinary) + { + Assert(jb->type != jbvArray); + elog(ERROR, "invalid jsonb array value type: %d", jb->type); + } + + return executeAnyItem + (cxt, jsp, jb->val.binary.data, found, 1, 1, 1, + false, unwrapElements); +} + +/* + * Execute next jsonpath item if exists. Otherwise put "v" to the "found" + * list if provided. + */ +static JsonPathExecResult +executeNextItem(JsonPathExecContext *cxt, + JsonPathItem *cur, JsonPathItem *next, + JsonbValue *v, JsonValueList *found, bool copy) +{ + JsonPathItem elem; + bool hasNext; + + if (!cur) + hasNext = next != NULL; + else if (next) + hasNext = jspHasNext(cur); + else + { + next = &elem; + hasNext = jspGetNext(cur, next); + } + + if (hasNext) + return executeItem(cxt, next, v, found); + + if (found) + JsonValueListAppend(found, copy ? copyJsonbValue(v) : v); + + return jperOk; +} + +/* + * Same as executeItem(), but when "unwrap == true" automatically unwraps + * each array item from the resulting sequence in lax mode. + */ +static JsonPathExecResult +executeItemOptUnwrapResult(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, + JsonValueList *found) +{ + if (unwrap && jspAutoUnwrap(cxt)) + { + JsonValueList seq = {0}; + JsonValueListIterator it; + JsonPathExecResult res = executeItem(cxt, jsp, jb, &seq); + JsonbValue *item; + + if (jperIsError(res)) + return res; + + JsonValueListInitIterator(&seq, &it); + while ((item = JsonValueListNext(&seq, &it))) + { + Assert(item->type != jbvArray); + + if (JsonbType(item) == jbvArray) + executeItemUnwrapTargetArray(cxt, NULL, item, found, false); + else + JsonValueListAppend(found, item); + } + + return jperOk; + } + + return executeItem(cxt, jsp, jb, found); +} + +/* + * Same as executeItemOptUnwrapResult(), but with error suppression. + */ +static JsonPathExecResult +executeItemOptUnwrapResultNoThrow(JsonPathExecContext *cxt, + JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, + JsonValueList *found) +{ + JsonPathExecResult res; + bool throwErrors = cxt->throwErrors; + + cxt->throwErrors = false; + res = executeItemOptUnwrapResult(cxt, jsp, jb, unwrap, found); + cxt->throwErrors = throwErrors; + + return res; +} + +/* Execute boolean-valued jsonpath expression. */ +static JsonPathBool +executeBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool canHaveNext) +{ + JsonPathItem larg; + JsonPathItem rarg; + JsonPathBool res; + JsonPathBool res2; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + if (!canHaveNext && jspHasNext(jsp)) + elog(ERROR, "boolean jsonpath item cannot have next item"); + + switch (jsp->type) + { + case jpiAnd: + jspGetLeftArg(jsp, &larg); + res = executeBoolItem(cxt, &larg, jb, false); + + if (res == jpbFalse) + return jpbFalse; + + /* + * SQL/JSON says that we should check second arg in case of + * jperError + */ + + jspGetRightArg(jsp, &rarg); + res2 = executeBoolItem(cxt, &rarg, jb, false); + + return res2 == jpbTrue ? res : res2; + + case jpiOr: + jspGetLeftArg(jsp, &larg); + res = executeBoolItem(cxt, &larg, jb, false); + + if (res == jpbTrue) + return jpbTrue; + + jspGetRightArg(jsp, &rarg); + res2 = executeBoolItem(cxt, &rarg, jb, false); + + return res2 == jpbFalse ? res : res2; + + case jpiNot: + jspGetArg(jsp, &larg); + + res = executeBoolItem(cxt, &larg, jb, false); + + if (res == jpbUnknown) + return jpbUnknown; + + return res == jpbTrue ? jpbFalse : jpbTrue; + + case jpiIsUnknown: + jspGetArg(jsp, &larg); + res = executeBoolItem(cxt, &larg, jb, false); + return res == jpbUnknown ? jpbTrue : jpbFalse; + + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + jspGetLeftArg(jsp, &larg); + jspGetRightArg(jsp, &rarg); + return executePredicate(cxt, jsp, &larg, &rarg, jb, true, + executeComparison, cxt); + + case jpiStartsWith: /* 'whole STARTS WITH initial' */ + jspGetLeftArg(jsp, &larg); /* 'whole' */ + jspGetRightArg(jsp, &rarg); /* 'initial' */ + return executePredicate(cxt, jsp, &larg, &rarg, jb, false, + executeStartsWith, NULL); + + case jpiLikeRegex: /* 'expr LIKE_REGEX pattern FLAGS flags' */ + { + /* + * 'expr' is a sequence-returning expression. 'pattern' is a + * regex string literal. SQL/JSON standard requires XQuery + * regexes, but we use Postgres regexes here. 'flags' is a + * string literal converted to integer flags at compile-time. + */ + JsonLikeRegexContext lrcxt = {0}; + + jspInitByBuffer(&larg, jsp->base, + jsp->content.like_regex.expr); + + return executePredicate(cxt, jsp, &larg, NULL, jb, false, + executeLikeRegex, &lrcxt); + } + + case jpiExists: + jspGetArg(jsp, &larg); + + if (jspStrictAbsenseOfErrors(cxt)) + { + /* + * In strict mode we must get a complete list of values to + * check that there are no errors at all. + */ + JsonValueList vals = {0}; + JsonPathExecResult res = + executeItemOptUnwrapResultNoThrow(cxt, &larg, jb, + false, &vals); + + if (jperIsError(res)) + return jpbUnknown; + + return JsonValueListIsEmpty(&vals) ? jpbFalse : jpbTrue; + } + else + { + JsonPathExecResult res = + executeItemOptUnwrapResultNoThrow(cxt, &larg, jb, + false, NULL); + + if (jperIsError(res)) + return jpbUnknown; + + return res == jperOk ? jpbTrue : jpbFalse; + } + + default: + elog(ERROR, "invalid boolean jsonpath item type: %d", jsp->type); + return jpbUnknown; + } +} + +/* + * Execute nested (filters etc.) boolean expression pushing current SQL/JSON + * item onto the stack. + */ +static JsonPathBool +executeNestedBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb) +{ + JsonbValue *prev; + JsonPathBool res; + + prev = cxt->current; + cxt->current = jb; + res = executeBoolItem(cxt, jsp, jb, false); + cxt->current = prev; + + return res; +} + +/* + * Implementation of several jsonpath nodes: + * - jpiAny (.** accessor), + * - jpiAnyKey (.* accessor), + * - jpiAnyArray ([*] accessor) + */ +static JsonPathExecResult +executeAnyItem(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbContainer *jbc, + JsonValueList *found, uint32 level, uint32 first, uint32 last, + bool ignoreStructuralErrors, bool unwrapNext) +{ + JsonPathExecResult res = jperNotFound; + JsonbIterator *it; + int32 r; + JsonbValue v; + + check_stack_depth(); + + if (level > last) + return res; + + it = JsonbIteratorInit(jbc); + + /* + * Recursively iterate over jsonb objects/arrays + */ + while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) + { + if (r == WJB_KEY) + { + r = JsonbIteratorNext(&it, &v, true); + Assert(r == WJB_VALUE); + } + + if (r == WJB_VALUE || r == WJB_ELEM) + { + + if (level >= first || + (first == PG_UINT32_MAX && last == PG_UINT32_MAX && + v.type != jbvBinary)) /* leaves only requested */ + { + /* check expression */ + if (jsp) + { + if (ignoreStructuralErrors) + { + bool savedIgnoreStructuralErrors; + + savedIgnoreStructuralErrors = cxt->ignoreStructuralErrors; + cxt->ignoreStructuralErrors = true; + res = executeItemOptUnwrapTarget(cxt, jsp, &v, found, unwrapNext); + cxt->ignoreStructuralErrors = savedIgnoreStructuralErrors; + } + else + res = executeItemOptUnwrapTarget(cxt, jsp, &v, found, unwrapNext); + + if (jperIsError(res)) + break; + + if (res == jperOk && !found) + break; + } + else if (found) + JsonValueListAppend(found, copyJsonbValue(&v)); + else + return jperOk; + } + + if (level < last && v.type == jbvBinary) + { + res = executeAnyItem + (cxt, jsp, v.val.binary.data, found, + level + 1, first, last, + ignoreStructuralErrors, unwrapNext); + + if (jperIsError(res)) + break; + + if (res == jperOk && found == NULL) + break; + } + } + } + + return res; +} + +/* + * Execute unary or binary predicate. + * + * Predicates have existence semantics, because their operands are item + * sequences. Pairs of items from the left and right operand's sequences are + * checked. TRUE returned only if any pair satisfying the condition is found. + * In strict mode, even if the desired pair has already been found, all pairs + * still need to be examined to check the absence of errors. If any error + * occurs, UNKNOWN (analogous to SQL NULL) is returned. + */ +static JsonPathBool +executePredicate(JsonPathExecContext *cxt, JsonPathItem *pred, + JsonPathItem *larg, JsonPathItem *rarg, JsonbValue *jb, + bool unwrapRightArg, JsonPathPredicateCallback exec, + void *param) +{ + JsonPathExecResult res; + JsonValueListIterator lseqit; + JsonValueList lseq = {0}; + JsonValueList rseq = {0}; + JsonbValue *lval; + bool error = false; + bool found = false; + + /* Left argument is always auto-unwrapped. */ + res = executeItemOptUnwrapResultNoThrow(cxt, larg, jb, true, &lseq); + if (jperIsError(res)) + return jpbUnknown; + + if (rarg) + { + /* Right argument is conditionally auto-unwrapped. */ + res = executeItemOptUnwrapResultNoThrow(cxt, rarg, jb, + unwrapRightArg, &rseq); + if (jperIsError(res)) + return jpbUnknown; + } + + JsonValueListInitIterator(&lseq, &lseqit); + while ((lval = JsonValueListNext(&lseq, &lseqit))) + { + JsonValueListIterator rseqit; + JsonbValue *rval; + bool first = true; + + JsonValueListInitIterator(&rseq, &rseqit); + if (rarg) + rval = JsonValueListNext(&rseq, &rseqit); + else + rval = NULL; + + /* Loop over right arg sequence or do single pass otherwise */ + while (rarg ? (rval != NULL) : first) + { + JsonPathBool res = exec(pred, lval, rval, param); + + if (res == jpbUnknown) + { + if (jspStrictAbsenseOfErrors(cxt)) + return jpbUnknown; + + error = true; + } + else if (res == jpbTrue) + { + if (!jspStrictAbsenseOfErrors(cxt)) + return jpbTrue; + + found = true; + } + + first = false; + if (rarg) + rval = JsonValueListNext(&rseq, &rseqit); + } + } + + if (found) /* possible only in strict mode */ + return jpbTrue; + + if (error) /* possible only in lax mode */ + return jpbUnknown; + + return jpbFalse; +} + +/* + * Execute binary arithmetic expression on singleton numeric operands. + * Array operands are automatically unwrapped in lax mode. + */ +static JsonPathExecResult +executeBinaryArithmExpr(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, BinaryArithmFunc func, + JsonValueList *found) +{ + JsonPathExecResult jper; + JsonPathItem elem; + JsonValueList lseq = {0}; + JsonValueList rseq = {0}; + JsonbValue *lval; + JsonbValue *rval; + Numeric res; + + jspGetLeftArg(jsp, &elem); + + /* + * XXX: By standard only operands of multiplicative expressions are + * unwrapped. We extend it to other binary arithmetic expressions too. + */ + jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &lseq); + if (jperIsError(jper)) + return jper; + + jspGetRightArg(jsp, &elem); + + jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &rseq); + if (jperIsError(jper)) + return jper; + + if (JsonValueListLength(&lseq) != 1 || + !(lval = getScalar(JsonValueListHead(&lseq), jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SINGLETON_SQL_JSON_ITEM_REQUIRED), + errmsg("left operand of jsonpath operator %s is not a single numeric value", + jspOperationName(jsp->type))))); + + if (JsonValueListLength(&rseq) != 1 || + !(rval = getScalar(JsonValueListHead(&rseq), jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SINGLETON_SQL_JSON_ITEM_REQUIRED), + errmsg("right operand of jsonpath operator %s is not a single numeric value", + jspOperationName(jsp->type))))); + + if (jspThrowErrors(cxt)) + { + res = func(lval->val.numeric, rval->val.numeric, NULL); + } + else + { + bool error = false; + + res = func(lval->val.numeric, rval->val.numeric, &error); + + if (error) + return jperError; + } + + if (!jspGetNext(jsp, &elem) && !found) + return jperOk; + + lval = palloc(sizeof(*lval)); + lval->type = jbvNumeric; + lval->val.numeric = res; + + return executeNextItem(cxt, jsp, &elem, lval, found, false); +} + +/* + * Execute unary arithmetic expression for each numeric item in its operand's + * sequence. Array operand is automatically unwrapped in lax mode. + */ +static JsonPathExecResult +executeUnaryArithmExpr(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, PGFunction func, JsonValueList *found) +{ + JsonPathExecResult jper; + JsonPathExecResult jper2; + JsonPathItem elem; + JsonValueList seq = {0}; + JsonValueListIterator it; + JsonbValue *val; + bool hasNext; + + jspGetArg(jsp, &elem); + jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &seq); + + if (jperIsError(jper)) + return jper; + + jper = jperNotFound; + + hasNext = jspGetNext(jsp, &elem); + + JsonValueListInitIterator(&seq, &it); + while ((val = JsonValueListNext(&seq, &it))) + { + if ((val = getScalar(val, jbvNumeric))) + { + if (!found && !hasNext) + return jperOk; + } + else + { + if (!found && !hasNext) + continue; /* skip non-numerics processing */ + + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_NUMBER_NOT_FOUND), + errmsg("operand of unary jsonpath operator %s is not a numeric value", + jspOperationName(jsp->type))))); + } + + if (func) + val->val.numeric = + DatumGetNumeric(DirectFunctionCall1(func, + NumericGetDatum(val->val.numeric))); + + jper2 = executeNextItem(cxt, jsp, &elem, val, found, false); + + if (jperIsError(jper2)) + return jper2; + + if (jper2 == jperOk) + { + if (!found) + return jperOk; + jper = jperOk; + } + } + + return jper; +} + +/* + * STARTS_WITH predicate callback. + * + * Check if the 'whole' string starts from 'initial' string. + */ +static JsonPathBool +executeStartsWith(JsonPathItem *jsp, JsonbValue *whole, JsonbValue *initial, + void *param) +{ + if (!(whole = getScalar(whole, jbvString))) + return jpbUnknown; /* error */ + + if (!(initial = getScalar(initial, jbvString))) + return jpbUnknown; /* error */ + + if (whole->val.string.len >= initial->val.string.len && + !memcmp(whole->val.string.val, + initial->val.string.val, + initial->val.string.len)) + return jpbTrue; + + return jpbFalse; +} + +/* + * LIKE_REGEX predicate callback. + * + * Check if the string matches regex pattern. + */ +static JsonPathBool +executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg, + void *param) +{ + JsonLikeRegexContext *cxt = param; + + if (!(str = getScalar(str, jbvString))) + return jpbUnknown; + + /* Cache regex text and converted flags. */ + if (!cxt->regex) + { + cxt->regex = + cstring_to_text_with_len(jsp->content.like_regex.pattern, + jsp->content.like_regex.patternlen); + (void) jspConvertRegexFlags(jsp->content.like_regex.flags, + &(cxt->cflags), NULL); + } + + if (RE_compile_and_execute(cxt->regex, str->val.string.val, + str->val.string.len, + cxt->cflags, DEFAULT_COLLATION_OID, 0, NULL)) + return jpbTrue; + + return jpbFalse; +} + +/* + * Execute numeric item methods (.abs(), .floor(), .ceil()) using the specified + * user function 'func'. + */ +static JsonPathExecResult +executeNumericItemMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, PGFunction func, + JsonValueList *found) +{ + JsonPathItem next; + Datum datum; + + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + + if (!(jb = getScalar(jb, jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM), + errmsg("jsonpath item method .%s() can only be applied to a numeric value", + jspOperationName(jsp->type))))); + + datum = DirectFunctionCall1(func, NumericGetDatum(jb->val.numeric)); + + if (!jspGetNext(jsp, &next) && !found) + return jperOk; + + jb = palloc(sizeof(*jb)); + jb->type = jbvNumeric; + jb->val.numeric = DatumGetNumeric(datum); + + return executeNextItem(cxt, jsp, &next, jb, found, false); +} + +/* + * Implementation of the .datetime() method. + * + * Converts a string into a date/time value. The actual type is determined at run time. + * If an argument is provided, this argument is used as a template string. + * Otherwise, the first fitting ISO format is selected. + */ +static JsonPathExecResult +executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found) +{ + JsonbValue jbvbuf; + Datum value; + text *datetime; + Oid collid; + Oid typid; + int32 typmod = -1; + int tz = 0; + bool hasNext; + JsonPathExecResult res = jperNotFound; + JsonPathItem elem; + + if (!(jb = getScalar(jb, jbvString))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_SQL_JSON_DATETIME_FUNCTION), + errmsg("jsonpath item method .%s() can only be applied to a string", + jspOperationName(jsp->type))))); + + datetime = cstring_to_text_with_len(jb->val.string.val, + jb->val.string.len); + + /* + * At some point we might wish to have callers supply the collation to + * use, but right now it's unclear that they'd be able to do better than + * DEFAULT_COLLATION_OID anyway. + */ + collid = DEFAULT_COLLATION_OID; + + if (jsp->content.arg) + { + text *template; + char *template_str; + int template_len; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + jspGetArg(jsp, &elem); + + if (elem.type != jpiString) + elog(ERROR, "invalid jsonpath item type for .datetime() argument"); + + template_str = jspGetString(&elem, &template_len); + + template = cstring_to_text_with_len(template_str, + template_len); + + value = parse_datetime(datetime, template, collid, true, + &typid, &typmod, &tz, + jspThrowErrors(cxt) ? NULL : (Node *) &escontext); + + if (escontext.error_occurred) + res = jperError; + else + res = jperOk; + } + else + { + /* + * According to SQL/JSON standard enumerate ISO formats for: date, + * timetz, time, timestamptz, timestamp. + * + * We also support ISO 8601 format (with "T") for timestamps, because + * to_json[b]() functions use this format. + */ + static __thread const char *fmt_str[] = + { + "yyyy-mm-dd", /* date */ + "HH24:MI:SS.USTZH:TZM", /* timetz */ + "HH24:MI:SS.USTZH", + "HH24:MI:SSTZH:TZM", + "HH24:MI:SSTZH", + "HH24:MI:SS.US", /* time without tz */ + "HH24:MI:SS", + "yyyy-mm-dd HH24:MI:SS.USTZH:TZM", /* timestamptz */ + "yyyy-mm-dd HH24:MI:SS.USTZH", + "yyyy-mm-dd HH24:MI:SSTZH:TZM", + "yyyy-mm-dd HH24:MI:SSTZH", + "yyyy-mm-dd\"T\"HH24:MI:SS.USTZH:TZM", + "yyyy-mm-dd\"T\"HH24:MI:SS.USTZH", + "yyyy-mm-dd\"T\"HH24:MI:SSTZH:TZM", + "yyyy-mm-dd\"T\"HH24:MI:SSTZH", + "yyyy-mm-dd HH24:MI:SS.US", /* timestamp without tz */ + "yyyy-mm-dd HH24:MI:SS", + "yyyy-mm-dd\"T\"HH24:MI:SS.US", + "yyyy-mm-dd\"T\"HH24:MI:SS" + }; + + /* cache for format texts */ + static __thread text *fmt_txt[lengthof(fmt_str)] = {0}; + int i; + + /* loop until datetime format fits */ + for (i = 0; i < lengthof(fmt_str); i++) + { + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!fmt_txt[i]) + { + MemoryContext oldcxt = + MemoryContextSwitchTo(TopMemoryContext); + + fmt_txt[i] = cstring_to_text(fmt_str[i]); + MemoryContextSwitchTo(oldcxt); + } + + value = parse_datetime(datetime, fmt_txt[i], collid, true, + &typid, &typmod, &tz, + (Node *) &escontext); + + if (!escontext.error_occurred) + { + res = jperOk; + break; + } + } + + if (res == jperNotFound) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_SQL_JSON_DATETIME_FUNCTION), + errmsg("datetime format is not recognized: \"%s\"", + text_to_cstring(datetime)), + errhint("Use a datetime template argument to specify the input data format.")))); + } + + pfree(datetime); + + if (jperIsError(res)) + return res; + + hasNext = jspGetNext(jsp, &elem); + + if (!hasNext && !found) + return res; + + jb = hasNext ? &jbvbuf : palloc(sizeof(*jb)); + + jb->type = jbvDatetime; + jb->val.datetime.value = value; + jb->val.datetime.typid = typid; + jb->val.datetime.typmod = typmod; + jb->val.datetime.tz = tz; + + return executeNextItem(cxt, jsp, &elem, jb, found, hasNext); +} + +/* + * Implementation of .keyvalue() method. + * + * .keyvalue() method returns a sequence of object's key-value pairs in the + * following format: '{ "key": key, "value": value, "id": id }'. + * + * "id" field is an object identifier which is constructed from the two parts: + * base object id and its binary offset in base object's jsonb: + * id = 10000000000 * base_object_id + obj_offset_in_base_object + * + * 10000000000 (10^10) -- is a first round decimal number greater than 2^32 + * (maximal offset in jsonb). Decimal multiplier is used here to improve the + * readability of identifiers. + * + * Base object is usually a root object of the path: context item '$' or path + * variable '$var', literals can't produce objects for now. But if the path + * contains generated objects (.keyvalue() itself, for example), then they + * become base object for the subsequent .keyvalue(). + * + * Id of '$' is 0. Id of '$var' is its ordinal (positive) number in the list + * of variables (see getJsonPathVariable()). Ids for generated objects + * are assigned using global counter JsonPathExecContext.lastGeneratedObjectId. + */ +static JsonPathExecResult +executeKeyValueMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found) +{ + JsonPathExecResult res = jperNotFound; + JsonPathItem next; + JsonbContainer *jbc; + JsonbValue key; + JsonbValue val; + JsonbValue idval; + JsonbValue keystr; + JsonbValue valstr; + JsonbValue idstr; + JsonbIterator *it; + JsonbIteratorToken tok; + int64 id; + bool hasNext; + + if (JsonbType(jb) != jbvObject || jb->type != jbvBinary) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_OBJECT_NOT_FOUND), + errmsg("jsonpath item method .%s() can only be applied to an object", + jspOperationName(jsp->type))))); + + jbc = jb->val.binary.data; + + if (!JsonContainerSize(jbc)) + return jperNotFound; /* no key-value pairs */ + + hasNext = jspGetNext(jsp, &next); + + keystr.type = jbvString; + keystr.val.string.val = "key"; + keystr.val.string.len = 3; + + valstr.type = jbvString; + valstr.val.string.val = "value"; + valstr.val.string.len = 5; + + idstr.type = jbvString; + idstr.val.string.val = "id"; + idstr.val.string.len = 2; + + /* construct object id from its base object and offset inside that */ + id = jb->type != jbvBinary ? 0 : + (int64) ((char *) jbc - (char *) cxt->baseObject.jbc); + id += (int64) cxt->baseObject.id * INT64CONST(10000000000); + + idval.type = jbvNumeric; + idval.val.numeric = int64_to_numeric(id); + + it = JsonbIteratorInit(jbc); + + while ((tok = JsonbIteratorNext(&it, &key, true)) != WJB_DONE) + { + JsonBaseObjectInfo baseObject; + JsonbValue obj; + JsonbParseState *ps; + JsonbValue *keyval; + Jsonb *jsonb; + + if (tok != WJB_KEY) + continue; + + res = jperOk; + + if (!hasNext && !found) + break; + + tok = JsonbIteratorNext(&it, &val, true); + Assert(tok == WJB_VALUE); + + ps = NULL; + pushJsonbValue(&ps, WJB_BEGIN_OBJECT, NULL); + + pushJsonbValue(&ps, WJB_KEY, &keystr); + pushJsonbValue(&ps, WJB_VALUE, &key); + + pushJsonbValue(&ps, WJB_KEY, &valstr); + pushJsonbValue(&ps, WJB_VALUE, &val); + + pushJsonbValue(&ps, WJB_KEY, &idstr); + pushJsonbValue(&ps, WJB_VALUE, &idval); + + keyval = pushJsonbValue(&ps, WJB_END_OBJECT, NULL); + + jsonb = JsonbValueToJsonb(keyval); + + JsonbInitBinary(&obj, jsonb); + + baseObject = setBaseObject(cxt, &obj, cxt->lastGeneratedObjectId++); + + res = executeNextItem(cxt, jsp, &next, &obj, found, true); + + cxt->baseObject = baseObject; + + if (jperIsError(res)) + return res; + + if (res == jperOk && !found) + break; + } + + return res; +} + +/* + * Convert boolean execution status 'res' to a boolean JSON item and execute + * next jsonpath. + */ +static JsonPathExecResult +appendBoolResult(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonValueList *found, JsonPathBool res) +{ + JsonPathItem next; + JsonbValue jbv; + + if (!jspGetNext(jsp, &next) && !found) + return jperOk; /* found singleton boolean value */ + + if (res == jpbUnknown) + { + jbv.type = jbvNull; + } + else + { + jbv.type = jbvBool; + jbv.val.boolean = res == jpbTrue; + } + + return executeNextItem(cxt, jsp, &next, &jbv, found, true); +} + +/* + * Convert jsonpath's scalar or variable node to actual jsonb value. + * + * If node is a variable then its id returned, otherwise 0 returned. + */ +static void +getJsonPathItem(JsonPathExecContext *cxt, JsonPathItem *item, + JsonbValue *value) +{ + switch (item->type) + { + case jpiNull: + value->type = jbvNull; + break; + case jpiBool: + value->type = jbvBool; + value->val.boolean = jspGetBool(item); + break; + case jpiNumeric: + value->type = jbvNumeric; + value->val.numeric = jspGetNumeric(item); + break; + case jpiString: + value->type = jbvString; + value->val.string.val = jspGetString(item, + &value->val.string.len); + break; + case jpiVariable: + getJsonPathVariable(cxt, item, cxt->vars, value); + return; + default: + elog(ERROR, "unexpected jsonpath item type"); + } +} + +/* + * Get the value of variable passed to jsonpath executor + */ +static void +getJsonPathVariable(JsonPathExecContext *cxt, JsonPathItem *variable, + Jsonb *vars, JsonbValue *value) +{ + char *varName; + int varNameLength; + JsonbValue tmp; + JsonbValue *v; + + if (!vars) + { + value->type = jbvNull; + return; + } + + Assert(variable->type == jpiVariable); + varName = jspGetString(variable, &varNameLength); + tmp.type = jbvString; + tmp.val.string.val = varName; + tmp.val.string.len = varNameLength; + + v = findJsonbValueFromContainer(&vars->root, JB_FOBJECT, &tmp); + + if (v) + { + *value = *v; + pfree(v); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("could not find jsonpath variable \"%s\"", + pnstrdup(varName, varNameLength)))); + } + + JsonbInitBinary(&tmp, vars); + setBaseObject(cxt, &tmp, 1); +} + +/**************** Support functions for JsonPath execution *****************/ + +/* + * Returns the size of an array item, or -1 if item is not an array. + */ +static int +JsonbArraySize(JsonbValue *jb) +{ + Assert(jb->type != jbvArray); + + if (jb->type == jbvBinary) + { + JsonbContainer *jbc = jb->val.binary.data; + + if (JsonContainerIsArray(jbc) && !JsonContainerIsScalar(jbc)) + return JsonContainerSize(jbc); + } + + return -1; +} + +/* Comparison predicate callback. */ +static JsonPathBool +executeComparison(JsonPathItem *cmp, JsonbValue *lv, JsonbValue *rv, void *p) +{ + JsonPathExecContext *cxt = (JsonPathExecContext *) p; + + return compareItems(cmp->type, lv, rv, cxt->useTz); +} + +/* + * Perform per-byte comparison of two strings. + */ +static int +binaryCompareStrings(const char *s1, int len1, + const char *s2, int len2) +{ + int cmp; + + cmp = memcmp(s1, s2, Min(len1, len2)); + + if (cmp != 0) + return cmp; + + if (len1 == len2) + return 0; + + return len1 < len2 ? -1 : 1; +} + +/* + * Compare two strings in the current server encoding using Unicode codepoint + * collation. + */ +static int +compareStrings(const char *mbstr1, int mblen1, + const char *mbstr2, int mblen2) +{ + if (GetDatabaseEncoding() == PG_SQL_ASCII || + GetDatabaseEncoding() == PG_UTF8) + { + /* + * It's known property of UTF-8 strings that their per-byte comparison + * result matches codepoints comparison result. ASCII can be + * considered as special case of UTF-8. + */ + return binaryCompareStrings(mbstr1, mblen1, mbstr2, mblen2); + } + else + { + char *utf8str1, + *utf8str2; + int cmp, + utf8len1, + utf8len2; + + /* + * We have to convert other encodings to UTF-8 first, then compare. + * Input strings may be not null-terminated and pg_server_to_any() may + * return them "as is". So, use strlen() only if there is real + * conversion. + */ + utf8str1 = pg_server_to_any(mbstr1, mblen1, PG_UTF8); + utf8str2 = pg_server_to_any(mbstr2, mblen2, PG_UTF8); + utf8len1 = (mbstr1 == utf8str1) ? mblen1 : strlen(utf8str1); + utf8len2 = (mbstr2 == utf8str2) ? mblen2 : strlen(utf8str2); + + cmp = binaryCompareStrings(utf8str1, utf8len1, utf8str2, utf8len2); + + /* + * If pg_server_to_any() did no real conversion, then we actually + * compared original strings. So, we already done. + */ + if (mbstr1 == utf8str1 && mbstr2 == utf8str2) + return cmp; + + /* Free memory if needed */ + if (mbstr1 != utf8str1) + pfree(utf8str1); + if (mbstr2 != utf8str2) + pfree(utf8str2); + + /* + * When all Unicode codepoints are equal, return result of binary + * comparison. In some edge cases, same characters may have different + * representations in encoding. Then our behavior could diverge from + * standard. However, that allow us to do simple binary comparison + * for "==" operator, which is performance critical in typical cases. + * In future to implement strict standard conformance, we can do + * normalization of input JSON strings. + */ + if (cmp == 0) + return binaryCompareStrings(mbstr1, mblen1, mbstr2, mblen2); + else + return cmp; + } +} + +/* + * Compare two SQL/JSON items using comparison operation 'op'. + */ +static JsonPathBool +compareItems(int32 op, JsonbValue *jb1, JsonbValue *jb2, bool useTz) +{ + int cmp; + bool res; + + if (jb1->type != jb2->type) + { + if (jb1->type == jbvNull || jb2->type == jbvNull) + + /* + * Equality and order comparison of nulls to non-nulls returns + * always false, but inequality comparison returns true. + */ + return op == jpiNotEqual ? jpbTrue : jpbFalse; + + /* Non-null items of different types are not comparable. */ + return jpbUnknown; + } + + switch (jb1->type) + { + case jbvNull: + cmp = 0; + break; + case jbvBool: + cmp = jb1->val.boolean == jb2->val.boolean ? 0 : + jb1->val.boolean ? 1 : -1; + break; + case jbvNumeric: + cmp = compareNumeric(jb1->val.numeric, jb2->val.numeric); + break; + case jbvString: + if (op == jpiEqual) + return jb1->val.string.len != jb2->val.string.len || + memcmp(jb1->val.string.val, + jb2->val.string.val, + jb1->val.string.len) ? jpbFalse : jpbTrue; + + cmp = compareStrings(jb1->val.string.val, jb1->val.string.len, + jb2->val.string.val, jb2->val.string.len); + break; + case jbvDatetime: + { + bool cast_error; + + cmp = compareDatetime(jb1->val.datetime.value, + jb1->val.datetime.typid, + jb2->val.datetime.value, + jb2->val.datetime.typid, + useTz, + &cast_error); + + if (cast_error) + return jpbUnknown; + } + break; + + case jbvBinary: + case jbvArray: + case jbvObject: + return jpbUnknown; /* non-scalars are not comparable */ + + default: + elog(ERROR, "invalid jsonb value type %d", jb1->type); + } + + switch (op) + { + case jpiEqual: + res = (cmp == 0); + break; + case jpiNotEqual: + res = (cmp != 0); + break; + case jpiLess: + res = (cmp < 0); + break; + case jpiGreater: + res = (cmp > 0); + break; + case jpiLessOrEqual: + res = (cmp <= 0); + break; + case jpiGreaterOrEqual: + res = (cmp >= 0); + break; + default: + elog(ERROR, "unrecognized jsonpath operation: %d", op); + return jpbUnknown; + } + + return res ? jpbTrue : jpbFalse; +} + +/* Compare two numerics */ +static int +compareNumeric(Numeric a, Numeric b) +{ + return DatumGetInt32(DirectFunctionCall2(numeric_cmp, + NumericGetDatum(a), + NumericGetDatum(b))); +} + +static JsonbValue * +copyJsonbValue(JsonbValue *src) +{ + JsonbValue *dst = palloc(sizeof(*dst)); + + *dst = *src; + + return dst; +} + +/* + * Execute array subscript expression and convert resulting numeric item to + * the integer type with truncation. + */ +static JsonPathExecResult +getArrayIndex(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb, + int32 *index) +{ + JsonbValue *jbv; + JsonValueList found = {0}; + JsonPathExecResult res = executeItem(cxt, jsp, jb, &found); + Datum numeric_index; + bool have_error = false; + + if (jperIsError(res)) + return res; + + if (JsonValueListLength(&found) != 1 || + !(jbv = getScalar(JsonValueListHead(&found), jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_SQL_JSON_SUBSCRIPT), + errmsg("jsonpath array subscript is not a single numeric value")))); + + numeric_index = DirectFunctionCall2(numeric_trunc, + NumericGetDatum(jbv->val.numeric), + Int32GetDatum(0)); + + *index = numeric_int4_opt_error(DatumGetNumeric(numeric_index), + &have_error); + + if (have_error) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_SQL_JSON_SUBSCRIPT), + errmsg("jsonpath array subscript is out of integer range")))); + + return jperOk; +} + +/* Save base object and its id needed for the execution of .keyvalue(). */ +static JsonBaseObjectInfo +setBaseObject(JsonPathExecContext *cxt, JsonbValue *jbv, int32 id) +{ + JsonBaseObjectInfo baseObject = cxt->baseObject; + + cxt->baseObject.jbc = jbv->type != jbvBinary ? NULL : + (JsonbContainer *) jbv->val.binary.data; + cxt->baseObject.id = id; + + return baseObject; +} + +static void +JsonValueListAppend(JsonValueList *jvl, JsonbValue *jbv) +{ + if (jvl->singleton) + { + jvl->list = list_make2(jvl->singleton, jbv); + jvl->singleton = NULL; + } + else if (!jvl->list) + jvl->singleton = jbv; + else + jvl->list = lappend(jvl->list, jbv); +} + +static int +JsonValueListLength(const JsonValueList *jvl) +{ + return jvl->singleton ? 1 : list_length(jvl->list); +} + +static bool +JsonValueListIsEmpty(JsonValueList *jvl) +{ + return !jvl->singleton && (jvl->list == NIL); +} + +static JsonbValue * +JsonValueListHead(JsonValueList *jvl) +{ + return jvl->singleton ? jvl->singleton : linitial(jvl->list); +} + +static List * +JsonValueListGetList(JsonValueList *jvl) +{ + if (jvl->singleton) + return list_make1(jvl->singleton); + + return jvl->list; +} + +static void +JsonValueListInitIterator(const JsonValueList *jvl, JsonValueListIterator *it) +{ + if (jvl->singleton) + { + it->value = jvl->singleton; + it->list = NIL; + it->next = NULL; + } + else if (jvl->list != NIL) + { + it->value = (JsonbValue *) linitial(jvl->list); + it->list = jvl->list; + it->next = list_second_cell(jvl->list); + } + else + { + it->value = NULL; + it->list = NIL; + it->next = NULL; + } +} + +/* + * Get the next item from the sequence advancing iterator. + */ +static JsonbValue * +JsonValueListNext(const JsonValueList *jvl, JsonValueListIterator *it) +{ + JsonbValue *result = it->value; + + if (it->next) + { + it->value = lfirst(it->next); + it->next = lnext(it->list, it->next); + } + else + { + it->value = NULL; + } + + return result; +} + +/* + * Initialize a binary JsonbValue with the given jsonb container. + */ +static JsonbValue * +JsonbInitBinary(JsonbValue *jbv, Jsonb *jb) +{ + jbv->type = jbvBinary; + jbv->val.binary.data = &jb->root; + jbv->val.binary.len = VARSIZE_ANY_EXHDR(jb); + + return jbv; +} + +/* + * Returns jbv* type of JsonbValue. Note, it never returns jbvBinary as is. + */ +static int +JsonbType(JsonbValue *jb) +{ + int type = jb->type; + + if (jb->type == jbvBinary) + { + JsonbContainer *jbc = (void *) jb->val.binary.data; + + /* Scalars should be always extracted during jsonpath execution. */ + Assert(!JsonContainerIsScalar(jbc)); + + if (JsonContainerIsObject(jbc)) + type = jbvObject; + else if (JsonContainerIsArray(jbc)) + type = jbvArray; + else + elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header); + } + + return type; +} + +/* Get scalar of given type or NULL on type mismatch */ +static JsonbValue * +getScalar(JsonbValue *scalar, enum jbvType type) +{ + /* Scalars should be always extracted during jsonpath execution. */ + Assert(scalar->type != jbvBinary || + !JsonContainerIsScalar(scalar->val.binary.data)); + + return scalar->type == type ? scalar : NULL; +} + +/* Construct a JSON array from the item list */ +static JsonbValue * +wrapItemsInArray(const JsonValueList *items) +{ + JsonbParseState *ps = NULL; + JsonValueListIterator it; + JsonbValue *jbv; + + pushJsonbValue(&ps, WJB_BEGIN_ARRAY, NULL); + + JsonValueListInitIterator(items, &it); + while ((jbv = JsonValueListNext(items, &it))) + pushJsonbValue(&ps, WJB_ELEM, jbv); + + return pushJsonbValue(&ps, WJB_END_ARRAY, NULL); +} + +/* Check if the timezone required for casting from type1 to type2 is used */ +static void +checkTimezoneIsUsedForCast(bool useTz, const char *type1, const char *type2) +{ + if (!useTz) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert value from %s to %s without time zone usage", + type1, type2), + errhint("Use *_tz() function for time zone support."))); +} + +/* Convert time datum to timetz datum */ +static Datum +castTimeToTimeTz(Datum time, bool useTz) +{ + checkTimezoneIsUsedForCast(useTz, "time", "timetz"); + + return DirectFunctionCall1(time_timetz, time); +} + +/* + * Compare date to timestamp. + * Note that this doesn't involve any timezone considerations. + */ +static int +cmpDateToTimestamp(DateADT date1, Timestamp ts2, bool useTz) +{ + return date_cmp_timestamp_internal(date1, ts2); +} + +/* + * Compare date to timestamptz. + */ +static int +cmpDateToTimestampTz(DateADT date1, TimestampTz tstz2, bool useTz) +{ + checkTimezoneIsUsedForCast(useTz, "date", "timestamptz"); + + return date_cmp_timestamptz_internal(date1, tstz2); +} + +/* + * Compare timestamp to timestamptz. + */ +static int +cmpTimestampToTimestampTz(Timestamp ts1, TimestampTz tstz2, bool useTz) +{ + checkTimezoneIsUsedForCast(useTz, "timestamp", "timestamptz"); + + return timestamp_cmp_timestamptz_internal(ts1, tstz2); +} + +/* + * Cross-type comparison of two datetime SQL/JSON items. If items are + * uncomparable *cast_error flag is set, otherwise *cast_error is unset. + * If the cast requires timezone and it is not used, then explicit error is thrown. + */ +static int +compareDatetime(Datum val1, Oid typid1, Datum val2, Oid typid2, + bool useTz, bool *cast_error) +{ + PGFunction cmpfunc; + + *cast_error = false; + + switch (typid1) + { + case DATEOID: + switch (typid2) + { + case DATEOID: + cmpfunc = date_cmp; + + break; + + case TIMESTAMPOID: + return cmpDateToTimestamp(DatumGetDateADT(val1), + DatumGetTimestamp(val2), + useTz); + + case TIMESTAMPTZOID: + return cmpDateToTimestampTz(DatumGetDateADT(val1), + DatumGetTimestampTz(val2), + useTz); + + case TIMEOID: + case TIMETZOID: + *cast_error = true; /* uncomparable types */ + return 0; + + default: + elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u", + typid2); + } + break; + + case TIMEOID: + switch (typid2) + { + case TIMEOID: + cmpfunc = time_cmp; + + break; + + case TIMETZOID: + val1 = castTimeToTimeTz(val1, useTz); + cmpfunc = timetz_cmp; + + break; + + case DATEOID: + case TIMESTAMPOID: + case TIMESTAMPTZOID: + *cast_error = true; /* uncomparable types */ + return 0; + + default: + elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u", + typid2); + } + break; + + case TIMETZOID: + switch (typid2) + { + case TIMEOID: + val2 = castTimeToTimeTz(val2, useTz); + cmpfunc = timetz_cmp; + + break; + + case TIMETZOID: + cmpfunc = timetz_cmp; + + break; + + case DATEOID: + case TIMESTAMPOID: + case TIMESTAMPTZOID: + *cast_error = true; /* uncomparable types */ + return 0; + + default: + elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u", + typid2); + } + break; + + case TIMESTAMPOID: + switch (typid2) + { + case DATEOID: + return -cmpDateToTimestamp(DatumGetDateADT(val2), + DatumGetTimestamp(val1), + useTz); + + case TIMESTAMPOID: + cmpfunc = timestamp_cmp; + + break; + + case TIMESTAMPTZOID: + return cmpTimestampToTimestampTz(DatumGetTimestamp(val1), + DatumGetTimestampTz(val2), + useTz); + + case TIMEOID: + case TIMETZOID: + *cast_error = true; /* uncomparable types */ + return 0; + + default: + elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u", + typid2); + } + break; + + case TIMESTAMPTZOID: + switch (typid2) + { + case DATEOID: + return -cmpDateToTimestampTz(DatumGetDateADT(val2), + DatumGetTimestampTz(val1), + useTz); + + case TIMESTAMPOID: + return -cmpTimestampToTimestampTz(DatumGetTimestamp(val2), + DatumGetTimestampTz(val1), + useTz); + + case TIMESTAMPTZOID: + cmpfunc = timestamp_cmp; + + break; + + case TIMEOID: + case TIMETZOID: + *cast_error = true; /* uncomparable types */ + return 0; + + default: + elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u", + typid2); + } + break; + + default: + elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u", typid1); + } + + if (*cast_error) + return 0; /* cast error */ + + return DatumGetInt32(DirectFunctionCall2(cmpfunc, val1, val2)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.c new file mode 100644 index 00000000000..6c5892a0152 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.c @@ -0,0 +1,2334 @@ +/* A Bison parser, made by GNU Bison 3.7.5. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output, and Bison version. */ +#define YYBISON 30705 + +/* Bison version string. */ +#define YYBISON_VERSION "3.7.5" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + + +/* Substitute the variable and function names. */ +#define yyparse jsonpath_yyparse +#define yylex jsonpath_yylex +#define yyerror jsonpath_yyerror +#define yydebug jsonpath_yydebug +#define yynerrs jsonpath_yynerrs + +/* First part of user prologue. */ +#line 1 "jsonpath_gram.y" + +/*------------------------------------------------------------------------- + * + * jsonpath_gram.y + * Grammar definitions for jsonpath datatype + * + * Transforms tokenized jsonpath into tree of JsonPathParseItem structs. + * + * Copyright (c) 2019-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath_gram.y + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "fmgr.h" +#include "jsonpath_internal.h" +#include "miscadmin.h" +#include "nodes/pg_list.h" +#include "regex/regex.h" +#include "utils/builtins.h" + +static JsonPathParseItem *makeItemType(JsonPathItemType type); +static JsonPathParseItem *makeItemString(JsonPathString *s); +static JsonPathParseItem *makeItemVariable(JsonPathString *s); +static JsonPathParseItem *makeItemKey(JsonPathString *s); +static JsonPathParseItem *makeItemNumeric(JsonPathString *s); +static JsonPathParseItem *makeItemBool(bool val); +static JsonPathParseItem *makeItemBinary(JsonPathItemType type, + JsonPathParseItem *la, + JsonPathParseItem *ra); +static JsonPathParseItem *makeItemUnary(JsonPathItemType type, + JsonPathParseItem *a); +static JsonPathParseItem *makeItemList(List *list); +static JsonPathParseItem *makeIndexArray(List *list); +static JsonPathParseItem *makeAny(int first, int last); +static bool makeItemLikeRegex(JsonPathParseItem *expr, + JsonPathString *pattern, + JsonPathString *flags, + JsonPathParseItem ** result, + struct Node *escontext); + +/* + * Bison doesn't allocate anything that needs to live across parser calls, + * so we can easily have it use palloc instead of malloc. This prevents + * memory leaks if we error out during parsing. + */ +#define YYMALLOC palloc +#define YYFREE pfree + + +#line 132 "jsonpath_gram.c" + +# ifndef YY_CAST +# ifdef __cplusplus +# define YY_CAST(Type, Val) static_cast<Type> (Val) +# define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast<Type> (Val) +# else +# define YY_CAST(Type, Val) ((Type) (Val)) +# define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val)) +# endif +# endif +# ifndef YY_NULLPTR +# if defined __cplusplus +# if 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# else +# define YY_NULLPTR ((void*)0) +# endif +# endif + +#include "jsonpath_gram.h" +/* Symbol kind. */ +enum yysymbol_kind_t +{ + YYSYMBOL_YYEMPTY = -2, + YYSYMBOL_YYEOF = 0, /* "end of file" */ + YYSYMBOL_YYerror = 1, /* error */ + YYSYMBOL_YYUNDEF = 2, /* "invalid token" */ + YYSYMBOL_TO_P = 3, /* TO_P */ + YYSYMBOL_NULL_P = 4, /* NULL_P */ + YYSYMBOL_TRUE_P = 5, /* TRUE_P */ + YYSYMBOL_FALSE_P = 6, /* FALSE_P */ + YYSYMBOL_IS_P = 7, /* IS_P */ + YYSYMBOL_UNKNOWN_P = 8, /* UNKNOWN_P */ + YYSYMBOL_EXISTS_P = 9, /* EXISTS_P */ + YYSYMBOL_IDENT_P = 10, /* IDENT_P */ + YYSYMBOL_STRING_P = 11, /* STRING_P */ + YYSYMBOL_NUMERIC_P = 12, /* NUMERIC_P */ + YYSYMBOL_INT_P = 13, /* INT_P */ + YYSYMBOL_VARIABLE_P = 14, /* VARIABLE_P */ + YYSYMBOL_OR_P = 15, /* OR_P */ + YYSYMBOL_AND_P = 16, /* AND_P */ + YYSYMBOL_NOT_P = 17, /* NOT_P */ + YYSYMBOL_LESS_P = 18, /* LESS_P */ + YYSYMBOL_LESSEQUAL_P = 19, /* LESSEQUAL_P */ + YYSYMBOL_EQUAL_P = 20, /* EQUAL_P */ + YYSYMBOL_NOTEQUAL_P = 21, /* NOTEQUAL_P */ + YYSYMBOL_GREATEREQUAL_P = 22, /* GREATEREQUAL_P */ + YYSYMBOL_GREATER_P = 23, /* GREATER_P */ + YYSYMBOL_ANY_P = 24, /* ANY_P */ + YYSYMBOL_STRICT_P = 25, /* STRICT_P */ + YYSYMBOL_LAX_P = 26, /* LAX_P */ + YYSYMBOL_LAST_P = 27, /* LAST_P */ + YYSYMBOL_STARTS_P = 28, /* STARTS_P */ + YYSYMBOL_WITH_P = 29, /* WITH_P */ + YYSYMBOL_LIKE_REGEX_P = 30, /* LIKE_REGEX_P */ + YYSYMBOL_FLAG_P = 31, /* FLAG_P */ + YYSYMBOL_ABS_P = 32, /* ABS_P */ + YYSYMBOL_SIZE_P = 33, /* SIZE_P */ + YYSYMBOL_TYPE_P = 34, /* TYPE_P */ + YYSYMBOL_FLOOR_P = 35, /* FLOOR_P */ + YYSYMBOL_DOUBLE_P = 36, /* DOUBLE_P */ + YYSYMBOL_CEILING_P = 37, /* CEILING_P */ + YYSYMBOL_KEYVALUE_P = 38, /* KEYVALUE_P */ + YYSYMBOL_DATETIME_P = 39, /* DATETIME_P */ + YYSYMBOL_40_ = 40, /* '+' */ + YYSYMBOL_41_ = 41, /* '-' */ + YYSYMBOL_42_ = 42, /* '*' */ + YYSYMBOL_43_ = 43, /* '/' */ + YYSYMBOL_44_ = 44, /* '%' */ + YYSYMBOL_UMINUS = 45, /* UMINUS */ + YYSYMBOL_46_ = 46, /* '(' */ + YYSYMBOL_47_ = 47, /* ')' */ + YYSYMBOL_48_ = 48, /* '$' */ + YYSYMBOL_49_ = 49, /* '@' */ + YYSYMBOL_50_ = 50, /* ',' */ + YYSYMBOL_51_ = 51, /* '[' */ + YYSYMBOL_52_ = 52, /* ']' */ + YYSYMBOL_53_ = 53, /* '{' */ + YYSYMBOL_54_ = 54, /* '}' */ + YYSYMBOL_55_ = 55, /* '.' */ + YYSYMBOL_56_ = 56, /* '?' */ + YYSYMBOL_YYACCEPT = 57, /* $accept */ + YYSYMBOL_result = 58, /* result */ + YYSYMBOL_expr_or_predicate = 59, /* expr_or_predicate */ + YYSYMBOL_mode = 60, /* mode */ + YYSYMBOL_scalar_value = 61, /* scalar_value */ + YYSYMBOL_comp_op = 62, /* comp_op */ + YYSYMBOL_delimited_predicate = 63, /* delimited_predicate */ + YYSYMBOL_predicate = 64, /* predicate */ + YYSYMBOL_starts_with_initial = 65, /* starts_with_initial */ + YYSYMBOL_path_primary = 66, /* path_primary */ + YYSYMBOL_accessor_expr = 67, /* accessor_expr */ + YYSYMBOL_expr = 68, /* expr */ + YYSYMBOL_index_elem = 69, /* index_elem */ + YYSYMBOL_index_list = 70, /* index_list */ + YYSYMBOL_array_accessor = 71, /* array_accessor */ + YYSYMBOL_any_level = 72, /* any_level */ + YYSYMBOL_any_path = 73, /* any_path */ + YYSYMBOL_accessor_op = 74, /* accessor_op */ + YYSYMBOL_datetime_template = 75, /* datetime_template */ + YYSYMBOL_opt_datetime_template = 76, /* opt_datetime_template */ + YYSYMBOL_key = 77, /* key */ + YYSYMBOL_key_name = 78, /* key_name */ + YYSYMBOL_method = 79 /* method */ +}; +typedef enum yysymbol_kind_t yysymbol_kind_t; + + + + +#ifdef short +# undef short +#endif + +/* On compilers that do not define __PTRDIFF_MAX__ etc., make sure + <limits.h> and (if available) <stdint.h> are included + so that the code can choose integer types of a good width. */ + +#ifndef __PTRDIFF_MAX__ +# include <limits.h> /* INFRINGES ON USER NAME SPACE */ +# if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stdint.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_STDINT_H +# endif +#endif + +/* Narrow types that promote to a signed type and that can represent a + signed or unsigned integer of at least N bits. In tables they can + save space and decrease cache pressure. Promoting to a signed type + helps avoid bugs in integer arithmetic. */ + +#ifdef __INT_LEAST8_MAX__ +typedef __INT_LEAST8_TYPE__ yytype_int8; +#elif defined YY_STDINT_H +typedef int_least8_t yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef __INT_LEAST16_MAX__ +typedef __INT_LEAST16_TYPE__ yytype_int16; +#elif defined YY_STDINT_H +typedef int_least16_t yytype_int16; +#else +typedef short yytype_int16; +#endif + +/* Work around bug in HP-UX 11.23, which defines these macros + incorrectly for preprocessor constants. This workaround can likely + be removed in 2023, as HPE has promised support for HP-UX 11.23 + (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of + <https://h20195.www2.hpe.com/V2/getpdf.aspx/4AA4-7673ENW.pdf>. */ +#ifdef __hpux +# undef UINT_LEAST8_MAX +# undef UINT_LEAST16_MAX +# define UINT_LEAST8_MAX 255 +# define UINT_LEAST16_MAX 65535 +#endif + +#if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST8_TYPE__ yytype_uint8; +#elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST8_MAX <= INT_MAX) +typedef uint_least8_t yytype_uint8; +#elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX +typedef unsigned char yytype_uint8; +#else +typedef short yytype_uint8; +#endif + +#if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST16_TYPE__ yytype_uint16; +#elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST16_MAX <= INT_MAX) +typedef uint_least16_t yytype_uint16; +#elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX +typedef unsigned short yytype_uint16; +#else +typedef int yytype_uint16; +#endif + +#ifndef YYPTRDIFF_T +# if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__ +# define YYPTRDIFF_T __PTRDIFF_TYPE__ +# define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__ +# elif defined PTRDIFF_MAX +# ifndef ptrdiff_t +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# endif +# define YYPTRDIFF_T ptrdiff_t +# define YYPTRDIFF_MAXIMUM PTRDIFF_MAX +# else +# define YYPTRDIFF_T long +# define YYPTRDIFF_MAXIMUM LONG_MAX +# endif +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned +# endif +#endif + +#define YYSIZE_MAXIMUM \ + YY_CAST (YYPTRDIFF_T, \ + (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1) \ + ? YYPTRDIFF_MAXIMUM \ + : YY_CAST (YYSIZE_T, -1))) + +#define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) + + +/* Stored state numbers (used for stacks). */ +typedef yytype_uint8 yy_state_t; + +/* State numbers in computations. */ +typedef int yy_state_fast_t; + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + + +#ifndef YY_ATTRIBUTE_PURE +# if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) +# else +# define YY_ATTRIBUTE_PURE +# endif +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +# else +# define YY_ATTRIBUTE_UNUSED +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YY_USE(E) ((void) (E)) +#else +# define YY_USE(E) /* empty */ +#endif + +#if defined __GNUC__ && ! defined __ICC && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + +#if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__ +# define YY_IGNORE_USELESS_CAST_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"") +# define YY_IGNORE_USELESS_CAST_END \ + _Pragma ("GCC diagnostic pop") +#endif +#ifndef YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_END +#endif + + +#define YY_ASSERT(E) ((void) (0 && (E))) + +#if !defined yyoverflow + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* !defined yyoverflow */ + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yy_state_t yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYPTRDIFF_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / YYSIZEOF (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYPTRDIFF_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 5 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 239 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 57 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 23 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 104 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 143 + +/* YYMAXUTOK -- Last valid token kind. */ +#define YYMAXUTOK 295 + + +/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, with out-of-bounds checking. */ +#define YYTRANSLATE(YYX) \ + (0 <= (YYX) && (YYX) <= YYMAXUTOK \ + ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \ + : YYSYMBOL_YYUNDEF) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex. */ +static const yytype_int8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 48, 44, 2, 2, + 46, 47, 42, 40, 50, 41, 55, 43, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 56, 49, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 51, 2, 52, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 53, 2, 54, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 45 +}; + +#if YYDEBUG + /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_int16 yyrline[] = +{ + 0, 117, 117, 123, 127, 128, 132, 133, 134, 138, + 139, 140, 141, 142, 143, 144, 148, 149, 150, 151, + 152, 153, 157, 158, 162, 163, 164, 165, 166, 167, + 169, 171, 178, 188, 189, 193, 194, 195, 196, 200, + 201, 202, 203, 207, 208, 209, 210, 211, 212, 213, + 214, 215, 219, 220, 224, 225, 229, 230, 234, 235, + 239, 240, 241, 246, 247, 248, 249, 250, 251, 253, + 257, 261, 262, 266, 270, 271, 272, 273, 274, 275, + 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, + 286, 287, 288, 289, 290, 291, 292, 293, 297, 298, + 299, 300, 301, 302, 303 +}; +#endif + +/** Accessing symbol of state STATE. */ +#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State]) + +#if YYDEBUG || 0 +/* The user-facing name of the symbol whose (internal) number is + YYSYMBOL. No bounds checking. */ +static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED; + +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "\"end of file\"", "error", "\"invalid token\"", "TO_P", "NULL_P", + "TRUE_P", "FALSE_P", "IS_P", "UNKNOWN_P", "EXISTS_P", "IDENT_P", + "STRING_P", "NUMERIC_P", "INT_P", "VARIABLE_P", "OR_P", "AND_P", "NOT_P", + "LESS_P", "LESSEQUAL_P", "EQUAL_P", "NOTEQUAL_P", "GREATEREQUAL_P", + "GREATER_P", "ANY_P", "STRICT_P", "LAX_P", "LAST_P", "STARTS_P", + "WITH_P", "LIKE_REGEX_P", "FLAG_P", "ABS_P", "SIZE_P", "TYPE_P", + "FLOOR_P", "DOUBLE_P", "CEILING_P", "KEYVALUE_P", "DATETIME_P", "'+'", + "'-'", "'*'", "'/'", "'%'", "UMINUS", "'('", "')'", "'$'", "'@'", "','", + "'['", "']'", "'{'", "'}'", "'.'", "'?'", "$accept", "result", + "expr_or_predicate", "mode", "scalar_value", "comp_op", + "delimited_predicate", "predicate", "starts_with_initial", + "path_primary", "accessor_expr", "expr", "index_elem", "index_list", + "array_accessor", "any_level", "any_path", "accessor_op", + "datetime_template", "opt_datetime_template", "key", "key_name", + "method", YY_NULLPTR +}; + +static const char * +yysymbol_name (yysymbol_kind_t yysymbol) +{ + return yytname[yysymbol]; +} +#endif + +#ifdef YYPRINT +/* YYTOKNUM[NUM] -- (External) token number corresponding to the + (internal) symbol number NUM (which must be that of a token). */ +static const yytype_int16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, + 43, 45, 42, 47, 37, 295, 40, 41, 36, 64, + 44, 91, 93, 123, 125, 46, 63 +}; +#endif + +#define YYPACT_NINF (-44) + +#define yypact_value_is_default(Yyn) \ + ((Yyn) == YYPACT_NINF) + +#define YYTABLE_NINF (-105) + +#define yytable_value_is_error(Yyn) \ + 0 + + /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int16 yypact[] = +{ + 7, -44, -44, 18, 51, -44, -44, -44, -44, -43, + -44, -44, -44, -44, -3, -44, 114, 114, 51, -44, + -44, -44, -44, -44, 10, -44, -35, 195, 114, 51, + -44, 51, -44, -44, 14, 165, 51, 51, 68, 140, + -9, -44, -44, -44, -44, -44, -44, -44, -44, 37, + 60, 114, 114, 114, 114, 114, 114, 46, 20, 195, + 30, 3, -35, 59, -44, 24, -2, -44, -41, -44, + -44, -44, -44, -44, -44, -44, -44, -44, 31, -44, + -44, -44, -44, -44, -44, -44, 48, 50, 52, 61, + 67, 69, 78, 83, -44, -44, -44, -44, 84, 51, + 17, 100, 79, 79, -44, -44, -44, 62, -44, -44, + -35, 75, -44, -44, -44, 114, 114, -44, -8, 121, + 86, 54, -44, -44, -44, 123, -44, 62, -44, -44, + -44, -1, -44, -44, 88, -44, -44, -44, -8, -44, + -44, 82, -44 +}; + + /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_int8 yydefact[] = +{ + 8, 6, 7, 0, 0, 1, 10, 11, 12, 0, + 9, 13, 14, 15, 0, 38, 0, 0, 0, 36, + 37, 2, 35, 24, 5, 39, 43, 4, 0, 0, + 28, 0, 45, 46, 0, 0, 0, 0, 0, 0, + 0, 65, 42, 18, 20, 16, 17, 21, 19, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 22, 44, 27, 26, 0, 52, 54, 0, 76, + 77, 78, 79, 80, 81, 82, 74, 75, 60, 83, + 84, 93, 94, 95, 96, 97, 85, 86, 87, 88, + 89, 90, 92, 91, 64, 66, 63, 73, 0, 0, + 0, 31, 47, 48, 49, 50, 51, 25, 23, 22, + 0, 0, 41, 40, 56, 0, 0, 57, 0, 72, + 0, 0, 33, 34, 30, 0, 29, 53, 55, 58, + 59, 0, 70, 71, 0, 67, 69, 32, 0, 61, + 68, 0, 62 +}; + + /* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -44, -44, -44, -44, -44, -44, 124, -14, -44, -44, + -44, -4, 21, -44, -44, 1, -44, -18, -44, -44, + -44, -44, -44 +}; + + /* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_uint8 yydefgoto[] = +{ + 0, 3, 21, 4, 22, 56, 23, 24, 124, 25, + 26, 59, 67, 68, 41, 131, 95, 112, 133, 134, + 96, 97, 98 +}; + + /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_int16 yytable[] = +{ + 27, 115, 138, 28, 34, 129, 9, -3, 42, 116, + 111, 117, 32, 33, 35, 58, 38, 60, 5, 130, + 39, 40, 63, 64, 57, 36, 37, 35, 122, 36, + 37, 123, 1, 2, 66, 36, 37, 99, 51, 52, + 53, 54, 55, 29, 113, 36, 37, 102, 103, 104, + 105, 106, 107, 139, 38, 6, 7, 8, 39, 40, + 9, 61, 10, 11, 12, 13, 100, 109, 14, 36, + 37, 101, 6, 7, 8, 37, 114, 110, 15, 10, + 11, 12, 13, 126, 118, 121, 51, 52, 53, 54, + 55, 16, 17, 108, -98, 15, -99, 18, -100, 19, + 20, 136, 51, 52, 53, 54, 55, -101, 16, 17, + 65, 127, 66, -102, 31, -103, 19, 20, 6, 7, + 8, 53, 54, 55, -104, 10, 11, 12, 13, 119, + 120, 125, 132, 135, 137, 140, 142, 128, 30, 141, + 0, 15, 0, 69, 70, 71, 72, 73, 74, 75, + 76, 77, 0, 0, 16, 17, 0, 0, 0, 0, + 31, 0, 19, 20, 78, 79, 80, 81, 82, 83, + 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, + 0, 0, 94, 43, 44, 45, 46, 47, 48, 0, + 0, 0, 0, 49, 0, 50, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 51, 52, 53, 54, 55, + 0, 0, 62, 43, 44, 45, 46, 47, 48, 0, + 0, 0, 0, 49, 0, 50, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 51, 52, 53, 54, 55 +}; + +static const yytype_int16 yycheck[] = +{ + 4, 3, 3, 46, 18, 13, 9, 0, 26, 50, + 7, 52, 16, 17, 18, 29, 51, 31, 0, 27, + 55, 56, 36, 37, 28, 15, 16, 31, 11, 15, + 16, 14, 25, 26, 38, 15, 16, 46, 40, 41, + 42, 43, 44, 46, 62, 15, 16, 51, 52, 53, + 54, 55, 56, 54, 51, 4, 5, 6, 55, 56, + 9, 47, 11, 12, 13, 14, 29, 47, 17, 15, + 16, 11, 4, 5, 6, 16, 52, 47, 27, 11, + 12, 13, 14, 8, 53, 99, 40, 41, 42, 43, + 44, 40, 41, 47, 46, 27, 46, 46, 46, 48, + 49, 47, 40, 41, 42, 43, 44, 46, 40, 41, + 42, 115, 116, 46, 46, 46, 48, 49, 4, 5, + 6, 42, 43, 44, 46, 11, 12, 13, 14, 46, + 46, 31, 11, 47, 11, 47, 54, 116, 14, 138, + -1, 27, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, 40, 41, -1, -1, -1, -1, + 46, -1, 48, 49, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + -1, -1, 42, 18, 19, 20, 21, 22, 23, -1, + -1, -1, -1, 28, -1, 30, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 40, 41, 42, 43, 44, + -1, -1, 47, 18, 19, 20, 21, 22, 23, -1, + -1, -1, -1, 28, -1, 30, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 40, 41, 42, 43, 44 +}; + + /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_int8 yystos[] = +{ + 0, 25, 26, 58, 60, 0, 4, 5, 6, 9, + 11, 12, 13, 14, 17, 27, 40, 41, 46, 48, + 49, 59, 61, 63, 64, 66, 67, 68, 46, 46, + 63, 46, 68, 68, 64, 68, 15, 16, 51, 55, + 56, 71, 74, 18, 19, 20, 21, 22, 23, 28, + 30, 40, 41, 42, 43, 44, 62, 68, 64, 68, + 64, 47, 47, 64, 64, 42, 68, 69, 70, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 42, 73, 77, 78, 79, 46, + 29, 11, 68, 68, 68, 68, 68, 68, 47, 47, + 47, 7, 74, 74, 52, 3, 50, 52, 53, 46, + 46, 64, 11, 14, 65, 31, 8, 68, 69, 13, + 27, 72, 11, 75, 76, 47, 47, 11, 3, 54, + 47, 72, 54 +}; + + /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_int8 yyr1[] = +{ + 0, 57, 58, 58, 59, 59, 60, 60, 60, 61, + 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, + 62, 62, 63, 63, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 65, 65, 66, 66, 66, 66, 67, + 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, + 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, + 75, 76, 76, 77, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, + 79, 79, 79, 79, 79 +}; + + /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ +static const yytype_int8 yyr2[] = +{ + 0, 2, 2, 0, 1, 1, 1, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 3, 4, 1, 3, 3, 3, 2, 5, + 4, 3, 5, 1, 1, 1, 1, 1, 1, 1, + 4, 4, 2, 1, 3, 2, 2, 3, 3, 3, + 3, 3, 1, 3, 1, 3, 3, 3, 1, 1, + 1, 4, 6, 2, 2, 1, 2, 4, 5, 4, + 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 +}; + + +enum { YYENOMEM = -2 }; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ + do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (result, escontext, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ + while (0) + +/* Backward compatibility with an undocumented macro. + Use YYerror or YYUNDEF. */ +#define YYERRCODE YYUNDEF + + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + +/* This macro is provided for backward compatibility. */ +# ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +# endif + + +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Kind, Value, result, escontext); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*-----------------------------------. +| Print this symbol's value on YYO. | +`-----------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, JsonPathParseResult **result, struct Node *escontext) +{ + FILE *yyoutput = yyo; + YY_USE (yyoutput); + YY_USE (result); + YY_USE (escontext); + if (!yyvaluep) + return; +# ifdef YYPRINT + if (yykind < YYNTOKENS) + YYPRINT (yyo, yytoknum[yykind], *yyvaluep); +# endif + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YY_USE (yykind); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + +/*---------------------------. +| Print this symbol on YYO. | +`---------------------------*/ + +static void +yy_symbol_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, JsonPathParseResult **result, struct Node *escontext) +{ + YYFPRINTF (yyo, "%s %s (", + yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind)); + + yy_symbol_value_print (yyo, yykind, yyvaluep, result, escontext); + YYFPRINTF (yyo, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, + int yyrule, JsonPathParseResult **result, struct Node *escontext) +{ + int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]), + &yyvsp[(yyi + 1) - (yynrhs)], result, escontext); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule, result, escontext); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) ((void) 0) +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + + + + + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, + yysymbol_kind_t yykind, YYSTYPE *yyvaluep, JsonPathParseResult **result, struct Node *escontext) +{ + YY_USE (yyvaluep); + YY_USE (result); + YY_USE (escontext); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YY_USE (yykind); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (JsonPathParseResult **result, struct Node *escontext) +{ +/* Lookahead token kind. */ +int yychar; + + +/* The semantic value of the lookahead symbol. */ +/* Default value used for initialization, for pacifying older GCCs + or non-GCC compilers. */ +YY_INITIAL_VALUE (static __thread YYSTYPE yyval_default;) +YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + + /* Number of syntax errors so far. */ + int yynerrs = 0; + + yy_state_fast_t yystate = 0; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus = 0; + + /* Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* Their size. */ + YYPTRDIFF_T yystacksize = YYINITDEPTH; + + /* The state stack: array, bottom, top. */ + yy_state_t yyssa[YYINITDEPTH]; + yy_state_t *yyss = yyssa; + yy_state_t *yyssp = yyss; + + /* The semantic value stack: array, bottom, top. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp = yyvs; + + int yyn; + /* The return value of yyparse. */ + int yyresult; + /* Lookahead symbol kind. */ + yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + + + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yychar = YYEMPTY; /* Cause a token to be read. */ + goto yysetstate; + + +/*------------------------------------------------------------. +| yynewstate -- push a new state, which is found in yystate. | +`------------------------------------------------------------*/ +yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + +/*--------------------------------------------------------------------. +| yysetstate -- set current state (the top of the stack) to yystate. | +`--------------------------------------------------------------------*/ +yysetstate: + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + YY_ASSERT (0 <= yystate && yystate < YYNSTATES); + YY_IGNORE_USELESS_CAST_BEGIN + *yyssp = YY_CAST (yy_state_t, yystate); + YY_IGNORE_USELESS_CAST_END + YY_STACK_PRINT (yyss, yyssp); + + if (yyss + yystacksize - 1 <= yyssp) +#if !defined yyoverflow && !defined YYSTACK_RELOCATE + goto yyexhaustedlab; +#else + { + /* Get the current used size of the three stacks, in elements. */ + YYPTRDIFF_T yysize = yyssp - yyss + 1; + +# if defined yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + yy_state_t *yyss1 = yyss; + YYSTYPE *yyvs1 = yyvs; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * YYSIZEOF (*yyssp), + &yyvs1, yysize * YYSIZEOF (*yyvsp), + &yystacksize); + yyss = yyss1; + yyvs = yyvs1; + } +# else /* defined YYSTACK_RELOCATE */ + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yy_state_t *yyss1 = yyss; + union yyalloc *yyptr = + YY_CAST (union yyalloc *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YY_IGNORE_USELESS_CAST_BEGIN + YYDPRINTF ((stderr, "Stack size increased to %ld\n", + YY_CAST (long, yystacksize))); + YY_IGNORE_USELESS_CAST_END + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } +#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token\n")); + yychar = yylex (&yylval, result, escontext); + } + + if (yychar <= YYEOF) + { + yychar = YYEOF; + yytoken = YYSYMBOL_YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else if (yychar == YYerror) + { + /* The scanner already issued an error message, process directly + to error recovery. But do not keep the error token as + lookahead, it is too special and may lead us to an endless + loop in error recovery. */ + yychar = YYUNDEF; + yytoken = YYSYMBOL_YYerror; + goto yyerrlab1; + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + /* Discard the shifted token. */ + yychar = YYEMPTY; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 2: /* result: mode expr_or_predicate */ +#line 117 "jsonpath_gram.y" + { + *result = palloc(sizeof(JsonPathParseResult)); + (*result)->expr = (yyvsp[0].value); + (*result)->lax = (yyvsp[-1].boolean); + (void) yynerrs; + } +#line 1350 "jsonpath_gram.c" + break; + + case 3: /* result: %empty */ +#line 123 "jsonpath_gram.y" + { *result = NULL; } +#line 1356 "jsonpath_gram.c" + break; + + case 4: /* expr_or_predicate: expr */ +#line 127 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1362 "jsonpath_gram.c" + break; + + case 5: /* expr_or_predicate: predicate */ +#line 128 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1368 "jsonpath_gram.c" + break; + + case 6: /* mode: STRICT_P */ +#line 132 "jsonpath_gram.y" + { (yyval.boolean) = false; } +#line 1374 "jsonpath_gram.c" + break; + + case 7: /* mode: LAX_P */ +#line 133 "jsonpath_gram.y" + { (yyval.boolean) = true; } +#line 1380 "jsonpath_gram.c" + break; + + case 8: /* mode: %empty */ +#line 134 "jsonpath_gram.y" + { (yyval.boolean) = true; } +#line 1386 "jsonpath_gram.c" + break; + + case 9: /* scalar_value: STRING_P */ +#line 138 "jsonpath_gram.y" + { (yyval.value) = makeItemString(&(yyvsp[0].str)); } +#line 1392 "jsonpath_gram.c" + break; + + case 10: /* scalar_value: NULL_P */ +#line 139 "jsonpath_gram.y" + { (yyval.value) = makeItemString(NULL); } +#line 1398 "jsonpath_gram.c" + break; + + case 11: /* scalar_value: TRUE_P */ +#line 140 "jsonpath_gram.y" + { (yyval.value) = makeItemBool(true); } +#line 1404 "jsonpath_gram.c" + break; + + case 12: /* scalar_value: FALSE_P */ +#line 141 "jsonpath_gram.y" + { (yyval.value) = makeItemBool(false); } +#line 1410 "jsonpath_gram.c" + break; + + case 13: /* scalar_value: NUMERIC_P */ +#line 142 "jsonpath_gram.y" + { (yyval.value) = makeItemNumeric(&(yyvsp[0].str)); } +#line 1416 "jsonpath_gram.c" + break; + + case 14: /* scalar_value: INT_P */ +#line 143 "jsonpath_gram.y" + { (yyval.value) = makeItemNumeric(&(yyvsp[0].str)); } +#line 1422 "jsonpath_gram.c" + break; + + case 15: /* scalar_value: VARIABLE_P */ +#line 144 "jsonpath_gram.y" + { (yyval.value) = makeItemVariable(&(yyvsp[0].str)); } +#line 1428 "jsonpath_gram.c" + break; + + case 16: /* comp_op: EQUAL_P */ +#line 148 "jsonpath_gram.y" + { (yyval.optype) = jpiEqual; } +#line 1434 "jsonpath_gram.c" + break; + + case 17: /* comp_op: NOTEQUAL_P */ +#line 149 "jsonpath_gram.y" + { (yyval.optype) = jpiNotEqual; } +#line 1440 "jsonpath_gram.c" + break; + + case 18: /* comp_op: LESS_P */ +#line 150 "jsonpath_gram.y" + { (yyval.optype) = jpiLess; } +#line 1446 "jsonpath_gram.c" + break; + + case 19: /* comp_op: GREATER_P */ +#line 151 "jsonpath_gram.y" + { (yyval.optype) = jpiGreater; } +#line 1452 "jsonpath_gram.c" + break; + + case 20: /* comp_op: LESSEQUAL_P */ +#line 152 "jsonpath_gram.y" + { (yyval.optype) = jpiLessOrEqual; } +#line 1458 "jsonpath_gram.c" + break; + + case 21: /* comp_op: GREATEREQUAL_P */ +#line 153 "jsonpath_gram.y" + { (yyval.optype) = jpiGreaterOrEqual; } +#line 1464 "jsonpath_gram.c" + break; + + case 22: /* delimited_predicate: '(' predicate ')' */ +#line 157 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[-1].value); } +#line 1470 "jsonpath_gram.c" + break; + + case 23: /* delimited_predicate: EXISTS_P '(' expr ')' */ +#line 158 "jsonpath_gram.y" + { (yyval.value) = makeItemUnary(jpiExists, (yyvsp[-1].value)); } +#line 1476 "jsonpath_gram.c" + break; + + case 24: /* predicate: delimited_predicate */ +#line 162 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1482 "jsonpath_gram.c" + break; + + case 25: /* predicate: expr comp_op expr */ +#line 163 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary((yyvsp[-1].optype), (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1488 "jsonpath_gram.c" + break; + + case 26: /* predicate: predicate AND_P predicate */ +#line 164 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiAnd, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1494 "jsonpath_gram.c" + break; + + case 27: /* predicate: predicate OR_P predicate */ +#line 165 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiOr, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1500 "jsonpath_gram.c" + break; + + case 28: /* predicate: NOT_P delimited_predicate */ +#line 166 "jsonpath_gram.y" + { (yyval.value) = makeItemUnary(jpiNot, (yyvsp[0].value)); } +#line 1506 "jsonpath_gram.c" + break; + + case 29: /* predicate: '(' predicate ')' IS_P UNKNOWN_P */ +#line 168 "jsonpath_gram.y" + { (yyval.value) = makeItemUnary(jpiIsUnknown, (yyvsp[-3].value)); } +#line 1512 "jsonpath_gram.c" + break; + + case 30: /* predicate: expr STARTS_P WITH_P starts_with_initial */ +#line 170 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiStartsWith, (yyvsp[-3].value), (yyvsp[0].value)); } +#line 1518 "jsonpath_gram.c" + break; + + case 31: /* predicate: expr LIKE_REGEX_P STRING_P */ +#line 172 "jsonpath_gram.y" + { + JsonPathParseItem *jppitem; + if (! makeItemLikeRegex((yyvsp[-2].value), &(yyvsp[0].str), NULL, &jppitem, escontext)) + YYABORT; + (yyval.value) = jppitem; + } +#line 1529 "jsonpath_gram.c" + break; + + case 32: /* predicate: expr LIKE_REGEX_P STRING_P FLAG_P STRING_P */ +#line 179 "jsonpath_gram.y" + { + JsonPathParseItem *jppitem; + if (! makeItemLikeRegex((yyvsp[-4].value), &(yyvsp[-2].str), &(yyvsp[0].str), &jppitem, escontext)) + YYABORT; + (yyval.value) = jppitem; + } +#line 1540 "jsonpath_gram.c" + break; + + case 33: /* starts_with_initial: STRING_P */ +#line 188 "jsonpath_gram.y" + { (yyval.value) = makeItemString(&(yyvsp[0].str)); } +#line 1546 "jsonpath_gram.c" + break; + + case 34: /* starts_with_initial: VARIABLE_P */ +#line 189 "jsonpath_gram.y" + { (yyval.value) = makeItemVariable(&(yyvsp[0].str)); } +#line 1552 "jsonpath_gram.c" + break; + + case 35: /* path_primary: scalar_value */ +#line 193 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1558 "jsonpath_gram.c" + break; + + case 36: /* path_primary: '$' */ +#line 194 "jsonpath_gram.y" + { (yyval.value) = makeItemType(jpiRoot); } +#line 1564 "jsonpath_gram.c" + break; + + case 37: /* path_primary: '@' */ +#line 195 "jsonpath_gram.y" + { (yyval.value) = makeItemType(jpiCurrent); } +#line 1570 "jsonpath_gram.c" + break; + + case 38: /* path_primary: LAST_P */ +#line 196 "jsonpath_gram.y" + { (yyval.value) = makeItemType(jpiLast); } +#line 1576 "jsonpath_gram.c" + break; + + case 39: /* accessor_expr: path_primary */ +#line 200 "jsonpath_gram.y" + { (yyval.elems) = list_make1((yyvsp[0].value)); } +#line 1582 "jsonpath_gram.c" + break; + + case 40: /* accessor_expr: '(' expr ')' accessor_op */ +#line 201 "jsonpath_gram.y" + { (yyval.elems) = list_make2((yyvsp[-2].value), (yyvsp[0].value)); } +#line 1588 "jsonpath_gram.c" + break; + + case 41: /* accessor_expr: '(' predicate ')' accessor_op */ +#line 202 "jsonpath_gram.y" + { (yyval.elems) = list_make2((yyvsp[-2].value), (yyvsp[0].value)); } +#line 1594 "jsonpath_gram.c" + break; + + case 42: /* accessor_expr: accessor_expr accessor_op */ +#line 203 "jsonpath_gram.y" + { (yyval.elems) = lappend((yyvsp[-1].elems), (yyvsp[0].value)); } +#line 1600 "jsonpath_gram.c" + break; + + case 43: /* expr: accessor_expr */ +#line 207 "jsonpath_gram.y" + { (yyval.value) = makeItemList((yyvsp[0].elems)); } +#line 1606 "jsonpath_gram.c" + break; + + case 44: /* expr: '(' expr ')' */ +#line 208 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[-1].value); } +#line 1612 "jsonpath_gram.c" + break; + + case 45: /* expr: '+' expr */ +#line 209 "jsonpath_gram.y" + { (yyval.value) = makeItemUnary(jpiPlus, (yyvsp[0].value)); } +#line 1618 "jsonpath_gram.c" + break; + + case 46: /* expr: '-' expr */ +#line 210 "jsonpath_gram.y" + { (yyval.value) = makeItemUnary(jpiMinus, (yyvsp[0].value)); } +#line 1624 "jsonpath_gram.c" + break; + + case 47: /* expr: expr '+' expr */ +#line 211 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiAdd, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1630 "jsonpath_gram.c" + break; + + case 48: /* expr: expr '-' expr */ +#line 212 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiSub, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1636 "jsonpath_gram.c" + break; + + case 49: /* expr: expr '*' expr */ +#line 213 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiMul, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1642 "jsonpath_gram.c" + break; + + case 50: /* expr: expr '/' expr */ +#line 214 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiDiv, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1648 "jsonpath_gram.c" + break; + + case 51: /* expr: expr '%' expr */ +#line 215 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiMod, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1654 "jsonpath_gram.c" + break; + + case 52: /* index_elem: expr */ +#line 219 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiSubscript, (yyvsp[0].value), NULL); } +#line 1660 "jsonpath_gram.c" + break; + + case 53: /* index_elem: expr TO_P expr */ +#line 220 "jsonpath_gram.y" + { (yyval.value) = makeItemBinary(jpiSubscript, (yyvsp[-2].value), (yyvsp[0].value)); } +#line 1666 "jsonpath_gram.c" + break; + + case 54: /* index_list: index_elem */ +#line 224 "jsonpath_gram.y" + { (yyval.indexs) = list_make1((yyvsp[0].value)); } +#line 1672 "jsonpath_gram.c" + break; + + case 55: /* index_list: index_list ',' index_elem */ +#line 225 "jsonpath_gram.y" + { (yyval.indexs) = lappend((yyvsp[-2].indexs), (yyvsp[0].value)); } +#line 1678 "jsonpath_gram.c" + break; + + case 56: /* array_accessor: '[' '*' ']' */ +#line 229 "jsonpath_gram.y" + { (yyval.value) = makeItemType(jpiAnyArray); } +#line 1684 "jsonpath_gram.c" + break; + + case 57: /* array_accessor: '[' index_list ']' */ +#line 230 "jsonpath_gram.y" + { (yyval.value) = makeIndexArray((yyvsp[-1].indexs)); } +#line 1690 "jsonpath_gram.c" + break; + + case 58: /* any_level: INT_P */ +#line 234 "jsonpath_gram.y" + { (yyval.integer) = pg_strtoint32((yyvsp[0].str).val); } +#line 1696 "jsonpath_gram.c" + break; + + case 59: /* any_level: LAST_P */ +#line 235 "jsonpath_gram.y" + { (yyval.integer) = -1; } +#line 1702 "jsonpath_gram.c" + break; + + case 60: /* any_path: ANY_P */ +#line 239 "jsonpath_gram.y" + { (yyval.value) = makeAny(0, -1); } +#line 1708 "jsonpath_gram.c" + break; + + case 61: /* any_path: ANY_P '{' any_level '}' */ +#line 240 "jsonpath_gram.y" + { (yyval.value) = makeAny((yyvsp[-1].integer), (yyvsp[-1].integer)); } +#line 1714 "jsonpath_gram.c" + break; + + case 62: /* any_path: ANY_P '{' any_level TO_P any_level '}' */ +#line 242 "jsonpath_gram.y" + { (yyval.value) = makeAny((yyvsp[-3].integer), (yyvsp[-1].integer)); } +#line 1720 "jsonpath_gram.c" + break; + + case 63: /* accessor_op: '.' key */ +#line 246 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1726 "jsonpath_gram.c" + break; + + case 64: /* accessor_op: '.' '*' */ +#line 247 "jsonpath_gram.y" + { (yyval.value) = makeItemType(jpiAnyKey); } +#line 1732 "jsonpath_gram.c" + break; + + case 65: /* accessor_op: array_accessor */ +#line 248 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1738 "jsonpath_gram.c" + break; + + case 66: /* accessor_op: '.' any_path */ +#line 249 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1744 "jsonpath_gram.c" + break; + + case 67: /* accessor_op: '.' method '(' ')' */ +#line 250 "jsonpath_gram.y" + { (yyval.value) = makeItemType((yyvsp[-2].optype)); } +#line 1750 "jsonpath_gram.c" + break; + + case 68: /* accessor_op: '.' DATETIME_P '(' opt_datetime_template ')' */ +#line 252 "jsonpath_gram.y" + { (yyval.value) = makeItemUnary(jpiDatetime, (yyvsp[-1].value)); } +#line 1756 "jsonpath_gram.c" + break; + + case 69: /* accessor_op: '?' '(' predicate ')' */ +#line 253 "jsonpath_gram.y" + { (yyval.value) = makeItemUnary(jpiFilter, (yyvsp[-1].value)); } +#line 1762 "jsonpath_gram.c" + break; + + case 70: /* datetime_template: STRING_P */ +#line 257 "jsonpath_gram.y" + { (yyval.value) = makeItemString(&(yyvsp[0].str)); } +#line 1768 "jsonpath_gram.c" + break; + + case 71: /* opt_datetime_template: datetime_template */ +#line 261 "jsonpath_gram.y" + { (yyval.value) = (yyvsp[0].value); } +#line 1774 "jsonpath_gram.c" + break; + + case 72: /* opt_datetime_template: %empty */ +#line 262 "jsonpath_gram.y" + { (yyval.value) = NULL; } +#line 1780 "jsonpath_gram.c" + break; + + case 73: /* key: key_name */ +#line 266 "jsonpath_gram.y" + { (yyval.value) = makeItemKey(&(yyvsp[0].str)); } +#line 1786 "jsonpath_gram.c" + break; + + case 98: /* method: ABS_P */ +#line 297 "jsonpath_gram.y" + { (yyval.optype) = jpiAbs; } +#line 1792 "jsonpath_gram.c" + break; + + case 99: /* method: SIZE_P */ +#line 298 "jsonpath_gram.y" + { (yyval.optype) = jpiSize; } +#line 1798 "jsonpath_gram.c" + break; + + case 100: /* method: TYPE_P */ +#line 299 "jsonpath_gram.y" + { (yyval.optype) = jpiType; } +#line 1804 "jsonpath_gram.c" + break; + + case 101: /* method: FLOOR_P */ +#line 300 "jsonpath_gram.y" + { (yyval.optype) = jpiFloor; } +#line 1810 "jsonpath_gram.c" + break; + + case 102: /* method: DOUBLE_P */ +#line 301 "jsonpath_gram.y" + { (yyval.optype) = jpiDouble; } +#line 1816 "jsonpath_gram.c" + break; + + case 103: /* method: CEILING_P */ +#line 302 "jsonpath_gram.y" + { (yyval.optype) = jpiCeiling; } +#line 1822 "jsonpath_gram.c" + break; + + case 104: /* method: KEYVALUE_P */ +#line 303 "jsonpath_gram.y" + { (yyval.optype) = jpiKeyValue; } +#line 1828 "jsonpath_gram.c" + break; + + +#line 1832 "jsonpath_gram.c" + + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + { + const int yylhs = yyr1[yyn] - YYNTOKENS; + const int yyi = yypgoto[yylhs] + *yyssp; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp + ? yytable[yyi] + : yydefgoto[yylhs]); + } + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar); + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; + yyerror (result, escontext, YY_("syntax error")); + } + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, result, escontext); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + /* Pacify compilers when the user code never invokes YYERROR and the + label yyerrorlab therefore never appears in user code. */ + if (0) + YYERROR; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + /* Pop stack until we find a state that shifts the error token. */ + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYSYMBOL_YYerror; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + YY_ACCESSING_SYMBOL (yystate), yyvsp, result, escontext); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + + +#if !defined yyoverflow +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (result, escontext, YY_("memory exhausted")); + yyresult = 2; + goto yyreturn; +#endif + + +/*-------------------------------------------------------. +| yyreturn -- parsing is finished, clean up and return. | +`-------------------------------------------------------*/ +yyreturn: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, result, escontext); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, result, escontext); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif + + return yyresult; +} + +#line 305 "jsonpath_gram.y" + + +/* + * The helper functions below allocate and fill JsonPathParseItem's of various + * types. + */ + +static JsonPathParseItem * +makeItemType(JsonPathItemType type) +{ + JsonPathParseItem *v = palloc(sizeof(*v)); + + CHECK_FOR_INTERRUPTS(); + + v->type = type; + v->next = NULL; + + return v; +} + +static JsonPathParseItem * +makeItemString(JsonPathString *s) +{ + JsonPathParseItem *v; + + if (s == NULL) + { + v = makeItemType(jpiNull); + } + else + { + v = makeItemType(jpiString); + v->value.string.val = s->val; + v->value.string.len = s->len; + } + + return v; +} + +static JsonPathParseItem * +makeItemVariable(JsonPathString *s) +{ + JsonPathParseItem *v; + + v = makeItemType(jpiVariable); + v->value.string.val = s->val; + v->value.string.len = s->len; + + return v; +} + +static JsonPathParseItem * +makeItemKey(JsonPathString *s) +{ + JsonPathParseItem *v; + + v = makeItemString(s); + v->type = jpiKey; + + return v; +} + +static JsonPathParseItem * +makeItemNumeric(JsonPathString *s) +{ + JsonPathParseItem *v; + + v = makeItemType(jpiNumeric); + v->value.numeric = + DatumGetNumeric(DirectFunctionCall3(numeric_in, + CStringGetDatum(s->val), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1))); + + return v; +} + +static JsonPathParseItem * +makeItemBool(bool val) +{ + JsonPathParseItem *v = makeItemType(jpiBool); + + v->value.boolean = val; + + return v; +} + +static JsonPathParseItem * +makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra) +{ + JsonPathParseItem *v = makeItemType(type); + + v->value.args.left = la; + v->value.args.right = ra; + + return v; +} + +static JsonPathParseItem * +makeItemUnary(JsonPathItemType type, JsonPathParseItem *a) +{ + JsonPathParseItem *v; + + if (type == jpiPlus && a->type == jpiNumeric && !a->next) + return a; + + if (type == jpiMinus && a->type == jpiNumeric && !a->next) + { + v = makeItemType(jpiNumeric); + v->value.numeric = + DatumGetNumeric(DirectFunctionCall1(numeric_uminus, + NumericGetDatum(a->value.numeric))); + return v; + } + + v = makeItemType(type); + + v->value.arg = a; + + return v; +} + +static JsonPathParseItem * +makeItemList(List *list) +{ + JsonPathParseItem *head, + *end; + ListCell *cell; + + head = end = (JsonPathParseItem *) linitial(list); + + if (list_length(list) == 1) + return head; + + /* append items to the end of already existing list */ + while (end->next) + end = end->next; + + for_each_from(cell, list, 1) + { + JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell); + + end->next = c; + end = c; + } + + return head; +} + +static JsonPathParseItem * +makeIndexArray(List *list) +{ + JsonPathParseItem *v = makeItemType(jpiIndexArray); + ListCell *cell; + int i = 0; + + Assert(list != NIL); + v->value.array.nelems = list_length(list); + + v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) * + v->value.array.nelems); + + foreach(cell, list) + { + JsonPathParseItem *jpi = lfirst(cell); + + Assert(jpi->type == jpiSubscript); + + v->value.array.elems[i].from = jpi->value.args.left; + v->value.array.elems[i++].to = jpi->value.args.right; + } + + return v; +} + +static JsonPathParseItem * +makeAny(int first, int last) +{ + JsonPathParseItem *v = makeItemType(jpiAny); + + v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX; + v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX; + + return v; +} + +static bool +makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, + JsonPathString *flags, JsonPathParseItem ** result, + struct Node *escontext) +{ + JsonPathParseItem *v = makeItemType(jpiLikeRegex); + int i; + int cflags; + + v->value.like_regex.expr = expr; + v->value.like_regex.pattern = pattern->val; + v->value.like_regex.patternlen = pattern->len; + + /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */ + v->value.like_regex.flags = 0; + for (i = 0; flags && i < flags->len; i++) + { + switch (flags->val[i]) + { + case 'i': + v->value.like_regex.flags |= JSP_REGEX_ICASE; + break; + case 's': + v->value.like_regex.flags |= JSP_REGEX_DOTALL; + break; + case 'm': + v->value.like_regex.flags |= JSP_REGEX_MLINE; + break; + case 'x': + v->value.like_regex.flags |= JSP_REGEX_WSPACE; + break; + case 'q': + v->value.like_regex.flags |= JSP_REGEX_QUOTE; + break; + default: + ereturn(escontext, false, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid input syntax for type %s", "jsonpath"), + errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.", + pg_mblen(flags->val + i), flags->val + i))); + break; + } + } + + /* Convert flags to what pg_regcomp needs */ + if ( !jspConvertRegexFlags(v->value.like_regex.flags, &cflags, escontext)) + return false; + + /* check regex validity */ + { + regex_t re_tmp; + pg_wchar *wpattern; + int wpattern_len; + int re_result; + + wpattern = (pg_wchar *) palloc((pattern->len + 1) * sizeof(pg_wchar)); + wpattern_len = pg_mb2wchar_with_len(pattern->val, + wpattern, + pattern->len); + + if ((re_result = pg_regcomp(&re_tmp, wpattern, wpattern_len, cflags, + DEFAULT_COLLATION_OID)) != REG_OKAY) + { + char errMsg[100]; + + pg_regerror(re_result, &re_tmp, errMsg, sizeof(errMsg)); + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regular expression: %s", errMsg))); + } + + pg_regfree(&re_tmp); + } + + *result = v; + + return true; +} + +/* + * Convert from XQuery regex flags to those recognized by our regex library. + */ +bool +jspConvertRegexFlags(uint32 xflags, int *result, struct Node *escontext) +{ + /* By default, XQuery is very nearly the same as Spencer's AREs */ + int cflags = REG_ADVANCED; + + /* Ignore-case means the same thing, too, modulo locale issues */ + if (xflags & JSP_REGEX_ICASE) + cflags |= REG_ICASE; + + /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */ + if (xflags & JSP_REGEX_QUOTE) + { + cflags &= ~REG_ADVANCED; + cflags |= REG_QUOTE; + } + else + { + /* Note that dotall mode is the default in POSIX */ + if (!(xflags & JSP_REGEX_DOTALL)) + cflags |= REG_NLSTOP; + if (xflags & JSP_REGEX_MLINE) + cflags |= REG_NLANCH; + + /* + * XQuery's 'x' mode is related to Spencer's expanded mode, but it's + * not really enough alike to justify treating JSP_REGEX_WSPACE as + * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in + * future we'll modify the regex library to have an option for + * XQuery-style ignore-whitespace mode. + */ + if (xflags & JSP_REGEX_WSPACE) + ereturn(escontext, false, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented"))); + } + + *result = cflags; + + return true; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.h b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.h new file mode 100644 index 00000000000..6931882fc78 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.h @@ -0,0 +1,126 @@ +/* A Bison parser, made by GNU Bison 3.7.5. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +#ifndef YY_JSONPATH_YY_JSONPATH_GRAM_H_INCLUDED +# define YY_JSONPATH_YY_JSONPATH_GRAM_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int jsonpath_yydebug; +#endif + +/* Token kinds. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + YYEMPTY = -2, + YYEOF = 0, /* "end of file" */ + YYerror = 256, /* error */ + YYUNDEF = 257, /* "invalid token" */ + TO_P = 258, /* TO_P */ + NULL_P = 259, /* NULL_P */ + TRUE_P = 260, /* TRUE_P */ + FALSE_P = 261, /* FALSE_P */ + IS_P = 262, /* IS_P */ + UNKNOWN_P = 263, /* UNKNOWN_P */ + EXISTS_P = 264, /* EXISTS_P */ + IDENT_P = 265, /* IDENT_P */ + STRING_P = 266, /* STRING_P */ + NUMERIC_P = 267, /* NUMERIC_P */ + INT_P = 268, /* INT_P */ + VARIABLE_P = 269, /* VARIABLE_P */ + OR_P = 270, /* OR_P */ + AND_P = 271, /* AND_P */ + NOT_P = 272, /* NOT_P */ + LESS_P = 273, /* LESS_P */ + LESSEQUAL_P = 274, /* LESSEQUAL_P */ + EQUAL_P = 275, /* EQUAL_P */ + NOTEQUAL_P = 276, /* NOTEQUAL_P */ + GREATEREQUAL_P = 277, /* GREATEREQUAL_P */ + GREATER_P = 278, /* GREATER_P */ + ANY_P = 279, /* ANY_P */ + STRICT_P = 280, /* STRICT_P */ + LAX_P = 281, /* LAX_P */ + LAST_P = 282, /* LAST_P */ + STARTS_P = 283, /* STARTS_P */ + WITH_P = 284, /* WITH_P */ + LIKE_REGEX_P = 285, /* LIKE_REGEX_P */ + FLAG_P = 286, /* FLAG_P */ + ABS_P = 287, /* ABS_P */ + SIZE_P = 288, /* SIZE_P */ + TYPE_P = 289, /* TYPE_P */ + FLOOR_P = 290, /* FLOOR_P */ + DOUBLE_P = 291, /* DOUBLE_P */ + CEILING_P = 292, /* CEILING_P */ + KEYVALUE_P = 293, /* KEYVALUE_P */ + DATETIME_P = 294, /* DATETIME_P */ + UMINUS = 295 /* UMINUS */ + }; + typedef enum yytokentype yytoken_kind_t; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +union YYSTYPE +{ +#line 67 "jsonpath_gram.y" + + JsonPathString str; + List *elems; /* list of JsonPathParseItem */ + List *indexs; /* list of integers */ + JsonPathParseItem *value; + JsonPathParseResult *result; + JsonPathItemType optype; + bool boolean; + int integer; + +#line 115 "jsonpath_gram.h" + +}; +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + +int jsonpath_yyparse (JsonPathParseResult **result, struct Node *escontext); + +#endif /* !YY_JSONPATH_YY_JSONPATH_GRAM_H_INCLUDED */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_internal.h b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_internal.h new file mode 100644 index 00000000000..90eea6e9616 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_internal.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------- + * + * jsonpath_internal.h + * Private definitions for jsonpath scanner & parser + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/utils/adt/jsonpath_internal.h + * + *------------------------------------------------------------------------- + */ + +#ifndef JSONPATH_INTERNAL_H +#define JSONPATH_INTERNAL_H + +/* struct JsonPathString is shared between scan and gram */ +typedef struct JsonPathString +{ + char *val; + int len; + int total; +} JsonPathString; + +#include "utils/jsonpath.h" +#include "jsonpath_gram.h" + +#define YY_DECL extern int jsonpath_yylex(YYSTYPE *yylval_param, \ + JsonPathParseResult **result, \ + struct Node *escontext) +YY_DECL; +extern int jsonpath_yyparse(JsonPathParseResult **result, + struct Node *escontext); +extern void jsonpath_yyerror(JsonPathParseResult **result, + struct Node *escontext, + const char *message); + +#endif /* JSONPATH_INTERNAL_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_scan.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_scan.c new file mode 100644 index 00000000000..cddc313bb41 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_scan.c @@ -0,0 +1,6175 @@ +#line 2 "jsonpath_scan.c" +/*------------------------------------------------------------------------- + * + * jsonpath_scan.l + * Lexical parser for jsonpath datatype + * + * Splits jsonpath string into tokens represented as JsonPathString structs. + * Decodes unicode and hex escaped strings. + * + * Copyright (c) 2019-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath_scan.l + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +/* + * NB: include jsonpath_gram.h only AFTER including jsonpath_internal.h, + * because jsonpath_internal.h contains the declaration for JsonPathString. + */ +#include "jsonpath_internal.h" +#include "jsonpath_gram.h" + +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" +#include "nodes/pg_list.h" + +#line 32 "jsonpath_scan.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define yy_create_buffer jsonpath_yy_create_buffer +#define yy_delete_buffer jsonpath_yy_delete_buffer +#define yy_scan_buffer jsonpath_yy_scan_buffer +#define yy_scan_string jsonpath_yy_scan_string +#define yy_scan_bytes jsonpath_yy_scan_bytes +#define yy_init_buffer jsonpath_yy_init_buffer +#define yy_flush_buffer jsonpath_yy_flush_buffer +#define yy_load_buffer_state jsonpath_yy_load_buffer_state +#define yy_switch_to_buffer jsonpath_yy_switch_to_buffer +#define yypush_buffer_state jsonpath_yypush_buffer_state +#define yypop_buffer_state jsonpath_yypop_buffer_state +#define yyensure_buffer_stack jsonpath_yyensure_buffer_stack +#define yy_flex_debug jsonpath_yy_flex_debug +#define yyin jsonpath_yyin +#define yyleng jsonpath_yyleng +#define yylex jsonpath_yylex +#define yylineno jsonpath_yylineno +#define yyout jsonpath_yyout +#define yyrestart jsonpath_yyrestart +#define yytext jsonpath_yytext +#define yywrap jsonpath_yywrap +#define yyalloc jsonpath_yyalloc +#define yyrealloc jsonpath_yyrealloc +#define yyfree jsonpath_yyfree + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 4 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +#ifdef yy_create_buffer +#define jsonpath_yy_create_buffer_ALREADY_DEFINED +#else +#define yy_create_buffer jsonpath_yy_create_buffer +#endif + +#ifdef yy_delete_buffer +#define jsonpath_yy_delete_buffer_ALREADY_DEFINED +#else +#define yy_delete_buffer jsonpath_yy_delete_buffer +#endif + +#ifdef yy_scan_buffer +#define jsonpath_yy_scan_buffer_ALREADY_DEFINED +#else +#define yy_scan_buffer jsonpath_yy_scan_buffer +#endif + +#ifdef yy_scan_string +#define jsonpath_yy_scan_string_ALREADY_DEFINED +#else +#define yy_scan_string jsonpath_yy_scan_string +#endif + +#ifdef yy_scan_bytes +#define jsonpath_yy_scan_bytes_ALREADY_DEFINED +#else +#define yy_scan_bytes jsonpath_yy_scan_bytes +#endif + +#ifdef yy_init_buffer +#define jsonpath_yy_init_buffer_ALREADY_DEFINED +#else +#define yy_init_buffer jsonpath_yy_init_buffer +#endif + +#ifdef yy_flush_buffer +#define jsonpath_yy_flush_buffer_ALREADY_DEFINED +#else +#define yy_flush_buffer jsonpath_yy_flush_buffer +#endif + +#ifdef yy_load_buffer_state +#define jsonpath_yy_load_buffer_state_ALREADY_DEFINED +#else +#define yy_load_buffer_state jsonpath_yy_load_buffer_state +#endif + +#ifdef yy_switch_to_buffer +#define jsonpath_yy_switch_to_buffer_ALREADY_DEFINED +#else +#define yy_switch_to_buffer jsonpath_yy_switch_to_buffer +#endif + +#ifdef yypush_buffer_state +#define jsonpath_yypush_buffer_state_ALREADY_DEFINED +#else +#define yypush_buffer_state jsonpath_yypush_buffer_state +#endif + +#ifdef yypop_buffer_state +#define jsonpath_yypop_buffer_state_ALREADY_DEFINED +#else +#define yypop_buffer_state jsonpath_yypop_buffer_state +#endif + +#ifdef yyensure_buffer_stack +#define jsonpath_yyensure_buffer_stack_ALREADY_DEFINED +#else +#define yyensure_buffer_stack jsonpath_yyensure_buffer_stack +#endif + +#ifdef yylex +#define jsonpath_yylex_ALREADY_DEFINED +#else +#define yylex jsonpath_yylex +#endif + +#ifdef yyrestart +#define jsonpath_yyrestart_ALREADY_DEFINED +#else +#define yyrestart jsonpath_yyrestart +#endif + +#ifdef yylex_init +#define jsonpath_yylex_init_ALREADY_DEFINED +#else +#define yylex_init jsonpath_yylex_init +#endif + +#ifdef yylex_init_extra +#define jsonpath_yylex_init_extra_ALREADY_DEFINED +#else +#define yylex_init_extra jsonpath_yylex_init_extra +#endif + +#ifdef yylex_destroy +#define jsonpath_yylex_destroy_ALREADY_DEFINED +#else +#define yylex_destroy jsonpath_yylex_destroy +#endif + +#ifdef yyget_debug +#define jsonpath_yyget_debug_ALREADY_DEFINED +#else +#define yyget_debug jsonpath_yyget_debug +#endif + +#ifdef yyset_debug +#define jsonpath_yyset_debug_ALREADY_DEFINED +#else +#define yyset_debug jsonpath_yyset_debug +#endif + +#ifdef yyget_extra +#define jsonpath_yyget_extra_ALREADY_DEFINED +#else +#define yyget_extra jsonpath_yyget_extra +#endif + +#ifdef yyset_extra +#define jsonpath_yyset_extra_ALREADY_DEFINED +#else +#define yyset_extra jsonpath_yyset_extra +#endif + +#ifdef yyget_in +#define jsonpath_yyget_in_ALREADY_DEFINED +#else +#define yyget_in jsonpath_yyget_in +#endif + +#ifdef yyset_in +#define jsonpath_yyset_in_ALREADY_DEFINED +#else +#define yyset_in jsonpath_yyset_in +#endif + +#ifdef yyget_out +#define jsonpath_yyget_out_ALREADY_DEFINED +#else +#define yyget_out jsonpath_yyget_out +#endif + +#ifdef yyset_out +#define jsonpath_yyset_out_ALREADY_DEFINED +#else +#define yyset_out jsonpath_yyset_out +#endif + +#ifdef yyget_leng +#define jsonpath_yyget_leng_ALREADY_DEFINED +#else +#define yyget_leng jsonpath_yyget_leng +#endif + +#ifdef yyget_text +#define jsonpath_yyget_text_ALREADY_DEFINED +#else +#define yyget_text jsonpath_yyget_text +#endif + +#ifdef yyget_lineno +#define jsonpath_yyget_lineno_ALREADY_DEFINED +#else +#define yyget_lineno jsonpath_yyget_lineno +#endif + +#ifdef yyset_lineno +#define jsonpath_yyset_lineno_ALREADY_DEFINED +#else +#define yyset_lineno jsonpath_yyset_lineno +#endif + +#ifdef yywrap +#define jsonpath_yywrap_ALREADY_DEFINED +#else +#define yywrap jsonpath_yywrap +#endif + +#ifdef yyget_lval +#define jsonpath_yyget_lval_ALREADY_DEFINED +#else +#define yyget_lval jsonpath_yyget_lval +#endif + +#ifdef yyset_lval +#define jsonpath_yyset_lval_ALREADY_DEFINED +#else +#define yyset_lval jsonpath_yyset_lval +#endif + +#ifdef yyalloc +#define jsonpath_yyalloc_ALREADY_DEFINED +#else +#define yyalloc jsonpath_yyalloc +#endif + +#ifdef yyrealloc +#define jsonpath_yyrealloc_ALREADY_DEFINED +#else +#define yyrealloc jsonpath_yyrealloc +#endif + +#ifdef yyfree +#define jsonpath_yyfree_ALREADY_DEFINED +#else +#define yyfree jsonpath_yyfree +#endif + +#ifdef yytext +#define jsonpath_yytext_ALREADY_DEFINED +#else +#define yytext jsonpath_yytext +#endif + +#ifdef yyleng +#define jsonpath_yyleng_ALREADY_DEFINED +#else +#define yyleng jsonpath_yyleng +#endif + +#ifdef yyin +#define jsonpath_yyin_ALREADY_DEFINED +#else +#define yyin jsonpath_yyin +#endif + +#ifdef yyout +#define jsonpath_yyout_ALREADY_DEFINED +#else +#define yyout jsonpath_yyout +#endif + +#ifdef yy_flex_debug +#define jsonpath_yy_flex_debug_ALREADY_DEFINED +#else +#define yy_flex_debug jsonpath_yy_flex_debug +#endif + +#ifdef yylineno +#define jsonpath_yylineno_ALREADY_DEFINED +#else +#define yylineno jsonpath_yylineno +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX (~(size_t)0) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* begin standard C++ headers. */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an + * integer in range [0..255] for use as an array index. + */ +#define YY_SC_TO_UI(c) ((YY_CHAR) (c)) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN (yy_start) = 1 + 2 * +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START (((yy_start) - 1) / 2) +#define YYSTATE YY_START +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +extern __thread int yyleng; + +extern __thread FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + #define YY_LINENO_REWIND_TO(ptr) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = (yy_hold_char); \ + YY_RESTORE_YY_MORE_OFFSET \ + (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) +#define unput(c) yyunput( c, (yytext_ptr) ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* Stack of input buffers. */ +static __thread size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ +static __thread size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ +static __thread YY_BUFFER_STATE * yy_buffer_stack = NULL; /**< Stack as an array. */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ + ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ + : NULL) +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] + +/* yy_hold_char holds the character lost when yytext is formed. */ +static __thread char yy_hold_char; +static __thread int yy_n_chars; /* number of characters read into yy_ch_buf */ +__thread int yyleng; + +/* Points to current character in buffer. */ +static __thread char *yy_c_buf_p = NULL; +static __thread int yy_init = 0; /* whether we need to initialize */ +static __thread int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static __thread int yy_did_buffer_switch_on_eof; + +void yyrestart ( FILE *input_file ); +void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer ); +YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size ); +void yy_delete_buffer ( YY_BUFFER_STATE b ); +void yy_flush_buffer ( YY_BUFFER_STATE b ); +void yypush_buffer_state ( YY_BUFFER_STATE new_buffer ); +void yypop_buffer_state ( void ); + +static void yyensure_buffer_stack ( void ); +static void yy_load_buffer_state ( void ); +static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file ); +#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER ) + +YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size ); +YY_BUFFER_STATE yy_scan_string ( const char *yy_str ); +YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len ); + +void *yyalloc ( yy_size_t ); +void *yyrealloc ( void *, yy_size_t ); +void yyfree ( void * ); + +#define yy_new_buffer yy_create_buffer +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +#define jsonpath_yywrap() (/*CONSTCOND*/1) +#define YY_SKIP_YYWRAP +typedef flex_uint8_t YY_CHAR; + +__thread FILE *yyin = NULL, *yyout = NULL; + +typedef const struct yy_trans_info *yy_state_type; + +extern __thread int yylineno; +__thread int yylineno = 1; + +extern __thread char *yytext; +#ifdef yytext_ptr +#undef yytext_ptr +#endif +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state ( void ); +static yy_state_type yy_try_NUL_trans ( yy_state_type current_state ); +static int yy_get_next_buffer ( void ); +static void yynoreturn yy_fatal_error ( const char* msg ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + (yytext_ptr) = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + (yy_hold_char) = *yy_cp; \ + *yy_cp = '\0'; \ + (yy_c_buf_p) = yy_cp; +#define YY_NUM_RULES 53 +#define YY_END_OF_BUFFER 54 +struct yy_trans_info + { + flex_int16_t yy_verify; + flex_int16_t yy_nxt; + }; +static const struct yy_trans_info yy_transition[15882] = + { + { 0, 0 }, { 0,15626 }, { 0, 0 }, { 0,15624 }, { 1,2580 }, + { 2,2580 }, { 3,2580 }, { 4,2580 }, { 5,2580 }, { 6,2580 }, + { 7,2580 }, { 8,2580 }, { 9,2838 }, { 10,2838 }, { 11,2580 }, + { 12,2838 }, { 13,2838 }, { 14,2580 }, { 15,2580 }, { 16,2580 }, + { 17,2580 }, { 18,2580 }, { 19,2580 }, { 20,2580 }, { 21,2580 }, + { 22,2580 }, { 23,2580 }, { 24,2580 }, { 25,2580 }, { 26,2580 }, + { 27,2580 }, { 28,2580 }, { 29,2580 }, { 30,2580 }, { 31,2580 }, + { 32,2838 }, { 33,2642 }, { 34,2644 }, { 35,2672 }, { 36,2857 }, + { 37,2672 }, { 38,2854 }, { 39,2580 }, { 40,2672 }, { 41,2672 }, + { 42,3115 }, { 43,2672 }, { 44,2672 }, { 45,2672 }, { 46,3117 }, + + { 47,3119 }, { 48,3176 }, { 49,3434 }, { 50,3434 }, { 51,3434 }, + { 52,3434 }, { 53,3434 }, { 54,3434 }, { 55,3434 }, { 56,3434 }, + { 57,3434 }, { 58,2672 }, { 59,2580 }, { 60,3124 }, { 61,3127 }, + { 62,3147 }, { 63,2672 }, { 64,2672 }, { 65,2580 }, { 66,2580 }, + { 67,2580 }, { 68,2580 }, { 69,2580 }, { 70,2580 }, { 71,2580 }, + { 72,2580 }, { 73,2580 }, { 74,2580 }, { 75,2580 }, { 76,2580 }, + { 77,2580 }, { 78,2580 }, { 79,2580 }, { 80,2580 }, { 81,2580 }, + { 82,2580 }, { 83,2580 }, { 84,2580 }, { 85,2580 }, { 86,2580 }, + { 87,2580 }, { 88,2580 }, { 89,2580 }, { 90,2580 }, { 91,2672 }, + { 92,3149 }, { 93,2672 }, { 94,2580 }, { 95,2580 }, { 96,2580 }, + + { 97,2580 }, { 98,2580 }, { 99,2580 }, { 100,2580 }, { 101,2580 }, + { 102,2580 }, { 103,2580 }, { 104,2580 }, { 105,2580 }, { 106,2580 }, + { 107,2580 }, { 108,2580 }, { 109,2580 }, { 110,2580 }, { 111,2580 }, + { 112,2580 }, { 113,2580 }, { 114,2580 }, { 115,2580 }, { 116,2580 }, + { 117,2580 }, { 118,2580 }, { 119,2580 }, { 120,2580 }, { 121,2580 }, + { 122,2580 }, { 123,2672 }, { 124,3692 }, { 125,2672 }, { 126,2580 }, + { 127,2580 }, { 128,2580 }, { 129,2580 }, { 130,2580 }, { 131,2580 }, + { 132,2580 }, { 133,2580 }, { 134,2580 }, { 135,2580 }, { 136,2580 }, + { 137,2580 }, { 138,2580 }, { 139,2580 }, { 140,2580 }, { 141,2580 }, + { 142,2580 }, { 143,2580 }, { 144,2580 }, { 145,2580 }, { 146,2580 }, + + { 147,2580 }, { 148,2580 }, { 149,2580 }, { 150,2580 }, { 151,2580 }, + { 152,2580 }, { 153,2580 }, { 154,2580 }, { 155,2580 }, { 156,2580 }, + { 157,2580 }, { 158,2580 }, { 159,2580 }, { 160,2580 }, { 161,2580 }, + { 162,2580 }, { 163,2580 }, { 164,2580 }, { 165,2580 }, { 166,2580 }, + { 167,2580 }, { 168,2580 }, { 169,2580 }, { 170,2580 }, { 171,2580 }, + { 172,2580 }, { 173,2580 }, { 174,2580 }, { 175,2580 }, { 176,2580 }, + { 177,2580 }, { 178,2580 }, { 179,2580 }, { 180,2580 }, { 181,2580 }, + { 182,2580 }, { 183,2580 }, { 184,2580 }, { 185,2580 }, { 186,2580 }, + { 187,2580 }, { 188,2580 }, { 189,2580 }, { 190,2580 }, { 191,2580 }, + { 192,2580 }, { 193,2580 }, { 194,2580 }, { 195,2580 }, { 196,2580 }, + + { 197,2580 }, { 198,2580 }, { 199,2580 }, { 200,2580 }, { 201,2580 }, + { 202,2580 }, { 203,2580 }, { 204,2580 }, { 205,2580 }, { 206,2580 }, + { 207,2580 }, { 208,2580 }, { 209,2580 }, { 210,2580 }, { 211,2580 }, + { 212,2580 }, { 213,2580 }, { 214,2580 }, { 215,2580 }, { 216,2580 }, + { 217,2580 }, { 218,2580 }, { 219,2580 }, { 220,2580 }, { 221,2580 }, + { 222,2580 }, { 223,2580 }, { 224,2580 }, { 225,2580 }, { 226,2580 }, + { 227,2580 }, { 228,2580 }, { 229,2580 }, { 230,2580 }, { 231,2580 }, + { 232,2580 }, { 233,2580 }, { 234,2580 }, { 235,2580 }, { 236,2580 }, + { 237,2580 }, { 238,2580 }, { 239,2580 }, { 240,2580 }, { 241,2580 }, + { 242,2580 }, { 243,2580 }, { 244,2580 }, { 245,2580 }, { 246,2580 }, + + { 247,2580 }, { 248,2580 }, { 249,2580 }, { 250,2580 }, { 251,2580 }, + { 252,2580 }, { 253,2580 }, { 254,2580 }, { 255,2580 }, { 256,2580 }, + { 0, 0 }, { 0,15366 }, { 1,2322 }, { 2,2322 }, { 3,2322 }, + { 4,2322 }, { 5,2322 }, { 6,2322 }, { 7,2322 }, { 8,2322 }, + { 9,2580 }, { 10,2580 }, { 11,2322 }, { 12,2580 }, { 13,2580 }, + { 14,2322 }, { 15,2322 }, { 16,2322 }, { 17,2322 }, { 18,2322 }, + { 19,2322 }, { 20,2322 }, { 21,2322 }, { 22,2322 }, { 23,2322 }, + { 24,2322 }, { 25,2322 }, { 26,2322 }, { 27,2322 }, { 28,2322 }, + { 29,2322 }, { 30,2322 }, { 31,2322 }, { 32,2580 }, { 33,2384 }, + { 34,2386 }, { 35,2414 }, { 36,2599 }, { 37,2414 }, { 38,2596 }, + + { 39,2322 }, { 40,2414 }, { 41,2414 }, { 42,2857 }, { 43,2414 }, + { 44,2414 }, { 45,2414 }, { 46,2859 }, { 47,2861 }, { 48,2918 }, + { 49,3176 }, { 50,3176 }, { 51,3176 }, { 52,3176 }, { 53,3176 }, + { 54,3176 }, { 55,3176 }, { 56,3176 }, { 57,3176 }, { 58,2414 }, + { 59,2322 }, { 60,2866 }, { 61,2869 }, { 62,2889 }, { 63,2414 }, + { 64,2414 }, { 65,2322 }, { 66,2322 }, { 67,2322 }, { 68,2322 }, + { 69,2322 }, { 70,2322 }, { 71,2322 }, { 72,2322 }, { 73,2322 }, + { 74,2322 }, { 75,2322 }, { 76,2322 }, { 77,2322 }, { 78,2322 }, + { 79,2322 }, { 80,2322 }, { 81,2322 }, { 82,2322 }, { 83,2322 }, + { 84,2322 }, { 85,2322 }, { 86,2322 }, { 87,2322 }, { 88,2322 }, + + { 89,2322 }, { 90,2322 }, { 91,2414 }, { 92,2891 }, { 93,2414 }, + { 94,2322 }, { 95,2322 }, { 96,2322 }, { 97,2322 }, { 98,2322 }, + { 99,2322 }, { 100,2322 }, { 101,2322 }, { 102,2322 }, { 103,2322 }, + { 104,2322 }, { 105,2322 }, { 106,2322 }, { 107,2322 }, { 108,2322 }, + { 109,2322 }, { 110,2322 }, { 111,2322 }, { 112,2322 }, { 113,2322 }, + { 114,2322 }, { 115,2322 }, { 116,2322 }, { 117,2322 }, { 118,2322 }, + { 119,2322 }, { 120,2322 }, { 121,2322 }, { 122,2322 }, { 123,2414 }, + { 124,3434 }, { 125,2414 }, { 126,2322 }, { 127,2322 }, { 128,2322 }, + { 129,2322 }, { 130,2322 }, { 131,2322 }, { 132,2322 }, { 133,2322 }, + { 134,2322 }, { 135,2322 }, { 136,2322 }, { 137,2322 }, { 138,2322 }, + + { 139,2322 }, { 140,2322 }, { 141,2322 }, { 142,2322 }, { 143,2322 }, + { 144,2322 }, { 145,2322 }, { 146,2322 }, { 147,2322 }, { 148,2322 }, + { 149,2322 }, { 150,2322 }, { 151,2322 }, { 152,2322 }, { 153,2322 }, + { 154,2322 }, { 155,2322 }, { 156,2322 }, { 157,2322 }, { 158,2322 }, + { 159,2322 }, { 160,2322 }, { 161,2322 }, { 162,2322 }, { 163,2322 }, + { 164,2322 }, { 165,2322 }, { 166,2322 }, { 167,2322 }, { 168,2322 }, + { 169,2322 }, { 170,2322 }, { 171,2322 }, { 172,2322 }, { 173,2322 }, + { 174,2322 }, { 175,2322 }, { 176,2322 }, { 177,2322 }, { 178,2322 }, + { 179,2322 }, { 180,2322 }, { 181,2322 }, { 182,2322 }, { 183,2322 }, + { 184,2322 }, { 185,2322 }, { 186,2322 }, { 187,2322 }, { 188,2322 }, + + { 189,2322 }, { 190,2322 }, { 191,2322 }, { 192,2322 }, { 193,2322 }, + { 194,2322 }, { 195,2322 }, { 196,2322 }, { 197,2322 }, { 198,2322 }, + { 199,2322 }, { 200,2322 }, { 201,2322 }, { 202,2322 }, { 203,2322 }, + { 204,2322 }, { 205,2322 }, { 206,2322 }, { 207,2322 }, { 208,2322 }, + { 209,2322 }, { 210,2322 }, { 211,2322 }, { 212,2322 }, { 213,2322 }, + { 214,2322 }, { 215,2322 }, { 216,2322 }, { 217,2322 }, { 218,2322 }, + { 219,2322 }, { 220,2322 }, { 221,2322 }, { 222,2322 }, { 223,2322 }, + { 224,2322 }, { 225,2322 }, { 226,2322 }, { 227,2322 }, { 228,2322 }, + { 229,2322 }, { 230,2322 }, { 231,2322 }, { 232,2322 }, { 233,2322 }, + { 234,2322 }, { 235,2322 }, { 236,2322 }, { 237,2322 }, { 238,2322 }, + + { 239,2322 }, { 240,2322 }, { 241,2322 }, { 242,2322 }, { 243,2322 }, + { 244,2322 }, { 245,2322 }, { 246,2322 }, { 247,2322 }, { 248,2322 }, + { 249,2322 }, { 250,2322 }, { 251,2322 }, { 252,2322 }, { 253,2322 }, + { 254,2322 }, { 255,2322 }, { 256,2322 }, { 0, 0 }, { 0,15108 }, + { 1,3208 }, { 2,3208 }, { 3,3208 }, { 4,3208 }, { 5,3208 }, + { 6,3208 }, { 7,3208 }, { 8,3208 }, { 9,3208 }, { 10,3208 }, + { 11,3208 }, { 12,3208 }, { 13,3208 }, { 14,3208 }, { 15,3208 }, + { 16,3208 }, { 17,3208 }, { 18,3208 }, { 19,3208 }, { 20,3208 }, + { 21,3208 }, { 22,3208 }, { 23,3208 }, { 24,3208 }, { 25,3208 }, + { 26,3208 }, { 27,3208 }, { 28,3208 }, { 29,3208 }, { 30,3208 }, + + { 31,3208 }, { 32,3208 }, { 33,3208 }, { 34,3178 }, { 35,3208 }, + { 36,3208 }, { 37,3208 }, { 38,3208 }, { 39,3208 }, { 40,3208 }, + { 41,3208 }, { 42,3208 }, { 43,3208 }, { 44,3208 }, { 45,3208 }, + { 46,3208 }, { 47,3208 }, { 48,3208 }, { 49,3208 }, { 50,3208 }, + { 51,3208 }, { 52,3208 }, { 53,3208 }, { 54,3208 }, { 55,3208 }, + { 56,3208 }, { 57,3208 }, { 58,3208 }, { 59,3208 }, { 60,3208 }, + { 61,3208 }, { 62,3208 }, { 63,3208 }, { 64,3208 }, { 65,3208 }, + { 66,3208 }, { 67,3208 }, { 68,3208 }, { 69,3208 }, { 70,3208 }, + { 71,3208 }, { 72,3208 }, { 73,3208 }, { 74,3208 }, { 75,3208 }, + { 76,3208 }, { 77,3208 }, { 78,3208 }, { 79,3208 }, { 80,3208 }, + + { 81,3208 }, { 82,3208 }, { 83,3208 }, { 84,3208 }, { 85,3208 }, + { 86,3208 }, { 87,3208 }, { 88,3208 }, { 89,3208 }, { 90,3208 }, + { 91,3208 }, { 92,3466 }, { 93,3208 }, { 94,3208 }, { 95,3208 }, + { 96,3208 }, { 97,3208 }, { 98,3208 }, { 99,3208 }, { 100,3208 }, + { 101,3208 }, { 102,3208 }, { 103,3208 }, { 104,3208 }, { 105,3208 }, + { 106,3208 }, { 107,3208 }, { 108,3208 }, { 109,3208 }, { 110,3208 }, + { 111,3208 }, { 112,3208 }, { 113,3208 }, { 114,3208 }, { 115,3208 }, + { 116,3208 }, { 117,3208 }, { 118,3208 }, { 119,3208 }, { 120,3208 }, + { 121,3208 }, { 122,3208 }, { 123,3208 }, { 124,3208 }, { 125,3208 }, + { 126,3208 }, { 127,3208 }, { 128,3208 }, { 129,3208 }, { 130,3208 }, + + { 131,3208 }, { 132,3208 }, { 133,3208 }, { 134,3208 }, { 135,3208 }, + { 136,3208 }, { 137,3208 }, { 138,3208 }, { 139,3208 }, { 140,3208 }, + { 141,3208 }, { 142,3208 }, { 143,3208 }, { 144,3208 }, { 145,3208 }, + { 146,3208 }, { 147,3208 }, { 148,3208 }, { 149,3208 }, { 150,3208 }, + { 151,3208 }, { 152,3208 }, { 153,3208 }, { 154,3208 }, { 155,3208 }, + { 156,3208 }, { 157,3208 }, { 158,3208 }, { 159,3208 }, { 160,3208 }, + { 161,3208 }, { 162,3208 }, { 163,3208 }, { 164,3208 }, { 165,3208 }, + { 166,3208 }, { 167,3208 }, { 168,3208 }, { 169,3208 }, { 170,3208 }, + { 171,3208 }, { 172,3208 }, { 173,3208 }, { 174,3208 }, { 175,3208 }, + { 176,3208 }, { 177,3208 }, { 178,3208 }, { 179,3208 }, { 180,3208 }, + + { 181,3208 }, { 182,3208 }, { 183,3208 }, { 184,3208 }, { 185,3208 }, + { 186,3208 }, { 187,3208 }, { 188,3208 }, { 189,3208 }, { 190,3208 }, + { 191,3208 }, { 192,3208 }, { 193,3208 }, { 194,3208 }, { 195,3208 }, + { 196,3208 }, { 197,3208 }, { 198,3208 }, { 199,3208 }, { 200,3208 }, + { 201,3208 }, { 202,3208 }, { 203,3208 }, { 204,3208 }, { 205,3208 }, + { 206,3208 }, { 207,3208 }, { 208,3208 }, { 209,3208 }, { 210,3208 }, + { 211,3208 }, { 212,3208 }, { 213,3208 }, { 214,3208 }, { 215,3208 }, + { 216,3208 }, { 217,3208 }, { 218,3208 }, { 219,3208 }, { 220,3208 }, + { 221,3208 }, { 222,3208 }, { 223,3208 }, { 224,3208 }, { 225,3208 }, + { 226,3208 }, { 227,3208 }, { 228,3208 }, { 229,3208 }, { 230,3208 }, + + { 231,3208 }, { 232,3208 }, { 233,3208 }, { 234,3208 }, { 235,3208 }, + { 236,3208 }, { 237,3208 }, { 238,3208 }, { 239,3208 }, { 240,3208 }, + { 241,3208 }, { 242,3208 }, { 243,3208 }, { 244,3208 }, { 245,3208 }, + { 246,3208 }, { 247,3208 }, { 248,3208 }, { 249,3208 }, { 250,3208 }, + { 251,3208 }, { 252,3208 }, { 253,3208 }, { 254,3208 }, { 255,3208 }, + { 256,3208 }, { 0, 0 }, { 0,14850 }, { 1,2950 }, { 2,2950 }, + { 3,2950 }, { 4,2950 }, { 5,2950 }, { 6,2950 }, { 7,2950 }, + { 8,2950 }, { 9,2950 }, { 10,2950 }, { 11,2950 }, { 12,2950 }, + { 13,2950 }, { 14,2950 }, { 15,2950 }, { 16,2950 }, { 17,2950 }, + { 18,2950 }, { 19,2950 }, { 20,2950 }, { 21,2950 }, { 22,2950 }, + + { 23,2950 }, { 24,2950 }, { 25,2950 }, { 26,2950 }, { 27,2950 }, + { 28,2950 }, { 29,2950 }, { 30,2950 }, { 31,2950 }, { 32,2950 }, + { 33,2950 }, { 34,2920 }, { 35,2950 }, { 36,2950 }, { 37,2950 }, + { 38,2950 }, { 39,2950 }, { 40,2950 }, { 41,2950 }, { 42,2950 }, + { 43,2950 }, { 44,2950 }, { 45,2950 }, { 46,2950 }, { 47,2950 }, + { 48,2950 }, { 49,2950 }, { 50,2950 }, { 51,2950 }, { 52,2950 }, + { 53,2950 }, { 54,2950 }, { 55,2950 }, { 56,2950 }, { 57,2950 }, + { 58,2950 }, { 59,2950 }, { 60,2950 }, { 61,2950 }, { 62,2950 }, + { 63,2950 }, { 64,2950 }, { 65,2950 }, { 66,2950 }, { 67,2950 }, + { 68,2950 }, { 69,2950 }, { 70,2950 }, { 71,2950 }, { 72,2950 }, + + { 73,2950 }, { 74,2950 }, { 75,2950 }, { 76,2950 }, { 77,2950 }, + { 78,2950 }, { 79,2950 }, { 80,2950 }, { 81,2950 }, { 82,2950 }, + { 83,2950 }, { 84,2950 }, { 85,2950 }, { 86,2950 }, { 87,2950 }, + { 88,2950 }, { 89,2950 }, { 90,2950 }, { 91,2950 }, { 92,3208 }, + { 93,2950 }, { 94,2950 }, { 95,2950 }, { 96,2950 }, { 97,2950 }, + { 98,2950 }, { 99,2950 }, { 100,2950 }, { 101,2950 }, { 102,2950 }, + { 103,2950 }, { 104,2950 }, { 105,2950 }, { 106,2950 }, { 107,2950 }, + { 108,2950 }, { 109,2950 }, { 110,2950 }, { 111,2950 }, { 112,2950 }, + { 113,2950 }, { 114,2950 }, { 115,2950 }, { 116,2950 }, { 117,2950 }, + { 118,2950 }, { 119,2950 }, { 120,2950 }, { 121,2950 }, { 122,2950 }, + + { 123,2950 }, { 124,2950 }, { 125,2950 }, { 126,2950 }, { 127,2950 }, + { 128,2950 }, { 129,2950 }, { 130,2950 }, { 131,2950 }, { 132,2950 }, + { 133,2950 }, { 134,2950 }, { 135,2950 }, { 136,2950 }, { 137,2950 }, + { 138,2950 }, { 139,2950 }, { 140,2950 }, { 141,2950 }, { 142,2950 }, + { 143,2950 }, { 144,2950 }, { 145,2950 }, { 146,2950 }, { 147,2950 }, + { 148,2950 }, { 149,2950 }, { 150,2950 }, { 151,2950 }, { 152,2950 }, + { 153,2950 }, { 154,2950 }, { 155,2950 }, { 156,2950 }, { 157,2950 }, + { 158,2950 }, { 159,2950 }, { 160,2950 }, { 161,2950 }, { 162,2950 }, + { 163,2950 }, { 164,2950 }, { 165,2950 }, { 166,2950 }, { 167,2950 }, + { 168,2950 }, { 169,2950 }, { 170,2950 }, { 171,2950 }, { 172,2950 }, + + { 173,2950 }, { 174,2950 }, { 175,2950 }, { 176,2950 }, { 177,2950 }, + { 178,2950 }, { 179,2950 }, { 180,2950 }, { 181,2950 }, { 182,2950 }, + { 183,2950 }, { 184,2950 }, { 185,2950 }, { 186,2950 }, { 187,2950 }, + { 188,2950 }, { 189,2950 }, { 190,2950 }, { 191,2950 }, { 192,2950 }, + { 193,2950 }, { 194,2950 }, { 195,2950 }, { 196,2950 }, { 197,2950 }, + { 198,2950 }, { 199,2950 }, { 200,2950 }, { 201,2950 }, { 202,2950 }, + { 203,2950 }, { 204,2950 }, { 205,2950 }, { 206,2950 }, { 207,2950 }, + { 208,2950 }, { 209,2950 }, { 210,2950 }, { 211,2950 }, { 212,2950 }, + { 213,2950 }, { 214,2950 }, { 215,2950 }, { 216,2950 }, { 217,2950 }, + { 218,2950 }, { 219,2950 }, { 220,2950 }, { 221,2950 }, { 222,2950 }, + + { 223,2950 }, { 224,2950 }, { 225,2950 }, { 226,2950 }, { 227,2950 }, + { 228,2950 }, { 229,2950 }, { 230,2950 }, { 231,2950 }, { 232,2950 }, + { 233,2950 }, { 234,2950 }, { 235,2950 }, { 236,2950 }, { 237,2950 }, + { 238,2950 }, { 239,2950 }, { 240,2950 }, { 241,2950 }, { 242,2950 }, + { 243,2950 }, { 244,2950 }, { 245,2950 }, { 246,2950 }, { 247,2950 }, + { 248,2950 }, { 249,2950 }, { 250,2950 }, { 251,2950 }, { 252,2950 }, + { 253,2950 }, { 254,2950 }, { 255,2950 }, { 256,2950 }, { 0, 0 }, + { 0,14592 }, { 1,3208 }, { 2,3208 }, { 3,3208 }, { 4,3208 }, + { 5,3208 }, { 6,3208 }, { 7,3208 }, { 8,3208 }, { 9,3466 }, + { 10,3466 }, { 11,3208 }, { 12,3466 }, { 13,3466 }, { 14,3208 }, + + { 15,3208 }, { 16,3208 }, { 17,3208 }, { 18,3208 }, { 19,3208 }, + { 20,3208 }, { 21,3208 }, { 22,3208 }, { 23,3208 }, { 24,3208 }, + { 25,3208 }, { 26,3208 }, { 27,3208 }, { 28,3208 }, { 29,3208 }, + { 30,3208 }, { 31,3208 }, { 32,3466 }, { 33,2664 }, { 34,2664 }, + { 35,2664 }, { 36,2664 }, { 37,2664 }, { 38,2664 }, { 39,3208 }, + { 40,2664 }, { 41,2664 }, { 42,2664 }, { 43,2664 }, { 44,2664 }, + { 45,2664 }, { 46,2664 }, { 47,2684 }, { 48,3208 }, { 49,3208 }, + { 50,3208 }, { 51,3208 }, { 52,3208 }, { 53,3208 }, { 54,3208 }, + { 55,3208 }, { 56,3208 }, { 57,3208 }, { 58,2664 }, { 59,3208 }, + { 60,2664 }, { 61,2664 }, { 62,2664 }, { 63,2664 }, { 64,2664 }, + + { 65,3208 }, { 66,3208 }, { 67,3208 }, { 68,3208 }, { 69,3208 }, + { 70,3208 }, { 71,3208 }, { 72,3208 }, { 73,3208 }, { 74,3208 }, + { 75,3208 }, { 76,3208 }, { 77,3208 }, { 78,3208 }, { 79,3208 }, + { 80,3208 }, { 81,3208 }, { 82,3208 }, { 83,3208 }, { 84,3208 }, + { 85,3208 }, { 86,3208 }, { 87,3208 }, { 88,3208 }, { 89,3208 }, + { 90,3208 }, { 91,2664 }, { 92,2950 }, { 93,2664 }, { 94,3208 }, + { 95,3208 }, { 96,3208 }, { 97,3208 }, { 98,3208 }, { 99,3208 }, + { 100,3208 }, { 101,3208 }, { 102,3208 }, { 103,3208 }, { 104,3208 }, + { 105,3208 }, { 106,3208 }, { 107,3208 }, { 108,3208 }, { 109,3208 }, + { 110,3208 }, { 111,3208 }, { 112,3208 }, { 113,3208 }, { 114,3208 }, + + { 115,3208 }, { 116,3208 }, { 117,3208 }, { 118,3208 }, { 119,3208 }, + { 120,3208 }, { 121,3208 }, { 122,3208 }, { 123,2664 }, { 124,2664 }, + { 125,2664 }, { 126,3208 }, { 127,3208 }, { 128,3208 }, { 129,3208 }, + { 130,3208 }, { 131,3208 }, { 132,3208 }, { 133,3208 }, { 134,3208 }, + { 135,3208 }, { 136,3208 }, { 137,3208 }, { 138,3208 }, { 139,3208 }, + { 140,3208 }, { 141,3208 }, { 142,3208 }, { 143,3208 }, { 144,3208 }, + { 145,3208 }, { 146,3208 }, { 147,3208 }, { 148,3208 }, { 149,3208 }, + { 150,3208 }, { 151,3208 }, { 152,3208 }, { 153,3208 }, { 154,3208 }, + { 155,3208 }, { 156,3208 }, { 157,3208 }, { 158,3208 }, { 159,3208 }, + { 160,3208 }, { 161,3208 }, { 162,3208 }, { 163,3208 }, { 164,3208 }, + + { 165,3208 }, { 166,3208 }, { 167,3208 }, { 168,3208 }, { 169,3208 }, + { 170,3208 }, { 171,3208 }, { 172,3208 }, { 173,3208 }, { 174,3208 }, + { 175,3208 }, { 176,3208 }, { 177,3208 }, { 178,3208 }, { 179,3208 }, + { 180,3208 }, { 181,3208 }, { 182,3208 }, { 183,3208 }, { 184,3208 }, + { 185,3208 }, { 186,3208 }, { 187,3208 }, { 188,3208 }, { 189,3208 }, + { 190,3208 }, { 191,3208 }, { 192,3208 }, { 193,3208 }, { 194,3208 }, + { 195,3208 }, { 196,3208 }, { 197,3208 }, { 198,3208 }, { 199,3208 }, + { 200,3208 }, { 201,3208 }, { 202,3208 }, { 203,3208 }, { 204,3208 }, + { 205,3208 }, { 206,3208 }, { 207,3208 }, { 208,3208 }, { 209,3208 }, + { 210,3208 }, { 211,3208 }, { 212,3208 }, { 213,3208 }, { 214,3208 }, + + { 215,3208 }, { 216,3208 }, { 217,3208 }, { 218,3208 }, { 219,3208 }, + { 220,3208 }, { 221,3208 }, { 222,3208 }, { 223,3208 }, { 224,3208 }, + { 225,3208 }, { 226,3208 }, { 227,3208 }, { 228,3208 }, { 229,3208 }, + { 230,3208 }, { 231,3208 }, { 232,3208 }, { 233,3208 }, { 234,3208 }, + { 235,3208 }, { 236,3208 }, { 237,3208 }, { 238,3208 }, { 239,3208 }, + { 240,3208 }, { 241,3208 }, { 242,3208 }, { 243,3208 }, { 244,3208 }, + { 245,3208 }, { 246,3208 }, { 247,3208 }, { 248,3208 }, { 249,3208 }, + { 250,3208 }, { 251,3208 }, { 252,3208 }, { 253,3208 }, { 254,3208 }, + { 255,3208 }, { 256,3208 }, { 0, 0 }, { 0,14334 }, { 1,2950 }, + { 2,2950 }, { 3,2950 }, { 4,2950 }, { 5,2950 }, { 6,2950 }, + + { 7,2950 }, { 8,2950 }, { 9,3208 }, { 10,3208 }, { 11,2950 }, + { 12,3208 }, { 13,3208 }, { 14,2950 }, { 15,2950 }, { 16,2950 }, + { 17,2950 }, { 18,2950 }, { 19,2950 }, { 20,2950 }, { 21,2950 }, + { 22,2950 }, { 23,2950 }, { 24,2950 }, { 25,2950 }, { 26,2950 }, + { 27,2950 }, { 28,2950 }, { 29,2950 }, { 30,2950 }, { 31,2950 }, + { 32,3208 }, { 33,2406 }, { 34,2406 }, { 35,2406 }, { 36,2406 }, + { 37,2406 }, { 38,2406 }, { 39,2950 }, { 40,2406 }, { 41,2406 }, + { 42,2406 }, { 43,2406 }, { 44,2406 }, { 45,2406 }, { 46,2406 }, + { 47,2426 }, { 48,2950 }, { 49,2950 }, { 50,2950 }, { 51,2950 }, + { 52,2950 }, { 53,2950 }, { 54,2950 }, { 55,2950 }, { 56,2950 }, + + { 57,2950 }, { 58,2406 }, { 59,2950 }, { 60,2406 }, { 61,2406 }, + { 62,2406 }, { 63,2406 }, { 64,2406 }, { 65,2950 }, { 66,2950 }, + { 67,2950 }, { 68,2950 }, { 69,2950 }, { 70,2950 }, { 71,2950 }, + { 72,2950 }, { 73,2950 }, { 74,2950 }, { 75,2950 }, { 76,2950 }, + { 77,2950 }, { 78,2950 }, { 79,2950 }, { 80,2950 }, { 81,2950 }, + { 82,2950 }, { 83,2950 }, { 84,2950 }, { 85,2950 }, { 86,2950 }, + { 87,2950 }, { 88,2950 }, { 89,2950 }, { 90,2950 }, { 91,2406 }, + { 92,2692 }, { 93,2406 }, { 94,2950 }, { 95,2950 }, { 96,2950 }, + { 97,2950 }, { 98,2950 }, { 99,2950 }, { 100,2950 }, { 101,2950 }, + { 102,2950 }, { 103,2950 }, { 104,2950 }, { 105,2950 }, { 106,2950 }, + + { 107,2950 }, { 108,2950 }, { 109,2950 }, { 110,2950 }, { 111,2950 }, + { 112,2950 }, { 113,2950 }, { 114,2950 }, { 115,2950 }, { 116,2950 }, + { 117,2950 }, { 118,2950 }, { 119,2950 }, { 120,2950 }, { 121,2950 }, + { 122,2950 }, { 123,2406 }, { 124,2406 }, { 125,2406 }, { 126,2950 }, + { 127,2950 }, { 128,2950 }, { 129,2950 }, { 130,2950 }, { 131,2950 }, + { 132,2950 }, { 133,2950 }, { 134,2950 }, { 135,2950 }, { 136,2950 }, + { 137,2950 }, { 138,2950 }, { 139,2950 }, { 140,2950 }, { 141,2950 }, + { 142,2950 }, { 143,2950 }, { 144,2950 }, { 145,2950 }, { 146,2950 }, + { 147,2950 }, { 148,2950 }, { 149,2950 }, { 150,2950 }, { 151,2950 }, + { 152,2950 }, { 153,2950 }, { 154,2950 }, { 155,2950 }, { 156,2950 }, + + { 157,2950 }, { 158,2950 }, { 159,2950 }, { 160,2950 }, { 161,2950 }, + { 162,2950 }, { 163,2950 }, { 164,2950 }, { 165,2950 }, { 166,2950 }, + { 167,2950 }, { 168,2950 }, { 169,2950 }, { 170,2950 }, { 171,2950 }, + { 172,2950 }, { 173,2950 }, { 174,2950 }, { 175,2950 }, { 176,2950 }, + { 177,2950 }, { 178,2950 }, { 179,2950 }, { 180,2950 }, { 181,2950 }, + { 182,2950 }, { 183,2950 }, { 184,2950 }, { 185,2950 }, { 186,2950 }, + { 187,2950 }, { 188,2950 }, { 189,2950 }, { 190,2950 }, { 191,2950 }, + { 192,2950 }, { 193,2950 }, { 194,2950 }, { 195,2950 }, { 196,2950 }, + { 197,2950 }, { 198,2950 }, { 199,2950 }, { 200,2950 }, { 201,2950 }, + { 202,2950 }, { 203,2950 }, { 204,2950 }, { 205,2950 }, { 206,2950 }, + + { 207,2950 }, { 208,2950 }, { 209,2950 }, { 210,2950 }, { 211,2950 }, + { 212,2950 }, { 213,2950 }, { 214,2950 }, { 215,2950 }, { 216,2950 }, + { 217,2950 }, { 218,2950 }, { 219,2950 }, { 220,2950 }, { 221,2950 }, + { 222,2950 }, { 223,2950 }, { 224,2950 }, { 225,2950 }, { 226,2950 }, + { 227,2950 }, { 228,2950 }, { 229,2950 }, { 230,2950 }, { 231,2950 }, + { 232,2950 }, { 233,2950 }, { 234,2950 }, { 235,2950 }, { 236,2950 }, + { 237,2950 }, { 238,2950 }, { 239,2950 }, { 240,2950 }, { 241,2950 }, + { 242,2950 }, { 243,2950 }, { 244,2950 }, { 245,2950 }, { 246,2950 }, + { 247,2950 }, { 248,2950 }, { 249,2950 }, { 250,2950 }, { 251,2950 }, + { 252,2950 }, { 253,2950 }, { 254,2950 }, { 255,2950 }, { 256,2950 }, + + { 0, 0 }, { 0,14076 }, { 1,2176 }, { 2,2176 }, { 3,2176 }, + { 4,2176 }, { 5,2176 }, { 6,2176 }, { 7,2176 }, { 8,2176 }, + { 9,2176 }, { 10,2176 }, { 11,2176 }, { 12,2176 }, { 13,2176 }, + { 14,2176 }, { 15,2176 }, { 16,2176 }, { 17,2176 }, { 18,2176 }, + { 19,2176 }, { 20,2176 }, { 21,2176 }, { 22,2176 }, { 23,2176 }, + { 24,2176 }, { 25,2176 }, { 26,2176 }, { 27,2176 }, { 28,2176 }, + { 29,2176 }, { 30,2176 }, { 31,2176 }, { 32,2176 }, { 33,2176 }, + { 34,2170 }, { 35,2176 }, { 36,2176 }, { 37,2176 }, { 38,2176 }, + { 39,2176 }, { 40,2176 }, { 41,2176 }, { 42,2176 }, { 43,2176 }, + { 44,2176 }, { 45,2176 }, { 46,2176 }, { 47,2176 }, { 48,2176 }, + + { 49,2176 }, { 50,2176 }, { 51,2176 }, { 52,2176 }, { 53,2176 }, + { 54,2176 }, { 55,2176 }, { 56,2176 }, { 57,2176 }, { 58,2176 }, + { 59,2176 }, { 60,2176 }, { 61,2176 }, { 62,2176 }, { 63,2176 }, + { 64,2176 }, { 65,2176 }, { 66,2176 }, { 67,2176 }, { 68,2176 }, + { 69,2176 }, { 70,2176 }, { 71,2176 }, { 72,2176 }, { 73,2176 }, + { 74,2176 }, { 75,2176 }, { 76,2176 }, { 77,2176 }, { 78,2176 }, + { 79,2176 }, { 80,2176 }, { 81,2176 }, { 82,2176 }, { 83,2176 }, + { 84,2176 }, { 85,2176 }, { 86,2176 }, { 87,2176 }, { 88,2176 }, + { 89,2176 }, { 90,2176 }, { 91,2176 }, { 92,2434 }, { 93,2176 }, + { 94,2176 }, { 95,2176 }, { 96,2176 }, { 97,2176 }, { 98,2176 }, + + { 99,2176 }, { 100,2176 }, { 101,2176 }, { 102,2176 }, { 103,2176 }, + { 104,2176 }, { 105,2176 }, { 106,2176 }, { 107,2176 }, { 108,2176 }, + { 109,2176 }, { 110,2176 }, { 111,2176 }, { 112,2176 }, { 113,2176 }, + { 114,2176 }, { 115,2176 }, { 116,2176 }, { 117,2176 }, { 118,2176 }, + { 119,2176 }, { 120,2176 }, { 121,2176 }, { 122,2176 }, { 123,2176 }, + { 124,2176 }, { 125,2176 }, { 126,2176 }, { 127,2176 }, { 128,2176 }, + { 129,2176 }, { 130,2176 }, { 131,2176 }, { 132,2176 }, { 133,2176 }, + { 134,2176 }, { 135,2176 }, { 136,2176 }, { 137,2176 }, { 138,2176 }, + { 139,2176 }, { 140,2176 }, { 141,2176 }, { 142,2176 }, { 143,2176 }, + { 144,2176 }, { 145,2176 }, { 146,2176 }, { 147,2176 }, { 148,2176 }, + + { 149,2176 }, { 150,2176 }, { 151,2176 }, { 152,2176 }, { 153,2176 }, + { 154,2176 }, { 155,2176 }, { 156,2176 }, { 157,2176 }, { 158,2176 }, + { 159,2176 }, { 160,2176 }, { 161,2176 }, { 162,2176 }, { 163,2176 }, + { 164,2176 }, { 165,2176 }, { 166,2176 }, { 167,2176 }, { 168,2176 }, + { 169,2176 }, { 170,2176 }, { 171,2176 }, { 172,2176 }, { 173,2176 }, + { 174,2176 }, { 175,2176 }, { 176,2176 }, { 177,2176 }, { 178,2176 }, + { 179,2176 }, { 180,2176 }, { 181,2176 }, { 182,2176 }, { 183,2176 }, + { 184,2176 }, { 185,2176 }, { 186,2176 }, { 187,2176 }, { 188,2176 }, + { 189,2176 }, { 190,2176 }, { 191,2176 }, { 192,2176 }, { 193,2176 }, + { 194,2176 }, { 195,2176 }, { 196,2176 }, { 197,2176 }, { 198,2176 }, + + { 199,2176 }, { 200,2176 }, { 201,2176 }, { 202,2176 }, { 203,2176 }, + { 204,2176 }, { 205,2176 }, { 206,2176 }, { 207,2176 }, { 208,2176 }, + { 209,2176 }, { 210,2176 }, { 211,2176 }, { 212,2176 }, { 213,2176 }, + { 214,2176 }, { 215,2176 }, { 216,2176 }, { 217,2176 }, { 218,2176 }, + { 219,2176 }, { 220,2176 }, { 221,2176 }, { 222,2176 }, { 223,2176 }, + { 224,2176 }, { 225,2176 }, { 226,2176 }, { 227,2176 }, { 228,2176 }, + { 229,2176 }, { 230,2176 }, { 231,2176 }, { 232,2176 }, { 233,2176 }, + { 234,2176 }, { 235,2176 }, { 236,2176 }, { 237,2176 }, { 238,2176 }, + { 239,2176 }, { 240,2176 }, { 241,2176 }, { 242,2176 }, { 243,2176 }, + { 244,2176 }, { 245,2176 }, { 246,2176 }, { 247,2176 }, { 248,2176 }, + + { 249,2176 }, { 250,2176 }, { 251,2176 }, { 252,2176 }, { 253,2176 }, + { 254,2176 }, { 255,2176 }, { 256,2176 }, { 0, 0 }, { 0,13818 }, + { 1,1918 }, { 2,1918 }, { 3,1918 }, { 4,1918 }, { 5,1918 }, + { 6,1918 }, { 7,1918 }, { 8,1918 }, { 9,1918 }, { 10,1918 }, + { 11,1918 }, { 12,1918 }, { 13,1918 }, { 14,1918 }, { 15,1918 }, + { 16,1918 }, { 17,1918 }, { 18,1918 }, { 19,1918 }, { 20,1918 }, + { 21,1918 }, { 22,1918 }, { 23,1918 }, { 24,1918 }, { 25,1918 }, + { 26,1918 }, { 27,1918 }, { 28,1918 }, { 29,1918 }, { 30,1918 }, + { 31,1918 }, { 32,1918 }, { 33,1918 }, { 34,1912 }, { 35,1918 }, + { 36,1918 }, { 37,1918 }, { 38,1918 }, { 39,1918 }, { 40,1918 }, + + { 41,1918 }, { 42,1918 }, { 43,1918 }, { 44,1918 }, { 45,1918 }, + { 46,1918 }, { 47,1918 }, { 48,1918 }, { 49,1918 }, { 50,1918 }, + { 51,1918 }, { 52,1918 }, { 53,1918 }, { 54,1918 }, { 55,1918 }, + { 56,1918 }, { 57,1918 }, { 58,1918 }, { 59,1918 }, { 60,1918 }, + { 61,1918 }, { 62,1918 }, { 63,1918 }, { 64,1918 }, { 65,1918 }, + { 66,1918 }, { 67,1918 }, { 68,1918 }, { 69,1918 }, { 70,1918 }, + { 71,1918 }, { 72,1918 }, { 73,1918 }, { 74,1918 }, { 75,1918 }, + { 76,1918 }, { 77,1918 }, { 78,1918 }, { 79,1918 }, { 80,1918 }, + { 81,1918 }, { 82,1918 }, { 83,1918 }, { 84,1918 }, { 85,1918 }, + { 86,1918 }, { 87,1918 }, { 88,1918 }, { 89,1918 }, { 90,1918 }, + + { 91,1918 }, { 92,2176 }, { 93,1918 }, { 94,1918 }, { 95,1918 }, + { 96,1918 }, { 97,1918 }, { 98,1918 }, { 99,1918 }, { 100,1918 }, + { 101,1918 }, { 102,1918 }, { 103,1918 }, { 104,1918 }, { 105,1918 }, + { 106,1918 }, { 107,1918 }, { 108,1918 }, { 109,1918 }, { 110,1918 }, + { 111,1918 }, { 112,1918 }, { 113,1918 }, { 114,1918 }, { 115,1918 }, + { 116,1918 }, { 117,1918 }, { 118,1918 }, { 119,1918 }, { 120,1918 }, + { 121,1918 }, { 122,1918 }, { 123,1918 }, { 124,1918 }, { 125,1918 }, + { 126,1918 }, { 127,1918 }, { 128,1918 }, { 129,1918 }, { 130,1918 }, + { 131,1918 }, { 132,1918 }, { 133,1918 }, { 134,1918 }, { 135,1918 }, + { 136,1918 }, { 137,1918 }, { 138,1918 }, { 139,1918 }, { 140,1918 }, + + { 141,1918 }, { 142,1918 }, { 143,1918 }, { 144,1918 }, { 145,1918 }, + { 146,1918 }, { 147,1918 }, { 148,1918 }, { 149,1918 }, { 150,1918 }, + { 151,1918 }, { 152,1918 }, { 153,1918 }, { 154,1918 }, { 155,1918 }, + { 156,1918 }, { 157,1918 }, { 158,1918 }, { 159,1918 }, { 160,1918 }, + { 161,1918 }, { 162,1918 }, { 163,1918 }, { 164,1918 }, { 165,1918 }, + { 166,1918 }, { 167,1918 }, { 168,1918 }, { 169,1918 }, { 170,1918 }, + { 171,1918 }, { 172,1918 }, { 173,1918 }, { 174,1918 }, { 175,1918 }, + { 176,1918 }, { 177,1918 }, { 178,1918 }, { 179,1918 }, { 180,1918 }, + { 181,1918 }, { 182,1918 }, { 183,1918 }, { 184,1918 }, { 185,1918 }, + { 186,1918 }, { 187,1918 }, { 188,1918 }, { 189,1918 }, { 190,1918 }, + + { 191,1918 }, { 192,1918 }, { 193,1918 }, { 194,1918 }, { 195,1918 }, + { 196,1918 }, { 197,1918 }, { 198,1918 }, { 199,1918 }, { 200,1918 }, + { 201,1918 }, { 202,1918 }, { 203,1918 }, { 204,1918 }, { 205,1918 }, + { 206,1918 }, { 207,1918 }, { 208,1918 }, { 209,1918 }, { 210,1918 }, + { 211,1918 }, { 212,1918 }, { 213,1918 }, { 214,1918 }, { 215,1918 }, + { 216,1918 }, { 217,1918 }, { 218,1918 }, { 219,1918 }, { 220,1918 }, + { 221,1918 }, { 222,1918 }, { 223,1918 }, { 224,1918 }, { 225,1918 }, + { 226,1918 }, { 227,1918 }, { 228,1918 }, { 229,1918 }, { 230,1918 }, + { 231,1918 }, { 232,1918 }, { 233,1918 }, { 234,1918 }, { 235,1918 }, + { 236,1918 }, { 237,1918 }, { 238,1918 }, { 239,1918 }, { 240,1918 }, + + { 241,1918 }, { 242,1918 }, { 243,1918 }, { 244,1918 }, { 245,1918 }, + { 246,1918 }, { 247,1918 }, { 248,1918 }, { 249,1918 }, { 250,1918 }, + { 251,1918 }, { 252,1918 }, { 253,1918 }, { 254,1918 }, { 255,1918 }, + { 256,1918 }, { 0, 0 }, { 0,13560 }, { 1,2468 }, { 2,2468 }, + { 3,2468 }, { 4,2468 }, { 5,2468 }, { 6,2468 }, { 7,2468 }, + { 8,2468 }, { 9,2468 }, { 10,2468 }, { 11,2468 }, { 12,2468 }, + { 13,2468 }, { 14,2468 }, { 15,2468 }, { 16,2468 }, { 17,2468 }, + { 18,2468 }, { 19,2468 }, { 20,2468 }, { 21,2468 }, { 22,2468 }, + { 23,2468 }, { 24,2468 }, { 25,2468 }, { 26,2468 }, { 27,2468 }, + { 28,2468 }, { 29,2468 }, { 30,2468 }, { 31,2468 }, { 32,2468 }, + + { 33,2468 }, { 34,2468 }, { 35,2468 }, { 36,2468 }, { 37,2468 }, + { 38,2468 }, { 39,2468 }, { 40,2468 }, { 41,2468 }, { 42,2189 }, + { 43,2468 }, { 44,2468 }, { 45,2468 }, { 46,2468 }, { 47,2468 }, + { 48,2468 }, { 49,2468 }, { 50,2468 }, { 51,2468 }, { 52,2468 }, + { 53,2468 }, { 54,2468 }, { 55,2468 }, { 56,2468 }, { 57,2468 }, + { 58,2468 }, { 59,2468 }, { 60,2468 }, { 61,2468 }, { 62,2468 }, + { 63,2468 }, { 64,2468 }, { 65,2468 }, { 66,2468 }, { 67,2468 }, + { 68,2468 }, { 69,2468 }, { 70,2468 }, { 71,2468 }, { 72,2468 }, + { 73,2468 }, { 74,2468 }, { 75,2468 }, { 76,2468 }, { 77,2468 }, + { 78,2468 }, { 79,2468 }, { 80,2468 }, { 81,2468 }, { 82,2468 }, + + { 83,2468 }, { 84,2468 }, { 85,2468 }, { 86,2468 }, { 87,2468 }, + { 88,2468 }, { 89,2468 }, { 90,2468 }, { 91,2468 }, { 92,2468 }, + { 93,2468 }, { 94,2468 }, { 95,2468 }, { 96,2468 }, { 97,2468 }, + { 98,2468 }, { 99,2468 }, { 100,2468 }, { 101,2468 }, { 102,2468 }, + { 103,2468 }, { 104,2468 }, { 105,2468 }, { 106,2468 }, { 107,2468 }, + { 108,2468 }, { 109,2468 }, { 110,2468 }, { 111,2468 }, { 112,2468 }, + { 113,2468 }, { 114,2468 }, { 115,2468 }, { 116,2468 }, { 117,2468 }, + { 118,2468 }, { 119,2468 }, { 120,2468 }, { 121,2468 }, { 122,2468 }, + { 123,2468 }, { 124,2468 }, { 125,2468 }, { 126,2468 }, { 127,2468 }, + { 128,2468 }, { 129,2468 }, { 130,2468 }, { 131,2468 }, { 132,2468 }, + + { 133,2468 }, { 134,2468 }, { 135,2468 }, { 136,2468 }, { 137,2468 }, + { 138,2468 }, { 139,2468 }, { 140,2468 }, { 141,2468 }, { 142,2468 }, + { 143,2468 }, { 144,2468 }, { 145,2468 }, { 146,2468 }, { 147,2468 }, + { 148,2468 }, { 149,2468 }, { 150,2468 }, { 151,2468 }, { 152,2468 }, + { 153,2468 }, { 154,2468 }, { 155,2468 }, { 156,2468 }, { 157,2468 }, + { 158,2468 }, { 159,2468 }, { 160,2468 }, { 161,2468 }, { 162,2468 }, + { 163,2468 }, { 164,2468 }, { 165,2468 }, { 166,2468 }, { 167,2468 }, + { 168,2468 }, { 169,2468 }, { 170,2468 }, { 171,2468 }, { 172,2468 }, + { 173,2468 }, { 174,2468 }, { 175,2468 }, { 176,2468 }, { 177,2468 }, + { 178,2468 }, { 179,2468 }, { 180,2468 }, { 181,2468 }, { 182,2468 }, + + { 183,2468 }, { 184,2468 }, { 185,2468 }, { 186,2468 }, { 187,2468 }, + { 188,2468 }, { 189,2468 }, { 190,2468 }, { 191,2468 }, { 192,2468 }, + { 193,2468 }, { 194,2468 }, { 195,2468 }, { 196,2468 }, { 197,2468 }, + { 198,2468 }, { 199,2468 }, { 200,2468 }, { 201,2468 }, { 202,2468 }, + { 203,2468 }, { 204,2468 }, { 205,2468 }, { 206,2468 }, { 207,2468 }, + { 208,2468 }, { 209,2468 }, { 210,2468 }, { 211,2468 }, { 212,2468 }, + { 213,2468 }, { 214,2468 }, { 215,2468 }, { 216,2468 }, { 217,2468 }, + { 218,2468 }, { 219,2468 }, { 220,2468 }, { 221,2468 }, { 222,2468 }, + { 223,2468 }, { 224,2468 }, { 225,2468 }, { 226,2468 }, { 227,2468 }, + { 228,2468 }, { 229,2468 }, { 230,2468 }, { 231,2468 }, { 232,2468 }, + + { 233,2468 }, { 234,2468 }, { 235,2468 }, { 236,2468 }, { 237,2468 }, + { 238,2468 }, { 239,2468 }, { 240,2468 }, { 241,2468 }, { 242,2468 }, + { 243,2468 }, { 244,2468 }, { 245,2468 }, { 246,2468 }, { 247,2468 }, + { 248,2468 }, { 249,2468 }, { 250,2468 }, { 251,2468 }, { 252,2468 }, + { 253,2468 }, { 254,2468 }, { 255,2468 }, { 256,2468 }, { 0, 0 }, + { 0,13302 }, { 1,2210 }, { 2,2210 }, { 3,2210 }, { 4,2210 }, + { 5,2210 }, { 6,2210 }, { 7,2210 }, { 8,2210 }, { 9,2210 }, + { 10,2210 }, { 11,2210 }, { 12,2210 }, { 13,2210 }, { 14,2210 }, + { 15,2210 }, { 16,2210 }, { 17,2210 }, { 18,2210 }, { 19,2210 }, + { 20,2210 }, { 21,2210 }, { 22,2210 }, { 23,2210 }, { 24,2210 }, + + { 25,2210 }, { 26,2210 }, { 27,2210 }, { 28,2210 }, { 29,2210 }, + { 30,2210 }, { 31,2210 }, { 32,2210 }, { 33,2210 }, { 34,2210 }, + { 35,2210 }, { 36,2210 }, { 37,2210 }, { 38,2210 }, { 39,2210 }, + { 40,2210 }, { 41,2210 }, { 42,1931 }, { 43,2210 }, { 44,2210 }, + { 45,2210 }, { 46,2210 }, { 47,2210 }, { 48,2210 }, { 49,2210 }, + { 50,2210 }, { 51,2210 }, { 52,2210 }, { 53,2210 }, { 54,2210 }, + { 55,2210 }, { 56,2210 }, { 57,2210 }, { 58,2210 }, { 59,2210 }, + { 60,2210 }, { 61,2210 }, { 62,2210 }, { 63,2210 }, { 64,2210 }, + { 65,2210 }, { 66,2210 }, { 67,2210 }, { 68,2210 }, { 69,2210 }, + { 70,2210 }, { 71,2210 }, { 72,2210 }, { 73,2210 }, { 74,2210 }, + + { 75,2210 }, { 76,2210 }, { 77,2210 }, { 78,2210 }, { 79,2210 }, + { 80,2210 }, { 81,2210 }, { 82,2210 }, { 83,2210 }, { 84,2210 }, + { 85,2210 }, { 86,2210 }, { 87,2210 }, { 88,2210 }, { 89,2210 }, + { 90,2210 }, { 91,2210 }, { 92,2210 }, { 93,2210 }, { 94,2210 }, + { 95,2210 }, { 96,2210 }, { 97,2210 }, { 98,2210 }, { 99,2210 }, + { 100,2210 }, { 101,2210 }, { 102,2210 }, { 103,2210 }, { 104,2210 }, + { 105,2210 }, { 106,2210 }, { 107,2210 }, { 108,2210 }, { 109,2210 }, + { 110,2210 }, { 111,2210 }, { 112,2210 }, { 113,2210 }, { 114,2210 }, + { 115,2210 }, { 116,2210 }, { 117,2210 }, { 118,2210 }, { 119,2210 }, + { 120,2210 }, { 121,2210 }, { 122,2210 }, { 123,2210 }, { 124,2210 }, + + { 125,2210 }, { 126,2210 }, { 127,2210 }, { 128,2210 }, { 129,2210 }, + { 130,2210 }, { 131,2210 }, { 132,2210 }, { 133,2210 }, { 134,2210 }, + { 135,2210 }, { 136,2210 }, { 137,2210 }, { 138,2210 }, { 139,2210 }, + { 140,2210 }, { 141,2210 }, { 142,2210 }, { 143,2210 }, { 144,2210 }, + { 145,2210 }, { 146,2210 }, { 147,2210 }, { 148,2210 }, { 149,2210 }, + { 150,2210 }, { 151,2210 }, { 152,2210 }, { 153,2210 }, { 154,2210 }, + { 155,2210 }, { 156,2210 }, { 157,2210 }, { 158,2210 }, { 159,2210 }, + { 160,2210 }, { 161,2210 }, { 162,2210 }, { 163,2210 }, { 164,2210 }, + { 165,2210 }, { 166,2210 }, { 167,2210 }, { 168,2210 }, { 169,2210 }, + { 170,2210 }, { 171,2210 }, { 172,2210 }, { 173,2210 }, { 174,2210 }, + + { 175,2210 }, { 176,2210 }, { 177,2210 }, { 178,2210 }, { 179,2210 }, + { 180,2210 }, { 181,2210 }, { 182,2210 }, { 183,2210 }, { 184,2210 }, + { 185,2210 }, { 186,2210 }, { 187,2210 }, { 188,2210 }, { 189,2210 }, + { 190,2210 }, { 191,2210 }, { 192,2210 }, { 193,2210 }, { 194,2210 }, + { 195,2210 }, { 196,2210 }, { 197,2210 }, { 198,2210 }, { 199,2210 }, + { 200,2210 }, { 201,2210 }, { 202,2210 }, { 203,2210 }, { 204,2210 }, + { 205,2210 }, { 206,2210 }, { 207,2210 }, { 208,2210 }, { 209,2210 }, + { 210,2210 }, { 211,2210 }, { 212,2210 }, { 213,2210 }, { 214,2210 }, + { 215,2210 }, { 216,2210 }, { 217,2210 }, { 218,2210 }, { 219,2210 }, + { 220,2210 }, { 221,2210 }, { 222,2210 }, { 223,2210 }, { 224,2210 }, + + { 225,2210 }, { 226,2210 }, { 227,2210 }, { 228,2210 }, { 229,2210 }, + { 230,2210 }, { 231,2210 }, { 232,2210 }, { 233,2210 }, { 234,2210 }, + { 235,2210 }, { 236,2210 }, { 237,2210 }, { 238,2210 }, { 239,2210 }, + { 240,2210 }, { 241,2210 }, { 242,2210 }, { 243,2210 }, { 244,2210 }, + { 245,2210 }, { 246,2210 }, { 247,2210 }, { 248,2210 }, { 249,2210 }, + { 250,2210 }, { 251,2210 }, { 252,2210 }, { 253,2210 }, { 254,2210 }, + { 255,2210 }, { 256,2210 }, { 0, 52 }, { 0,13044 }, { 1,2210 }, + { 2,2210 }, { 3,2210 }, { 4,2210 }, { 5,2210 }, { 6,2210 }, + { 7,2210 }, { 8,2210 }, { 0, 0 }, { 0, 0 }, { 11,2210 }, + { 0, 0 }, { 0, 0 }, { 14,2210 }, { 15,2210 }, { 16,2210 }, + + { 17,2210 }, { 18,2210 }, { 19,2210 }, { 20,2210 }, { 21,2210 }, + { 22,2210 }, { 23,2210 }, { 24,2210 }, { 25,2210 }, { 26,2210 }, + { 27,2210 }, { 28,2210 }, { 29,2210 }, { 30,2210 }, { 31,2210 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 39,2210 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,2210 }, { 49,2210 }, { 50,2210 }, { 51,2210 }, + { 52,2210 }, { 53,2210 }, { 54,2210 }, { 55,2210 }, { 56,2210 }, + { 57,2210 }, { 0, 0 }, { 59,2210 }, { 0, 0 }, { 0, 26 }, + { 0,12982 }, { 0, 50 }, { 0,12980 }, { 65,2210 }, { 66,2210 }, + + { 67,2210 }, { 68,2210 }, { 69,2210 }, { 70,2210 }, { 71,2210 }, + { 72,2210 }, { 73,2210 }, { 74,2210 }, { 75,2210 }, { 76,2210 }, + { 77,2210 }, { 78,2210 }, { 79,2210 }, { 80,2210 }, { 81,2210 }, + { 82,2210 }, { 83,2210 }, { 84,2210 }, { 85,2210 }, { 86,2210 }, + { 87,2210 }, { 88,2210 }, { 89,2210 }, { 90,2210 }, { 0, 37 }, + { 0,12952 }, { 0, 0 }, { 94,2210 }, { 95,2210 }, { 96,2210 }, + { 97,2210 }, { 98,2210 }, { 99,2210 }, { 100,2210 }, { 101,2210 }, + { 102,2210 }, { 103,2210 }, { 104,2210 }, { 105,2210 }, { 106,2210 }, + { 107,2210 }, { 108,2210 }, { 109,2210 }, { 110,2210 }, { 111,2210 }, + { 112,2210 }, { 113,2210 }, { 114,2210 }, { 115,2210 }, { 116,2210 }, + + { 117,2210 }, { 118,2210 }, { 119,2210 }, { 120,2210 }, { 121,2210 }, + { 122,2210 }, { 61,1631 }, { 0, 0 }, { 0, 0 }, { 126,2210 }, + { 127,2210 }, { 128,2210 }, { 129,2210 }, { 130,2210 }, { 131,2210 }, + { 132,2210 }, { 133,2210 }, { 134,2210 }, { 135,2210 }, { 136,2210 }, + { 137,2210 }, { 138,2210 }, { 139,2210 }, { 140,2210 }, { 141,2210 }, + { 142,2210 }, { 143,2210 }, { 144,2210 }, { 145,2210 }, { 146,2210 }, + { 147,2210 }, { 148,2210 }, { 149,2210 }, { 150,2210 }, { 151,2210 }, + { 152,2210 }, { 153,2210 }, { 154,2210 }, { 155,2210 }, { 156,2210 }, + { 157,2210 }, { 158,2210 }, { 159,2210 }, { 160,2210 }, { 161,2210 }, + { 162,2210 }, { 163,2210 }, { 164,2210 }, { 165,2210 }, { 166,2210 }, + + { 167,2210 }, { 168,2210 }, { 169,2210 }, { 170,2210 }, { 171,2210 }, + { 172,2210 }, { 173,2210 }, { 174,2210 }, { 175,2210 }, { 176,2210 }, + { 177,2210 }, { 178,2210 }, { 179,2210 }, { 180,2210 }, { 181,2210 }, + { 182,2210 }, { 183,2210 }, { 184,2210 }, { 185,2210 }, { 186,2210 }, + { 187,2210 }, { 188,2210 }, { 189,2210 }, { 190,2210 }, { 191,2210 }, + { 192,2210 }, { 193,2210 }, { 194,2210 }, { 195,2210 }, { 196,2210 }, + { 197,2210 }, { 198,2210 }, { 199,2210 }, { 200,2210 }, { 201,2210 }, + { 202,2210 }, { 203,2210 }, { 204,2210 }, { 205,2210 }, { 206,2210 }, + { 207,2210 }, { 208,2210 }, { 209,2210 }, { 210,2210 }, { 211,2210 }, + { 212,2210 }, { 213,2210 }, { 214,2210 }, { 215,2210 }, { 216,2210 }, + + { 217,2210 }, { 218,2210 }, { 219,2210 }, { 220,2210 }, { 221,2210 }, + { 222,2210 }, { 223,2210 }, { 224,2210 }, { 225,2210 }, { 226,2210 }, + { 227,2210 }, { 228,2210 }, { 229,2210 }, { 230,2210 }, { 231,2210 }, + { 232,2210 }, { 233,2210 }, { 234,2210 }, { 235,2210 }, { 236,2210 }, + { 237,2210 }, { 238,2210 }, { 239,2210 }, { 240,2210 }, { 241,2210 }, + { 242,2210 }, { 243,2210 }, { 244,2210 }, { 245,2210 }, { 246,2210 }, + { 247,2210 }, { 248,2210 }, { 249,2210 }, { 250,2210 }, { 251,2210 }, + { 252,2210 }, { 253,2210 }, { 254,2210 }, { 255,2210 }, { 256,2210 }, + { 0, 38 }, { 0,12786 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 9,2210 }, { 10,2210 }, { 0, 0 }, { 12,2210 }, { 13,2210 }, + { 0, 0 }, { 0, 37 }, { 0,12770 }, { 0, 0 }, { 0, 37 }, + { 0,12767 }, { 1,2210 }, { 2,2210 }, { 3,2210 }, { 4,2210 }, + { 5,2210 }, { 6,2210 }, { 7,2210 }, { 8,2210 }, { 0, 0 }, + { 0, 0 }, { 11,2210 }, { 0, 0 }, { 32,2210 }, { 14,2210 }, + { 15,2210 }, { 16,2210 }, { 17,2210 }, { 18,2210 }, { 19,2210 }, + { 20,2210 }, { 21,2210 }, { 22,2210 }, { 23,2210 }, { 24,2210 }, + { 25,2210 }, { 26,2210 }, { 27,2210 }, { 28,2210 }, { 29,2210 }, + { 30,2210 }, { 31,2210 }, { 0, 0 }, { 0, 0 }, { 34,1418 }, + { 38,1423 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,2210 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,2210 }, { 49,2210 }, + { 50,2210 }, { 51,2210 }, { 52,2210 }, { 53,2210 }, { 54,2210 }, + { 55,2210 }, { 56,2210 }, { 57,2210 }, { 0, 0 }, { 59,2210 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65,2210 }, { 66,2210 }, { 67,2210 }, { 68,2210 }, { 69,2210 }, + { 70,2210 }, { 71,2210 }, { 72,2210 }, { 73,2210 }, { 74,2210 }, + { 75,2210 }, { 76,2210 }, { 77,2210 }, { 78,2210 }, { 79,2210 }, + { 80,2210 }, { 81,2210 }, { 82,2210 }, { 83,2210 }, { 84,2210 }, + { 85,2210 }, { 86,2210 }, { 87,2210 }, { 88,2210 }, { 89,2210 }, + + { 90,2210 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,2210 }, + { 95,2210 }, { 96,2210 }, { 97,2210 }, { 98,2210 }, { 99,2210 }, + { 100,2210 }, { 101,2210 }, { 102,2210 }, { 103,2210 }, { 104,2210 }, + { 105,2210 }, { 106,2210 }, { 107,2210 }, { 108,2210 }, { 109,2210 }, + { 110,2210 }, { 111,2210 }, { 112,2210 }, { 113,2210 }, { 114,2210 }, + { 115,2210 }, { 116,2210 }, { 117,2210 }, { 118,2210 }, { 119,2210 }, + { 120,2210 }, { 121,2210 }, { 122,2210 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,2210 }, { 127,2210 }, { 128,2210 }, { 129,2210 }, + { 130,2210 }, { 131,2210 }, { 132,2210 }, { 133,2210 }, { 134,2210 }, + { 135,2210 }, { 136,2210 }, { 137,2210 }, { 138,2210 }, { 139,2210 }, + + { 140,2210 }, { 141,2210 }, { 142,2210 }, { 143,2210 }, { 144,2210 }, + { 145,2210 }, { 146,2210 }, { 147,2210 }, { 148,2210 }, { 149,2210 }, + { 150,2210 }, { 151,2210 }, { 152,2210 }, { 153,2210 }, { 154,2210 }, + { 155,2210 }, { 156,2210 }, { 157,2210 }, { 158,2210 }, { 159,2210 }, + { 160,2210 }, { 161,2210 }, { 162,2210 }, { 163,2210 }, { 164,2210 }, + { 165,2210 }, { 166,2210 }, { 167,2210 }, { 168,2210 }, { 169,2210 }, + { 170,2210 }, { 171,2210 }, { 172,2210 }, { 173,2210 }, { 174,2210 }, + { 175,2210 }, { 176,2210 }, { 177,2210 }, { 178,2210 }, { 179,2210 }, + { 180,2210 }, { 181,2210 }, { 182,2210 }, { 183,2210 }, { 184,2210 }, + { 185,2210 }, { 186,2210 }, { 187,2210 }, { 188,2210 }, { 189,2210 }, + + { 190,2210 }, { 191,2210 }, { 192,2210 }, { 193,2210 }, { 194,2210 }, + { 195,2210 }, { 196,2210 }, { 197,2210 }, { 198,2210 }, { 199,2210 }, + { 200,2210 }, { 201,2210 }, { 202,2210 }, { 203,2210 }, { 204,2210 }, + { 205,2210 }, { 206,2210 }, { 207,2210 }, { 208,2210 }, { 209,2210 }, + { 210,2210 }, { 211,2210 }, { 212,2210 }, { 213,2210 }, { 214,2210 }, + { 215,2210 }, { 216,2210 }, { 217,2210 }, { 218,2210 }, { 219,2210 }, + { 220,2210 }, { 221,2210 }, { 222,2210 }, { 223,2210 }, { 224,2210 }, + { 225,2210 }, { 226,2210 }, { 227,2210 }, { 228,2210 }, { 229,2210 }, + { 230,2210 }, { 231,2210 }, { 232,2210 }, { 233,2210 }, { 234,2210 }, + { 235,2210 }, { 236,2210 }, { 237,2210 }, { 238,2210 }, { 239,2210 }, + + { 240,2210 }, { 241,2210 }, { 242,2210 }, { 243,2210 }, { 244,2210 }, + { 245,2210 }, { 246,2210 }, { 247,2210 }, { 248,2210 }, { 249,2210 }, + { 250,2210 }, { 251,2210 }, { 252,2210 }, { 253,2210 }, { 254,2210 }, + { 255,2210 }, { 256,2210 }, { 0, 37 }, { 0,12509 }, { 0, 37 }, + { 0,12507 }, { 0, 37 }, { 0,12505 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 28 }, { 0,12500 }, { 0, 0 }, { 0, 37 }, + { 0,12497 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 34 }, + + { 0,12477 }, { 0, 51 }, { 0,12475 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 42,1166 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 42,1164 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,2208 }, { 49,2208 }, + { 50,2208 }, { 51,2208 }, { 52,2208 }, { 53,2208 }, { 54,2208 }, + { 55,2208 }, { 56,2208 }, { 57,2208 }, { 0, 42 }, { 0,12448 }, + { 1,2407 }, { 2,2407 }, { 3,2407 }, { 4,2407 }, { 5,2407 }, + { 6,2407 }, { 7,2407 }, { 8,2407 }, { 61,1161 }, { 62,1163 }, + { 11,2407 }, { 61,1175 }, { 0, 0 }, { 14,2407 }, { 15,2407 }, + { 16,2407 }, { 17,2407 }, { 18,2407 }, { 19,2407 }, { 20,2407 }, + + { 21,2407 }, { 22,2407 }, { 23,2407 }, { 24,2407 }, { 25,2407 }, + { 26,2407 }, { 27,2407 }, { 28,2407 }, { 29,2407 }, { 30,2407 }, + { 31,2407 }, { 61,1157 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,2407 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 46,2665 }, { 0, 0 }, { 48,2407 }, { 49,2407 }, { 50,2407 }, + { 51,2407 }, { 52,2407 }, { 53,2407 }, { 54,2407 }, { 55,2407 }, + { 56,2407 }, { 57,2407 }, { 0, 0 }, { 59,2407 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,2407 }, + { 66,2923 }, { 67,2407 }, { 68,2407 }, { 69,3181 }, { 70,2407 }, + + { 71,2407 }, { 72,2407 }, { 73,2407 }, { 74,2407 }, { 75,2407 }, + { 76,2407 }, { 77,2407 }, { 78,2407 }, { 79,3439 }, { 80,2407 }, + { 81,2407 }, { 82,2407 }, { 83,2407 }, { 84,2407 }, { 85,2407 }, + { 86,2407 }, { 87,2407 }, { 88,3697 }, { 89,2407 }, { 90,2407 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,2407 }, { 95,2407 }, + { 96,2407 }, { 97,2407 }, { 98,2923 }, { 99,2407 }, { 100,2407 }, + { 101,3181 }, { 102,2407 }, { 103,2407 }, { 104,2407 }, { 105,2407 }, + { 106,2407 }, { 107,2407 }, { 108,2407 }, { 109,2407 }, { 110,2407 }, + { 111,3439 }, { 112,2407 }, { 113,2407 }, { 114,2407 }, { 115,2407 }, + { 116,2407 }, { 117,2407 }, { 118,2407 }, { 119,2407 }, { 120,3697 }, + + { 121,2407 }, { 122,2407 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 126,2407 }, { 127,2407 }, { 128,2407 }, { 129,2407 }, { 130,2407 }, + { 131,2407 }, { 132,2407 }, { 133,2407 }, { 134,2407 }, { 135,2407 }, + { 136,2407 }, { 137,2407 }, { 138,2407 }, { 139,2407 }, { 140,2407 }, + { 141,2407 }, { 142,2407 }, { 143,2407 }, { 144,2407 }, { 145,2407 }, + { 146,2407 }, { 147,2407 }, { 148,2407 }, { 149,2407 }, { 150,2407 }, + { 151,2407 }, { 152,2407 }, { 153,2407 }, { 154,2407 }, { 155,2407 }, + { 156,2407 }, { 157,2407 }, { 158,2407 }, { 159,2407 }, { 160,2407 }, + { 161,2407 }, { 162,2407 }, { 163,2407 }, { 164,2407 }, { 165,2407 }, + { 166,2407 }, { 167,2407 }, { 168,2407 }, { 169,2407 }, { 170,2407 }, + + { 171,2407 }, { 172,2407 }, { 173,2407 }, { 174,2407 }, { 175,2407 }, + { 176,2407 }, { 177,2407 }, { 178,2407 }, { 179,2407 }, { 180,2407 }, + { 181,2407 }, { 182,2407 }, { 183,2407 }, { 184,2407 }, { 185,2407 }, + { 186,2407 }, { 187,2407 }, { 188,2407 }, { 189,2407 }, { 190,2407 }, + { 191,2407 }, { 192,2407 }, { 193,2407 }, { 194,2407 }, { 195,2407 }, + { 196,2407 }, { 197,2407 }, { 198,2407 }, { 199,2407 }, { 200,2407 }, + { 201,2407 }, { 202,2407 }, { 203,2407 }, { 204,2407 }, { 205,2407 }, + { 206,2407 }, { 207,2407 }, { 208,2407 }, { 209,2407 }, { 210,2407 }, + { 211,2407 }, { 212,2407 }, { 213,2407 }, { 214,2407 }, { 215,2407 }, + { 216,2407 }, { 217,2407 }, { 218,2407 }, { 219,2407 }, { 220,2407 }, + + { 221,2407 }, { 222,2407 }, { 223,2407 }, { 224,2407 }, { 225,2407 }, + { 226,2407 }, { 227,2407 }, { 228,2407 }, { 229,2407 }, { 230,2407 }, + { 231,2407 }, { 232,2407 }, { 233,2407 }, { 234,2407 }, { 235,2407 }, + { 236,2407 }, { 237,2407 }, { 238,2407 }, { 239,2407 }, { 240,2407 }, + { 241,2407 }, { 242,2407 }, { 243,2407 }, { 244,2407 }, { 245,2407 }, + { 246,2407 }, { 247,2407 }, { 248,2407 }, { 249,2407 }, { 250,2407 }, + { 251,2407 }, { 252,2407 }, { 253,2407 }, { 254,2407 }, { 255,2407 }, + { 256,2407 }, { 0, 42 }, { 0,12190 }, { 1,2149 }, { 2,2149 }, + { 3,2149 }, { 4,2149 }, { 5,2149 }, { 6,2149 }, { 7,2149 }, + { 8,2149 }, { 0, 0 }, { 0, 0 }, { 11,2149 }, { 0, 0 }, + + { 0, 0 }, { 14,2149 }, { 15,2149 }, { 16,2149 }, { 17,2149 }, + { 18,2149 }, { 19,2149 }, { 20,2149 }, { 21,2149 }, { 22,2149 }, + { 23,2149 }, { 24,2149 }, { 25,2149 }, { 26,2149 }, { 27,2149 }, + { 28,2149 }, { 29,2149 }, { 30,2149 }, { 31,2149 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,2149 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 46,2407 }, { 0, 0 }, + { 48,3697 }, { 49,3697 }, { 50,3697 }, { 51,3697 }, { 52,3697 }, + { 53,3697 }, { 54,3697 }, { 55,3697 }, { 56,3697 }, { 57,3697 }, + { 0, 0 }, { 59,2149 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 65,2149 }, { 66,2149 }, { 67,2149 }, + { 68,2149 }, { 69,2923 }, { 70,2149 }, { 71,2149 }, { 72,2149 }, + { 73,2149 }, { 74,2149 }, { 75,2149 }, { 76,2149 }, { 77,2149 }, + { 78,2149 }, { 79,2149 }, { 80,2149 }, { 81,2149 }, { 82,2149 }, + { 83,2149 }, { 84,2149 }, { 85,2149 }, { 86,2149 }, { 87,2149 }, + { 88,2149 }, { 89,2149 }, { 90,2149 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 94,2149 }, { 95,3955 }, { 96,2149 }, { 97,2149 }, + { 98,2149 }, { 99,2149 }, { 100,2149 }, { 101,2923 }, { 102,2149 }, + { 103,2149 }, { 104,2149 }, { 105,2149 }, { 106,2149 }, { 107,2149 }, + { 108,2149 }, { 109,2149 }, { 110,2149 }, { 111,2149 }, { 112,2149 }, + + { 113,2149 }, { 114,2149 }, { 115,2149 }, { 116,2149 }, { 117,2149 }, + { 118,2149 }, { 119,2149 }, { 120,2149 }, { 121,2149 }, { 122,2149 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,2149 }, { 127,2149 }, + { 128,2149 }, { 129,2149 }, { 130,2149 }, { 131,2149 }, { 132,2149 }, + { 133,2149 }, { 134,2149 }, { 135,2149 }, { 136,2149 }, { 137,2149 }, + { 138,2149 }, { 139,2149 }, { 140,2149 }, { 141,2149 }, { 142,2149 }, + { 143,2149 }, { 144,2149 }, { 145,2149 }, { 146,2149 }, { 147,2149 }, + { 148,2149 }, { 149,2149 }, { 150,2149 }, { 151,2149 }, { 152,2149 }, + { 153,2149 }, { 154,2149 }, { 155,2149 }, { 156,2149 }, { 157,2149 }, + { 158,2149 }, { 159,2149 }, { 160,2149 }, { 161,2149 }, { 162,2149 }, + + { 163,2149 }, { 164,2149 }, { 165,2149 }, { 166,2149 }, { 167,2149 }, + { 168,2149 }, { 169,2149 }, { 170,2149 }, { 171,2149 }, { 172,2149 }, + { 173,2149 }, { 174,2149 }, { 175,2149 }, { 176,2149 }, { 177,2149 }, + { 178,2149 }, { 179,2149 }, { 180,2149 }, { 181,2149 }, { 182,2149 }, + { 183,2149 }, { 184,2149 }, { 185,2149 }, { 186,2149 }, { 187,2149 }, + { 188,2149 }, { 189,2149 }, { 190,2149 }, { 191,2149 }, { 192,2149 }, + { 193,2149 }, { 194,2149 }, { 195,2149 }, { 196,2149 }, { 197,2149 }, + { 198,2149 }, { 199,2149 }, { 200,2149 }, { 201,2149 }, { 202,2149 }, + { 203,2149 }, { 204,2149 }, { 205,2149 }, { 206,2149 }, { 207,2149 }, + { 208,2149 }, { 209,2149 }, { 210,2149 }, { 211,2149 }, { 212,2149 }, + + { 213,2149 }, { 214,2149 }, { 215,2149 }, { 216,2149 }, { 217,2149 }, + { 218,2149 }, { 219,2149 }, { 220,2149 }, { 221,2149 }, { 222,2149 }, + { 223,2149 }, { 224,2149 }, { 225,2149 }, { 226,2149 }, { 227,2149 }, + { 228,2149 }, { 229,2149 }, { 230,2149 }, { 231,2149 }, { 232,2149 }, + { 233,2149 }, { 234,2149 }, { 235,2149 }, { 236,2149 }, { 237,2149 }, + { 238,2149 }, { 239,2149 }, { 240,2149 }, { 241,2149 }, { 242,2149 }, + { 243,2149 }, { 244,2149 }, { 245,2149 }, { 246,2149 }, { 247,2149 }, + { 248,2149 }, { 249,2149 }, { 250,2149 }, { 251,2149 }, { 252,2149 }, + { 253,2149 }, { 254,2149 }, { 255,2149 }, { 256,2149 }, { 0, 37 }, + { 0,11932 }, { 0, 18 }, { 0,11930 }, { 0, 4 }, { 0,11928 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 4 }, { 0,11908 }, + { 0, 19 }, { 0,11906 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 20 }, { 0,11900 }, { 1,3923 }, { 2,3923 }, + { 3,3923 }, { 4,3923 }, { 5,3923 }, { 6,3923 }, { 7,3923 }, + { 8,3923 }, { 9,3923 }, { 10,3923 }, { 11,3923 }, { 12,3923 }, + { 13,3923 }, { 14,3923 }, { 15,3923 }, { 16,3923 }, { 17,3923 }, + { 18,3923 }, { 19,3923 }, { 20,3923 }, { 21,3923 }, { 22,3923 }, + + { 23,3923 }, { 24,3923 }, { 25,3923 }, { 26,3923 }, { 27,3923 }, + { 28,3923 }, { 29,3923 }, { 30,3923 }, { 31,3923 }, { 32,3923 }, + { 33,3923 }, { 42, 801 }, { 35,3923 }, { 36,3923 }, { 37,3923 }, + { 38,3923 }, { 39,3923 }, { 40,3923 }, { 41,3923 }, { 42,3923 }, + { 43,3923 }, { 44,3923 }, { 45,3923 }, { 46,3923 }, { 47,3923 }, + { 48,3923 }, { 49,3923 }, { 50,3923 }, { 51,3923 }, { 52,3923 }, + { 53,3923 }, { 54,3923 }, { 55,3923 }, { 56,3923 }, { 57,3923 }, + { 58,3923 }, { 59,3923 }, { 60,3923 }, { 61,3923 }, { 62,3923 }, + { 63,3923 }, { 64,3923 }, { 65,3923 }, { 66,3923 }, { 67,3923 }, + { 68,3923 }, { 69,3923 }, { 70,3923 }, { 71,3923 }, { 72,3923 }, + + { 73,3923 }, { 74,3923 }, { 75,3923 }, { 76,3923 }, { 77,3923 }, + { 78,3923 }, { 79,3923 }, { 80,3923 }, { 81,3923 }, { 82,3923 }, + { 83,3923 }, { 84,3923 }, { 85,3923 }, { 86,3923 }, { 87,3923 }, + { 88,3923 }, { 89,3923 }, { 90,3923 }, { 91,3923 }, { 124, 640 }, + { 93,3923 }, { 94,3923 }, { 95,3923 }, { 96,3923 }, { 97,3923 }, + { 98,3923 }, { 99,3923 }, { 100,3923 }, { 101,3923 }, { 102,3923 }, + { 103,3923 }, { 104,3923 }, { 105,3923 }, { 106,3923 }, { 107,3923 }, + { 108,3923 }, { 109,3923 }, { 110,3923 }, { 111,3923 }, { 112,3923 }, + { 113,3923 }, { 114,3923 }, { 115,3923 }, { 116,3923 }, { 117,3923 }, + { 118,3923 }, { 119,3923 }, { 120,3923 }, { 121,3923 }, { 122,3923 }, + + { 123,3923 }, { 124,3923 }, { 125,3923 }, { 126,3923 }, { 127,3923 }, + { 128,3923 }, { 129,3923 }, { 130,3923 }, { 131,3923 }, { 132,3923 }, + { 133,3923 }, { 134,3923 }, { 135,3923 }, { 136,3923 }, { 137,3923 }, + { 138,3923 }, { 139,3923 }, { 140,3923 }, { 141,3923 }, { 142,3923 }, + { 143,3923 }, { 144,3923 }, { 145,3923 }, { 146,3923 }, { 147,3923 }, + { 148,3923 }, { 149,3923 }, { 150,3923 }, { 151,3923 }, { 152,3923 }, + { 153,3923 }, { 154,3923 }, { 155,3923 }, { 156,3923 }, { 157,3923 }, + { 158,3923 }, { 159,3923 }, { 160,3923 }, { 161,3923 }, { 162,3923 }, + { 163,3923 }, { 164,3923 }, { 165,3923 }, { 166,3923 }, { 167,3923 }, + { 168,3923 }, { 169,3923 }, { 170,3923 }, { 171,3923 }, { 172,3923 }, + + { 173,3923 }, { 174,3923 }, { 175,3923 }, { 176,3923 }, { 177,3923 }, + { 178,3923 }, { 179,3923 }, { 180,3923 }, { 181,3923 }, { 182,3923 }, + { 183,3923 }, { 184,3923 }, { 185,3923 }, { 186,3923 }, { 187,3923 }, + { 188,3923 }, { 189,3923 }, { 190,3923 }, { 191,3923 }, { 192,3923 }, + { 193,3923 }, { 194,3923 }, { 195,3923 }, { 196,3923 }, { 197,3923 }, + { 198,3923 }, { 199,3923 }, { 200,3923 }, { 201,3923 }, { 202,3923 }, + { 203,3923 }, { 204,3923 }, { 205,3923 }, { 206,3923 }, { 207,3923 }, + { 208,3923 }, { 209,3923 }, { 210,3923 }, { 211,3923 }, { 212,3923 }, + { 213,3923 }, { 214,3923 }, { 215,3923 }, { 216,3923 }, { 217,3923 }, + { 218,3923 }, { 219,3923 }, { 220,3923 }, { 221,3923 }, { 222,3923 }, + + { 223,3923 }, { 224,3923 }, { 225,3923 }, { 226,3923 }, { 227,3923 }, + { 228,3923 }, { 229,3923 }, { 230,3923 }, { 231,3923 }, { 232,3923 }, + { 233,3923 }, { 234,3923 }, { 235,3923 }, { 236,3923 }, { 237,3923 }, + { 238,3923 }, { 239,3923 }, { 240,3923 }, { 241,3923 }, { 242,3923 }, + { 243,3923 }, { 244,3923 }, { 245,3923 }, { 246,3923 }, { 247,3923 }, + { 248,3923 }, { 249,3923 }, { 250,3923 }, { 251,3923 }, { 252,3923 }, + { 253,3923 }, { 254,3923 }, { 255,3923 }, { 256,3923 }, { 0, 17 }, + { 0,11642 }, { 1, 382 }, { 2, 382 }, { 3, 382 }, { 4, 382 }, + { 5, 382 }, { 6, 382 }, { 7, 382 }, { 8, 382 }, { 9, 382 }, + { 0, 0 }, { 11, 382 }, { 12, 382 }, { 13, 382 }, { 14, 382 }, + + { 15, 382 }, { 16, 382 }, { 17, 382 }, { 18, 382 }, { 19, 382 }, + { 20, 382 }, { 21, 382 }, { 22, 382 }, { 23, 382 }, { 24, 382 }, + { 25, 382 }, { 26, 382 }, { 27, 382 }, { 28, 382 }, { 29, 382 }, + { 30, 382 }, { 31, 382 }, { 32, 382 }, { 33, 382 }, { 34, 382 }, + { 35, 382 }, { 36, 382 }, { 37, 382 }, { 38, 382 }, { 39, 382 }, + { 40, 382 }, { 41, 382 }, { 42, 382 }, { 43, 382 }, { 44, 382 }, + { 45, 382 }, { 46, 382 }, { 47, 382 }, { 48, 382 }, { 49, 382 }, + { 50, 382 }, { 51, 382 }, { 52, 382 }, { 53, 382 }, { 54, 382 }, + { 55, 382 }, { 56, 382 }, { 57, 382 }, { 58, 382 }, { 59, 382 }, + { 60, 382 }, { 61, 382 }, { 62, 382 }, { 63, 382 }, { 64, 382 }, + + { 65, 382 }, { 66, 382 }, { 67, 382 }, { 68, 382 }, { 69, 382 }, + { 70, 382 }, { 71, 382 }, { 72, 382 }, { 73, 382 }, { 74, 382 }, + { 75, 382 }, { 76, 382 }, { 77, 382 }, { 78, 382 }, { 79, 382 }, + { 80, 382 }, { 81, 382 }, { 82, 382 }, { 83, 382 }, { 84, 382 }, + { 85, 382 }, { 86, 382 }, { 87, 382 }, { 88, 382 }, { 89, 382 }, + { 90, 382 }, { 91, 382 }, { 92, 382 }, { 93, 382 }, { 94, 382 }, + { 95, 382 }, { 96, 382 }, { 97, 382 }, { 98, 518 }, { 99, 382 }, + { 100, 382 }, { 101, 382 }, { 102, 520 }, { 103, 382 }, { 104, 382 }, + { 105, 382 }, { 106, 382 }, { 107, 382 }, { 108, 382 }, { 109, 382 }, + { 110, 522 }, { 111, 382 }, { 112, 382 }, { 113, 382 }, { 114, 524 }, + + { 115, 382 }, { 116, 531 }, { 117,3923 }, { 118, 533 }, { 119, 382 }, + { 120,3961 }, { 121, 382 }, { 122, 382 }, { 123, 382 }, { 124, 382 }, + { 125, 382 }, { 126, 382 }, { 127, 382 }, { 128, 382 }, { 129, 382 }, + { 130, 382 }, { 131, 382 }, { 132, 382 }, { 133, 382 }, { 134, 382 }, + { 135, 382 }, { 136, 382 }, { 137, 382 }, { 138, 382 }, { 139, 382 }, + { 140, 382 }, { 141, 382 }, { 142, 382 }, { 143, 382 }, { 144, 382 }, + { 145, 382 }, { 146, 382 }, { 147, 382 }, { 148, 382 }, { 149, 382 }, + { 150, 382 }, { 151, 382 }, { 152, 382 }, { 153, 382 }, { 154, 382 }, + { 155, 382 }, { 156, 382 }, { 157, 382 }, { 158, 382 }, { 159, 382 }, + { 160, 382 }, { 161, 382 }, { 162, 382 }, { 163, 382 }, { 164, 382 }, + + { 165, 382 }, { 166, 382 }, { 167, 382 }, { 168, 382 }, { 169, 382 }, + { 170, 382 }, { 171, 382 }, { 172, 382 }, { 173, 382 }, { 174, 382 }, + { 175, 382 }, { 176, 382 }, { 177, 382 }, { 178, 382 }, { 179, 382 }, + { 180, 382 }, { 181, 382 }, { 182, 382 }, { 183, 382 }, { 184, 382 }, + { 185, 382 }, { 186, 382 }, { 187, 382 }, { 188, 382 }, { 189, 382 }, + { 190, 382 }, { 191, 382 }, { 192, 382 }, { 193, 382 }, { 194, 382 }, + { 195, 382 }, { 196, 382 }, { 197, 382 }, { 198, 382 }, { 199, 382 }, + { 200, 382 }, { 201, 382 }, { 202, 382 }, { 203, 382 }, { 204, 382 }, + { 205, 382 }, { 206, 382 }, { 207, 382 }, { 208, 382 }, { 209, 382 }, + { 210, 382 }, { 211, 382 }, { 212, 382 }, { 213, 382 }, { 214, 382 }, + + { 215, 382 }, { 216, 382 }, { 217, 382 }, { 218, 382 }, { 219, 382 }, + { 220, 382 }, { 221, 382 }, { 222, 382 }, { 223, 382 }, { 224, 382 }, + { 225, 382 }, { 226, 382 }, { 227, 382 }, { 228, 382 }, { 229, 382 }, + { 230, 382 }, { 231, 382 }, { 232, 382 }, { 233, 382 }, { 234, 382 }, + { 235, 382 }, { 236, 382 }, { 237, 382 }, { 238, 382 }, { 239, 382 }, + { 240, 382 }, { 241, 382 }, { 242, 382 }, { 243, 382 }, { 244, 382 }, + { 245, 382 }, { 246, 382 }, { 247, 382 }, { 248, 382 }, { 249, 382 }, + { 250, 382 }, { 251, 382 }, { 252, 382 }, { 253, 382 }, { 254, 382 }, + { 255, 382 }, { 256, 382 }, { 0, 1 }, { 0,11384 }, { 1,3807 }, + { 2,3807 }, { 3,3807 }, { 4,3807 }, { 5,3807 }, { 6,3807 }, + + { 7,3807 }, { 8,3807 }, { 0, 0 }, { 0, 0 }, { 11,3807 }, + { 0, 23 }, { 0,11371 }, { 14,3807 }, { 15,3807 }, { 16,3807 }, + { 17,3807 }, { 18,3807 }, { 19,3807 }, { 20,3807 }, { 21,3807 }, + { 22,3807 }, { 23,3807 }, { 24,3807 }, { 25,3807 }, { 26,3807 }, + { 27,3807 }, { 28,3807 }, { 29,3807 }, { 30,3807 }, { 31,3807 }, + { 0, 32 }, { 0,11351 }, { 0, 36 }, { 0,11349 }, { 0, 24 }, + { 0,11347 }, { 0, 0 }, { 39,3807 }, { 0, 27 }, { 0,11343 }, + { 0, 39 }, { 0,11341 }, { 0, 29 }, { 0,11339 }, { 0, 31 }, + { 0,11337 }, { 48,3807 }, { 49,3807 }, { 50,3807 }, { 51,3807 }, + { 52,3807 }, { 53,3807 }, { 54,3807 }, { 55,3807 }, { 56,3807 }, + + { 57,3807 }, { 0, 0 }, { 59,3807 }, { 47, 266 }, { 0, 30 }, + { 0,11322 }, { 0, 33 }, { 0,11320 }, { 65,3807 }, { 66,3807 }, + { 67,3807 }, { 68,3807 }, { 69,3807 }, { 70,3807 }, { 71,3807 }, + { 72,3807 }, { 73,3807 }, { 74,3807 }, { 75,3807 }, { 76,3807 }, + { 77,3807 }, { 78,3807 }, { 79,3807 }, { 80,3807 }, { 81,3807 }, + { 82,3807 }, { 83,3807 }, { 84,3807 }, { 85,3807 }, { 86,3807 }, + { 87,3807 }, { 88,3807 }, { 89,3807 }, { 90,3807 }, { 0, 25 }, + { 0,11292 }, { 0, 0 }, { 94,3807 }, { 95,3807 }, { 96,3807 }, + { 97,3807 }, { 98,3807 }, { 99,3807 }, { 100,3807 }, { 101,3807 }, + { 102,3807 }, { 103,3807 }, { 104,3807 }, { 105,3807 }, { 106,3807 }, + + { 107,3807 }, { 108,3807 }, { 109,3807 }, { 110,3807 }, { 111,3807 }, + { 112,3807 }, { 113,3807 }, { 114,3807 }, { 115,3807 }, { 116,3807 }, + { 117,3807 }, { 118,3807 }, { 119,3807 }, { 120,3807 }, { 121,3807 }, + { 122,3807 }, { 0, 16 }, { 0,11260 }, { 0, 0 }, { 126,3807 }, + { 127,3807 }, { 128,3807 }, { 129,3807 }, { 130,3807 }, { 131,3807 }, + { 132,3807 }, { 133,3807 }, { 134,3807 }, { 135,3807 }, { 136,3807 }, + { 137,3807 }, { 138,3807 }, { 139,3807 }, { 140,3807 }, { 141,3807 }, + { 142,3807 }, { 143,3807 }, { 144,3807 }, { 145,3807 }, { 146,3807 }, + { 147,3807 }, { 148,3807 }, { 149,3807 }, { 150,3807 }, { 151,3807 }, + { 152,3807 }, { 153,3807 }, { 154,3807 }, { 155,3807 }, { 156,3807 }, + + { 157,3807 }, { 158,3807 }, { 159,3807 }, { 160,3807 }, { 161,3807 }, + { 162,3807 }, { 163,3807 }, { 164,3807 }, { 165,3807 }, { 166,3807 }, + { 167,3807 }, { 168,3807 }, { 169,3807 }, { 170,3807 }, { 171,3807 }, + { 172,3807 }, { 173,3807 }, { 174,3807 }, { 175,3807 }, { 176,3807 }, + { 177,3807 }, { 178,3807 }, { 179,3807 }, { 180,3807 }, { 181,3807 }, + { 182,3807 }, { 183,3807 }, { 184,3807 }, { 185,3807 }, { 186,3807 }, + { 187,3807 }, { 188,3807 }, { 189,3807 }, { 190,3807 }, { 191,3807 }, + { 192,3807 }, { 193,3807 }, { 194,3807 }, { 195,3807 }, { 196,3807 }, + { 197,3807 }, { 198,3807 }, { 199,3807 }, { 200,3807 }, { 201,3807 }, + { 202,3807 }, { 203,3807 }, { 204,3807 }, { 205,3807 }, { 206,3807 }, + + { 207,3807 }, { 208,3807 }, { 209,3807 }, { 210,3807 }, { 211,3807 }, + { 212,3807 }, { 213,3807 }, { 214,3807 }, { 215,3807 }, { 216,3807 }, + { 217,3807 }, { 218,3807 }, { 219,3807 }, { 220,3807 }, { 221,3807 }, + { 222,3807 }, { 223,3807 }, { 224,3807 }, { 225,3807 }, { 226,3807 }, + { 227,3807 }, { 228,3807 }, { 229,3807 }, { 230,3807 }, { 231,3807 }, + { 232,3807 }, { 233,3807 }, { 234,3807 }, { 235,3807 }, { 236,3807 }, + { 237,3807 }, { 238,3807 }, { 239,3807 }, { 240,3807 }, { 241,3807 }, + { 242,3807 }, { 243,3807 }, { 244,3807 }, { 245,3807 }, { 246,3807 }, + { 247,3807 }, { 248,3807 }, { 249,3807 }, { 250,3807 }, { 251,3807 }, + { 252,3807 }, { 253,3807 }, { 254,3807 }, { 255,3807 }, { 256,3807 }, + + { 0, 2 }, { 0,11126 }, { 0, 5 }, { 0,11124 }, { 0, 6 }, + { 0,11122 }, { 0, 7 }, { 0,11120 }, { 0, 8 }, { 0,11118 }, + { 9,3807 }, { 10,3807 }, { 0, 0 }, { 12,3807 }, { 13,3807 }, + { 0, 9 }, { 0,11111 }, { 0, 10 }, { 0,11109 }, { 0, 3 }, + { 0,11107 }, { 0, 21 }, { 0,11105 }, { 0, 48 }, { 0,11103 }, + { 0, 12 }, { 0,11101 }, { 0, 49 }, { 0,11099 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 32,3807 }, { 0, 22 }, + { 0,11092 }, { 1,3807 }, { 2,3807 }, { 3,3807 }, { 4,3807 }, + { 5,3807 }, { 6,3807 }, { 7,3807 }, { 8,3807 }, { 9,3807 }, + { 10,3807 }, { 11,3807 }, { 12,3807 }, { 13,3807 }, { 14,3807 }, + + { 15,3807 }, { 16,3807 }, { 17,3807 }, { 18,3807 }, { 19,3807 }, + { 20,3807 }, { 21,3807 }, { 22,3807 }, { 23,3807 }, { 24,3807 }, + { 25,3807 }, { 26,3807 }, { 27,3807 }, { 28,3807 }, { 29,3807 }, + { 30,3807 }, { 31,3807 }, { 32,3807 }, { 33,3807 }, { 34,3807 }, + { 35,3807 }, { 36,3807 }, { 37,3807 }, { 38,3807 }, { 39,3807 }, + { 40,3807 }, { 41,3807 }, { 0, 0 }, { 43,3807 }, { 44,3807 }, + { 45,3807 }, { 46,3807 }, { 47,3807 }, { 48,3807 }, { 49,3807 }, + { 50,3807 }, { 51,3807 }, { 52,3807 }, { 53,3807 }, { 54,3807 }, + { 55,3807 }, { 56,3807 }, { 57,3807 }, { 58,3807 }, { 59,3807 }, + { 60,3807 }, { 61,3807 }, { 62,3807 }, { 63,3807 }, { 64,3807 }, + + { 65,3807 }, { 66,3807 }, { 67,3807 }, { 68,3807 }, { 69,3807 }, + { 70,3807 }, { 71,3807 }, { 72,3807 }, { 73,3807 }, { 74,3807 }, + { 75,3807 }, { 76,3807 }, { 77,3807 }, { 78,3807 }, { 79,3807 }, + { 80,3807 }, { 81,3807 }, { 82,3807 }, { 83,3807 }, { 84,3807 }, + { 85,3807 }, { 86,3807 }, { 87,3807 }, { 88,3807 }, { 89,3807 }, + { 90,3807 }, { 91,3807 }, { 92,3807 }, { 93,3807 }, { 94,3807 }, + { 95,3807 }, { 96,3807 }, { 97,3807 }, { 98,3807 }, { 99,3807 }, + { 100,3807 }, { 101,3807 }, { 102,3807 }, { 103,3807 }, { 104,3807 }, + { 105,3807 }, { 106,3807 }, { 107,3807 }, { 108,3807 }, { 109,3807 }, + { 110,3807 }, { 111,3807 }, { 112,3807 }, { 113,3807 }, { 114,3807 }, + + { 115,3807 }, { 116,3807 }, { 117,3807 }, { 118,3807 }, { 119,3807 }, + { 120,3807 }, { 121,3807 }, { 122,3807 }, { 123,3807 }, { 124,3807 }, + { 125,3807 }, { 126,3807 }, { 127,3807 }, { 128,3807 }, { 129,3807 }, + { 130,3807 }, { 131,3807 }, { 132,3807 }, { 133,3807 }, { 134,3807 }, + { 135,3807 }, { 136,3807 }, { 137,3807 }, { 138,3807 }, { 139,3807 }, + { 140,3807 }, { 141,3807 }, { 142,3807 }, { 143,3807 }, { 144,3807 }, + { 145,3807 }, { 146,3807 }, { 147,3807 }, { 148,3807 }, { 149,3807 }, + { 150,3807 }, { 151,3807 }, { 152,3807 }, { 153,3807 }, { 154,3807 }, + { 155,3807 }, { 156,3807 }, { 157,3807 }, { 158,3807 }, { 159,3807 }, + { 160,3807 }, { 161,3807 }, { 162,3807 }, { 163,3807 }, { 164,3807 }, + + { 165,3807 }, { 166,3807 }, { 167,3807 }, { 168,3807 }, { 169,3807 }, + { 170,3807 }, { 171,3807 }, { 172,3807 }, { 173,3807 }, { 174,3807 }, + { 175,3807 }, { 176,3807 }, { 177,3807 }, { 178,3807 }, { 179,3807 }, + { 180,3807 }, { 181,3807 }, { 182,3807 }, { 183,3807 }, { 184,3807 }, + { 185,3807 }, { 186,3807 }, { 187,3807 }, { 188,3807 }, { 189,3807 }, + { 190,3807 }, { 191,3807 }, { 192,3807 }, { 193,3807 }, { 194,3807 }, + { 195,3807 }, { 196,3807 }, { 197,3807 }, { 198,3807 }, { 199,3807 }, + { 200,3807 }, { 201,3807 }, { 202,3807 }, { 203,3807 }, { 204,3807 }, + { 205,3807 }, { 206,3807 }, { 207,3807 }, { 208,3807 }, { 209,3807 }, + { 210,3807 }, { 211,3807 }, { 212,3807 }, { 213,3807 }, { 214,3807 }, + + { 215,3807 }, { 216,3807 }, { 217,3807 }, { 218,3807 }, { 219,3807 }, + { 220,3807 }, { 221,3807 }, { 222,3807 }, { 223,3807 }, { 224,3807 }, + { 225,3807 }, { 226,3807 }, { 227,3807 }, { 228,3807 }, { 229,3807 }, + { 230,3807 }, { 231,3807 }, { 232,3807 }, { 233,3807 }, { 234,3807 }, + { 235,3807 }, { 236,3807 }, { 237,3807 }, { 238,3807 }, { 239,3807 }, + { 240,3807 }, { 241,3807 }, { 242,3807 }, { 243,3807 }, { 244,3807 }, + { 245,3807 }, { 246,3807 }, { 247,3807 }, { 248,3807 }, { 249,3807 }, + { 250,3807 }, { 251,3807 }, { 252,3807 }, { 253,3807 }, { 254,3807 }, + { 255,3807 }, { 256,3807 }, { 0, 52 }, { 0,10834 }, { 1, 0 }, + { 2, 0 }, { 3, 0 }, { 4, 0 }, { 5, 0 }, { 6, 0 }, + + { 7, 0 }, { 8, 0 }, { 0, 0 }, { 0, 0 }, { 11, 0 }, + { 0, 0 }, { 0, 0 }, { 14, 0 }, { 15, 0 }, { 16, 0 }, + { 17, 0 }, { 18, 0 }, { 19, 0 }, { 20, 0 }, { 21, 0 }, + { 22, 0 }, { 23, 0 }, { 24, 0 }, { 25, 0 }, { 26, 0 }, + { 27, 0 }, { 28, 0 }, { 29, 0 }, { 30, 0 }, { 31, 0 }, + { 0, 11 }, { 0,10801 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 39, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 }, + { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 }, + + { 57, 0 }, { 0, 0 }, { 59, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65, 0 }, { 66, 0 }, + { 67, 0 }, { 68, 0 }, { 69, 0 }, { 70, 0 }, { 71, 0 }, + { 72, 0 }, { 73, 0 }, { 74, 0 }, { 75, 0 }, { 76, 0 }, + { 77, 0 }, { 78, 0 }, { 79, 0 }, { 80, 0 }, { 81, 0 }, + { 82, 0 }, { 83, 0 }, { 84, 0 }, { 85, 0 }, { 86, 0 }, + { 87, 0 }, { 88, 0 }, { 89, 0 }, { 90, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 94, 0 }, { 95, 0 }, { 96, 0 }, + { 97, 0 }, { 98, 0 }, { 99, 0 }, { 100, 0 }, { 101, 0 }, + { 102, 0 }, { 103, 0 }, { 104, 0 }, { 105, 0 }, { 106, 0 }, + + { 107, 0 }, { 108, 0 }, { 109, 0 }, { 110, 0 }, { 111, 0 }, + { 112, 0 }, { 113, 0 }, { 114, 0 }, { 115, 0 }, { 116, 0 }, + { 117, 0 }, { 118, 0 }, { 119, 0 }, { 120, 0 }, { 121, 0 }, + { 122, 0 }, { 0, 0 }, { 0, 0 }, { 92,3084 }, { 126, 0 }, + { 127, 0 }, { 128, 0 }, { 129, 0 }, { 130, 0 }, { 131, 0 }, + { 132, 0 }, { 133, 0 }, { 134, 0 }, { 135, 0 }, { 136, 0 }, + { 137, 0 }, { 138, 0 }, { 139, 0 }, { 140, 0 }, { 141, 0 }, + { 142, 0 }, { 143, 0 }, { 144, 0 }, { 145, 0 }, { 146, 0 }, + { 147, 0 }, { 148, 0 }, { 149, 0 }, { 150, 0 }, { 151, 0 }, + { 152, 0 }, { 153, 0 }, { 154, 0 }, { 155, 0 }, { 156, 0 }, + + { 157, 0 }, { 158, 0 }, { 159, 0 }, { 160, 0 }, { 161, 0 }, + { 162, 0 }, { 163, 0 }, { 164, 0 }, { 165, 0 }, { 166, 0 }, + { 167, 0 }, { 168, 0 }, { 169, 0 }, { 170, 0 }, { 171, 0 }, + { 172, 0 }, { 173, 0 }, { 174, 0 }, { 175, 0 }, { 176, 0 }, + { 177, 0 }, { 178, 0 }, { 179, 0 }, { 180, 0 }, { 181, 0 }, + { 182, 0 }, { 183, 0 }, { 184, 0 }, { 185, 0 }, { 186, 0 }, + { 187, 0 }, { 188, 0 }, { 189, 0 }, { 190, 0 }, { 191, 0 }, + { 192, 0 }, { 193, 0 }, { 194, 0 }, { 195, 0 }, { 196, 0 }, + { 197, 0 }, { 198, 0 }, { 199, 0 }, { 200, 0 }, { 201, 0 }, + { 202, 0 }, { 203, 0 }, { 204, 0 }, { 205, 0 }, { 206, 0 }, + + { 207, 0 }, { 208, 0 }, { 209, 0 }, { 210, 0 }, { 211, 0 }, + { 212, 0 }, { 213, 0 }, { 214, 0 }, { 215, 0 }, { 216, 0 }, + { 217, 0 }, { 218, 0 }, { 219, 0 }, { 220, 0 }, { 221, 0 }, + { 222, 0 }, { 223, 0 }, { 224, 0 }, { 225, 0 }, { 226, 0 }, + { 227, 0 }, { 228, 0 }, { 229, 0 }, { 230, 0 }, { 231, 0 }, + { 232, 0 }, { 233, 0 }, { 234, 0 }, { 235, 0 }, { 236, 0 }, + { 237, 0 }, { 238, 0 }, { 239, 0 }, { 240, 0 }, { 241, 0 }, + { 242, 0 }, { 243, 0 }, { 244, 0 }, { 245, 0 }, { 246, 0 }, + { 247, 0 }, { 248, 0 }, { 249, 0 }, { 250, 0 }, { 251, 0 }, + { 252, 0 }, { 253, 0 }, { 254, 0 }, { 255, 0 }, { 256, 0 }, + + { 0, 38 }, { 0,10576 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 9, 0 }, { 10, 0 }, { 0, 0 }, { 12, 0 }, { 13, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 35 }, + { 0,10557 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }, + { 5, 0 }, { 6, 0 }, { 7, 0 }, { 8, 0 }, { 0, 0 }, + { 0, 0 }, { 11, 0 }, { 0, 0 }, { 32, 0 }, { 14, 0 }, + { 15, 0 }, { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 }, + { 20, 0 }, { 21, 0 }, { 22, 0 }, { 23, 0 }, { 24, 0 }, + { 25, 0 }, { 26, 0 }, { 27, 0 }, { 28, 0 }, { 29, 0 }, + + { 30, 0 }, { 31, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 }, + { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, + { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65, 0 }, { 66, 0 }, { 67, 0 }, { 68, 0 }, { 69, 0 }, + { 70, 0 }, { 71, 0 }, { 72, 0 }, { 73, 0 }, { 74, 0 }, + { 75, 0 }, { 76, 0 }, { 77, 0 }, { 78, 0 }, { 79, 0 }, + + { 80, 0 }, { 81, 0 }, { 82, 0 }, { 83, 0 }, { 84, 0 }, + { 85, 0 }, { 86, 0 }, { 87, 0 }, { 88, 0 }, { 89, 0 }, + { 90, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94, 0 }, + { 95, 0 }, { 96, 0 }, { 97, 0 }, { 98, 0 }, { 99, 0 }, + { 100, 0 }, { 101, 0 }, { 102, 0 }, { 103, 0 }, { 104, 0 }, + { 105, 0 }, { 106, 0 }, { 107, 0 }, { 108, 0 }, { 109, 0 }, + { 110, 0 }, { 111, 0 }, { 112, 0 }, { 113, 0 }, { 114, 0 }, + { 115, 0 }, { 116, 0 }, { 117, 0 }, { 118, 0 }, { 119, 0 }, + { 120, 0 }, { 121, 0 }, { 122, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126, 0 }, { 127, 0 }, { 128, 0 }, { 129, 0 }, + + { 130, 0 }, { 131, 0 }, { 132, 0 }, { 133, 0 }, { 134, 0 }, + { 135, 0 }, { 136, 0 }, { 137, 0 }, { 138, 0 }, { 139, 0 }, + { 140, 0 }, { 141, 0 }, { 142, 0 }, { 143, 0 }, { 144, 0 }, + { 145, 0 }, { 146, 0 }, { 147, 0 }, { 148, 0 }, { 149, 0 }, + { 150, 0 }, { 151, 0 }, { 152, 0 }, { 153, 0 }, { 154, 0 }, + { 155, 0 }, { 156, 0 }, { 157, 0 }, { 158, 0 }, { 159, 0 }, + { 160, 0 }, { 161, 0 }, { 162, 0 }, { 163, 0 }, { 164, 0 }, + { 165, 0 }, { 166, 0 }, { 167, 0 }, { 168, 0 }, { 169, 0 }, + { 170, 0 }, { 171, 0 }, { 172, 0 }, { 173, 0 }, { 174, 0 }, + { 175, 0 }, { 176, 0 }, { 177, 0 }, { 178, 0 }, { 179, 0 }, + + { 180, 0 }, { 181, 0 }, { 182, 0 }, { 183, 0 }, { 184, 0 }, + { 185, 0 }, { 186, 0 }, { 187, 0 }, { 188, 0 }, { 189, 0 }, + { 190, 0 }, { 191, 0 }, { 192, 0 }, { 193, 0 }, { 194, 0 }, + { 195, 0 }, { 196, 0 }, { 197, 0 }, { 198, 0 }, { 199, 0 }, + { 200, 0 }, { 201, 0 }, { 202, 0 }, { 203, 0 }, { 204, 0 }, + { 205, 0 }, { 206, 0 }, { 207, 0 }, { 208, 0 }, { 209, 0 }, + { 210, 0 }, { 211, 0 }, { 212, 0 }, { 213, 0 }, { 214, 0 }, + { 215, 0 }, { 216, 0 }, { 217, 0 }, { 218, 0 }, { 219, 0 }, + { 220, 0 }, { 221, 0 }, { 222, 0 }, { 223, 0 }, { 224, 0 }, + { 225, 0 }, { 226, 0 }, { 227, 0 }, { 228, 0 }, { 229, 0 }, + + { 230, 0 }, { 231, 0 }, { 232, 0 }, { 233, 0 }, { 234, 0 }, + { 235, 0 }, { 236, 0 }, { 237, 0 }, { 238, 0 }, { 239, 0 }, + { 240, 0 }, { 241, 0 }, { 242, 0 }, { 243, 0 }, { 244, 0 }, + { 245, 0 }, { 246, 0 }, { 247, 0 }, { 248, 0 }, { 249, 0 }, + { 250, 0 }, { 251, 0 }, { 252, 0 }, { 253, 0 }, { 254, 0 }, + { 255, 0 }, { 256, 0 }, { 0, 41 }, { 0,10299 }, { 1,-804 }, + { 2,-804 }, { 3,-804 }, { 4,-804 }, { 5,-804 }, { 6,-804 }, + { 7,-804 }, { 8,-804 }, { 0, 0 }, { 0, 0 }, { 11,-804 }, + { 0, 0 }, { 0, 0 }, { 14,-804 }, { 15,-804 }, { 16,-804 }, + { 17,-804 }, { 18,-804 }, { 19,-804 }, { 20,-804 }, { 21,-804 }, + + { 22,-804 }, { 23,-804 }, { 24,-804 }, { 25,-804 }, { 26,-804 }, + { 27,-804 }, { 28,-804 }, { 29,-804 }, { 30,-804 }, { 31,-804 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 39,-804 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,3272 }, { 49,3272 }, { 50,3272 }, { 51,3272 }, + { 52,3272 }, { 53,3272 }, { 54,3272 }, { 55,3272 }, { 56,3272 }, + { 57,3272 }, { 0, 0 }, { 59,-804 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-804 }, { 66,-804 }, + { 67,-804 }, { 68,-804 }, { 69,3530 }, { 70,-804 }, { 71,-804 }, + + { 72,-804 }, { 73,-804 }, { 74,-804 }, { 75,-804 }, { 76,-804 }, + { 77,-804 }, { 78,-804 }, { 79,-804 }, { 80,-804 }, { 81,-804 }, + { 82,-804 }, { 83,-804 }, { 84,-804 }, { 85,-804 }, { 86,-804 }, + { 87,-804 }, { 88,-804 }, { 89,-804 }, { 90,-804 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 94,-804 }, { 95,3540 }, { 96,-804 }, + { 97,-804 }, { 98,-804 }, { 99,-804 }, { 100,-804 }, { 101,3530 }, + { 102,-804 }, { 103,-804 }, { 104,-804 }, { 105,-804 }, { 106,-804 }, + { 107,-804 }, { 108,-804 }, { 109,-804 }, { 110,-804 }, { 111,-804 }, + { 112,-804 }, { 113,-804 }, { 114,-804 }, { 115,-804 }, { 116,-804 }, + { 117,-804 }, { 118,-804 }, { 119,-804 }, { 120,-804 }, { 121,-804 }, + + { 122,-804 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-804 }, + { 127,-804 }, { 128,-804 }, { 129,-804 }, { 130,-804 }, { 131,-804 }, + { 132,-804 }, { 133,-804 }, { 134,-804 }, { 135,-804 }, { 136,-804 }, + { 137,-804 }, { 138,-804 }, { 139,-804 }, { 140,-804 }, { 141,-804 }, + { 142,-804 }, { 143,-804 }, { 144,-804 }, { 145,-804 }, { 146,-804 }, + { 147,-804 }, { 148,-804 }, { 149,-804 }, { 150,-804 }, { 151,-804 }, + { 152,-804 }, { 153,-804 }, { 154,-804 }, { 155,-804 }, { 156,-804 }, + { 157,-804 }, { 158,-804 }, { 159,-804 }, { 160,-804 }, { 161,-804 }, + { 162,-804 }, { 163,-804 }, { 164,-804 }, { 165,-804 }, { 166,-804 }, + { 167,-804 }, { 168,-804 }, { 169,-804 }, { 170,-804 }, { 171,-804 }, + + { 172,-804 }, { 173,-804 }, { 174,-804 }, { 175,-804 }, { 176,-804 }, + { 177,-804 }, { 178,-804 }, { 179,-804 }, { 180,-804 }, { 181,-804 }, + { 182,-804 }, { 183,-804 }, { 184,-804 }, { 185,-804 }, { 186,-804 }, + { 187,-804 }, { 188,-804 }, { 189,-804 }, { 190,-804 }, { 191,-804 }, + { 192,-804 }, { 193,-804 }, { 194,-804 }, { 195,-804 }, { 196,-804 }, + { 197,-804 }, { 198,-804 }, { 199,-804 }, { 200,-804 }, { 201,-804 }, + { 202,-804 }, { 203,-804 }, { 204,-804 }, { 205,-804 }, { 206,-804 }, + { 207,-804 }, { 208,-804 }, { 209,-804 }, { 210,-804 }, { 211,-804 }, + { 212,-804 }, { 213,-804 }, { 214,-804 }, { 215,-804 }, { 216,-804 }, + { 217,-804 }, { 218,-804 }, { 219,-804 }, { 220,-804 }, { 221,-804 }, + + { 222,-804 }, { 223,-804 }, { 224,-804 }, { 225,-804 }, { 226,-804 }, + { 227,-804 }, { 228,-804 }, { 229,-804 }, { 230,-804 }, { 231,-804 }, + { 232,-804 }, { 233,-804 }, { 234,-804 }, { 235,-804 }, { 236,-804 }, + { 237,-804 }, { 238,-804 }, { 239,-804 }, { 240,-804 }, { 241,-804 }, + { 242,-804 }, { 243,-804 }, { 244,-804 }, { 245,-804 }, { 246,-804 }, + { 247,-804 }, { 248,-804 }, { 249,-804 }, { 250,-804 }, { 251,-804 }, + { 252,-804 }, { 253,-804 }, { 254,-804 }, { 255,-804 }, { 256,-804 }, + { 0, 47 }, { 0,10041 }, { 1,-793 }, { 2,-793 }, { 3,-793 }, + { 4,-793 }, { 5,-793 }, { 6,-793 }, { 7,-793 }, { 8,-793 }, + { 0, 0 }, { 0, 0 }, { 11,-793 }, { 0, 0 }, { 0, 0 }, + + { 14,-793 }, { 15,-793 }, { 16,-793 }, { 17,-793 }, { 18,-793 }, + { 19,-793 }, { 20,-793 }, { 21,-793 }, { 22,-793 }, { 23,-793 }, + { 24,-793 }, { 25,-793 }, { 26,-793 }, { 27,-793 }, { 28,-793 }, + { 29,-793 }, { 30,-793 }, { 31,-793 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 39,-793 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-793 }, + { 49,-793 }, { 50,-793 }, { 51,-793 }, { 52,-793 }, { 53,-793 }, + { 54,-793 }, { 55,-793 }, { 56,-793 }, { 57,-793 }, { 0, 0 }, + { 59,-793 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 65,-793 }, { 66,-793 }, { 67,-793 }, { 68,-793 }, + { 69,-793 }, { 70,-793 }, { 71,-793 }, { 72,-793 }, { 73,-793 }, + { 74,-793 }, { 75,-793 }, { 76,-793 }, { 77,-793 }, { 78,-793 }, + { 79,-793 }, { 80,-793 }, { 81,-793 }, { 82,-793 }, { 83,-793 }, + { 84,-793 }, { 85,-793 }, { 86,-793 }, { 87,-793 }, { 88,-793 }, + { 89,-793 }, { 90,-793 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 94,-793 }, { 95,-793 }, { 96,-793 }, { 97,-793 }, { 98,-793 }, + { 99,-793 }, { 100,-793 }, { 101,-793 }, { 102,-793 }, { 103,-793 }, + { 104,-793 }, { 105,-793 }, { 106,-793 }, { 107,-793 }, { 108,-793 }, + { 109,-793 }, { 110,-793 }, { 111,-793 }, { 112,-793 }, { 113,-793 }, + + { 114,-793 }, { 115,-793 }, { 116,-793 }, { 117,-793 }, { 118,-793 }, + { 119,-793 }, { 120,-793 }, { 121,-793 }, { 122,-793 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 126,-793 }, { 127,-793 }, { 128,-793 }, + { 129,-793 }, { 130,-793 }, { 131,-793 }, { 132,-793 }, { 133,-793 }, + { 134,-793 }, { 135,-793 }, { 136,-793 }, { 137,-793 }, { 138,-793 }, + { 139,-793 }, { 140,-793 }, { 141,-793 }, { 142,-793 }, { 143,-793 }, + { 144,-793 }, { 145,-793 }, { 146,-793 }, { 147,-793 }, { 148,-793 }, + { 149,-793 }, { 150,-793 }, { 151,-793 }, { 152,-793 }, { 153,-793 }, + { 154,-793 }, { 155,-793 }, { 156,-793 }, { 157,-793 }, { 158,-793 }, + { 159,-793 }, { 160,-793 }, { 161,-793 }, { 162,-793 }, { 163,-793 }, + + { 164,-793 }, { 165,-793 }, { 166,-793 }, { 167,-793 }, { 168,-793 }, + { 169,-793 }, { 170,-793 }, { 171,-793 }, { 172,-793 }, { 173,-793 }, + { 174,-793 }, { 175,-793 }, { 176,-793 }, { 177,-793 }, { 178,-793 }, + { 179,-793 }, { 180,-793 }, { 181,-793 }, { 182,-793 }, { 183,-793 }, + { 184,-793 }, { 185,-793 }, { 186,-793 }, { 187,-793 }, { 188,-793 }, + { 189,-793 }, { 190,-793 }, { 191,-793 }, { 192,-793 }, { 193,-793 }, + { 194,-793 }, { 195,-793 }, { 196,-793 }, { 197,-793 }, { 198,-793 }, + { 199,-793 }, { 200,-793 }, { 201,-793 }, { 202,-793 }, { 203,-793 }, + { 204,-793 }, { 205,-793 }, { 206,-793 }, { 207,-793 }, { 208,-793 }, + { 209,-793 }, { 210,-793 }, { 211,-793 }, { 212,-793 }, { 213,-793 }, + + { 214,-793 }, { 215,-793 }, { 216,-793 }, { 217,-793 }, { 218,-793 }, + { 219,-793 }, { 220,-793 }, { 221,-793 }, { 222,-793 }, { 223,-793 }, + { 224,-793 }, { 225,-793 }, { 226,-793 }, { 227,-793 }, { 228,-793 }, + { 229,-793 }, { 230,-793 }, { 231,-793 }, { 232,-793 }, { 233,-793 }, + { 234,-793 }, { 235,-793 }, { 236,-793 }, { 237,-793 }, { 238,-793 }, + { 239,-793 }, { 240,-793 }, { 241,-793 }, { 242,-793 }, { 243,-793 }, + { 244,-793 }, { 245,-793 }, { 246,-793 }, { 247,-793 }, { 248,-793 }, + { 249,-793 }, { 250,-793 }, { 251,-793 }, { 252,-793 }, { 253,-793 }, + { 254,-793 }, { 255,-793 }, { 256,-793 }, { 0, 41 }, { 0,9783 }, + { 1,-1320 }, { 2,-1320 }, { 3,-1320 }, { 4,-1320 }, { 5,-1320 }, + + { 6,-1320 }, { 7,-1320 }, { 8,-1320 }, { 0, 0 }, { 0, 0 }, + { 11,-1320 }, { 0, 0 }, { 0, 0 }, { 14,-1320 }, { 15,-1320 }, + { 16,-1320 }, { 17,-1320 }, { 18,-1320 }, { 19,-1320 }, { 20,-1320 }, + { 21,-1320 }, { 22,-1320 }, { 23,-1320 }, { 24,-1320 }, { 25,-1320 }, + { 26,-1320 }, { 27,-1320 }, { 28,-1320 }, { 29,-1320 }, { 30,-1320 }, + { 31,-1320 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-1320 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 48,3083 }, { 49,3083 }, { 50,3083 }, + { 51,3083 }, { 52,3083 }, { 53,3083 }, { 54,3083 }, { 55,3083 }, + + { 56,3083 }, { 57,3083 }, { 0, 0 }, { 59,-1320 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-1320 }, + { 66,-1320 }, { 67,-1320 }, { 68,-1320 }, { 69,3014 }, { 70,-1320 }, + { 71,-1320 }, { 72,-1320 }, { 73,-1320 }, { 74,-1320 }, { 75,-1320 }, + { 76,-1320 }, { 77,-1320 }, { 78,-1320 }, { 79,-1320 }, { 80,-1320 }, + { 81,-1320 }, { 82,-1320 }, { 83,-1320 }, { 84,-1320 }, { 85,-1320 }, + { 86,-1320 }, { 87,-1320 }, { 88,-1320 }, { 89,-1320 }, { 90,-1320 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-1320 }, { 95,-1320 }, + { 96,-1320 }, { 97,-1320 }, { 98,-1320 }, { 99,-1320 }, { 100,-1320 }, + { 101,3014 }, { 102,-1320 }, { 103,-1320 }, { 104,-1320 }, { 105,-1320 }, + + { 106,-1320 }, { 107,-1320 }, { 108,-1320 }, { 109,-1320 }, { 110,-1320 }, + { 111,-1320 }, { 112,-1320 }, { 113,-1320 }, { 114,-1320 }, { 115,-1320 }, + { 116,-1320 }, { 117,-1320 }, { 118,-1320 }, { 119,-1320 }, { 120,-1320 }, + { 121,-1320 }, { 122,-1320 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 126,-1320 }, { 127,-1320 }, { 128,-1320 }, { 129,-1320 }, { 130,-1320 }, + { 131,-1320 }, { 132,-1320 }, { 133,-1320 }, { 134,-1320 }, { 135,-1320 }, + { 136,-1320 }, { 137,-1320 }, { 138,-1320 }, { 139,-1320 }, { 140,-1320 }, + { 141,-1320 }, { 142,-1320 }, { 143,-1320 }, { 144,-1320 }, { 145,-1320 }, + { 146,-1320 }, { 147,-1320 }, { 148,-1320 }, { 149,-1320 }, { 150,-1320 }, + { 151,-1320 }, { 152,-1320 }, { 153,-1320 }, { 154,-1320 }, { 155,-1320 }, + + { 156,-1320 }, { 157,-1320 }, { 158,-1320 }, { 159,-1320 }, { 160,-1320 }, + { 161,-1320 }, { 162,-1320 }, { 163,-1320 }, { 164,-1320 }, { 165,-1320 }, + { 166,-1320 }, { 167,-1320 }, { 168,-1320 }, { 169,-1320 }, { 170,-1320 }, + { 171,-1320 }, { 172,-1320 }, { 173,-1320 }, { 174,-1320 }, { 175,-1320 }, + { 176,-1320 }, { 177,-1320 }, { 178,-1320 }, { 179,-1320 }, { 180,-1320 }, + { 181,-1320 }, { 182,-1320 }, { 183,-1320 }, { 184,-1320 }, { 185,-1320 }, + { 186,-1320 }, { 187,-1320 }, { 188,-1320 }, { 189,-1320 }, { 190,-1320 }, + { 191,-1320 }, { 192,-1320 }, { 193,-1320 }, { 194,-1320 }, { 195,-1320 }, + { 196,-1320 }, { 197,-1320 }, { 198,-1320 }, { 199,-1320 }, { 200,-1320 }, + { 201,-1320 }, { 202,-1320 }, { 203,-1320 }, { 204,-1320 }, { 205,-1320 }, + + { 206,-1320 }, { 207,-1320 }, { 208,-1320 }, { 209,-1320 }, { 210,-1320 }, + { 211,-1320 }, { 212,-1320 }, { 213,-1320 }, { 214,-1320 }, { 215,-1320 }, + { 216,-1320 }, { 217,-1320 }, { 218,-1320 }, { 219,-1320 }, { 220,-1320 }, + { 221,-1320 }, { 222,-1320 }, { 223,-1320 }, { 224,-1320 }, { 225,-1320 }, + { 226,-1320 }, { 227,-1320 }, { 228,-1320 }, { 229,-1320 }, { 230,-1320 }, + { 231,-1320 }, { 232,-1320 }, { 233,-1320 }, { 234,-1320 }, { 235,-1320 }, + { 236,-1320 }, { 237,-1320 }, { 238,-1320 }, { 239,-1320 }, { 240,-1320 }, + { 241,-1320 }, { 242,-1320 }, { 243,-1320 }, { 244,-1320 }, { 245,-1320 }, + { 246,-1320 }, { 247,-1320 }, { 248,-1320 }, { 249,-1320 }, { 250,-1320 }, + { 251,-1320 }, { 252,-1320 }, { 253,-1320 }, { 254,-1320 }, { 255,-1320 }, + + { 256,-1320 }, { 0, 47 }, { 0,9525 }, { 1,-1309 }, { 2,-1309 }, + { 3,-1309 }, { 4,-1309 }, { 5,-1309 }, { 6,-1309 }, { 7,-1309 }, + { 8,-1309 }, { 0, 0 }, { 0, 0 }, { 11,-1309 }, { 0, 0 }, + { 0, 0 }, { 14,-1309 }, { 15,-1309 }, { 16,-1309 }, { 17,-1309 }, + { 18,-1309 }, { 19,-1309 }, { 20,-1309 }, { 21,-1309 }, { 22,-1309 }, + { 23,-1309 }, { 24,-1309 }, { 25,-1309 }, { 26,-1309 }, { 27,-1309 }, + { 28,-1309 }, { 29,-1309 }, { 30,-1309 }, { 31,-1309 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,-1309 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 48,3083 }, { 49,3083 }, { 50,-1309 }, { 51,-1309 }, { 52,-1309 }, + { 53,-1309 }, { 54,-1309 }, { 55,-1309 }, { 56,-1309 }, { 57,-1309 }, + { 0, 0 }, { 59,-1309 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-1309 }, { 66,-1309 }, { 67,-1309 }, + { 68,-1309 }, { 69,-1309 }, { 70,-1309 }, { 71,-1309 }, { 72,-1309 }, + { 73,-1309 }, { 74,-1309 }, { 75,-1309 }, { 76,-1309 }, { 77,-1309 }, + { 78,-1309 }, { 79,-1309 }, { 80,-1309 }, { 81,-1309 }, { 82,-1309 }, + { 83,-1309 }, { 84,-1309 }, { 85,-1309 }, { 86,-1309 }, { 87,-1309 }, + { 88,-1309 }, { 89,-1309 }, { 90,-1309 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 94,-1309 }, { 95,-1309 }, { 96,-1309 }, { 97,-1309 }, + + { 98,-1309 }, { 99,-1309 }, { 100,-1309 }, { 101,-1309 }, { 102,-1309 }, + { 103,-1309 }, { 104,-1309 }, { 105,-1309 }, { 106,-1309 }, { 107,-1309 }, + { 108,-1309 }, { 109,-1309 }, { 110,-1309 }, { 111,-1309 }, { 112,-1309 }, + { 113,-1309 }, { 114,-1309 }, { 115,-1309 }, { 116,-1309 }, { 117,-1309 }, + { 118,-1309 }, { 119,-1309 }, { 120,-1309 }, { 121,-1309 }, { 122,-1309 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-1309 }, { 127,-1309 }, + { 128,-1309 }, { 129,-1309 }, { 130,-1309 }, { 131,-1309 }, { 132,-1309 }, + { 133,-1309 }, { 134,-1309 }, { 135,-1309 }, { 136,-1309 }, { 137,-1309 }, + { 138,-1309 }, { 139,-1309 }, { 140,-1309 }, { 141,-1309 }, { 142,-1309 }, + { 143,-1309 }, { 144,-1309 }, { 145,-1309 }, { 146,-1309 }, { 147,-1309 }, + + { 148,-1309 }, { 149,-1309 }, { 150,-1309 }, { 151,-1309 }, { 152,-1309 }, + { 153,-1309 }, { 154,-1309 }, { 155,-1309 }, { 156,-1309 }, { 157,-1309 }, + { 158,-1309 }, { 159,-1309 }, { 160,-1309 }, { 161,-1309 }, { 162,-1309 }, + { 163,-1309 }, { 164,-1309 }, { 165,-1309 }, { 166,-1309 }, { 167,-1309 }, + { 168,-1309 }, { 169,-1309 }, { 170,-1309 }, { 171,-1309 }, { 172,-1309 }, + { 173,-1309 }, { 174,-1309 }, { 175,-1309 }, { 176,-1309 }, { 177,-1309 }, + { 178,-1309 }, { 179,-1309 }, { 180,-1309 }, { 181,-1309 }, { 182,-1309 }, + { 183,-1309 }, { 184,-1309 }, { 185,-1309 }, { 186,-1309 }, { 187,-1309 }, + { 188,-1309 }, { 189,-1309 }, { 190,-1309 }, { 191,-1309 }, { 192,-1309 }, + { 193,-1309 }, { 194,-1309 }, { 195,-1309 }, { 196,-1309 }, { 197,-1309 }, + + { 198,-1309 }, { 199,-1309 }, { 200,-1309 }, { 201,-1309 }, { 202,-1309 }, + { 203,-1309 }, { 204,-1309 }, { 205,-1309 }, { 206,-1309 }, { 207,-1309 }, + { 208,-1309 }, { 209,-1309 }, { 210,-1309 }, { 211,-1309 }, { 212,-1309 }, + { 213,-1309 }, { 214,-1309 }, { 215,-1309 }, { 216,-1309 }, { 217,-1309 }, + { 218,-1309 }, { 219,-1309 }, { 220,-1309 }, { 221,-1309 }, { 222,-1309 }, + { 223,-1309 }, { 224,-1309 }, { 225,-1309 }, { 226,-1309 }, { 227,-1309 }, + { 228,-1309 }, { 229,-1309 }, { 230,-1309 }, { 231,-1309 }, { 232,-1309 }, + { 233,-1309 }, { 234,-1309 }, { 235,-1309 }, { 236,-1309 }, { 237,-1309 }, + { 238,-1309 }, { 239,-1309 }, { 240,-1309 }, { 241,-1309 }, { 242,-1309 }, + { 243,-1309 }, { 244,-1309 }, { 245,-1309 }, { 246,-1309 }, { 247,-1309 }, + + { 248,-1309 }, { 249,-1309 }, { 250,-1309 }, { 251,-1309 }, { 252,-1309 }, + { 253,-1309 }, { 254,-1309 }, { 255,-1309 }, { 256,-1309 }, { 0, 47 }, + { 0,9267 }, { 1,-1567 }, { 2,-1567 }, { 3,-1567 }, { 4,-1567 }, + { 5,-1567 }, { 6,-1567 }, { 7,-1567 }, { 8,-1567 }, { 0, 0 }, + { 0, 0 }, { 11,-1567 }, { 0, 0 }, { 0, 0 }, { 14,-1567 }, + { 15,-1567 }, { 16,-1567 }, { 17,-1567 }, { 18,-1567 }, { 19,-1567 }, + { 20,-1567 }, { 21,-1567 }, { 22,-1567 }, { 23,-1567 }, { 24,-1567 }, + { 25,-1567 }, { 26,-1567 }, { 27,-1567 }, { 28,-1567 }, { 29,-1567 }, + { 30,-1567 }, { 31,-1567 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-1567 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 43,3083 }, { 0, 0 }, + { 45,3083 }, { 0, 0 }, { 0, 0 }, { 48,3142 }, { 49,3142 }, + { 50,3142 }, { 51,3142 }, { 52,3142 }, { 53,3142 }, { 54,3142 }, + { 55,3142 }, { 56,3142 }, { 57,3142 }, { 0, 0 }, { 59,-1567 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65,-1567 }, { 66,-1567 }, { 67,-1567 }, { 68,-1567 }, { 69,-1567 }, + { 70,-1567 }, { 71,-1567 }, { 72,-1567 }, { 73,-1567 }, { 74,-1567 }, + { 75,-1567 }, { 76,-1567 }, { 77,-1567 }, { 78,-1567 }, { 79,-1567 }, + { 80,-1567 }, { 81,-1567 }, { 82,-1567 }, { 83,-1567 }, { 84,-1567 }, + { 85,-1567 }, { 86,-1567 }, { 87,-1567 }, { 88,-1567 }, { 89,-1567 }, + + { 90,-1567 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-1567 }, + { 95,-1567 }, { 96,-1567 }, { 97,-1567 }, { 98,-1567 }, { 99,-1567 }, + { 100,-1567 }, { 101,-1567 }, { 102,-1567 }, { 103,-1567 }, { 104,-1567 }, + { 105,-1567 }, { 106,-1567 }, { 107,-1567 }, { 108,-1567 }, { 109,-1567 }, + { 110,-1567 }, { 111,-1567 }, { 112,-1567 }, { 113,-1567 }, { 114,-1567 }, + { 115,-1567 }, { 116,-1567 }, { 117,-1567 }, { 118,-1567 }, { 119,-1567 }, + { 120,-1567 }, { 121,-1567 }, { 122,-1567 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,-1567 }, { 127,-1567 }, { 128,-1567 }, { 129,-1567 }, + { 130,-1567 }, { 131,-1567 }, { 132,-1567 }, { 133,-1567 }, { 134,-1567 }, + { 135,-1567 }, { 136,-1567 }, { 137,-1567 }, { 138,-1567 }, { 139,-1567 }, + + { 140,-1567 }, { 141,-1567 }, { 142,-1567 }, { 143,-1567 }, { 144,-1567 }, + { 145,-1567 }, { 146,-1567 }, { 147,-1567 }, { 148,-1567 }, { 149,-1567 }, + { 150,-1567 }, { 151,-1567 }, { 152,-1567 }, { 153,-1567 }, { 154,-1567 }, + { 155,-1567 }, { 156,-1567 }, { 157,-1567 }, { 158,-1567 }, { 159,-1567 }, + { 160,-1567 }, { 161,-1567 }, { 162,-1567 }, { 163,-1567 }, { 164,-1567 }, + { 165,-1567 }, { 166,-1567 }, { 167,-1567 }, { 168,-1567 }, { 169,-1567 }, + { 170,-1567 }, { 171,-1567 }, { 172,-1567 }, { 173,-1567 }, { 174,-1567 }, + { 175,-1567 }, { 176,-1567 }, { 177,-1567 }, { 178,-1567 }, { 179,-1567 }, + { 180,-1567 }, { 181,-1567 }, { 182,-1567 }, { 183,-1567 }, { 184,-1567 }, + { 185,-1567 }, { 186,-1567 }, { 187,-1567 }, { 188,-1567 }, { 189,-1567 }, + + { 190,-1567 }, { 191,-1567 }, { 192,-1567 }, { 193,-1567 }, { 194,-1567 }, + { 195,-1567 }, { 196,-1567 }, { 197,-1567 }, { 198,-1567 }, { 199,-1567 }, + { 200,-1567 }, { 201,-1567 }, { 202,-1567 }, { 203,-1567 }, { 204,-1567 }, + { 205,-1567 }, { 206,-1567 }, { 207,-1567 }, { 208,-1567 }, { 209,-1567 }, + { 210,-1567 }, { 211,-1567 }, { 212,-1567 }, { 213,-1567 }, { 214,-1567 }, + { 215,-1567 }, { 216,-1567 }, { 217,-1567 }, { 218,-1567 }, { 219,-1567 }, + { 220,-1567 }, { 221,-1567 }, { 222,-1567 }, { 223,-1567 }, { 224,-1567 }, + { 225,-1567 }, { 226,-1567 }, { 227,-1567 }, { 228,-1567 }, { 229,-1567 }, + { 230,-1567 }, { 231,-1567 }, { 232,-1567 }, { 233,-1567 }, { 234,-1567 }, + { 235,-1567 }, { 236,-1567 }, { 237,-1567 }, { 238,-1567 }, { 239,-1567 }, + + { 240,-1567 }, { 241,-1567 }, { 242,-1567 }, { 243,-1567 }, { 244,-1567 }, + { 245,-1567 }, { 246,-1567 }, { 247,-1567 }, { 248,-1567 }, { 249,-1567 }, + { 250,-1567 }, { 251,-1567 }, { 252,-1567 }, { 253,-1567 }, { 254,-1567 }, + { 255,-1567 }, { 256,-1567 }, { 0, 47 }, { 0,9009 }, { 1,-1825 }, + { 2,-1825 }, { 3,-1825 }, { 4,-1825 }, { 5,-1825 }, { 6,-1825 }, + { 7,-1825 }, { 8,-1825 }, { 0, 0 }, { 0, 0 }, { 11,-1825 }, + { 0, 0 }, { 0, 0 }, { 14,-1825 }, { 15,-1825 }, { 16,-1825 }, + { 17,-1825 }, { 18,-1825 }, { 19,-1825 }, { 20,-1825 }, { 21,-1825 }, + { 22,-1825 }, { 23,-1825 }, { 24,-1825 }, { 25,-1825 }, { 26,-1825 }, + { 27,-1825 }, { 28,-1825 }, { 29,-1825 }, { 30,-1825 }, { 31,-1825 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 39,-1825 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,3142 }, { 49,3142 }, { 50,3142 }, { 51,3142 }, + { 52,3142 }, { 53,3142 }, { 54,3142 }, { 55,3142 }, { 56,-1825 }, + { 57,-1825 }, { 0, 0 }, { 59,-1825 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-1825 }, { 66,-1825 }, + { 67,-1825 }, { 68,-1825 }, { 69,-1825 }, { 70,-1825 }, { 71,-1825 }, + { 72,-1825 }, { 73,-1825 }, { 74,-1825 }, { 75,-1825 }, { 76,-1825 }, + { 77,-1825 }, { 78,-1825 }, { 79,-1825 }, { 80,-1825 }, { 81,-1825 }, + + { 82,-1825 }, { 83,-1825 }, { 84,-1825 }, { 85,-1825 }, { 86,-1825 }, + { 87,-1825 }, { 88,-1825 }, { 89,-1825 }, { 90,-1825 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 94,-1825 }, { 95,-1825 }, { 96,-1825 }, + { 97,-1825 }, { 98,-1825 }, { 99,-1825 }, { 100,-1825 }, { 101,-1825 }, + { 102,-1825 }, { 103,-1825 }, { 104,-1825 }, { 105,-1825 }, { 106,-1825 }, + { 107,-1825 }, { 108,-1825 }, { 109,-1825 }, { 110,-1825 }, { 111,-1825 }, + { 112,-1825 }, { 113,-1825 }, { 114,-1825 }, { 115,-1825 }, { 116,-1825 }, + { 117,-1825 }, { 118,-1825 }, { 119,-1825 }, { 120,-1825 }, { 121,-1825 }, + { 122,-1825 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-1825 }, + { 127,-1825 }, { 128,-1825 }, { 129,-1825 }, { 130,-1825 }, { 131,-1825 }, + + { 132,-1825 }, { 133,-1825 }, { 134,-1825 }, { 135,-1825 }, { 136,-1825 }, + { 137,-1825 }, { 138,-1825 }, { 139,-1825 }, { 140,-1825 }, { 141,-1825 }, + { 142,-1825 }, { 143,-1825 }, { 144,-1825 }, { 145,-1825 }, { 146,-1825 }, + { 147,-1825 }, { 148,-1825 }, { 149,-1825 }, { 150,-1825 }, { 151,-1825 }, + { 152,-1825 }, { 153,-1825 }, { 154,-1825 }, { 155,-1825 }, { 156,-1825 }, + { 157,-1825 }, { 158,-1825 }, { 159,-1825 }, { 160,-1825 }, { 161,-1825 }, + { 162,-1825 }, { 163,-1825 }, { 164,-1825 }, { 165,-1825 }, { 166,-1825 }, + { 167,-1825 }, { 168,-1825 }, { 169,-1825 }, { 170,-1825 }, { 171,-1825 }, + { 172,-1825 }, { 173,-1825 }, { 174,-1825 }, { 175,-1825 }, { 176,-1825 }, + { 177,-1825 }, { 178,-1825 }, { 179,-1825 }, { 180,-1825 }, { 181,-1825 }, + + { 182,-1825 }, { 183,-1825 }, { 184,-1825 }, { 185,-1825 }, { 186,-1825 }, + { 187,-1825 }, { 188,-1825 }, { 189,-1825 }, { 190,-1825 }, { 191,-1825 }, + { 192,-1825 }, { 193,-1825 }, { 194,-1825 }, { 195,-1825 }, { 196,-1825 }, + { 197,-1825 }, { 198,-1825 }, { 199,-1825 }, { 200,-1825 }, { 201,-1825 }, + { 202,-1825 }, { 203,-1825 }, { 204,-1825 }, { 205,-1825 }, { 206,-1825 }, + { 207,-1825 }, { 208,-1825 }, { 209,-1825 }, { 210,-1825 }, { 211,-1825 }, + { 212,-1825 }, { 213,-1825 }, { 214,-1825 }, { 215,-1825 }, { 216,-1825 }, + { 217,-1825 }, { 218,-1825 }, { 219,-1825 }, { 220,-1825 }, { 221,-1825 }, + { 222,-1825 }, { 223,-1825 }, { 224,-1825 }, { 225,-1825 }, { 226,-1825 }, + { 227,-1825 }, { 228,-1825 }, { 229,-1825 }, { 230,-1825 }, { 231,-1825 }, + + { 232,-1825 }, { 233,-1825 }, { 234,-1825 }, { 235,-1825 }, { 236,-1825 }, + { 237,-1825 }, { 238,-1825 }, { 239,-1825 }, { 240,-1825 }, { 241,-1825 }, + { 242,-1825 }, { 243,-1825 }, { 244,-1825 }, { 245,-1825 }, { 246,-1825 }, + { 247,-1825 }, { 248,-1825 }, { 249,-1825 }, { 250,-1825 }, { 251,-1825 }, + { 252,-1825 }, { 253,-1825 }, { 254,-1825 }, { 255,-1825 }, { 256,-1825 }, + { 0, 47 }, { 0,8751 }, { 1,-2083 }, { 2,-2083 }, { 3,-2083 }, + { 4,-2083 }, { 5,-2083 }, { 6,-2083 }, { 7,-2083 }, { 8,-2083 }, + { 0, 0 }, { 0, 0 }, { 11,-2083 }, { 0, 0 }, { 0, 0 }, + { 14,-2083 }, { 15,-2083 }, { 16,-2083 }, { 17,-2083 }, { 18,-2083 }, + { 19,-2083 }, { 20,-2083 }, { 21,-2083 }, { 22,-2083 }, { 23,-2083 }, + + { 24,-2083 }, { 25,-2083 }, { 26,-2083 }, { 27,-2083 }, { 28,-2083 }, + { 29,-2083 }, { 30,-2083 }, { 31,-2083 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 39,-2083 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,3142 }, + { 49,3142 }, { 50,3142 }, { 51,3142 }, { 52,3142 }, { 53,3142 }, + { 54,3142 }, { 55,3142 }, { 56,3142 }, { 57,3142 }, { 0, 0 }, + { 59,-2083 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 65,3142 }, { 66,3142 }, { 67,3142 }, { 68,3142 }, + { 69,3142 }, { 70,3142 }, { 71,-2083 }, { 72,-2083 }, { 73,-2083 }, + + { 74,-2083 }, { 75,-2083 }, { 76,-2083 }, { 77,-2083 }, { 78,-2083 }, + { 79,-2083 }, { 80,-2083 }, { 81,-2083 }, { 82,-2083 }, { 83,-2083 }, + { 84,-2083 }, { 85,-2083 }, { 86,-2083 }, { 87,-2083 }, { 88,-2083 }, + { 89,-2083 }, { 90,-2083 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 94,-2083 }, { 95,-2083 }, { 96,-2083 }, { 97,3142 }, { 98,3142 }, + { 99,3142 }, { 100,3142 }, { 101,3142 }, { 102,3142 }, { 103,-2083 }, + { 104,-2083 }, { 105,-2083 }, { 106,-2083 }, { 107,-2083 }, { 108,-2083 }, + { 109,-2083 }, { 110,-2083 }, { 111,-2083 }, { 112,-2083 }, { 113,-2083 }, + { 114,-2083 }, { 115,-2083 }, { 116,-2083 }, { 117,-2083 }, { 118,-2083 }, + { 119,-2083 }, { 120,-2083 }, { 121,-2083 }, { 122,-2083 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 126,-2083 }, { 127,-2083 }, { 128,-2083 }, + { 129,-2083 }, { 130,-2083 }, { 131,-2083 }, { 132,-2083 }, { 133,-2083 }, + { 134,-2083 }, { 135,-2083 }, { 136,-2083 }, { 137,-2083 }, { 138,-2083 }, + { 139,-2083 }, { 140,-2083 }, { 141,-2083 }, { 142,-2083 }, { 143,-2083 }, + { 144,-2083 }, { 145,-2083 }, { 146,-2083 }, { 147,-2083 }, { 148,-2083 }, + { 149,-2083 }, { 150,-2083 }, { 151,-2083 }, { 152,-2083 }, { 153,-2083 }, + { 154,-2083 }, { 155,-2083 }, { 156,-2083 }, { 157,-2083 }, { 158,-2083 }, + { 159,-2083 }, { 160,-2083 }, { 161,-2083 }, { 162,-2083 }, { 163,-2083 }, + { 164,-2083 }, { 165,-2083 }, { 166,-2083 }, { 167,-2083 }, { 168,-2083 }, + { 169,-2083 }, { 170,-2083 }, { 171,-2083 }, { 172,-2083 }, { 173,-2083 }, + + { 174,-2083 }, { 175,-2083 }, { 176,-2083 }, { 177,-2083 }, { 178,-2083 }, + { 179,-2083 }, { 180,-2083 }, { 181,-2083 }, { 182,-2083 }, { 183,-2083 }, + { 184,-2083 }, { 185,-2083 }, { 186,-2083 }, { 187,-2083 }, { 188,-2083 }, + { 189,-2083 }, { 190,-2083 }, { 191,-2083 }, { 192,-2083 }, { 193,-2083 }, + { 194,-2083 }, { 195,-2083 }, { 196,-2083 }, { 197,-2083 }, { 198,-2083 }, + { 199,-2083 }, { 200,-2083 }, { 201,-2083 }, { 202,-2083 }, { 203,-2083 }, + { 204,-2083 }, { 205,-2083 }, { 206,-2083 }, { 207,-2083 }, { 208,-2083 }, + { 209,-2083 }, { 210,-2083 }, { 211,-2083 }, { 212,-2083 }, { 213,-2083 }, + { 214,-2083 }, { 215,-2083 }, { 216,-2083 }, { 217,-2083 }, { 218,-2083 }, + { 219,-2083 }, { 220,-2083 }, { 221,-2083 }, { 222,-2083 }, { 223,-2083 }, + + { 224,-2083 }, { 225,-2083 }, { 226,-2083 }, { 227,-2083 }, { 228,-2083 }, + { 229,-2083 }, { 230,-2083 }, { 231,-2083 }, { 232,-2083 }, { 233,-2083 }, + { 234,-2083 }, { 235,-2083 }, { 236,-2083 }, { 237,-2083 }, { 238,-2083 }, + { 239,-2083 }, { 240,-2083 }, { 241,-2083 }, { 242,-2083 }, { 243,-2083 }, + { 244,-2083 }, { 245,-2083 }, { 246,-2083 }, { 247,-2083 }, { 248,-2083 }, + { 249,-2083 }, { 250,-2083 }, { 251,-2083 }, { 252,-2083 }, { 253,-2083 }, + { 254,-2083 }, { 255,-2083 }, { 256,-2083 }, { 0, 42 }, { 0,8493 }, + { 1,-1548 }, { 2,-1548 }, { 3,-1548 }, { 4,-1548 }, { 5,-1548 }, + { 6,-1548 }, { 7,-1548 }, { 8,-1548 }, { 0, 0 }, { 0, 0 }, + { 11,-1548 }, { 0, 0 }, { 0, 0 }, { 14,-1548 }, { 15,-1548 }, + + { 16,-1548 }, { 17,-1548 }, { 18,-1548 }, { 19,-1548 }, { 20,-1548 }, + { 21,-1548 }, { 22,-1548 }, { 23,-1548 }, { 24,-1548 }, { 25,-1548 }, + { 26,-1548 }, { 27,-1548 }, { 28,-1548 }, { 29,-1548 }, { 30,-1548 }, + { 31,-1548 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-1548 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 46,-1290 }, { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, + { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, + { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59,-1548 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-1548 }, + + { 66,-1548 }, { 67,-1548 }, { 68,-1548 }, { 69,-774 }, { 70,-1548 }, + { 71,-1548 }, { 72,-1548 }, { 73,-1548 }, { 74,-1548 }, { 75,-1548 }, + { 76,-1548 }, { 77,-1548 }, { 78,-1548 }, { 79,-1548 }, { 80,-1548 }, + { 81,-1548 }, { 82,-1548 }, { 83,-1548 }, { 84,-1548 }, { 85,-1548 }, + { 86,-1548 }, { 87,-1548 }, { 88,-1548 }, { 89,-1548 }, { 90,-1548 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-1548 }, { 95, 258 }, + { 96,-1548 }, { 97,-1548 }, { 98,-1548 }, { 99,-1548 }, { 100,-1548 }, + { 101,-774 }, { 102,-1548 }, { 103,-1548 }, { 104,-1548 }, { 105,-1548 }, + { 106,-1548 }, { 107,-1548 }, { 108,-1548 }, { 109,-1548 }, { 110,-1548 }, + { 111,-1548 }, { 112,-1548 }, { 113,-1548 }, { 114,-1548 }, { 115,-1548 }, + + { 116,-1548 }, { 117,-1548 }, { 118,-1548 }, { 119,-1548 }, { 120,-1548 }, + { 121,-1548 }, { 122,-1548 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 126,-1548 }, { 127,-1548 }, { 128,-1548 }, { 129,-1548 }, { 130,-1548 }, + { 131,-1548 }, { 132,-1548 }, { 133,-1548 }, { 134,-1548 }, { 135,-1548 }, + { 136,-1548 }, { 137,-1548 }, { 138,-1548 }, { 139,-1548 }, { 140,-1548 }, + { 141,-1548 }, { 142,-1548 }, { 143,-1548 }, { 144,-1548 }, { 145,-1548 }, + { 146,-1548 }, { 147,-1548 }, { 148,-1548 }, { 149,-1548 }, { 150,-1548 }, + { 151,-1548 }, { 152,-1548 }, { 153,-1548 }, { 154,-1548 }, { 155,-1548 }, + { 156,-1548 }, { 157,-1548 }, { 158,-1548 }, { 159,-1548 }, { 160,-1548 }, + { 161,-1548 }, { 162,-1548 }, { 163,-1548 }, { 164,-1548 }, { 165,-1548 }, + + { 166,-1548 }, { 167,-1548 }, { 168,-1548 }, { 169,-1548 }, { 170,-1548 }, + { 171,-1548 }, { 172,-1548 }, { 173,-1548 }, { 174,-1548 }, { 175,-1548 }, + { 176,-1548 }, { 177,-1548 }, { 178,-1548 }, { 179,-1548 }, { 180,-1548 }, + { 181,-1548 }, { 182,-1548 }, { 183,-1548 }, { 184,-1548 }, { 185,-1548 }, + { 186,-1548 }, { 187,-1548 }, { 188,-1548 }, { 189,-1548 }, { 190,-1548 }, + { 191,-1548 }, { 192,-1548 }, { 193,-1548 }, { 194,-1548 }, { 195,-1548 }, + { 196,-1548 }, { 197,-1548 }, { 198,-1548 }, { 199,-1548 }, { 200,-1548 }, + { 201,-1548 }, { 202,-1548 }, { 203,-1548 }, { 204,-1548 }, { 205,-1548 }, + { 206,-1548 }, { 207,-1548 }, { 208,-1548 }, { 209,-1548 }, { 210,-1548 }, + { 211,-1548 }, { 212,-1548 }, { 213,-1548 }, { 214,-1548 }, { 215,-1548 }, + + { 216,-1548 }, { 217,-1548 }, { 218,-1548 }, { 219,-1548 }, { 220,-1548 }, + { 221,-1548 }, { 222,-1548 }, { 223,-1548 }, { 224,-1548 }, { 225,-1548 }, + { 226,-1548 }, { 227,-1548 }, { 228,-1548 }, { 229,-1548 }, { 230,-1548 }, + { 231,-1548 }, { 232,-1548 }, { 233,-1548 }, { 234,-1548 }, { 235,-1548 }, + { 236,-1548 }, { 237,-1548 }, { 238,-1548 }, { 239,-1548 }, { 240,-1548 }, + { 241,-1548 }, { 242,-1548 }, { 243,-1548 }, { 244,-1548 }, { 245,-1548 }, + { 246,-1548 }, { 247,-1548 }, { 248,-1548 }, { 249,-1548 }, { 250,-1548 }, + { 251,-1548 }, { 252,-1548 }, { 253,-1548 }, { 254,-1548 }, { 255,-1548 }, + { 256,-1548 }, { 0, 47 }, { 0,8235 }, { 1,-2599 }, { 2,-2599 }, + { 3,-2599 }, { 4,-2599 }, { 5,-2599 }, { 6,-2599 }, { 7,-2599 }, + + { 8,-2599 }, { 0, 0 }, { 0, 0 }, { 11,-2599 }, { 0, 0 }, + { 0, 0 }, { 14,-2599 }, { 15,-2599 }, { 16,-2599 }, { 17,-2599 }, + { 18,-2599 }, { 19,-2599 }, { 20,-2599 }, { 21,-2599 }, { 22,-2599 }, + { 23,-2599 }, { 24,-2599 }, { 25,-2599 }, { 26,-2599 }, { 27,-2599 }, + { 28,-2599 }, { 29,-2599 }, { 30,-2599 }, { 31,-2599 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,-2599 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48,2884 }, { 49,2884 }, { 50,2884 }, { 51,2884 }, { 52,2884 }, + { 53,2884 }, { 54,2884 }, { 55,2884 }, { 56,2884 }, { 57,2884 }, + + { 0, 0 }, { 59,-2599 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-2599 }, { 66,-2599 }, { 67,-2599 }, + { 68,-2599 }, { 69,-2599 }, { 70,-2599 }, { 71,-2599 }, { 72,-2599 }, + { 73,-2599 }, { 74,-2599 }, { 75,-2599 }, { 76,-2599 }, { 77,-2599 }, + { 78,-2599 }, { 79,-2599 }, { 80,-2599 }, { 81,-2599 }, { 82,-2599 }, + { 83,-2599 }, { 84,-2599 }, { 85,-2599 }, { 86,-2599 }, { 87,-2599 }, + { 88,-2599 }, { 89,-2599 }, { 90,-2599 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 94,-2599 }, { 95,-2599 }, { 96,-2599 }, { 97,-2599 }, + { 98,-2599 }, { 99,-2599 }, { 100,-2599 }, { 101,-2599 }, { 102,-2599 }, + { 103,-2599 }, { 104,-2599 }, { 105,-2599 }, { 106,-2599 }, { 107,-2599 }, + + { 108,-2599 }, { 109,-2599 }, { 110,-2599 }, { 111,-2599 }, { 112,-2599 }, + { 113,-2599 }, { 114,-2599 }, { 115,-2599 }, { 116,-2599 }, { 117,-2599 }, + { 118,-2599 }, { 119,-2599 }, { 120,-2599 }, { 121,-2599 }, { 122,-2599 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-2599 }, { 127,-2599 }, + { 128,-2599 }, { 129,-2599 }, { 130,-2599 }, { 131,-2599 }, { 132,-2599 }, + { 133,-2599 }, { 134,-2599 }, { 135,-2599 }, { 136,-2599 }, { 137,-2599 }, + { 138,-2599 }, { 139,-2599 }, { 140,-2599 }, { 141,-2599 }, { 142,-2599 }, + { 143,-2599 }, { 144,-2599 }, { 145,-2599 }, { 146,-2599 }, { 147,-2599 }, + { 148,-2599 }, { 149,-2599 }, { 150,-2599 }, { 151,-2599 }, { 152,-2599 }, + { 153,-2599 }, { 154,-2599 }, { 155,-2599 }, { 156,-2599 }, { 157,-2599 }, + + { 158,-2599 }, { 159,-2599 }, { 160,-2599 }, { 161,-2599 }, { 162,-2599 }, + { 163,-2599 }, { 164,-2599 }, { 165,-2599 }, { 166,-2599 }, { 167,-2599 }, + { 168,-2599 }, { 169,-2599 }, { 170,-2599 }, { 171,-2599 }, { 172,-2599 }, + { 173,-2599 }, { 174,-2599 }, { 175,-2599 }, { 176,-2599 }, { 177,-2599 }, + { 178,-2599 }, { 179,-2599 }, { 180,-2599 }, { 181,-2599 }, { 182,-2599 }, + { 183,-2599 }, { 184,-2599 }, { 185,-2599 }, { 186,-2599 }, { 187,-2599 }, + { 188,-2599 }, { 189,-2599 }, { 190,-2599 }, { 191,-2599 }, { 192,-2599 }, + { 193,-2599 }, { 194,-2599 }, { 195,-2599 }, { 196,-2599 }, { 197,-2599 }, + { 198,-2599 }, { 199,-2599 }, { 200,-2599 }, { 201,-2599 }, { 202,-2599 }, + { 203,-2599 }, { 204,-2599 }, { 205,-2599 }, { 206,-2599 }, { 207,-2599 }, + + { 208,-2599 }, { 209,-2599 }, { 210,-2599 }, { 211,-2599 }, { 212,-2599 }, + { 213,-2599 }, { 214,-2599 }, { 215,-2599 }, { 216,-2599 }, { 217,-2599 }, + { 218,-2599 }, { 219,-2599 }, { 220,-2599 }, { 221,-2599 }, { 222,-2599 }, + { 223,-2599 }, { 224,-2599 }, { 225,-2599 }, { 226,-2599 }, { 227,-2599 }, + { 228,-2599 }, { 229,-2599 }, { 230,-2599 }, { 231,-2599 }, { 232,-2599 }, + { 233,-2599 }, { 234,-2599 }, { 235,-2599 }, { 236,-2599 }, { 237,-2599 }, + { 238,-2599 }, { 239,-2599 }, { 240,-2599 }, { 241,-2599 }, { 242,-2599 }, + { 243,-2599 }, { 244,-2599 }, { 245,-2599 }, { 246,-2599 }, { 247,-2599 }, + { 248,-2599 }, { 249,-2599 }, { 250,-2599 }, { 251,-2599 }, { 252,-2599 }, + { 253,-2599 }, { 254,-2599 }, { 255,-2599 }, { 256,-2599 }, { 0, 20 }, + + { 0,7977 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }, + { 5, 0 }, { 6, 0 }, { 7, 0 }, { 8, 0 }, { 9, 0 }, + { 10, 0 }, { 11, 0 }, { 12, 0 }, { 13, 0 }, { 14, 0 }, + { 15, 0 }, { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 }, + { 20, 0 }, { 21, 0 }, { 22, 0 }, { 23, 0 }, { 24, 0 }, + { 25, 0 }, { 26, 0 }, { 27, 0 }, { 28, 0 }, { 29, 0 }, + { 30, 0 }, { 31, 0 }, { 32, 0 }, { 33, 0 }, { 0, 0 }, + { 35, 0 }, { 36, 0 }, { 37, 0 }, { 38, 0 }, { 39, 0 }, + { 40, 0 }, { 41, 0 }, { 42, 0 }, { 43, 0 }, { 44, 0 }, + { 45, 0 }, { 46, 0 }, { 47, 0 }, { 48, 0 }, { 49, 0 }, + + { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, + { 55, 0 }, { 56, 0 }, { 57, 0 }, { 58, 0 }, { 59, 0 }, + { 60, 0 }, { 61, 0 }, { 62, 0 }, { 63, 0 }, { 64, 0 }, + { 65, 0 }, { 66, 0 }, { 67, 0 }, { 68, 0 }, { 69, 0 }, + { 70, 0 }, { 71, 0 }, { 72, 0 }, { 73, 0 }, { 74, 0 }, + { 75, 0 }, { 76, 0 }, { 77, 0 }, { 78, 0 }, { 79, 0 }, + { 80, 0 }, { 81, 0 }, { 82, 0 }, { 83, 0 }, { 84, 0 }, + { 85, 0 }, { 86, 0 }, { 87, 0 }, { 88, 0 }, { 89, 0 }, + { 90, 0 }, { 91, 0 }, { 0, 0 }, { 93, 0 }, { 94, 0 }, + { 95, 0 }, { 96, 0 }, { 97, 0 }, { 98, 0 }, { 99, 0 }, + + { 100, 0 }, { 101, 0 }, { 102, 0 }, { 103, 0 }, { 104, 0 }, + { 105, 0 }, { 106, 0 }, { 107, 0 }, { 108, 0 }, { 109, 0 }, + { 110, 0 }, { 111, 0 }, { 112, 0 }, { 113, 0 }, { 114, 0 }, + { 115, 0 }, { 116, 0 }, { 117, 0 }, { 118, 0 }, { 119, 0 }, + { 120, 0 }, { 121, 0 }, { 122, 0 }, { 123, 0 }, { 124, 0 }, + { 125, 0 }, { 126, 0 }, { 127, 0 }, { 128, 0 }, { 129, 0 }, + { 130, 0 }, { 131, 0 }, { 132, 0 }, { 133, 0 }, { 134, 0 }, + { 135, 0 }, { 136, 0 }, { 137, 0 }, { 138, 0 }, { 139, 0 }, + { 140, 0 }, { 141, 0 }, { 142, 0 }, { 143, 0 }, { 144, 0 }, + { 145, 0 }, { 146, 0 }, { 147, 0 }, { 148, 0 }, { 149, 0 }, + + { 150, 0 }, { 151, 0 }, { 152, 0 }, { 153, 0 }, { 154, 0 }, + { 155, 0 }, { 156, 0 }, { 157, 0 }, { 158, 0 }, { 159, 0 }, + { 160, 0 }, { 161, 0 }, { 162, 0 }, { 163, 0 }, { 164, 0 }, + { 165, 0 }, { 166, 0 }, { 167, 0 }, { 168, 0 }, { 169, 0 }, + { 170, 0 }, { 171, 0 }, { 172, 0 }, { 173, 0 }, { 174, 0 }, + { 175, 0 }, { 176, 0 }, { 177, 0 }, { 178, 0 }, { 179, 0 }, + { 180, 0 }, { 181, 0 }, { 182, 0 }, { 183, 0 }, { 184, 0 }, + { 185, 0 }, { 186, 0 }, { 187, 0 }, { 188, 0 }, { 189, 0 }, + { 190, 0 }, { 191, 0 }, { 192, 0 }, { 193, 0 }, { 194, 0 }, + { 195, 0 }, { 196, 0 }, { 197, 0 }, { 198, 0 }, { 199, 0 }, + + { 200, 0 }, { 201, 0 }, { 202, 0 }, { 203, 0 }, { 204, 0 }, + { 205, 0 }, { 206, 0 }, { 207, 0 }, { 208, 0 }, { 209, 0 }, + { 210, 0 }, { 211, 0 }, { 212, 0 }, { 213, 0 }, { 214, 0 }, + { 215, 0 }, { 216, 0 }, { 217, 0 }, { 218, 0 }, { 219, 0 }, + { 220, 0 }, { 221, 0 }, { 222, 0 }, { 223, 0 }, { 224, 0 }, + { 225, 0 }, { 226, 0 }, { 227, 0 }, { 228, 0 }, { 229, 0 }, + { 230, 0 }, { 231, 0 }, { 232, 0 }, { 233, 0 }, { 234, 0 }, + { 235, 0 }, { 236, 0 }, { 237, 0 }, { 238, 0 }, { 239, 0 }, + { 240, 0 }, { 241, 0 }, { 242, 0 }, { 243, 0 }, { 244, 0 }, + { 245, 0 }, { 246, 0 }, { 247, 0 }, { 248, 0 }, { 249, 0 }, + + { 250, 0 }, { 251, 0 }, { 252, 0 }, { 253, 0 }, { 254, 0 }, + { 255, 0 }, { 256, 0 }, { 0, 13 }, { 0,7719 }, { 0, 15 }, + { 0,7717 }, { 0, 13 }, { 0,7715 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 14 }, { 0,7681 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,2626 }, { 49,2626 }, { 50,2626 }, { 51,2626 }, + { 52,2626 }, { 53,2626 }, { 54,2626 }, { 55,2626 }, { 56,2626 }, + { 57,2626 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,2626 }, { 66,2626 }, + { 67,2626 }, { 68,2626 }, { 69,2626 }, { 70,2626 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,2649 }, + { 49,2649 }, { 50,2649 }, { 51,2649 }, { 52,2649 }, { 53,2649 }, + + { 54,2649 }, { 55,2649 }, { 56,2649 }, { 57,2649 }, { 0, 0 }, + { 97,2626 }, { 98,2626 }, { 99,2626 }, { 100,2626 }, { 101,2626 }, + { 102,2626 }, { 65,2649 }, { 66,2649 }, { 67,2649 }, { 68,2649 }, + { 69,2649 }, { 70,2649 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 117,7421 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 123,2649 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 125,-3086 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 97,2649 }, { 98,2649 }, + { 99,2649 }, { 100,2649 }, { 101,2649 }, { 102,2649 }, { 0, 1 }, + + { 0,7577 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }, + { 5, 0 }, { 6, 0 }, { 7, 0 }, { 8, 0 }, { 0, 0 }, + { 0, 0 }, { 11, 0 }, { 0, 0 }, { 0, 0 }, { 14, 0 }, + { 15, 0 }, { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 }, + { 20, 0 }, { 21, 0 }, { 22, 0 }, { 23, 0 }, { 24, 0 }, + { 25, 0 }, { 26, 0 }, { 27, 0 }, { 28, 0 }, { 29, 0 }, + { 30, 0 }, { 31, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 }, + + { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, + { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65, 0 }, { 66, 0 }, { 67, 0 }, { 68, 0 }, { 69, 0 }, + { 70, 0 }, { 71, 0 }, { 72, 0 }, { 73, 0 }, { 74, 0 }, + { 75, 0 }, { 76, 0 }, { 77, 0 }, { 78, 0 }, { 79, 0 }, + { 80, 0 }, { 81, 0 }, { 82, 0 }, { 83, 0 }, { 84, 0 }, + { 85, 0 }, { 86, 0 }, { 87, 0 }, { 88, 0 }, { 89, 0 }, + { 90, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94, 0 }, + { 95, 0 }, { 96, 0 }, { 97, 0 }, { 98, 0 }, { 99, 0 }, + + { 100, 0 }, { 101, 0 }, { 102, 0 }, { 103, 0 }, { 104, 0 }, + { 105, 0 }, { 106, 0 }, { 107, 0 }, { 108, 0 }, { 109, 0 }, + { 110, 0 }, { 111, 0 }, { 112, 0 }, { 113, 0 }, { 114, 0 }, + { 115, 0 }, { 116, 0 }, { 117, 0 }, { 118, 0 }, { 119, 0 }, + { 120, 0 }, { 121, 0 }, { 122, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126, 0 }, { 127, 0 }, { 128, 0 }, { 129, 0 }, + { 130, 0 }, { 131, 0 }, { 132, 0 }, { 133, 0 }, { 134, 0 }, + { 135, 0 }, { 136, 0 }, { 137, 0 }, { 138, 0 }, { 139, 0 }, + { 140, 0 }, { 141, 0 }, { 142, 0 }, { 143, 0 }, { 144, 0 }, + { 145, 0 }, { 146, 0 }, { 147, 0 }, { 148, 0 }, { 149, 0 }, + + { 150, 0 }, { 151, 0 }, { 152, 0 }, { 153, 0 }, { 154, 0 }, + { 155, 0 }, { 156, 0 }, { 157, 0 }, { 158, 0 }, { 159, 0 }, + { 160, 0 }, { 161, 0 }, { 162, 0 }, { 163, 0 }, { 164, 0 }, + { 165, 0 }, { 166, 0 }, { 167, 0 }, { 168, 0 }, { 169, 0 }, + { 170, 0 }, { 171, 0 }, { 172, 0 }, { 173, 0 }, { 174, 0 }, + { 175, 0 }, { 176, 0 }, { 177, 0 }, { 178, 0 }, { 179, 0 }, + { 180, 0 }, { 181, 0 }, { 182, 0 }, { 183, 0 }, { 184, 0 }, + { 185, 0 }, { 186, 0 }, { 187, 0 }, { 188, 0 }, { 189, 0 }, + { 190, 0 }, { 191, 0 }, { 192, 0 }, { 193, 0 }, { 194, 0 }, + { 195, 0 }, { 196, 0 }, { 197, 0 }, { 198, 0 }, { 199, 0 }, + + { 200, 0 }, { 201, 0 }, { 202, 0 }, { 203, 0 }, { 204, 0 }, + { 205, 0 }, { 206, 0 }, { 207, 0 }, { 208, 0 }, { 209, 0 }, + { 210, 0 }, { 211, 0 }, { 212, 0 }, { 213, 0 }, { 214, 0 }, + { 215, 0 }, { 216, 0 }, { 217, 0 }, { 218, 0 }, { 219, 0 }, + { 220, 0 }, { 221, 0 }, { 222, 0 }, { 223, 0 }, { 224, 0 }, + { 225, 0 }, { 226, 0 }, { 227, 0 }, { 228, 0 }, { 229, 0 }, + { 230, 0 }, { 231, 0 }, { 232, 0 }, { 233, 0 }, { 234, 0 }, + { 235, 0 }, { 236, 0 }, { 237, 0 }, { 238, 0 }, { 239, 0 }, + { 240, 0 }, { 241, 0 }, { 242, 0 }, { 243, 0 }, { 244, 0 }, + { 245, 0 }, { 246, 0 }, { 247, 0 }, { 248, 0 }, { 249, 0 }, + + { 250, 0 }, { 251, 0 }, { 252, 0 }, { 253, 0 }, { 254, 0 }, + { 255, 0 }, { 256, 0 }, { 0, 2 }, { 0,7319 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 9, 0 }, { 10, 0 }, { 0, 0 }, + { 12, 0 }, { 13, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 32, 0 }, { 0, 22 }, { 0,7285 }, { 1, 0 }, { 2, 0 }, + { 3, 0 }, { 4, 0 }, { 5, 0 }, { 6, 0 }, { 7, 0 }, + + { 8, 0 }, { 9, 0 }, { 10, 0 }, { 11, 0 }, { 12, 0 }, + { 13, 0 }, { 14, 0 }, { 15, 0 }, { 16, 0 }, { 17, 0 }, + { 18, 0 }, { 19, 0 }, { 20, 0 }, { 21, 0 }, { 22, 0 }, + { 23, 0 }, { 24, 0 }, { 25, 0 }, { 26, 0 }, { 27, 0 }, + { 28, 0 }, { 29, 0 }, { 30, 0 }, { 31, 0 }, { 32, 0 }, + { 33, 0 }, { 34, 0 }, { 35, 0 }, { 36, 0 }, { 37, 0 }, + { 38, 0 }, { 39, 0 }, { 40, 0 }, { 41, 0 }, { 0, 0 }, + { 43, 0 }, { 44, 0 }, { 45, 0 }, { 46, 0 }, { 47, 0 }, + { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 }, { 52, 0 }, + { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 }, { 57, 0 }, + + { 58, 0 }, { 59, 0 }, { 60, 0 }, { 61, 0 }, { 62, 0 }, + { 63, 0 }, { 64, 0 }, { 65, 0 }, { 66, 0 }, { 67, 0 }, + { 68, 0 }, { 69, 0 }, { 70, 0 }, { 71, 0 }, { 72, 0 }, + { 73, 0 }, { 74, 0 }, { 75, 0 }, { 76, 0 }, { 77, 0 }, + { 78, 0 }, { 79, 0 }, { 80, 0 }, { 81, 0 }, { 82, 0 }, + { 83, 0 }, { 84, 0 }, { 85, 0 }, { 86, 0 }, { 87, 0 }, + { 88, 0 }, { 89, 0 }, { 90, 0 }, { 91, 0 }, { 92, 0 }, + { 93, 0 }, { 94, 0 }, { 95, 0 }, { 96, 0 }, { 97, 0 }, + { 98, 0 }, { 99, 0 }, { 100, 0 }, { 101, 0 }, { 102, 0 }, + { 103, 0 }, { 104, 0 }, { 105, 0 }, { 106, 0 }, { 107, 0 }, + + { 108, 0 }, { 109, 0 }, { 110, 0 }, { 111, 0 }, { 112, 0 }, + { 113, 0 }, { 114, 0 }, { 115, 0 }, { 116, 0 }, { 117, 0 }, + { 118, 0 }, { 119, 0 }, { 120, 0 }, { 121, 0 }, { 122, 0 }, + { 123, 0 }, { 124, 0 }, { 125, 0 }, { 126, 0 }, { 127, 0 }, + { 128, 0 }, { 129, 0 }, { 130, 0 }, { 131, 0 }, { 132, 0 }, + { 133, 0 }, { 134, 0 }, { 135, 0 }, { 136, 0 }, { 137, 0 }, + { 138, 0 }, { 139, 0 }, { 140, 0 }, { 141, 0 }, { 142, 0 }, + { 143, 0 }, { 144, 0 }, { 145, 0 }, { 146, 0 }, { 147, 0 }, + { 148, 0 }, { 149, 0 }, { 150, 0 }, { 151, 0 }, { 152, 0 }, + { 153, 0 }, { 154, 0 }, { 155, 0 }, { 156, 0 }, { 157, 0 }, + + { 158, 0 }, { 159, 0 }, { 160, 0 }, { 161, 0 }, { 162, 0 }, + { 163, 0 }, { 164, 0 }, { 165, 0 }, { 166, 0 }, { 167, 0 }, + { 168, 0 }, { 169, 0 }, { 170, 0 }, { 171, 0 }, { 172, 0 }, + { 173, 0 }, { 174, 0 }, { 175, 0 }, { 176, 0 }, { 177, 0 }, + { 178, 0 }, { 179, 0 }, { 180, 0 }, { 181, 0 }, { 182, 0 }, + { 183, 0 }, { 184, 0 }, { 185, 0 }, { 186, 0 }, { 187, 0 }, + { 188, 0 }, { 189, 0 }, { 190, 0 }, { 191, 0 }, { 192, 0 }, + { 193, 0 }, { 194, 0 }, { 195, 0 }, { 196, 0 }, { 197, 0 }, + { 198, 0 }, { 199, 0 }, { 200, 0 }, { 201, 0 }, { 202, 0 }, + { 203, 0 }, { 204, 0 }, { 205, 0 }, { 206, 0 }, { 207, 0 }, + + { 208, 0 }, { 209, 0 }, { 210, 0 }, { 211, 0 }, { 212, 0 }, + { 213, 0 }, { 214, 0 }, { 215, 0 }, { 216, 0 }, { 217, 0 }, + { 218, 0 }, { 219, 0 }, { 220, 0 }, { 221, 0 }, { 222, 0 }, + { 223, 0 }, { 224, 0 }, { 225, 0 }, { 226, 0 }, { 227, 0 }, + { 228, 0 }, { 229, 0 }, { 230, 0 }, { 231, 0 }, { 232, 0 }, + { 233, 0 }, { 234, 0 }, { 235, 0 }, { 236, 0 }, { 237, 0 }, + { 238, 0 }, { 239, 0 }, { 240, 0 }, { 241, 0 }, { 242, 0 }, + { 243, 0 }, { 244, 0 }, { 245, 0 }, { 246, 0 }, { 247, 0 }, + { 248, 0 }, { 249, 0 }, { 250, 0 }, { 251, 0 }, { 252, 0 }, + { 253, 0 }, { 254, 0 }, { 255, 0 }, { 256, 0 }, { 0, 41 }, + + { 0,7027 }, { 1,-4076 }, { 2,-4076 }, { 3,-4076 }, { 4,-4076 }, + { 5,-4076 }, { 6,-4076 }, { 7,-4076 }, { 8,-4076 }, { 0, 0 }, + { 0, 0 }, { 11,-4076 }, { 0, 0 }, { 0, 0 }, { 14,-4076 }, + { 15,-4076 }, { 16,-4076 }, { 17,-4076 }, { 18,-4076 }, { 19,-4076 }, + { 20,-4076 }, { 21,-4076 }, { 22,-4076 }, { 23,-4076 }, { 24,-4076 }, + { 25,-4076 }, { 26,-4076 }, { 27,-4076 }, { 28,-4076 }, { 29,-4076 }, + { 30,-4076 }, { 31,-4076 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-4076 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 }, + + { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, + { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59,-4076 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65,-4076 }, { 66,-4076 }, { 67,-4076 }, { 68,-4076 }, { 69, 258 }, + { 70,-4076 }, { 71,-4076 }, { 72,-4076 }, { 73,-4076 }, { 74,-4076 }, + { 75,-4076 }, { 76,-4076 }, { 77,-4076 }, { 78,-4076 }, { 79,-4076 }, + { 80,-4076 }, { 81,-4076 }, { 82,-4076 }, { 83,-4076 }, { 84,-4076 }, + { 85,-4076 }, { 86,-4076 }, { 87,-4076 }, { 88,-4076 }, { 89,-4076 }, + { 90,-4076 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-4076 }, + { 95, 268 }, { 96,-4076 }, { 97,-4076 }, { 98,-4076 }, { 99,-4076 }, + + { 100,-4076 }, { 101, 258 }, { 102,-4076 }, { 103,-4076 }, { 104,-4076 }, + { 105,-4076 }, { 106,-4076 }, { 107,-4076 }, { 108,-4076 }, { 109,-4076 }, + { 110,-4076 }, { 111,-4076 }, { 112,-4076 }, { 113,-4076 }, { 114,-4076 }, + { 115,-4076 }, { 116,-4076 }, { 117,-4076 }, { 118,-4076 }, { 119,-4076 }, + { 120,-4076 }, { 121,-4076 }, { 122,-4076 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,-4076 }, { 127,-4076 }, { 128,-4076 }, { 129,-4076 }, + { 130,-4076 }, { 131,-4076 }, { 132,-4076 }, { 133,-4076 }, { 134,-4076 }, + { 135,-4076 }, { 136,-4076 }, { 137,-4076 }, { 138,-4076 }, { 139,-4076 }, + { 140,-4076 }, { 141,-4076 }, { 142,-4076 }, { 143,-4076 }, { 144,-4076 }, + { 145,-4076 }, { 146,-4076 }, { 147,-4076 }, { 148,-4076 }, { 149,-4076 }, + + { 150,-4076 }, { 151,-4076 }, { 152,-4076 }, { 153,-4076 }, { 154,-4076 }, + { 155,-4076 }, { 156,-4076 }, { 157,-4076 }, { 158,-4076 }, { 159,-4076 }, + { 160,-4076 }, { 161,-4076 }, { 162,-4076 }, { 163,-4076 }, { 164,-4076 }, + { 165,-4076 }, { 166,-4076 }, { 167,-4076 }, { 168,-4076 }, { 169,-4076 }, + { 170,-4076 }, { 171,-4076 }, { 172,-4076 }, { 173,-4076 }, { 174,-4076 }, + { 175,-4076 }, { 176,-4076 }, { 177,-4076 }, { 178,-4076 }, { 179,-4076 }, + { 180,-4076 }, { 181,-4076 }, { 182,-4076 }, { 183,-4076 }, { 184,-4076 }, + { 185,-4076 }, { 186,-4076 }, { 187,-4076 }, { 188,-4076 }, { 189,-4076 }, + { 190,-4076 }, { 191,-4076 }, { 192,-4076 }, { 193,-4076 }, { 194,-4076 }, + { 195,-4076 }, { 196,-4076 }, { 197,-4076 }, { 198,-4076 }, { 199,-4076 }, + + { 200,-4076 }, { 201,-4076 }, { 202,-4076 }, { 203,-4076 }, { 204,-4076 }, + { 205,-4076 }, { 206,-4076 }, { 207,-4076 }, { 208,-4076 }, { 209,-4076 }, + { 210,-4076 }, { 211,-4076 }, { 212,-4076 }, { 213,-4076 }, { 214,-4076 }, + { 215,-4076 }, { 216,-4076 }, { 217,-4076 }, { 218,-4076 }, { 219,-4076 }, + { 220,-4076 }, { 221,-4076 }, { 222,-4076 }, { 223,-4076 }, { 224,-4076 }, + { 225,-4076 }, { 226,-4076 }, { 227,-4076 }, { 228,-4076 }, { 229,-4076 }, + { 230,-4076 }, { 231,-4076 }, { 232,-4076 }, { 233,-4076 }, { 234,-4076 }, + { 235,-4076 }, { 236,-4076 }, { 237,-4076 }, { 238,-4076 }, { 239,-4076 }, + { 240,-4076 }, { 241,-4076 }, { 242,-4076 }, { 243,-4076 }, { 244,-4076 }, + { 245,-4076 }, { 246,-4076 }, { 247,-4076 }, { 248,-4076 }, { 249,-4076 }, + + { 250,-4076 }, { 251,-4076 }, { 252,-4076 }, { 253,-4076 }, { 254,-4076 }, + { 255,-4076 }, { 256,-4076 }, { 0, 48 }, { 0,6769 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 48 }, { 0,6759 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 43, 585 }, { 0, 0 }, { 45, 585 }, { 0, 0 }, + { 0, 0 }, { 48,1841 }, { 49,1841 }, { 50,1841 }, { 51,1841 }, + { 52,1841 }, { 53,1841 }, { 54,1841 }, { 55,1841 }, { 56,1841 }, + { 57,1841 }, { 48,2089 }, { 49,2089 }, { 50,2089 }, { 51,2089 }, + { 52,2089 }, { 53,2089 }, { 54,2089 }, { 55,2089 }, { 56,2089 }, + { 57,2089 }, { 0, 41 }, { 0,6700 }, { 1,-4403 }, { 2,-4403 }, + { 3,-4403 }, { 4,-4403 }, { 5,-4403 }, { 6,-4403 }, { 7,-4403 }, + { 8,-4403 }, { 0, 0 }, { 0, 0 }, { 11,-4403 }, { 0, 0 }, + { 0, 0 }, { 14,-4403 }, { 15,-4403 }, { 16,-4403 }, { 17,-4403 }, + { 18,-4403 }, { 19,-4403 }, { 20,-4403 }, { 21,-4403 }, { 22,-4403 }, + + { 23,-4403 }, { 24,-4403 }, { 25,-4403 }, { 26,-4403 }, { 27,-4403 }, + { 28,-4403 }, { 29,-4403 }, { 30,-4403 }, { 31,-4403 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,-4403 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48,2288 }, { 49,2288 }, { 50,2288 }, { 51,2288 }, { 52,2288 }, + { 53,2288 }, { 54,2288 }, { 55,2288 }, { 56,2288 }, { 57,2288 }, + { 0, 0 }, { 59,-4403 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-4403 }, { 66,-4403 }, { 67,-4403 }, + { 68,-4403 }, { 69, -69 }, { 70,-4403 }, { 71,-4403 }, { 72,-4403 }, + + { 73,-4403 }, { 74,-4403 }, { 75,-4403 }, { 76,-4403 }, { 77,-4403 }, + { 78,-4403 }, { 79,-4403 }, { 80,-4403 }, { 81,-4403 }, { 82,-4403 }, + { 83,-4403 }, { 84,-4403 }, { 85,-4403 }, { 86,-4403 }, { 87,-4403 }, + { 88,-4403 }, { 89,-4403 }, { 90,-4403 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 94,-4403 }, { 95,2546 }, { 96,-4403 }, { 97,-4403 }, + { 98,-4403 }, { 99,-4403 }, { 100,-4403 }, { 101, -69 }, { 102,-4403 }, + { 103,-4403 }, { 104,-4403 }, { 105,-4403 }, { 106,-4403 }, { 107,-4403 }, + { 108,-4403 }, { 109,-4403 }, { 110,-4403 }, { 111,-4403 }, { 112,-4403 }, + { 113,-4403 }, { 114,-4403 }, { 115,-4403 }, { 116,-4403 }, { 117,-4403 }, + { 118,-4403 }, { 119,-4403 }, { 120,-4403 }, { 121,-4403 }, { 122,-4403 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-4403 }, { 127,-4403 }, + { 128,-4403 }, { 129,-4403 }, { 130,-4403 }, { 131,-4403 }, { 132,-4403 }, + { 133,-4403 }, { 134,-4403 }, { 135,-4403 }, { 136,-4403 }, { 137,-4403 }, + { 138,-4403 }, { 139,-4403 }, { 140,-4403 }, { 141,-4403 }, { 142,-4403 }, + { 143,-4403 }, { 144,-4403 }, { 145,-4403 }, { 146,-4403 }, { 147,-4403 }, + { 148,-4403 }, { 149,-4403 }, { 150,-4403 }, { 151,-4403 }, { 152,-4403 }, + { 153,-4403 }, { 154,-4403 }, { 155,-4403 }, { 156,-4403 }, { 157,-4403 }, + { 158,-4403 }, { 159,-4403 }, { 160,-4403 }, { 161,-4403 }, { 162,-4403 }, + { 163,-4403 }, { 164,-4403 }, { 165,-4403 }, { 166,-4403 }, { 167,-4403 }, + { 168,-4403 }, { 169,-4403 }, { 170,-4403 }, { 171,-4403 }, { 172,-4403 }, + + { 173,-4403 }, { 174,-4403 }, { 175,-4403 }, { 176,-4403 }, { 177,-4403 }, + { 178,-4403 }, { 179,-4403 }, { 180,-4403 }, { 181,-4403 }, { 182,-4403 }, + { 183,-4403 }, { 184,-4403 }, { 185,-4403 }, { 186,-4403 }, { 187,-4403 }, + { 188,-4403 }, { 189,-4403 }, { 190,-4403 }, { 191,-4403 }, { 192,-4403 }, + { 193,-4403 }, { 194,-4403 }, { 195,-4403 }, { 196,-4403 }, { 197,-4403 }, + { 198,-4403 }, { 199,-4403 }, { 200,-4403 }, { 201,-4403 }, { 202,-4403 }, + { 203,-4403 }, { 204,-4403 }, { 205,-4403 }, { 206,-4403 }, { 207,-4403 }, + { 208,-4403 }, { 209,-4403 }, { 210,-4403 }, { 211,-4403 }, { 212,-4403 }, + { 213,-4403 }, { 214,-4403 }, { 215,-4403 }, { 216,-4403 }, { 217,-4403 }, + { 218,-4403 }, { 219,-4403 }, { 220,-4403 }, { 221,-4403 }, { 222,-4403 }, + + { 223,-4403 }, { 224,-4403 }, { 225,-4403 }, { 226,-4403 }, { 227,-4403 }, + { 228,-4403 }, { 229,-4403 }, { 230,-4403 }, { 231,-4403 }, { 232,-4403 }, + { 233,-4403 }, { 234,-4403 }, { 235,-4403 }, { 236,-4403 }, { 237,-4403 }, + { 238,-4403 }, { 239,-4403 }, { 240,-4403 }, { 241,-4403 }, { 242,-4403 }, + { 243,-4403 }, { 244,-4403 }, { 245,-4403 }, { 246,-4403 }, { 247,-4403 }, + { 248,-4403 }, { 249,-4403 }, { 250,-4403 }, { 251,-4403 }, { 252,-4403 }, + { 253,-4403 }, { 254,-4403 }, { 255,-4403 }, { 256,-4403 }, { 0, 45 }, + { 0,6442 }, { 1,-4392 }, { 2,-4392 }, { 3,-4392 }, { 4,-4392 }, + { 5,-4392 }, { 6,-4392 }, { 7,-4392 }, { 8,-4392 }, { 0, 0 }, + { 0, 0 }, { 11,-4392 }, { 0, 0 }, { 0, 0 }, { 14,-4392 }, + + { 15,-4392 }, { 16,-4392 }, { 17,-4392 }, { 18,-4392 }, { 19,-4392 }, + { 20,-4392 }, { 21,-4392 }, { 22,-4392 }, { 23,-4392 }, { 24,-4392 }, + { 25,-4392 }, { 26,-4392 }, { 27,-4392 }, { 28,-4392 }, { 29,-4392 }, + { 30,-4392 }, { 31,-4392 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-4392 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,2347 }, { 49,2347 }, + { 50,-4392 }, { 51,-4392 }, { 52,-4392 }, { 53,-4392 }, { 54,-4392 }, + { 55,-4392 }, { 56,-4392 }, { 57,-4392 }, { 0, 0 }, { 59,-4392 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 65,-4392 }, { 66,-4392 }, { 67,-4392 }, { 68,-4392 }, { 69,-4392 }, + { 70,-4392 }, { 71,-4392 }, { 72,-4392 }, { 73,-4392 }, { 74,-4392 }, + { 75,-4392 }, { 76,-4392 }, { 77,-4392 }, { 78,-4392 }, { 79,-4392 }, + { 80,-4392 }, { 81,-4392 }, { 82,-4392 }, { 83,-4392 }, { 84,-4392 }, + { 85,-4392 }, { 86,-4392 }, { 87,-4392 }, { 88,-4392 }, { 89,-4392 }, + { 90,-4392 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-4392 }, + { 95,2605 }, { 96,-4392 }, { 97,-4392 }, { 98,-4392 }, { 99,-4392 }, + { 100,-4392 }, { 101,-4392 }, { 102,-4392 }, { 103,-4392 }, { 104,-4392 }, + { 105,-4392 }, { 106,-4392 }, { 107,-4392 }, { 108,-4392 }, { 109,-4392 }, + { 110,-4392 }, { 111,-4392 }, { 112,-4392 }, { 113,-4392 }, { 114,-4392 }, + + { 115,-4392 }, { 116,-4392 }, { 117,-4392 }, { 118,-4392 }, { 119,-4392 }, + { 120,-4392 }, { 121,-4392 }, { 122,-4392 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,-4392 }, { 127,-4392 }, { 128,-4392 }, { 129,-4392 }, + { 130,-4392 }, { 131,-4392 }, { 132,-4392 }, { 133,-4392 }, { 134,-4392 }, + { 135,-4392 }, { 136,-4392 }, { 137,-4392 }, { 138,-4392 }, { 139,-4392 }, + { 140,-4392 }, { 141,-4392 }, { 142,-4392 }, { 143,-4392 }, { 144,-4392 }, + { 145,-4392 }, { 146,-4392 }, { 147,-4392 }, { 148,-4392 }, { 149,-4392 }, + { 150,-4392 }, { 151,-4392 }, { 152,-4392 }, { 153,-4392 }, { 154,-4392 }, + { 155,-4392 }, { 156,-4392 }, { 157,-4392 }, { 158,-4392 }, { 159,-4392 }, + { 160,-4392 }, { 161,-4392 }, { 162,-4392 }, { 163,-4392 }, { 164,-4392 }, + + { 165,-4392 }, { 166,-4392 }, { 167,-4392 }, { 168,-4392 }, { 169,-4392 }, + { 170,-4392 }, { 171,-4392 }, { 172,-4392 }, { 173,-4392 }, { 174,-4392 }, + { 175,-4392 }, { 176,-4392 }, { 177,-4392 }, { 178,-4392 }, { 179,-4392 }, + { 180,-4392 }, { 181,-4392 }, { 182,-4392 }, { 183,-4392 }, { 184,-4392 }, + { 185,-4392 }, { 186,-4392 }, { 187,-4392 }, { 188,-4392 }, { 189,-4392 }, + { 190,-4392 }, { 191,-4392 }, { 192,-4392 }, { 193,-4392 }, { 194,-4392 }, + { 195,-4392 }, { 196,-4392 }, { 197,-4392 }, { 198,-4392 }, { 199,-4392 }, + { 200,-4392 }, { 201,-4392 }, { 202,-4392 }, { 203,-4392 }, { 204,-4392 }, + { 205,-4392 }, { 206,-4392 }, { 207,-4392 }, { 208,-4392 }, { 209,-4392 }, + { 210,-4392 }, { 211,-4392 }, { 212,-4392 }, { 213,-4392 }, { 214,-4392 }, + + { 215,-4392 }, { 216,-4392 }, { 217,-4392 }, { 218,-4392 }, { 219,-4392 }, + { 220,-4392 }, { 221,-4392 }, { 222,-4392 }, { 223,-4392 }, { 224,-4392 }, + { 225,-4392 }, { 226,-4392 }, { 227,-4392 }, { 228,-4392 }, { 229,-4392 }, + { 230,-4392 }, { 231,-4392 }, { 232,-4392 }, { 233,-4392 }, { 234,-4392 }, + { 235,-4392 }, { 236,-4392 }, { 237,-4392 }, { 238,-4392 }, { 239,-4392 }, + { 240,-4392 }, { 241,-4392 }, { 242,-4392 }, { 243,-4392 }, { 244,-4392 }, + { 245,-4392 }, { 246,-4392 }, { 247,-4392 }, { 248,-4392 }, { 249,-4392 }, + { 250,-4392 }, { 251,-4392 }, { 252,-4392 }, { 253,-4392 }, { 254,-4392 }, + { 255,-4392 }, { 256,-4392 }, { 0, 46 }, { 0,6184 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,1256 }, { 49,1256 }, { 50,1256 }, { 51,1256 }, + { 52,1256 }, { 53,1256 }, { 54,1256 }, { 55,1256 }, { 56,1256 }, + + { 57,1256 }, { 0, 40 }, { 0,6125 }, { 1,2546 }, { 2,2546 }, + { 3,2546 }, { 4,2546 }, { 5,2546 }, { 6,2546 }, { 7,2546 }, + { 8,2546 }, { 0, 0 }, { 0, 0 }, { 11,2546 }, { 0, 0 }, + { 0, 0 }, { 14,2546 }, { 15,2546 }, { 16,2546 }, { 17,2546 }, + { 18,2546 }, { 19,2546 }, { 20,2546 }, { 21,2546 }, { 22,2546 }, + { 23,2546 }, { 24,2546 }, { 25,2546 }, { 26,2546 }, { 27,2546 }, + { 28,2546 }, { 29,2546 }, { 30,2546 }, { 31,2546 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,2546 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 48,2804 }, { 49,2804 }, { 50,2804 }, { 51,2804 }, { 52,2804 }, + { 53,2804 }, { 54,2804 }, { 55,2804 }, { 56,2804 }, { 57,2804 }, + { 0, 0 }, { 59,2546 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,2546 }, { 66,2546 }, { 67,2546 }, + { 68,2546 }, { 69,2546 }, { 70,2546 }, { 71,2546 }, { 72,2546 }, + { 73,2546 }, { 74,2546 }, { 75,2546 }, { 76,2546 }, { 77,2546 }, + { 78,2546 }, { 79,2546 }, { 80,2546 }, { 81,2546 }, { 82,2546 }, + { 83,2546 }, { 84,2546 }, { 85,2546 }, { 86,2546 }, { 87,2546 }, + { 88,2546 }, { 89,2546 }, { 90,2546 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 94,2546 }, { 95,3062 }, { 96,2546 }, { 97,2546 }, + + { 98,2546 }, { 99,2546 }, { 100,2546 }, { 101,2546 }, { 102,2546 }, + { 103,2546 }, { 104,2546 }, { 105,2546 }, { 106,2546 }, { 107,2546 }, + { 108,2546 }, { 109,2546 }, { 110,2546 }, { 111,2546 }, { 112,2546 }, + { 113,2546 }, { 114,2546 }, { 115,2546 }, { 116,2546 }, { 117,2546 }, + { 118,2546 }, { 119,2546 }, { 120,2546 }, { 121,2546 }, { 122,2546 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,2546 }, { 127,2546 }, + { 128,2546 }, { 129,2546 }, { 130,2546 }, { 131,2546 }, { 132,2546 }, + { 133,2546 }, { 134,2546 }, { 135,2546 }, { 136,2546 }, { 137,2546 }, + { 138,2546 }, { 139,2546 }, { 140,2546 }, { 141,2546 }, { 142,2546 }, + { 143,2546 }, { 144,2546 }, { 145,2546 }, { 146,2546 }, { 147,2546 }, + + { 148,2546 }, { 149,2546 }, { 150,2546 }, { 151,2546 }, { 152,2546 }, + { 153,2546 }, { 154,2546 }, { 155,2546 }, { 156,2546 }, { 157,2546 }, + { 158,2546 }, { 159,2546 }, { 160,2546 }, { 161,2546 }, { 162,2546 }, + { 163,2546 }, { 164,2546 }, { 165,2546 }, { 166,2546 }, { 167,2546 }, + { 168,2546 }, { 169,2546 }, { 170,2546 }, { 171,2546 }, { 172,2546 }, + { 173,2546 }, { 174,2546 }, { 175,2546 }, { 176,2546 }, { 177,2546 }, + { 178,2546 }, { 179,2546 }, { 180,2546 }, { 181,2546 }, { 182,2546 }, + { 183,2546 }, { 184,2546 }, { 185,2546 }, { 186,2546 }, { 187,2546 }, + { 188,2546 }, { 189,2546 }, { 190,2546 }, { 191,2546 }, { 192,2546 }, + { 193,2546 }, { 194,2546 }, { 195,2546 }, { 196,2546 }, { 197,2546 }, + + { 198,2546 }, { 199,2546 }, { 200,2546 }, { 201,2546 }, { 202,2546 }, + { 203,2546 }, { 204,2546 }, { 205,2546 }, { 206,2546 }, { 207,2546 }, + { 208,2546 }, { 209,2546 }, { 210,2546 }, { 211,2546 }, { 212,2546 }, + { 213,2546 }, { 214,2546 }, { 215,2546 }, { 216,2546 }, { 217,2546 }, + { 218,2546 }, { 219,2546 }, { 220,2546 }, { 221,2546 }, { 222,2546 }, + { 223,2546 }, { 224,2546 }, { 225,2546 }, { 226,2546 }, { 227,2546 }, + { 228,2546 }, { 229,2546 }, { 230,2546 }, { 231,2546 }, { 232,2546 }, + { 233,2546 }, { 234,2546 }, { 235,2546 }, { 236,2546 }, { 237,2546 }, + { 238,2546 }, { 239,2546 }, { 240,2546 }, { 241,2546 }, { 242,2546 }, + { 243,2546 }, { 244,2546 }, { 245,2546 }, { 246,2546 }, { 247,2546 }, + + { 248,2546 }, { 249,2546 }, { 250,2546 }, { 251,2546 }, { 252,2546 }, + { 253,2546 }, { 254,2546 }, { 255,2546 }, { 256,2546 }, { 0, 44 }, + { 0,5867 }, { 1,-4967 }, { 2,-4967 }, { 3,-4967 }, { 4,-4967 }, + { 5,-4967 }, { 6,-4967 }, { 7,-4967 }, { 8,-4967 }, { 0, 0 }, + { 0, 0 }, { 11,-4967 }, { 0, 0 }, { 0, 0 }, { 14,-4967 }, + { 15,-4967 }, { 16,-4967 }, { 17,-4967 }, { 18,-4967 }, { 19,-4967 }, + { 20,-4967 }, { 21,-4967 }, { 22,-4967 }, { 23,-4967 }, { 24,-4967 }, + { 25,-4967 }, { 26,-4967 }, { 27,-4967 }, { 28,-4967 }, { 29,-4967 }, + { 30,-4967 }, { 31,-4967 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-4967 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,3062 }, { 49,3062 }, + { 50,3062 }, { 51,3062 }, { 52,3062 }, { 53,3062 }, { 54,3062 }, + { 55,3062 }, { 56,-4967 }, { 57,-4967 }, { 0, 0 }, { 59,-4967 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65,-4967 }, { 66,-4967 }, { 67,-4967 }, { 68,-4967 }, { 69,-4967 }, + { 70,-4967 }, { 71,-4967 }, { 72,-4967 }, { 73,-4967 }, { 74,-4967 }, + { 75,-4967 }, { 76,-4967 }, { 77,-4967 }, { 78,-4967 }, { 79,-4967 }, + { 80,-4967 }, { 81,-4967 }, { 82,-4967 }, { 83,-4967 }, { 84,-4967 }, + { 85,-4967 }, { 86,-4967 }, { 87,-4967 }, { 88,-4967 }, { 89,-4967 }, + + { 90,-4967 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-4967 }, + { 95,3320 }, { 96,-4967 }, { 97,-4967 }, { 98,-4967 }, { 99,-4967 }, + { 100,-4967 }, { 101,-4967 }, { 102,-4967 }, { 103,-4967 }, { 104,-4967 }, + { 105,-4967 }, { 106,-4967 }, { 107,-4967 }, { 108,-4967 }, { 109,-4967 }, + { 110,-4967 }, { 111,-4967 }, { 112,-4967 }, { 113,-4967 }, { 114,-4967 }, + { 115,-4967 }, { 116,-4967 }, { 117,-4967 }, { 118,-4967 }, { 119,-4967 }, + { 120,-4967 }, { 121,-4967 }, { 122,-4967 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,-4967 }, { 127,-4967 }, { 128,-4967 }, { 129,-4967 }, + { 130,-4967 }, { 131,-4967 }, { 132,-4967 }, { 133,-4967 }, { 134,-4967 }, + { 135,-4967 }, { 136,-4967 }, { 137,-4967 }, { 138,-4967 }, { 139,-4967 }, + + { 140,-4967 }, { 141,-4967 }, { 142,-4967 }, { 143,-4967 }, { 144,-4967 }, + { 145,-4967 }, { 146,-4967 }, { 147,-4967 }, { 148,-4967 }, { 149,-4967 }, + { 150,-4967 }, { 151,-4967 }, { 152,-4967 }, { 153,-4967 }, { 154,-4967 }, + { 155,-4967 }, { 156,-4967 }, { 157,-4967 }, { 158,-4967 }, { 159,-4967 }, + { 160,-4967 }, { 161,-4967 }, { 162,-4967 }, { 163,-4967 }, { 164,-4967 }, + { 165,-4967 }, { 166,-4967 }, { 167,-4967 }, { 168,-4967 }, { 169,-4967 }, + { 170,-4967 }, { 171,-4967 }, { 172,-4967 }, { 173,-4967 }, { 174,-4967 }, + { 175,-4967 }, { 176,-4967 }, { 177,-4967 }, { 178,-4967 }, { 179,-4967 }, + { 180,-4967 }, { 181,-4967 }, { 182,-4967 }, { 183,-4967 }, { 184,-4967 }, + { 185,-4967 }, { 186,-4967 }, { 187,-4967 }, { 188,-4967 }, { 189,-4967 }, + + { 190,-4967 }, { 191,-4967 }, { 192,-4967 }, { 193,-4967 }, { 194,-4967 }, + { 195,-4967 }, { 196,-4967 }, { 197,-4967 }, { 198,-4967 }, { 199,-4967 }, + { 200,-4967 }, { 201,-4967 }, { 202,-4967 }, { 203,-4967 }, { 204,-4967 }, + { 205,-4967 }, { 206,-4967 }, { 207,-4967 }, { 208,-4967 }, { 209,-4967 }, + { 210,-4967 }, { 211,-4967 }, { 212,-4967 }, { 213,-4967 }, { 214,-4967 }, + { 215,-4967 }, { 216,-4967 }, { 217,-4967 }, { 218,-4967 }, { 219,-4967 }, + { 220,-4967 }, { 221,-4967 }, { 222,-4967 }, { 223,-4967 }, { 224,-4967 }, + { 225,-4967 }, { 226,-4967 }, { 227,-4967 }, { 228,-4967 }, { 229,-4967 }, + { 230,-4967 }, { 231,-4967 }, { 232,-4967 }, { 233,-4967 }, { 234,-4967 }, + { 235,-4967 }, { 236,-4967 }, { 237,-4967 }, { 238,-4967 }, { 239,-4967 }, + + { 240,-4967 }, { 241,-4967 }, { 242,-4967 }, { 243,-4967 }, { 244,-4967 }, + { 245,-4967 }, { 246,-4967 }, { 247,-4967 }, { 248,-4967 }, { 249,-4967 }, + { 250,-4967 }, { 251,-4967 }, { 252,-4967 }, { 253,-4967 }, { 254,-4967 }, + { 255,-4967 }, { 256,-4967 }, { 0, 43 }, { 0,5609 }, { 1,-5225 }, + { 2,-5225 }, { 3,-5225 }, { 4,-5225 }, { 5,-5225 }, { 6,-5225 }, + { 7,-5225 }, { 8,-5225 }, { 0, 0 }, { 0, 0 }, { 11,-5225 }, + { 0, 0 }, { 0, 0 }, { 14,-5225 }, { 15,-5225 }, { 16,-5225 }, + { 17,-5225 }, { 18,-5225 }, { 19,-5225 }, { 20,-5225 }, { 21,-5225 }, + { 22,-5225 }, { 23,-5225 }, { 24,-5225 }, { 25,-5225 }, { 26,-5225 }, + { 27,-5225 }, { 28,-5225 }, { 29,-5225 }, { 30,-5225 }, { 31,-5225 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 39,-5225 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,3320 }, { 49,3320 }, { 50,3320 }, { 51,3320 }, + { 52,3320 }, { 53,3320 }, { 54,3320 }, { 55,3320 }, { 56,3320 }, + { 57,3320 }, { 0, 0 }, { 59,-5225 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,3320 }, { 66,3320 }, + { 67,3320 }, { 68,3320 }, { 69,3320 }, { 70,3320 }, { 71,-5225 }, + { 72,-5225 }, { 73,-5225 }, { 74,-5225 }, { 75,-5225 }, { 76,-5225 }, + { 77,-5225 }, { 78,-5225 }, { 79,-5225 }, { 80,-5225 }, { 81,-5225 }, + + { 82,-5225 }, { 83,-5225 }, { 84,-5225 }, { 85,-5225 }, { 86,-5225 }, + { 87,-5225 }, { 88,-5225 }, { 89,-5225 }, { 90,-5225 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 94,-5225 }, { 95,3578 }, { 96,-5225 }, + { 97,3320 }, { 98,3320 }, { 99,3320 }, { 100,3320 }, { 101,3320 }, + { 102,3320 }, { 103,-5225 }, { 104,-5225 }, { 105,-5225 }, { 106,-5225 }, + { 107,-5225 }, { 108,-5225 }, { 109,-5225 }, { 110,-5225 }, { 111,-5225 }, + { 112,-5225 }, { 113,-5225 }, { 114,-5225 }, { 115,-5225 }, { 116,-5225 }, + { 117,-5225 }, { 118,-5225 }, { 119,-5225 }, { 120,-5225 }, { 121,-5225 }, + { 122,-5225 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-5225 }, + { 127,-5225 }, { 128,-5225 }, { 129,-5225 }, { 130,-5225 }, { 131,-5225 }, + + { 132,-5225 }, { 133,-5225 }, { 134,-5225 }, { 135,-5225 }, { 136,-5225 }, + { 137,-5225 }, { 138,-5225 }, { 139,-5225 }, { 140,-5225 }, { 141,-5225 }, + { 142,-5225 }, { 143,-5225 }, { 144,-5225 }, { 145,-5225 }, { 146,-5225 }, + { 147,-5225 }, { 148,-5225 }, { 149,-5225 }, { 150,-5225 }, { 151,-5225 }, + { 152,-5225 }, { 153,-5225 }, { 154,-5225 }, { 155,-5225 }, { 156,-5225 }, + { 157,-5225 }, { 158,-5225 }, { 159,-5225 }, { 160,-5225 }, { 161,-5225 }, + { 162,-5225 }, { 163,-5225 }, { 164,-5225 }, { 165,-5225 }, { 166,-5225 }, + { 167,-5225 }, { 168,-5225 }, { 169,-5225 }, { 170,-5225 }, { 171,-5225 }, + { 172,-5225 }, { 173,-5225 }, { 174,-5225 }, { 175,-5225 }, { 176,-5225 }, + { 177,-5225 }, { 178,-5225 }, { 179,-5225 }, { 180,-5225 }, { 181,-5225 }, + + { 182,-5225 }, { 183,-5225 }, { 184,-5225 }, { 185,-5225 }, { 186,-5225 }, + { 187,-5225 }, { 188,-5225 }, { 189,-5225 }, { 190,-5225 }, { 191,-5225 }, + { 192,-5225 }, { 193,-5225 }, { 194,-5225 }, { 195,-5225 }, { 196,-5225 }, + { 197,-5225 }, { 198,-5225 }, { 199,-5225 }, { 200,-5225 }, { 201,-5225 }, + { 202,-5225 }, { 203,-5225 }, { 204,-5225 }, { 205,-5225 }, { 206,-5225 }, + { 207,-5225 }, { 208,-5225 }, { 209,-5225 }, { 210,-5225 }, { 211,-5225 }, + { 212,-5225 }, { 213,-5225 }, { 214,-5225 }, { 215,-5225 }, { 216,-5225 }, + { 217,-5225 }, { 218,-5225 }, { 219,-5225 }, { 220,-5225 }, { 221,-5225 }, + { 222,-5225 }, { 223,-5225 }, { 224,-5225 }, { 225,-5225 }, { 226,-5225 }, + { 227,-5225 }, { 228,-5225 }, { 229,-5225 }, { 230,-5225 }, { 231,-5225 }, + + { 232,-5225 }, { 233,-5225 }, { 234,-5225 }, { 235,-5225 }, { 236,-5225 }, + { 237,-5225 }, { 238,-5225 }, { 239,-5225 }, { 240,-5225 }, { 241,-5225 }, + { 242,-5225 }, { 243,-5225 }, { 244,-5225 }, { 245,-5225 }, { 246,-5225 }, + { 247,-5225 }, { 248,-5225 }, { 249,-5225 }, { 250,-5225 }, { 251,-5225 }, + { 252,-5225 }, { 253,-5225 }, { 254,-5225 }, { 255,-5225 }, { 256,-5225 }, + { 0, 42 }, { 0,5351 }, { 1,-4690 }, { 2,-4690 }, { 3,-4690 }, + { 4,-4690 }, { 5,-4690 }, { 6,-4690 }, { 7,-4690 }, { 8,-4690 }, + { 0, 0 }, { 0, 0 }, { 11,-4690 }, { 0, 0 }, { 0, 0 }, + { 14,-4690 }, { 15,-4690 }, { 16,-4690 }, { 17,-4690 }, { 18,-4690 }, + { 19,-4690 }, { 20,-4690 }, { 21,-4690 }, { 22,-4690 }, { 23,-4690 }, + + { 24,-4690 }, { 25,-4690 }, { 26,-4690 }, { 27,-4690 }, { 28,-4690 }, + { 29,-4690 }, { 30,-4690 }, { 31,-4690 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 39,-4690 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 46,-4432 }, { 0, 0 }, { 48,-3142 }, + { 49,-3142 }, { 50,-3142 }, { 51,-3142 }, { 52,-3142 }, { 53,-3142 }, + { 54,-3142 }, { 55,-3142 }, { 56,-3142 }, { 57,-3142 }, { 0, 0 }, + { 59,-4690 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 65,-4690 }, { 66,-4690 }, { 67,-4690 }, { 68,-4690 }, + { 69,-3916 }, { 70,-4690 }, { 71,-4690 }, { 72,-4690 }, { 73,-4690 }, + + { 74,-4690 }, { 75,-4690 }, { 76,-4690 }, { 77,-4690 }, { 78,-4690 }, + { 79,-4690 }, { 80,-4690 }, { 81,-4690 }, { 82,-4690 }, { 83,-4690 }, + { 84,-4690 }, { 85,-4690 }, { 86,-4690 }, { 87,-4690 }, { 88,-4690 }, + { 89,-4690 }, { 90,-4690 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 94,-4690 }, { 95,-2884 }, { 96,-4690 }, { 97,-4690 }, { 98,-4690 }, + { 99,-4690 }, { 100,-4690 }, { 101,-3916 }, { 102,-4690 }, { 103,-4690 }, + { 104,-4690 }, { 105,-4690 }, { 106,-4690 }, { 107,-4690 }, { 108,-4690 }, + { 109,-4690 }, { 110,-4690 }, { 111,-4690 }, { 112,-4690 }, { 113,-4690 }, + { 114,-4690 }, { 115,-4690 }, { 116,-4690 }, { 117,-4690 }, { 118,-4690 }, + { 119,-4690 }, { 120,-4690 }, { 121,-4690 }, { 122,-4690 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 126,-4690 }, { 127,-4690 }, { 128,-4690 }, + { 129,-4690 }, { 130,-4690 }, { 131,-4690 }, { 132,-4690 }, { 133,-4690 }, + { 134,-4690 }, { 135,-4690 }, { 136,-4690 }, { 137,-4690 }, { 138,-4690 }, + { 139,-4690 }, { 140,-4690 }, { 141,-4690 }, { 142,-4690 }, { 143,-4690 }, + { 144,-4690 }, { 145,-4690 }, { 146,-4690 }, { 147,-4690 }, { 148,-4690 }, + { 149,-4690 }, { 150,-4690 }, { 151,-4690 }, { 152,-4690 }, { 153,-4690 }, + { 154,-4690 }, { 155,-4690 }, { 156,-4690 }, { 157,-4690 }, { 158,-4690 }, + { 159,-4690 }, { 160,-4690 }, { 161,-4690 }, { 162,-4690 }, { 163,-4690 }, + { 164,-4690 }, { 165,-4690 }, { 166,-4690 }, { 167,-4690 }, { 168,-4690 }, + { 169,-4690 }, { 170,-4690 }, { 171,-4690 }, { 172,-4690 }, { 173,-4690 }, + + { 174,-4690 }, { 175,-4690 }, { 176,-4690 }, { 177,-4690 }, { 178,-4690 }, + { 179,-4690 }, { 180,-4690 }, { 181,-4690 }, { 182,-4690 }, { 183,-4690 }, + { 184,-4690 }, { 185,-4690 }, { 186,-4690 }, { 187,-4690 }, { 188,-4690 }, + { 189,-4690 }, { 190,-4690 }, { 191,-4690 }, { 192,-4690 }, { 193,-4690 }, + { 194,-4690 }, { 195,-4690 }, { 196,-4690 }, { 197,-4690 }, { 198,-4690 }, + { 199,-4690 }, { 200,-4690 }, { 201,-4690 }, { 202,-4690 }, { 203,-4690 }, + { 204,-4690 }, { 205,-4690 }, { 206,-4690 }, { 207,-4690 }, { 208,-4690 }, + { 209,-4690 }, { 210,-4690 }, { 211,-4690 }, { 212,-4690 }, { 213,-4690 }, + { 214,-4690 }, { 215,-4690 }, { 216,-4690 }, { 217,-4690 }, { 218,-4690 }, + { 219,-4690 }, { 220,-4690 }, { 221,-4690 }, { 222,-4690 }, { 223,-4690 }, + + { 224,-4690 }, { 225,-4690 }, { 226,-4690 }, { 227,-4690 }, { 228,-4690 }, + { 229,-4690 }, { 230,-4690 }, { 231,-4690 }, { 232,-4690 }, { 233,-4690 }, + { 234,-4690 }, { 235,-4690 }, { 236,-4690 }, { 237,-4690 }, { 238,-4690 }, + { 239,-4690 }, { 240,-4690 }, { 241,-4690 }, { 242,-4690 }, { 243,-4690 }, + { 244,-4690 }, { 245,-4690 }, { 246,-4690 }, { 247,-4690 }, { 248,-4690 }, + { 249,-4690 }, { 250,-4690 }, { 251,-4690 }, { 252,-4690 }, { 253,-4690 }, + { 254,-4690 }, { 255,-4690 }, { 256,-4690 }, { 0, 13 }, { 0,5093 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 13 }, { 0,5070 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 48,3320 }, { 49,3320 }, { 50,3320 }, + { 51,3320 }, { 52,3320 }, { 53,3320 }, { 54,3320 }, { 55,3320 }, + { 56,3320 }, { 57,3320 }, { 0, 0 }, { 0, 0 }, { 0, 14 }, + { 0,5032 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,3320 }, + + { 66,3320 }, { 67,3320 }, { 68,3320 }, { 69,3320 }, { 70,3320 }, + { 48,3320 }, { 49,3320 }, { 50,3320 }, { 51,3320 }, { 52,3320 }, + { 53,3320 }, { 54,3320 }, { 55,3320 }, { 56,3320 }, { 57,3320 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,3320 }, { 66,3320 }, { 67,3320 }, + { 68,3320 }, { 69,3320 }, { 70,3320 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 97,3320 }, { 98,3320 }, { 99,3320 }, { 100,3320 }, + { 101,3320 }, { 102,3320 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-6069 }, { 49,-6069 }, + { 50,-6069 }, { 51,-6069 }, { 52,-6069 }, { 53,-6069 }, { 54,-6069 }, + + { 55,-6069 }, { 56,-6069 }, { 57,-6069 }, { 0, 0 }, { 97,3320 }, + { 98,3320 }, { 99,3320 }, { 100,3320 }, { 101,3320 }, { 102,3320 }, + { 65,-6069 }, { 66,-6069 }, { 67,-6069 }, { 68,-6069 }, { 69,-6069 }, + { 70,-6069 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 97,-6069 }, { 98,-6069 }, { 99,-6069 }, + { 100,-6069 }, { 101,-6069 }, { 102,-6069 }, { 0, 40 }, { 0,4928 }, + + { 1,-6171 }, { 2,-6171 }, { 3,-6171 }, { 4,-6171 }, { 5,-6171 }, + { 6,-6171 }, { 7,-6171 }, { 8,-6171 }, { 0, 0 }, { 0, 0 }, + { 11,-6171 }, { 0, 0 }, { 0, 0 }, { 14,-6171 }, { 15,-6171 }, + { 16,-6171 }, { 17,-6171 }, { 18,-6171 }, { 19,-6171 }, { 20,-6171 }, + { 21,-6171 }, { 22,-6171 }, { 23,-6171 }, { 24,-6171 }, { 25,-6171 }, + { 26,-6171 }, { 27,-6171 }, { 28,-6171 }, { 29,-6171 }, { 30,-6171 }, + { 31,-6171 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-6171 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 48,3290 }, { 49,3290 }, { 50,3290 }, + + { 51,3290 }, { 52,3290 }, { 53,3290 }, { 54,3290 }, { 55,3290 }, + { 56,3290 }, { 57,3290 }, { 0, 0 }, { 59,-6171 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-6171 }, + { 66,-6171 }, { 67,-6171 }, { 68,-6171 }, { 69,-6171 }, { 70,-6171 }, + { 71,-6171 }, { 72,-6171 }, { 73,-6171 }, { 74,-6171 }, { 75,-6171 }, + { 76,-6171 }, { 77,-6171 }, { 78,-6171 }, { 79,-6171 }, { 80,-6171 }, + { 81,-6171 }, { 82,-6171 }, { 83,-6171 }, { 84,-6171 }, { 85,-6171 }, + { 86,-6171 }, { 87,-6171 }, { 88,-6171 }, { 89,-6171 }, { 90,-6171 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-6171 }, { 95,3548 }, + { 96,-6171 }, { 97,-6171 }, { 98,-6171 }, { 99,-6171 }, { 100,-6171 }, + + { 101,-6171 }, { 102,-6171 }, { 103,-6171 }, { 104,-6171 }, { 105,-6171 }, + { 106,-6171 }, { 107,-6171 }, { 108,-6171 }, { 109,-6171 }, { 110,-6171 }, + { 111,-6171 }, { 112,-6171 }, { 113,-6171 }, { 114,-6171 }, { 115,-6171 }, + { 116,-6171 }, { 117,-6171 }, { 118,-6171 }, { 119,-6171 }, { 120,-6171 }, + { 121,-6171 }, { 122,-6171 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 126,-6171 }, { 127,-6171 }, { 128,-6171 }, { 129,-6171 }, { 130,-6171 }, + { 131,-6171 }, { 132,-6171 }, { 133,-6171 }, { 134,-6171 }, { 135,-6171 }, + { 136,-6171 }, { 137,-6171 }, { 138,-6171 }, { 139,-6171 }, { 140,-6171 }, + { 141,-6171 }, { 142,-6171 }, { 143,-6171 }, { 144,-6171 }, { 145,-6171 }, + { 146,-6171 }, { 147,-6171 }, { 148,-6171 }, { 149,-6171 }, { 150,-6171 }, + + { 151,-6171 }, { 152,-6171 }, { 153,-6171 }, { 154,-6171 }, { 155,-6171 }, + { 156,-6171 }, { 157,-6171 }, { 158,-6171 }, { 159,-6171 }, { 160,-6171 }, + { 161,-6171 }, { 162,-6171 }, { 163,-6171 }, { 164,-6171 }, { 165,-6171 }, + { 166,-6171 }, { 167,-6171 }, { 168,-6171 }, { 169,-6171 }, { 170,-6171 }, + { 171,-6171 }, { 172,-6171 }, { 173,-6171 }, { 174,-6171 }, { 175,-6171 }, + { 176,-6171 }, { 177,-6171 }, { 178,-6171 }, { 179,-6171 }, { 180,-6171 }, + { 181,-6171 }, { 182,-6171 }, { 183,-6171 }, { 184,-6171 }, { 185,-6171 }, + { 186,-6171 }, { 187,-6171 }, { 188,-6171 }, { 189,-6171 }, { 190,-6171 }, + { 191,-6171 }, { 192,-6171 }, { 193,-6171 }, { 194,-6171 }, { 195,-6171 }, + { 196,-6171 }, { 197,-6171 }, { 198,-6171 }, { 199,-6171 }, { 200,-6171 }, + + { 201,-6171 }, { 202,-6171 }, { 203,-6171 }, { 204,-6171 }, { 205,-6171 }, + { 206,-6171 }, { 207,-6171 }, { 208,-6171 }, { 209,-6171 }, { 210,-6171 }, + { 211,-6171 }, { 212,-6171 }, { 213,-6171 }, { 214,-6171 }, { 215,-6171 }, + { 216,-6171 }, { 217,-6171 }, { 218,-6171 }, { 219,-6171 }, { 220,-6171 }, + { 221,-6171 }, { 222,-6171 }, { 223,-6171 }, { 224,-6171 }, { 225,-6171 }, + { 226,-6171 }, { 227,-6171 }, { 228,-6171 }, { 229,-6171 }, { 230,-6171 }, + { 231,-6171 }, { 232,-6171 }, { 233,-6171 }, { 234,-6171 }, { 235,-6171 }, + { 236,-6171 }, { 237,-6171 }, { 238,-6171 }, { 239,-6171 }, { 240,-6171 }, + { 241,-6171 }, { 242,-6171 }, { 243,-6171 }, { 244,-6171 }, { 245,-6171 }, + { 246,-6171 }, { 247,-6171 }, { 248,-6171 }, { 249,-6171 }, { 250,-6171 }, + + { 251,-6171 }, { 252,-6171 }, { 253,-6171 }, { 254,-6171 }, { 255,-6171 }, + { 256,-6171 }, { 0, 41 }, { 0,4670 }, { 1,-6433 }, { 2,-6433 }, + { 3,-6433 }, { 4,-6433 }, { 5,-6433 }, { 6,-6433 }, { 7,-6433 }, + { 8,-6433 }, { 0, 0 }, { 0, 0 }, { 11,-6433 }, { 0, 0 }, + { 0, 0 }, { 14,-6433 }, { 15,-6433 }, { 16,-6433 }, { 17,-6433 }, + { 18,-6433 }, { 19,-6433 }, { 20,-6433 }, { 21,-6433 }, { 22,-6433 }, + { 23,-6433 }, { 24,-6433 }, { 25,-6433 }, { 26,-6433 }, { 27,-6433 }, + { 28,-6433 }, { 29,-6433 }, { 30,-6433 }, { 31,-6433 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,-6433 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48,-2357 }, { 49,-2357 }, { 50,-2357 }, { 51,-2357 }, { 52,-2357 }, + { 53,-2357 }, { 54,-2357 }, { 55,-2357 }, { 56,-2357 }, { 57,-2357 }, + { 0, 0 }, { 59,-6433 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-6433 }, { 66,-6433 }, { 67,-6433 }, + { 68,-6433 }, { 69,-2099 }, { 70,-6433 }, { 71,-6433 }, { 72,-6433 }, + { 73,-6433 }, { 74,-6433 }, { 75,-6433 }, { 76,-6433 }, { 77,-6433 }, + { 78,-6433 }, { 79,-6433 }, { 80,-6433 }, { 81,-6433 }, { 82,-6433 }, + { 83,-6433 }, { 84,-6433 }, { 85,-6433 }, { 86,-6433 }, { 87,-6433 }, + { 88,-6433 }, { 89,-6433 }, { 90,-6433 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 94,-6433 }, { 95,-2089 }, { 96,-6433 }, { 97,-6433 }, + { 98,-6433 }, { 99,-6433 }, { 100,-6433 }, { 101,-2099 }, { 102,-6433 }, + { 103,-6433 }, { 104,-6433 }, { 105,-6433 }, { 106,-6433 }, { 107,-6433 }, + { 108,-6433 }, { 109,-6433 }, { 110,-6433 }, { 111,-6433 }, { 112,-6433 }, + { 113,-6433 }, { 114,-6433 }, { 115,-6433 }, { 116,-6433 }, { 117,-6433 }, + { 118,-6433 }, { 119,-6433 }, { 120,-6433 }, { 121,-6433 }, { 122,-6433 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-6433 }, { 127,-6433 }, + { 128,-6433 }, { 129,-6433 }, { 130,-6433 }, { 131,-6433 }, { 132,-6433 }, + { 133,-6433 }, { 134,-6433 }, { 135,-6433 }, { 136,-6433 }, { 137,-6433 }, + { 138,-6433 }, { 139,-6433 }, { 140,-6433 }, { 141,-6433 }, { 142,-6433 }, + + { 143,-6433 }, { 144,-6433 }, { 145,-6433 }, { 146,-6433 }, { 147,-6433 }, + { 148,-6433 }, { 149,-6433 }, { 150,-6433 }, { 151,-6433 }, { 152,-6433 }, + { 153,-6433 }, { 154,-6433 }, { 155,-6433 }, { 156,-6433 }, { 157,-6433 }, + { 158,-6433 }, { 159,-6433 }, { 160,-6433 }, { 161,-6433 }, { 162,-6433 }, + { 163,-6433 }, { 164,-6433 }, { 165,-6433 }, { 166,-6433 }, { 167,-6433 }, + { 168,-6433 }, { 169,-6433 }, { 170,-6433 }, { 171,-6433 }, { 172,-6433 }, + { 173,-6433 }, { 174,-6433 }, { 175,-6433 }, { 176,-6433 }, { 177,-6433 }, + { 178,-6433 }, { 179,-6433 }, { 180,-6433 }, { 181,-6433 }, { 182,-6433 }, + { 183,-6433 }, { 184,-6433 }, { 185,-6433 }, { 186,-6433 }, { 187,-6433 }, + { 188,-6433 }, { 189,-6433 }, { 190,-6433 }, { 191,-6433 }, { 192,-6433 }, + + { 193,-6433 }, { 194,-6433 }, { 195,-6433 }, { 196,-6433 }, { 197,-6433 }, + { 198,-6433 }, { 199,-6433 }, { 200,-6433 }, { 201,-6433 }, { 202,-6433 }, + { 203,-6433 }, { 204,-6433 }, { 205,-6433 }, { 206,-6433 }, { 207,-6433 }, + { 208,-6433 }, { 209,-6433 }, { 210,-6433 }, { 211,-6433 }, { 212,-6433 }, + { 213,-6433 }, { 214,-6433 }, { 215,-6433 }, { 216,-6433 }, { 217,-6433 }, + { 218,-6433 }, { 219,-6433 }, { 220,-6433 }, { 221,-6433 }, { 222,-6433 }, + { 223,-6433 }, { 224,-6433 }, { 225,-6433 }, { 226,-6433 }, { 227,-6433 }, + { 228,-6433 }, { 229,-6433 }, { 230,-6433 }, { 231,-6433 }, { 232,-6433 }, + { 233,-6433 }, { 234,-6433 }, { 235,-6433 }, { 236,-6433 }, { 237,-6433 }, + { 238,-6433 }, { 239,-6433 }, { 240,-6433 }, { 241,-6433 }, { 242,-6433 }, + + { 243,-6433 }, { 244,-6433 }, { 245,-6433 }, { 246,-6433 }, { 247,-6433 }, + { 248,-6433 }, { 249,-6433 }, { 250,-6433 }, { 251,-6433 }, { 252,-6433 }, + { 253,-6433 }, { 254,-6433 }, { 255,-6433 }, { 256,-6433 }, { 0, 41 }, + { 0,4412 }, { 1,-6691 }, { 2,-6691 }, { 3,-6691 }, { 4,-6691 }, + { 5,-6691 }, { 6,-6691 }, { 7,-6691 }, { 8,-6691 }, { 0, 0 }, + { 0, 0 }, { 11,-6691 }, { 0, 0 }, { 0, 0 }, { 14,-6691 }, + { 15,-6691 }, { 16,-6691 }, { 17,-6691 }, { 18,-6691 }, { 19,-6691 }, + { 20,-6691 }, { 21,-6691 }, { 22,-6691 }, { 23,-6691 }, { 24,-6691 }, + { 25,-6691 }, { 26,-6691 }, { 27,-6691 }, { 28,-6691 }, { 29,-6691 }, + { 30,-6691 }, { 31,-6691 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-6691 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 }, + { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, + { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59,-6691 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65,-6691 }, { 66,-6691 }, { 67,-6691 }, { 68,-6691 }, { 69,-2357 }, + { 70,-6691 }, { 71,-6691 }, { 72,-6691 }, { 73,-6691 }, { 74,-6691 }, + { 75,-6691 }, { 76,-6691 }, { 77,-6691 }, { 78,-6691 }, { 79,-6691 }, + { 80,-6691 }, { 81,-6691 }, { 82,-6691 }, { 83,-6691 }, { 84,-6691 }, + + { 85,-6691 }, { 86,-6691 }, { 87,-6691 }, { 88,-6691 }, { 89,-6691 }, + { 90,-6691 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-6691 }, + { 95, 258 }, { 96,-6691 }, { 97,-6691 }, { 98,-6691 }, { 99,-6691 }, + { 100,-6691 }, { 101,-2357 }, { 102,-6691 }, { 103,-6691 }, { 104,-6691 }, + { 105,-6691 }, { 106,-6691 }, { 107,-6691 }, { 108,-6691 }, { 109,-6691 }, + { 110,-6691 }, { 111,-6691 }, { 112,-6691 }, { 113,-6691 }, { 114,-6691 }, + { 115,-6691 }, { 116,-6691 }, { 117,-6691 }, { 118,-6691 }, { 119,-6691 }, + { 120,-6691 }, { 121,-6691 }, { 122,-6691 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,-6691 }, { 127,-6691 }, { 128,-6691 }, { 129,-6691 }, + { 130,-6691 }, { 131,-6691 }, { 132,-6691 }, { 133,-6691 }, { 134,-6691 }, + + { 135,-6691 }, { 136,-6691 }, { 137,-6691 }, { 138,-6691 }, { 139,-6691 }, + { 140,-6691 }, { 141,-6691 }, { 142,-6691 }, { 143,-6691 }, { 144,-6691 }, + { 145,-6691 }, { 146,-6691 }, { 147,-6691 }, { 148,-6691 }, { 149,-6691 }, + { 150,-6691 }, { 151,-6691 }, { 152,-6691 }, { 153,-6691 }, { 154,-6691 }, + { 155,-6691 }, { 156,-6691 }, { 157,-6691 }, { 158,-6691 }, { 159,-6691 }, + { 160,-6691 }, { 161,-6691 }, { 162,-6691 }, { 163,-6691 }, { 164,-6691 }, + { 165,-6691 }, { 166,-6691 }, { 167,-6691 }, { 168,-6691 }, { 169,-6691 }, + { 170,-6691 }, { 171,-6691 }, { 172,-6691 }, { 173,-6691 }, { 174,-6691 }, + { 175,-6691 }, { 176,-6691 }, { 177,-6691 }, { 178,-6691 }, { 179,-6691 }, + { 180,-6691 }, { 181,-6691 }, { 182,-6691 }, { 183,-6691 }, { 184,-6691 }, + + { 185,-6691 }, { 186,-6691 }, { 187,-6691 }, { 188,-6691 }, { 189,-6691 }, + { 190,-6691 }, { 191,-6691 }, { 192,-6691 }, { 193,-6691 }, { 194,-6691 }, + { 195,-6691 }, { 196,-6691 }, { 197,-6691 }, { 198,-6691 }, { 199,-6691 }, + { 200,-6691 }, { 201,-6691 }, { 202,-6691 }, { 203,-6691 }, { 204,-6691 }, + { 205,-6691 }, { 206,-6691 }, { 207,-6691 }, { 208,-6691 }, { 209,-6691 }, + { 210,-6691 }, { 211,-6691 }, { 212,-6691 }, { 213,-6691 }, { 214,-6691 }, + { 215,-6691 }, { 216,-6691 }, { 217,-6691 }, { 218,-6691 }, { 219,-6691 }, + { 220,-6691 }, { 221,-6691 }, { 222,-6691 }, { 223,-6691 }, { 224,-6691 }, + { 225,-6691 }, { 226,-6691 }, { 227,-6691 }, { 228,-6691 }, { 229,-6691 }, + { 230,-6691 }, { 231,-6691 }, { 232,-6691 }, { 233,-6691 }, { 234,-6691 }, + + { 235,-6691 }, { 236,-6691 }, { 237,-6691 }, { 238,-6691 }, { 239,-6691 }, + { 240,-6691 }, { 241,-6691 }, { 242,-6691 }, { 243,-6691 }, { 244,-6691 }, + { 245,-6691 }, { 246,-6691 }, { 247,-6691 }, { 248,-6691 }, { 249,-6691 }, + { 250,-6691 }, { 251,-6691 }, { 252,-6691 }, { 253,-6691 }, { 254,-6691 }, + { 255,-6691 }, { 256,-6691 }, { 0, 48 }, { 0,4154 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,2833 }, { 49,2833 }, { 50,2833 }, { 51,2833 }, + { 52,2833 }, { 53,2833 }, { 54,2833 }, { 55,2833 }, { 56,2833 }, + { 57,2833 }, { 0, 45 }, { 0,4095 }, { 1,-6739 }, { 2,-6739 }, + { 3,-6739 }, { 4,-6739 }, { 5,-6739 }, { 6,-6739 }, { 7,-6739 }, + { 8,-6739 }, { 0, 0 }, { 0, 0 }, { 11,-6739 }, { 0, 0 }, + { 0, 0 }, { 14,-6739 }, { 15,-6739 }, { 16,-6739 }, { 17,-6739 }, + + { 18,-6739 }, { 19,-6739 }, { 20,-6739 }, { 21,-6739 }, { 22,-6739 }, + { 23,-6739 }, { 24,-6739 }, { 25,-6739 }, { 26,-6739 }, { 27,-6739 }, + { 28,-6739 }, { 29,-6739 }, { 30,-6739 }, { 31,-6739 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,-6739 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48, 0 }, { 49, 0 }, { 50,-6739 }, { 51,-6739 }, { 52,-6739 }, + { 53,-6739 }, { 54,-6739 }, { 55,-6739 }, { 56,-6739 }, { 57,-6739 }, + { 0, 0 }, { 59,-6739 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-6739 }, { 66,-6739 }, { 67,-6739 }, + + { 68,-6739 }, { 69,-6739 }, { 70,-6739 }, { 71,-6739 }, { 72,-6739 }, + { 73,-6739 }, { 74,-6739 }, { 75,-6739 }, { 76,-6739 }, { 77,-6739 }, + { 78,-6739 }, { 79,-6739 }, { 80,-6739 }, { 81,-6739 }, { 82,-6739 }, + { 83,-6739 }, { 84,-6739 }, { 85,-6739 }, { 86,-6739 }, { 87,-6739 }, + { 88,-6739 }, { 89,-6739 }, { 90,-6739 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 94,-6739 }, { 95, 258 }, { 96,-6739 }, { 97,-6739 }, + { 98,-6739 }, { 99,-6739 }, { 100,-6739 }, { 101,-6739 }, { 102,-6739 }, + { 103,-6739 }, { 104,-6739 }, { 105,-6739 }, { 106,-6739 }, { 107,-6739 }, + { 108,-6739 }, { 109,-6739 }, { 110,-6739 }, { 111,-6739 }, { 112,-6739 }, + { 113,-6739 }, { 114,-6739 }, { 115,-6739 }, { 116,-6739 }, { 117,-6739 }, + + { 118,-6739 }, { 119,-6739 }, { 120,-6739 }, { 121,-6739 }, { 122,-6739 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-6739 }, { 127,-6739 }, + { 128,-6739 }, { 129,-6739 }, { 130,-6739 }, { 131,-6739 }, { 132,-6739 }, + { 133,-6739 }, { 134,-6739 }, { 135,-6739 }, { 136,-6739 }, { 137,-6739 }, + { 138,-6739 }, { 139,-6739 }, { 140,-6739 }, { 141,-6739 }, { 142,-6739 }, + { 143,-6739 }, { 144,-6739 }, { 145,-6739 }, { 146,-6739 }, { 147,-6739 }, + { 148,-6739 }, { 149,-6739 }, { 150,-6739 }, { 151,-6739 }, { 152,-6739 }, + { 153,-6739 }, { 154,-6739 }, { 155,-6739 }, { 156,-6739 }, { 157,-6739 }, + { 158,-6739 }, { 159,-6739 }, { 160,-6739 }, { 161,-6739 }, { 162,-6739 }, + { 163,-6739 }, { 164,-6739 }, { 165,-6739 }, { 166,-6739 }, { 167,-6739 }, + + { 168,-6739 }, { 169,-6739 }, { 170,-6739 }, { 171,-6739 }, { 172,-6739 }, + { 173,-6739 }, { 174,-6739 }, { 175,-6739 }, { 176,-6739 }, { 177,-6739 }, + { 178,-6739 }, { 179,-6739 }, { 180,-6739 }, { 181,-6739 }, { 182,-6739 }, + { 183,-6739 }, { 184,-6739 }, { 185,-6739 }, { 186,-6739 }, { 187,-6739 }, + { 188,-6739 }, { 189,-6739 }, { 190,-6739 }, { 191,-6739 }, { 192,-6739 }, + { 193,-6739 }, { 194,-6739 }, { 195,-6739 }, { 196,-6739 }, { 197,-6739 }, + { 198,-6739 }, { 199,-6739 }, { 200,-6739 }, { 201,-6739 }, { 202,-6739 }, + { 203,-6739 }, { 204,-6739 }, { 205,-6739 }, { 206,-6739 }, { 207,-6739 }, + { 208,-6739 }, { 209,-6739 }, { 210,-6739 }, { 211,-6739 }, { 212,-6739 }, + { 213,-6739 }, { 214,-6739 }, { 215,-6739 }, { 216,-6739 }, { 217,-6739 }, + + { 218,-6739 }, { 219,-6739 }, { 220,-6739 }, { 221,-6739 }, { 222,-6739 }, + { 223,-6739 }, { 224,-6739 }, { 225,-6739 }, { 226,-6739 }, { 227,-6739 }, + { 228,-6739 }, { 229,-6739 }, { 230,-6739 }, { 231,-6739 }, { 232,-6739 }, + { 233,-6739 }, { 234,-6739 }, { 235,-6739 }, { 236,-6739 }, { 237,-6739 }, + { 238,-6739 }, { 239,-6739 }, { 240,-6739 }, { 241,-6739 }, { 242,-6739 }, + { 243,-6739 }, { 244,-6739 }, { 245,-6739 }, { 246,-6739 }, { 247,-6739 }, + { 248,-6739 }, { 249,-6739 }, { 250,-6739 }, { 251,-6739 }, { 252,-6739 }, + { 253,-6739 }, { 254,-6739 }, { 255,-6739 }, { 256,-6739 }, { 0, 52 }, + { 0,3837 }, { 1,-6997 }, { 2,-6997 }, { 3,-6997 }, { 4,-6997 }, + { 5,-6997 }, { 6,-6997 }, { 7,-6997 }, { 8,-6997 }, { 0, 0 }, + + { 0, 0 }, { 11,-6997 }, { 0, 0 }, { 0, 0 }, { 14,-6997 }, + { 15,-6997 }, { 16,-6997 }, { 17,-6997 }, { 18,-6997 }, { 19,-6997 }, + { 20,-6997 }, { 21,-6997 }, { 22,-6997 }, { 23,-6997 }, { 24,-6997 }, + { 25,-6997 }, { 26,-6997 }, { 27,-6997 }, { 28,-6997 }, { 29,-6997 }, + { 30,-6997 }, { 31,-6997 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-6997 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-258 }, { 49,-258 }, + { 50,-6997 }, { 51,-6997 }, { 52,-6997 }, { 53,-6997 }, { 54,-6997 }, + { 55,-6997 }, { 56,-6997 }, { 57,-6997 }, { 0, 0 }, { 59,-6997 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65,-6997 }, { 66,-6997 }, { 67,-6997 }, { 68,-6997 }, { 69,-6997 }, + { 70,-6997 }, { 71,-6997 }, { 72,-6997 }, { 73,-6997 }, { 74,-6997 }, + { 75,-6997 }, { 76,-6997 }, { 77,-6997 }, { 78,-6997 }, { 79,-6997 }, + { 80,-6997 }, { 81,-6997 }, { 82,-6997 }, { 83,-6997 }, { 84,-6997 }, + { 85,-6997 }, { 86,-6997 }, { 87,-6997 }, { 88,-6997 }, { 89,-6997 }, + { 90,-6997 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-6997 }, + { 95,-6997 }, { 96,-6997 }, { 97,-6997 }, { 98,-6997 }, { 99,-6997 }, + { 100,-6997 }, { 101,-6997 }, { 102,-6997 }, { 103,-6997 }, { 104,-6997 }, + { 105,-6997 }, { 106,-6997 }, { 107,-6997 }, { 108,-6997 }, { 109,-6997 }, + + { 110,-6997 }, { 111,-6997 }, { 112,-6997 }, { 113,-6997 }, { 114,-6997 }, + { 115,-6997 }, { 116,-6997 }, { 117,-6997 }, { 118,-6997 }, { 119,-6997 }, + { 120,-6997 }, { 121,-6997 }, { 122,-6997 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,-6997 }, { 127,-6997 }, { 128,-6997 }, { 129,-6997 }, + { 130,-6997 }, { 131,-6997 }, { 132,-6997 }, { 133,-6997 }, { 134,-6997 }, + { 135,-6997 }, { 136,-6997 }, { 137,-6997 }, { 138,-6997 }, { 139,-6997 }, + { 140,-6997 }, { 141,-6997 }, { 142,-6997 }, { 143,-6997 }, { 144,-6997 }, + { 145,-6997 }, { 146,-6997 }, { 147,-6997 }, { 148,-6997 }, { 149,-6997 }, + { 150,-6997 }, { 151,-6997 }, { 152,-6997 }, { 153,-6997 }, { 154,-6997 }, + { 155,-6997 }, { 156,-6997 }, { 157,-6997 }, { 158,-6997 }, { 159,-6997 }, + + { 160,-6997 }, { 161,-6997 }, { 162,-6997 }, { 163,-6997 }, { 164,-6997 }, + { 165,-6997 }, { 166,-6997 }, { 167,-6997 }, { 168,-6997 }, { 169,-6997 }, + { 170,-6997 }, { 171,-6997 }, { 172,-6997 }, { 173,-6997 }, { 174,-6997 }, + { 175,-6997 }, { 176,-6997 }, { 177,-6997 }, { 178,-6997 }, { 179,-6997 }, + { 180,-6997 }, { 181,-6997 }, { 182,-6997 }, { 183,-6997 }, { 184,-6997 }, + { 185,-6997 }, { 186,-6997 }, { 187,-6997 }, { 188,-6997 }, { 189,-6997 }, + { 190,-6997 }, { 191,-6997 }, { 192,-6997 }, { 193,-6997 }, { 194,-6997 }, + { 195,-6997 }, { 196,-6997 }, { 197,-6997 }, { 198,-6997 }, { 199,-6997 }, + { 200,-6997 }, { 201,-6997 }, { 202,-6997 }, { 203,-6997 }, { 204,-6997 }, + { 205,-6997 }, { 206,-6997 }, { 207,-6997 }, { 208,-6997 }, { 209,-6997 }, + + { 210,-6997 }, { 211,-6997 }, { 212,-6997 }, { 213,-6997 }, { 214,-6997 }, + { 215,-6997 }, { 216,-6997 }, { 217,-6997 }, { 218,-6997 }, { 219,-6997 }, + { 220,-6997 }, { 221,-6997 }, { 222,-6997 }, { 223,-6997 }, { 224,-6997 }, + { 225,-6997 }, { 226,-6997 }, { 227,-6997 }, { 228,-6997 }, { 229,-6997 }, + { 230,-6997 }, { 231,-6997 }, { 232,-6997 }, { 233,-6997 }, { 234,-6997 }, + { 235,-6997 }, { 236,-6997 }, { 237,-6997 }, { 238,-6997 }, { 239,-6997 }, + { 240,-6997 }, { 241,-6997 }, { 242,-6997 }, { 243,-6997 }, { 244,-6997 }, + { 245,-6997 }, { 246,-6997 }, { 247,-6997 }, { 248,-6997 }, { 249,-6997 }, + { 250,-6997 }, { 251,-6997 }, { 252,-6997 }, { 253,-6997 }, { 254,-6997 }, + { 255,-6997 }, { 256,-6997 }, { 0, 49 }, { 0,3579 }, { 1,-7255 }, + + { 2,-7255 }, { 3,-7255 }, { 4,-7255 }, { 5,-7255 }, { 6,-7255 }, + { 7,-7255 }, { 8,-7255 }, { 0, 0 }, { 0, 0 }, { 11,-7255 }, + { 0, 0 }, { 0, 0 }, { 14,-7255 }, { 15,-7255 }, { 16,-7255 }, + { 17,-7255 }, { 18,-7255 }, { 19,-7255 }, { 20,-7255 }, { 21,-7255 }, + { 22,-7255 }, { 23,-7255 }, { 24,-7255 }, { 25,-7255 }, { 26,-7255 }, + { 27,-7255 }, { 28,-7255 }, { 29,-7255 }, { 30,-7255 }, { 31,-7255 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 39,-7255 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48,-7255 }, { 49,-7255 }, { 50,-7255 }, { 51,-7255 }, + + { 52,-7255 }, { 53,-7255 }, { 54,-7255 }, { 55,-7255 }, { 56,-7255 }, + { 57,-7255 }, { 0, 0 }, { 59,-7255 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-7255 }, { 66,-7255 }, + { 67,-7255 }, { 68,-7255 }, { 69,-7255 }, { 70,-7255 }, { 71,-7255 }, + { 72,-7255 }, { 73,-7255 }, { 74,-7255 }, { 75,-7255 }, { 76,-7255 }, + { 77,-7255 }, { 78,-7255 }, { 79,-7255 }, { 80,-7255 }, { 81,-7255 }, + { 82,-7255 }, { 83,-7255 }, { 84,-7255 }, { 85,-7255 }, { 86,-7255 }, + { 87,-7255 }, { 88,-7255 }, { 89,-7255 }, { 90,-7255 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 94,-7255 }, { 95,-7255 }, { 96,-7255 }, + { 97,-7255 }, { 98,-7255 }, { 99,-7255 }, { 100,-7255 }, { 101,-7255 }, + + { 102,-7255 }, { 103,-7255 }, { 104,-7255 }, { 105,-7255 }, { 106,-7255 }, + { 107,-7255 }, { 108,-7255 }, { 109,-7255 }, { 110,-7255 }, { 111,-7255 }, + { 112,-7255 }, { 113,-7255 }, { 114,-7255 }, { 115,-7255 }, { 116,-7255 }, + { 117,-7255 }, { 118,-7255 }, { 119,-7255 }, { 120,-7255 }, { 121,-7255 }, + { 122,-7255 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-7255 }, + { 127,-7255 }, { 128,-7255 }, { 129,-7255 }, { 130,-7255 }, { 131,-7255 }, + { 132,-7255 }, { 133,-7255 }, { 134,-7255 }, { 135,-7255 }, { 136,-7255 }, + { 137,-7255 }, { 138,-7255 }, { 139,-7255 }, { 140,-7255 }, { 141,-7255 }, + { 142,-7255 }, { 143,-7255 }, { 144,-7255 }, { 145,-7255 }, { 146,-7255 }, + { 147,-7255 }, { 148,-7255 }, { 149,-7255 }, { 150,-7255 }, { 151,-7255 }, + + { 152,-7255 }, { 153,-7255 }, { 154,-7255 }, { 155,-7255 }, { 156,-7255 }, + { 157,-7255 }, { 158,-7255 }, { 159,-7255 }, { 160,-7255 }, { 161,-7255 }, + { 162,-7255 }, { 163,-7255 }, { 164,-7255 }, { 165,-7255 }, { 166,-7255 }, + { 167,-7255 }, { 168,-7255 }, { 169,-7255 }, { 170,-7255 }, { 171,-7255 }, + { 172,-7255 }, { 173,-7255 }, { 174,-7255 }, { 175,-7255 }, { 176,-7255 }, + { 177,-7255 }, { 178,-7255 }, { 179,-7255 }, { 180,-7255 }, { 181,-7255 }, + { 182,-7255 }, { 183,-7255 }, { 184,-7255 }, { 185,-7255 }, { 186,-7255 }, + { 187,-7255 }, { 188,-7255 }, { 189,-7255 }, { 190,-7255 }, { 191,-7255 }, + { 192,-7255 }, { 193,-7255 }, { 194,-7255 }, { 195,-7255 }, { 196,-7255 }, + { 197,-7255 }, { 198,-7255 }, { 199,-7255 }, { 200,-7255 }, { 201,-7255 }, + + { 202,-7255 }, { 203,-7255 }, { 204,-7255 }, { 205,-7255 }, { 206,-7255 }, + { 207,-7255 }, { 208,-7255 }, { 209,-7255 }, { 210,-7255 }, { 211,-7255 }, + { 212,-7255 }, { 213,-7255 }, { 214,-7255 }, { 215,-7255 }, { 216,-7255 }, + { 217,-7255 }, { 218,-7255 }, { 219,-7255 }, { 220,-7255 }, { 221,-7255 }, + { 222,-7255 }, { 223,-7255 }, { 224,-7255 }, { 225,-7255 }, { 226,-7255 }, + { 227,-7255 }, { 228,-7255 }, { 229,-7255 }, { 230,-7255 }, { 231,-7255 }, + { 232,-7255 }, { 233,-7255 }, { 234,-7255 }, { 235,-7255 }, { 236,-7255 }, + { 237,-7255 }, { 238,-7255 }, { 239,-7255 }, { 240,-7255 }, { 241,-7255 }, + { 242,-7255 }, { 243,-7255 }, { 244,-7255 }, { 245,-7255 }, { 246,-7255 }, + { 247,-7255 }, { 248,-7255 }, { 249,-7255 }, { 250,-7255 }, { 251,-7255 }, + + { 252,-7255 }, { 253,-7255 }, { 254,-7255 }, { 255,-7255 }, { 256,-7255 }, + { 0, 40 }, { 0,3321 }, { 1,-258 }, { 2,-258 }, { 3,-258 }, + { 4,-258 }, { 5,-258 }, { 6,-258 }, { 7,-258 }, { 8,-258 }, + { 0, 0 }, { 0, 0 }, { 11,-258 }, { 0, 0 }, { 0, 0 }, + { 14,-258 }, { 15,-258 }, { 16,-258 }, { 17,-258 }, { 18,-258 }, + { 19,-258 }, { 20,-258 }, { 21,-258 }, { 22,-258 }, { 23,-258 }, + { 24,-258 }, { 25,-258 }, { 26,-258 }, { 27,-258 }, { 28,-258 }, + { 29,-258 }, { 30,-258 }, { 31,-258 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 39,-258 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 }, + { 49, 0 }, { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, + { 54, 0 }, { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, + { 59,-258 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 65,-258 }, { 66,-258 }, { 67,-258 }, { 68,-258 }, + { 69,-258 }, { 70,-258 }, { 71,-258 }, { 72,-258 }, { 73,-258 }, + { 74,-258 }, { 75,-258 }, { 76,-258 }, { 77,-258 }, { 78,-258 }, + { 79,-258 }, { 80,-258 }, { 81,-258 }, { 82,-258 }, { 83,-258 }, + { 84,-258 }, { 85,-258 }, { 86,-258 }, { 87,-258 }, { 88,-258 }, + { 89,-258 }, { 90,-258 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 94,-258 }, { 95, 258 }, { 96,-258 }, { 97,-258 }, { 98,-258 }, + { 99,-258 }, { 100,-258 }, { 101,-258 }, { 102,-258 }, { 103,-258 }, + { 104,-258 }, { 105,-258 }, { 106,-258 }, { 107,-258 }, { 108,-258 }, + { 109,-258 }, { 110,-258 }, { 111,-258 }, { 112,-258 }, { 113,-258 }, + { 114,-258 }, { 115,-258 }, { 116,-258 }, { 117,-258 }, { 118,-258 }, + { 119,-258 }, { 120,-258 }, { 121,-258 }, { 122,-258 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 126,-258 }, { 127,-258 }, { 128,-258 }, + { 129,-258 }, { 130,-258 }, { 131,-258 }, { 132,-258 }, { 133,-258 }, + { 134,-258 }, { 135,-258 }, { 136,-258 }, { 137,-258 }, { 138,-258 }, + { 139,-258 }, { 140,-258 }, { 141,-258 }, { 142,-258 }, { 143,-258 }, + + { 144,-258 }, { 145,-258 }, { 146,-258 }, { 147,-258 }, { 148,-258 }, + { 149,-258 }, { 150,-258 }, { 151,-258 }, { 152,-258 }, { 153,-258 }, + { 154,-258 }, { 155,-258 }, { 156,-258 }, { 157,-258 }, { 158,-258 }, + { 159,-258 }, { 160,-258 }, { 161,-258 }, { 162,-258 }, { 163,-258 }, + { 164,-258 }, { 165,-258 }, { 166,-258 }, { 167,-258 }, { 168,-258 }, + { 169,-258 }, { 170,-258 }, { 171,-258 }, { 172,-258 }, { 173,-258 }, + { 174,-258 }, { 175,-258 }, { 176,-258 }, { 177,-258 }, { 178,-258 }, + { 179,-258 }, { 180,-258 }, { 181,-258 }, { 182,-258 }, { 183,-258 }, + { 184,-258 }, { 185,-258 }, { 186,-258 }, { 187,-258 }, { 188,-258 }, + { 189,-258 }, { 190,-258 }, { 191,-258 }, { 192,-258 }, { 193,-258 }, + + { 194,-258 }, { 195,-258 }, { 196,-258 }, { 197,-258 }, { 198,-258 }, + { 199,-258 }, { 200,-258 }, { 201,-258 }, { 202,-258 }, { 203,-258 }, + { 204,-258 }, { 205,-258 }, { 206,-258 }, { 207,-258 }, { 208,-258 }, + { 209,-258 }, { 210,-258 }, { 211,-258 }, { 212,-258 }, { 213,-258 }, + { 214,-258 }, { 215,-258 }, { 216,-258 }, { 217,-258 }, { 218,-258 }, + { 219,-258 }, { 220,-258 }, { 221,-258 }, { 222,-258 }, { 223,-258 }, + { 224,-258 }, { 225,-258 }, { 226,-258 }, { 227,-258 }, { 228,-258 }, + { 229,-258 }, { 230,-258 }, { 231,-258 }, { 232,-258 }, { 233,-258 }, + { 234,-258 }, { 235,-258 }, { 236,-258 }, { 237,-258 }, { 238,-258 }, + { 239,-258 }, { 240,-258 }, { 241,-258 }, { 242,-258 }, { 243,-258 }, + + { 244,-258 }, { 245,-258 }, { 246,-258 }, { 247,-258 }, { 248,-258 }, + { 249,-258 }, { 250,-258 }, { 251,-258 }, { 252,-258 }, { 253,-258 }, + { 254,-258 }, { 255,-258 }, { 256,-258 }, { 0, 49 }, { 0,3063 }, + { 1,-7771 }, { 2,-7771 }, { 3,-7771 }, { 4,-7771 }, { 5,-7771 }, + { 6,-7771 }, { 7,-7771 }, { 8,-7771 }, { 0, 0 }, { 0, 0 }, + { 11,-7771 }, { 0, 0 }, { 0, 0 }, { 14,-7771 }, { 15,-7771 }, + { 16,-7771 }, { 17,-7771 }, { 18,-7771 }, { 19,-7771 }, { 20,-7771 }, + { 21,-7771 }, { 22,-7771 }, { 23,-7771 }, { 24,-7771 }, { 25,-7771 }, + { 26,-7771 }, { 27,-7771 }, { 28,-7771 }, { 29,-7771 }, { 30,-7771 }, + { 31,-7771 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-7771 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 48,2000 }, { 49,2000 }, { 50,2000 }, + { 51,2000 }, { 52,2000 }, { 53,2000 }, { 54,2000 }, { 55,2000 }, + { 56,2000 }, { 57,2000 }, { 0, 0 }, { 59,-7771 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-7771 }, + { 66,-7771 }, { 67,-7771 }, { 68,-7771 }, { 69,-7771 }, { 70,-7771 }, + { 71,-7771 }, { 72,-7771 }, { 73,-7771 }, { 74,-7771 }, { 75,-7771 }, + { 76,-7771 }, { 77,-7771 }, { 78,-7771 }, { 79,-7771 }, { 80,-7771 }, + { 81,-7771 }, { 82,-7771 }, { 83,-7771 }, { 84,-7771 }, { 85,-7771 }, + + { 86,-7771 }, { 87,-7771 }, { 88,-7771 }, { 89,-7771 }, { 90,-7771 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-7771 }, { 95,-7771 }, + { 96,-7771 }, { 97,-7771 }, { 98,-7771 }, { 99,-7771 }, { 100,-7771 }, + { 101,-7771 }, { 102,-7771 }, { 103,-7771 }, { 104,-7771 }, { 105,-7771 }, + { 106,-7771 }, { 107,-7771 }, { 108,-7771 }, { 109,-7771 }, { 110,-7771 }, + { 111,-7771 }, { 112,-7771 }, { 113,-7771 }, { 114,-7771 }, { 115,-7771 }, + { 116,-7771 }, { 117,-7771 }, { 118,-7771 }, { 119,-7771 }, { 120,-7771 }, + { 121,-7771 }, { 122,-7771 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 126,-7771 }, { 127,-7771 }, { 128,-7771 }, { 129,-7771 }, { 130,-7771 }, + { 131,-7771 }, { 132,-7771 }, { 133,-7771 }, { 134,-7771 }, { 135,-7771 }, + + { 136,-7771 }, { 137,-7771 }, { 138,-7771 }, { 139,-7771 }, { 140,-7771 }, + { 141,-7771 }, { 142,-7771 }, { 143,-7771 }, { 144,-7771 }, { 145,-7771 }, + { 146,-7771 }, { 147,-7771 }, { 148,-7771 }, { 149,-7771 }, { 150,-7771 }, + { 151,-7771 }, { 152,-7771 }, { 153,-7771 }, { 154,-7771 }, { 155,-7771 }, + { 156,-7771 }, { 157,-7771 }, { 158,-7771 }, { 159,-7771 }, { 160,-7771 }, + { 161,-7771 }, { 162,-7771 }, { 163,-7771 }, { 164,-7771 }, { 165,-7771 }, + { 166,-7771 }, { 167,-7771 }, { 168,-7771 }, { 169,-7771 }, { 170,-7771 }, + { 171,-7771 }, { 172,-7771 }, { 173,-7771 }, { 174,-7771 }, { 175,-7771 }, + { 176,-7771 }, { 177,-7771 }, { 178,-7771 }, { 179,-7771 }, { 180,-7771 }, + { 181,-7771 }, { 182,-7771 }, { 183,-7771 }, { 184,-7771 }, { 185,-7771 }, + + { 186,-7771 }, { 187,-7771 }, { 188,-7771 }, { 189,-7771 }, { 190,-7771 }, + { 191,-7771 }, { 192,-7771 }, { 193,-7771 }, { 194,-7771 }, { 195,-7771 }, + { 196,-7771 }, { 197,-7771 }, { 198,-7771 }, { 199,-7771 }, { 200,-7771 }, + { 201,-7771 }, { 202,-7771 }, { 203,-7771 }, { 204,-7771 }, { 205,-7771 }, + { 206,-7771 }, { 207,-7771 }, { 208,-7771 }, { 209,-7771 }, { 210,-7771 }, + { 211,-7771 }, { 212,-7771 }, { 213,-7771 }, { 214,-7771 }, { 215,-7771 }, + { 216,-7771 }, { 217,-7771 }, { 218,-7771 }, { 219,-7771 }, { 220,-7771 }, + { 221,-7771 }, { 222,-7771 }, { 223,-7771 }, { 224,-7771 }, { 225,-7771 }, + { 226,-7771 }, { 227,-7771 }, { 228,-7771 }, { 229,-7771 }, { 230,-7771 }, + { 231,-7771 }, { 232,-7771 }, { 233,-7771 }, { 234,-7771 }, { 235,-7771 }, + + { 236,-7771 }, { 237,-7771 }, { 238,-7771 }, { 239,-7771 }, { 240,-7771 }, + { 241,-7771 }, { 242,-7771 }, { 243,-7771 }, { 244,-7771 }, { 245,-7771 }, + { 246,-7771 }, { 247,-7771 }, { 248,-7771 }, { 249,-7771 }, { 250,-7771 }, + { 251,-7771 }, { 252,-7771 }, { 253,-7771 }, { 254,-7771 }, { 255,-7771 }, + { 256,-7771 }, { 0, 44 }, { 0,2805 }, { 1,-8029 }, { 2,-8029 }, + { 3,-8029 }, { 4,-8029 }, { 5,-8029 }, { 6,-8029 }, { 7,-8029 }, + { 8,-8029 }, { 0, 0 }, { 0, 0 }, { 11,-8029 }, { 0, 0 }, + { 0, 0 }, { 14,-8029 }, { 15,-8029 }, { 16,-8029 }, { 17,-8029 }, + { 18,-8029 }, { 19,-8029 }, { 20,-8029 }, { 21,-8029 }, { 22,-8029 }, + { 23,-8029 }, { 24,-8029 }, { 25,-8029 }, { 26,-8029 }, { 27,-8029 }, + + { 28,-8029 }, { 29,-8029 }, { 30,-8029 }, { 31,-8029 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,-8029 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 }, { 52, 0 }, + { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56,-8029 }, { 57,-8029 }, + { 0, 0 }, { 59,-8029 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-8029 }, { 66,-8029 }, { 67,-8029 }, + { 68,-8029 }, { 69,-8029 }, { 70,-8029 }, { 71,-8029 }, { 72,-8029 }, + { 73,-8029 }, { 74,-8029 }, { 75,-8029 }, { 76,-8029 }, { 77,-8029 }, + + { 78,-8029 }, { 79,-8029 }, { 80,-8029 }, { 81,-8029 }, { 82,-8029 }, + { 83,-8029 }, { 84,-8029 }, { 85,-8029 }, { 86,-8029 }, { 87,-8029 }, + { 88,-8029 }, { 89,-8029 }, { 90,-8029 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 94,-8029 }, { 95, 258 }, { 96,-8029 }, { 97,-8029 }, + { 98,-8029 }, { 99,-8029 }, { 100,-8029 }, { 101,-8029 }, { 102,-8029 }, + { 103,-8029 }, { 104,-8029 }, { 105,-8029 }, { 106,-8029 }, { 107,-8029 }, + { 108,-8029 }, { 109,-8029 }, { 110,-8029 }, { 111,-8029 }, { 112,-8029 }, + { 113,-8029 }, { 114,-8029 }, { 115,-8029 }, { 116,-8029 }, { 117,-8029 }, + { 118,-8029 }, { 119,-8029 }, { 120,-8029 }, { 121,-8029 }, { 122,-8029 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-8029 }, { 127,-8029 }, + + { 128,-8029 }, { 129,-8029 }, { 130,-8029 }, { 131,-8029 }, { 132,-8029 }, + { 133,-8029 }, { 134,-8029 }, { 135,-8029 }, { 136,-8029 }, { 137,-8029 }, + { 138,-8029 }, { 139,-8029 }, { 140,-8029 }, { 141,-8029 }, { 142,-8029 }, + { 143,-8029 }, { 144,-8029 }, { 145,-8029 }, { 146,-8029 }, { 147,-8029 }, + { 148,-8029 }, { 149,-8029 }, { 150,-8029 }, { 151,-8029 }, { 152,-8029 }, + { 153,-8029 }, { 154,-8029 }, { 155,-8029 }, { 156,-8029 }, { 157,-8029 }, + { 158,-8029 }, { 159,-8029 }, { 160,-8029 }, { 161,-8029 }, { 162,-8029 }, + { 163,-8029 }, { 164,-8029 }, { 165,-8029 }, { 166,-8029 }, { 167,-8029 }, + { 168,-8029 }, { 169,-8029 }, { 170,-8029 }, { 171,-8029 }, { 172,-8029 }, + { 173,-8029 }, { 174,-8029 }, { 175,-8029 }, { 176,-8029 }, { 177,-8029 }, + + { 178,-8029 }, { 179,-8029 }, { 180,-8029 }, { 181,-8029 }, { 182,-8029 }, + { 183,-8029 }, { 184,-8029 }, { 185,-8029 }, { 186,-8029 }, { 187,-8029 }, + { 188,-8029 }, { 189,-8029 }, { 190,-8029 }, { 191,-8029 }, { 192,-8029 }, + { 193,-8029 }, { 194,-8029 }, { 195,-8029 }, { 196,-8029 }, { 197,-8029 }, + { 198,-8029 }, { 199,-8029 }, { 200,-8029 }, { 201,-8029 }, { 202,-8029 }, + { 203,-8029 }, { 204,-8029 }, { 205,-8029 }, { 206,-8029 }, { 207,-8029 }, + { 208,-8029 }, { 209,-8029 }, { 210,-8029 }, { 211,-8029 }, { 212,-8029 }, + { 213,-8029 }, { 214,-8029 }, { 215,-8029 }, { 216,-8029 }, { 217,-8029 }, + { 218,-8029 }, { 219,-8029 }, { 220,-8029 }, { 221,-8029 }, { 222,-8029 }, + { 223,-8029 }, { 224,-8029 }, { 225,-8029 }, { 226,-8029 }, { 227,-8029 }, + + { 228,-8029 }, { 229,-8029 }, { 230,-8029 }, { 231,-8029 }, { 232,-8029 }, + { 233,-8029 }, { 234,-8029 }, { 235,-8029 }, { 236,-8029 }, { 237,-8029 }, + { 238,-8029 }, { 239,-8029 }, { 240,-8029 }, { 241,-8029 }, { 242,-8029 }, + { 243,-8029 }, { 244,-8029 }, { 245,-8029 }, { 246,-8029 }, { 247,-8029 }, + { 248,-8029 }, { 249,-8029 }, { 250,-8029 }, { 251,-8029 }, { 252,-8029 }, + { 253,-8029 }, { 254,-8029 }, { 255,-8029 }, { 256,-8029 }, { 0, 52 }, + { 0,2547 }, { 1,-8287 }, { 2,-8287 }, { 3,-8287 }, { 4,-8287 }, + { 5,-8287 }, { 6,-8287 }, { 7,-8287 }, { 8,-8287 }, { 0, 0 }, + { 0, 0 }, { 11,-8287 }, { 0, 0 }, { 0, 0 }, { 14,-8287 }, + { 15,-8287 }, { 16,-8287 }, { 17,-8287 }, { 18,-8287 }, { 19,-8287 }, + + { 20,-8287 }, { 21,-8287 }, { 22,-8287 }, { 23,-8287 }, { 24,-8287 }, + { 25,-8287 }, { 26,-8287 }, { 27,-8287 }, { 28,-8287 }, { 29,-8287 }, + { 30,-8287 }, { 31,-8287 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-8287 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-258 }, { 49,-258 }, + { 50,-258 }, { 51,-258 }, { 52,-258 }, { 53,-258 }, { 54,-258 }, + { 55,-258 }, { 56,-8287 }, { 57,-8287 }, { 0, 0 }, { 59,-8287 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65,-8287 }, { 66,-8287 }, { 67,-8287 }, { 68,-8287 }, { 69,-8287 }, + + { 70,-8287 }, { 71,-8287 }, { 72,-8287 }, { 73,-8287 }, { 74,-8287 }, + { 75,-8287 }, { 76,-8287 }, { 77,-8287 }, { 78,-8287 }, { 79,-8287 }, + { 80,-8287 }, { 81,-8287 }, { 82,-8287 }, { 83,-8287 }, { 84,-8287 }, + { 85,-8287 }, { 86,-8287 }, { 87,-8287 }, { 88,-8287 }, { 89,-8287 }, + { 90,-8287 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-8287 }, + { 95,-8287 }, { 96,-8287 }, { 97,-8287 }, { 98,-8287 }, { 99,-8287 }, + { 100,-8287 }, { 101,-8287 }, { 102,-8287 }, { 103,-8287 }, { 104,-8287 }, + { 105,-8287 }, { 106,-8287 }, { 107,-8287 }, { 108,-8287 }, { 109,-8287 }, + { 110,-8287 }, { 111,-8287 }, { 112,-8287 }, { 113,-8287 }, { 114,-8287 }, + { 115,-8287 }, { 116,-8287 }, { 117,-8287 }, { 118,-8287 }, { 119,-8287 }, + + { 120,-8287 }, { 121,-8287 }, { 122,-8287 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 126,-8287 }, { 127,-8287 }, { 128,-8287 }, { 129,-8287 }, + { 130,-8287 }, { 131,-8287 }, { 132,-8287 }, { 133,-8287 }, { 134,-8287 }, + { 135,-8287 }, { 136,-8287 }, { 137,-8287 }, { 138,-8287 }, { 139,-8287 }, + { 140,-8287 }, { 141,-8287 }, { 142,-8287 }, { 143,-8287 }, { 144,-8287 }, + { 145,-8287 }, { 146,-8287 }, { 147,-8287 }, { 148,-8287 }, { 149,-8287 }, + { 150,-8287 }, { 151,-8287 }, { 152,-8287 }, { 153,-8287 }, { 154,-8287 }, + { 155,-8287 }, { 156,-8287 }, { 157,-8287 }, { 158,-8287 }, { 159,-8287 }, + { 160,-8287 }, { 161,-8287 }, { 162,-8287 }, { 163,-8287 }, { 164,-8287 }, + { 165,-8287 }, { 166,-8287 }, { 167,-8287 }, { 168,-8287 }, { 169,-8287 }, + + { 170,-8287 }, { 171,-8287 }, { 172,-8287 }, { 173,-8287 }, { 174,-8287 }, + { 175,-8287 }, { 176,-8287 }, { 177,-8287 }, { 178,-8287 }, { 179,-8287 }, + { 180,-8287 }, { 181,-8287 }, { 182,-8287 }, { 183,-8287 }, { 184,-8287 }, + { 185,-8287 }, { 186,-8287 }, { 187,-8287 }, { 188,-8287 }, { 189,-8287 }, + { 190,-8287 }, { 191,-8287 }, { 192,-8287 }, { 193,-8287 }, { 194,-8287 }, + { 195,-8287 }, { 196,-8287 }, { 197,-8287 }, { 198,-8287 }, { 199,-8287 }, + { 200,-8287 }, { 201,-8287 }, { 202,-8287 }, { 203,-8287 }, { 204,-8287 }, + { 205,-8287 }, { 206,-8287 }, { 207,-8287 }, { 208,-8287 }, { 209,-8287 }, + { 210,-8287 }, { 211,-8287 }, { 212,-8287 }, { 213,-8287 }, { 214,-8287 }, + { 215,-8287 }, { 216,-8287 }, { 217,-8287 }, { 218,-8287 }, { 219,-8287 }, + + { 220,-8287 }, { 221,-8287 }, { 222,-8287 }, { 223,-8287 }, { 224,-8287 }, + { 225,-8287 }, { 226,-8287 }, { 227,-8287 }, { 228,-8287 }, { 229,-8287 }, + { 230,-8287 }, { 231,-8287 }, { 232,-8287 }, { 233,-8287 }, { 234,-8287 }, + { 235,-8287 }, { 236,-8287 }, { 237,-8287 }, { 238,-8287 }, { 239,-8287 }, + { 240,-8287 }, { 241,-8287 }, { 242,-8287 }, { 243,-8287 }, { 244,-8287 }, + { 245,-8287 }, { 246,-8287 }, { 247,-8287 }, { 248,-8287 }, { 249,-8287 }, + { 250,-8287 }, { 251,-8287 }, { 252,-8287 }, { 253,-8287 }, { 254,-8287 }, + { 255,-8287 }, { 256,-8287 }, { 0, 43 }, { 0,2289 }, { 1,-8545 }, + { 2,-8545 }, { 3,-8545 }, { 4,-8545 }, { 5,-8545 }, { 6,-8545 }, + { 7,-8545 }, { 8,-8545 }, { 0, 0 }, { 0, 0 }, { 11,-8545 }, + + { 0, 0 }, { 0, 0 }, { 14,-8545 }, { 15,-8545 }, { 16,-8545 }, + { 17,-8545 }, { 18,-8545 }, { 19,-8545 }, { 20,-8545 }, { 21,-8545 }, + { 22,-8545 }, { 23,-8545 }, { 24,-8545 }, { 25,-8545 }, { 26,-8545 }, + { 27,-8545 }, { 28,-8545 }, { 29,-8545 }, { 30,-8545 }, { 31,-8545 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 39,-8545 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 }, + { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 }, + { 57, 0 }, { 0, 0 }, { 59,-8545 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65, 0 }, { 66, 0 }, + { 67, 0 }, { 68, 0 }, { 69, 0 }, { 70, 0 }, { 71,-8545 }, + { 72,-8545 }, { 73,-8545 }, { 74,-8545 }, { 75,-8545 }, { 76,-8545 }, + { 77,-8545 }, { 78,-8545 }, { 79,-8545 }, { 80,-8545 }, { 81,-8545 }, + { 82,-8545 }, { 83,-8545 }, { 84,-8545 }, { 85,-8545 }, { 86,-8545 }, + { 87,-8545 }, { 88,-8545 }, { 89,-8545 }, { 90,-8545 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 94,-8545 }, { 95, 258 }, { 96,-8545 }, + { 97, 0 }, { 98, 0 }, { 99, 0 }, { 100, 0 }, { 101, 0 }, + { 102, 0 }, { 103,-8545 }, { 104,-8545 }, { 105,-8545 }, { 106,-8545 }, + { 107,-8545 }, { 108,-8545 }, { 109,-8545 }, { 110,-8545 }, { 111,-8545 }, + + { 112,-8545 }, { 113,-8545 }, { 114,-8545 }, { 115,-8545 }, { 116,-8545 }, + { 117,-8545 }, { 118,-8545 }, { 119,-8545 }, { 120,-8545 }, { 121,-8545 }, + { 122,-8545 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-8545 }, + { 127,-8545 }, { 128,-8545 }, { 129,-8545 }, { 130,-8545 }, { 131,-8545 }, + { 132,-8545 }, { 133,-8545 }, { 134,-8545 }, { 135,-8545 }, { 136,-8545 }, + { 137,-8545 }, { 138,-8545 }, { 139,-8545 }, { 140,-8545 }, { 141,-8545 }, + { 142,-8545 }, { 143,-8545 }, { 144,-8545 }, { 145,-8545 }, { 146,-8545 }, + { 147,-8545 }, { 148,-8545 }, { 149,-8545 }, { 150,-8545 }, { 151,-8545 }, + { 152,-8545 }, { 153,-8545 }, { 154,-8545 }, { 155,-8545 }, { 156,-8545 }, + { 157,-8545 }, { 158,-8545 }, { 159,-8545 }, { 160,-8545 }, { 161,-8545 }, + + { 162,-8545 }, { 163,-8545 }, { 164,-8545 }, { 165,-8545 }, { 166,-8545 }, + { 167,-8545 }, { 168,-8545 }, { 169,-8545 }, { 170,-8545 }, { 171,-8545 }, + { 172,-8545 }, { 173,-8545 }, { 174,-8545 }, { 175,-8545 }, { 176,-8545 }, + { 177,-8545 }, { 178,-8545 }, { 179,-8545 }, { 180,-8545 }, { 181,-8545 }, + { 182,-8545 }, { 183,-8545 }, { 184,-8545 }, { 185,-8545 }, { 186,-8545 }, + { 187,-8545 }, { 188,-8545 }, { 189,-8545 }, { 190,-8545 }, { 191,-8545 }, + { 192,-8545 }, { 193,-8545 }, { 194,-8545 }, { 195,-8545 }, { 196,-8545 }, + { 197,-8545 }, { 198,-8545 }, { 199,-8545 }, { 200,-8545 }, { 201,-8545 }, + { 202,-8545 }, { 203,-8545 }, { 204,-8545 }, { 205,-8545 }, { 206,-8545 }, + { 207,-8545 }, { 208,-8545 }, { 209,-8545 }, { 210,-8545 }, { 211,-8545 }, + + { 212,-8545 }, { 213,-8545 }, { 214,-8545 }, { 215,-8545 }, { 216,-8545 }, + { 217,-8545 }, { 218,-8545 }, { 219,-8545 }, { 220,-8545 }, { 221,-8545 }, + { 222,-8545 }, { 223,-8545 }, { 224,-8545 }, { 225,-8545 }, { 226,-8545 }, + { 227,-8545 }, { 228,-8545 }, { 229,-8545 }, { 230,-8545 }, { 231,-8545 }, + { 232,-8545 }, { 233,-8545 }, { 234,-8545 }, { 235,-8545 }, { 236,-8545 }, + { 237,-8545 }, { 238,-8545 }, { 239,-8545 }, { 240,-8545 }, { 241,-8545 }, + { 242,-8545 }, { 243,-8545 }, { 244,-8545 }, { 245,-8545 }, { 246,-8545 }, + { 247,-8545 }, { 248,-8545 }, { 249,-8545 }, { 250,-8545 }, { 251,-8545 }, + { 252,-8545 }, { 253,-8545 }, { 254,-8545 }, { 255,-8545 }, { 256,-8545 }, + { 0, 52 }, { 0,2031 }, { 1,-8803 }, { 2,-8803 }, { 3,-8803 }, + + { 4,-8803 }, { 5,-8803 }, { 6,-8803 }, { 7,-8803 }, { 8,-8803 }, + { 0, 0 }, { 0, 0 }, { 11,-8803 }, { 0, 0 }, { 0, 0 }, + { 14,-8803 }, { 15,-8803 }, { 16,-8803 }, { 17,-8803 }, { 18,-8803 }, + { 19,-8803 }, { 20,-8803 }, { 21,-8803 }, { 22,-8803 }, { 23,-8803 }, + { 24,-8803 }, { 25,-8803 }, { 26,-8803 }, { 27,-8803 }, { 28,-8803 }, + { 29,-8803 }, { 30,-8803 }, { 31,-8803 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 39,-8803 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-258 }, + { 49,-258 }, { 50,-258 }, { 51,-258 }, { 52,-258 }, { 53,-258 }, + + { 54,-258 }, { 55,-258 }, { 56,-258 }, { 57,-258 }, { 0, 0 }, + { 59,-8803 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 65,-258 }, { 66,-258 }, { 67,-258 }, { 68,-258 }, + { 69,-258 }, { 70,-258 }, { 71,-8803 }, { 72,-8803 }, { 73,-8803 }, + { 74,-8803 }, { 75,-8803 }, { 76,-8803 }, { 77,-8803 }, { 78,-8803 }, + { 79,-8803 }, { 80,-8803 }, { 81,-8803 }, { 82,-8803 }, { 83,-8803 }, + { 84,-8803 }, { 85,-8803 }, { 86,-8803 }, { 87,-8803 }, { 88,-8803 }, + { 89,-8803 }, { 90,-8803 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 94,-8803 }, { 95,-8803 }, { 96,-8803 }, { 97,-258 }, { 98,-258 }, + { 99,-258 }, { 100,-258 }, { 101,-258 }, { 102,-258 }, { 103,-8803 }, + + { 104,-8803 }, { 105,-8803 }, { 106,-8803 }, { 107,-8803 }, { 108,-8803 }, + { 109,-8803 }, { 110,-8803 }, { 111,-8803 }, { 112,-8803 }, { 113,-8803 }, + { 114,-8803 }, { 115,-8803 }, { 116,-8803 }, { 117,-8803 }, { 118,-8803 }, + { 119,-8803 }, { 120,-8803 }, { 121,-8803 }, { 122,-8803 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 126,-8803 }, { 127,-8803 }, { 128,-8803 }, + { 129,-8803 }, { 130,-8803 }, { 131,-8803 }, { 132,-8803 }, { 133,-8803 }, + { 134,-8803 }, { 135,-8803 }, { 136,-8803 }, { 137,-8803 }, { 138,-8803 }, + { 139,-8803 }, { 140,-8803 }, { 141,-8803 }, { 142,-8803 }, { 143,-8803 }, + { 144,-8803 }, { 145,-8803 }, { 146,-8803 }, { 147,-8803 }, { 148,-8803 }, + { 149,-8803 }, { 150,-8803 }, { 151,-8803 }, { 152,-8803 }, { 153,-8803 }, + + { 154,-8803 }, { 155,-8803 }, { 156,-8803 }, { 157,-8803 }, { 158,-8803 }, + { 159,-8803 }, { 160,-8803 }, { 161,-8803 }, { 162,-8803 }, { 163,-8803 }, + { 164,-8803 }, { 165,-8803 }, { 166,-8803 }, { 167,-8803 }, { 168,-8803 }, + { 169,-8803 }, { 170,-8803 }, { 171,-8803 }, { 172,-8803 }, { 173,-8803 }, + { 174,-8803 }, { 175,-8803 }, { 176,-8803 }, { 177,-8803 }, { 178,-8803 }, + { 179,-8803 }, { 180,-8803 }, { 181,-8803 }, { 182,-8803 }, { 183,-8803 }, + { 184,-8803 }, { 185,-8803 }, { 186,-8803 }, { 187,-8803 }, { 188,-8803 }, + { 189,-8803 }, { 190,-8803 }, { 191,-8803 }, { 192,-8803 }, { 193,-8803 }, + { 194,-8803 }, { 195,-8803 }, { 196,-8803 }, { 197,-8803 }, { 198,-8803 }, + { 199,-8803 }, { 200,-8803 }, { 201,-8803 }, { 202,-8803 }, { 203,-8803 }, + + { 204,-8803 }, { 205,-8803 }, { 206,-8803 }, { 207,-8803 }, { 208,-8803 }, + { 209,-8803 }, { 210,-8803 }, { 211,-8803 }, { 212,-8803 }, { 213,-8803 }, + { 214,-8803 }, { 215,-8803 }, { 216,-8803 }, { 217,-8803 }, { 218,-8803 }, + { 219,-8803 }, { 220,-8803 }, { 221,-8803 }, { 222,-8803 }, { 223,-8803 }, + { 224,-8803 }, { 225,-8803 }, { 226,-8803 }, { 227,-8803 }, { 228,-8803 }, + { 229,-8803 }, { 230,-8803 }, { 231,-8803 }, { 232,-8803 }, { 233,-8803 }, + { 234,-8803 }, { 235,-8803 }, { 236,-8803 }, { 237,-8803 }, { 238,-8803 }, + { 239,-8803 }, { 240,-8803 }, { 241,-8803 }, { 242,-8803 }, { 243,-8803 }, + { 244,-8803 }, { 245,-8803 }, { 246,-8803 }, { 247,-8803 }, { 248,-8803 }, + { 249,-8803 }, { 250,-8803 }, { 251,-8803 }, { 252,-8803 }, { 253,-8803 }, + + { 254,-8803 }, { 255,-8803 }, { 256,-8803 }, { 0, 13 }, { 0,1773 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 13 }, { 0,1750 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 48, 968 }, { 49, 968 }, { 50, 968 }, + { 51, 968 }, { 52, 968 }, { 53, 968 }, { 54, 968 }, { 55, 968 }, + { 56, 968 }, { 57, 968 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65, 968 }, + { 66, 968 }, { 67, 968 }, { 68, 968 }, { 69, 968 }, { 70, 968 }, + { 48, 968 }, { 49, 968 }, { 50, 968 }, { 51, 968 }, { 52, 968 }, + { 53, 968 }, { 54, 968 }, { 55, 968 }, { 56, 968 }, { 57, 968 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65, 968 }, { 66, 968 }, { 67, 968 }, + { 68, 968 }, { 69, 968 }, { 70, 968 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 97, 968 }, { 98, 968 }, { 99, 968 }, { 100, 968 }, + { 101, 968 }, { 102, 968 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 97, 968 }, + { 98, 968 }, { 99, 968 }, { 100, 968 }, { 101, 968 }, { 102, 968 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 40 }, { 0,1638 }, + { 1,-9461 }, { 2,-9461 }, { 3,-9461 }, { 4,-9461 }, { 5,-9461 }, + { 6,-9461 }, { 7,-9461 }, { 8,-9461 }, { 0, 0 }, { 0, 0 }, + + { 11,-9461 }, { 0, 0 }, { 125,-9051 }, { 14,-9461 }, { 15,-9461 }, + { 16,-9461 }, { 17,-9461 }, { 18,-9461 }, { 19,-9461 }, { 20,-9461 }, + { 21,-9461 }, { 22,-9461 }, { 23,-9461 }, { 24,-9461 }, { 25,-9461 }, + { 26,-9461 }, { 27,-9461 }, { 28,-9461 }, { 29,-9461 }, { 30,-9461 }, + { 31,-9461 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-9461 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, + { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, + { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59,-9461 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-9461 }, + { 66,-9461 }, { 67,-9461 }, { 68,-9461 }, { 69,-9461 }, { 70,-9461 }, + { 71,-9461 }, { 72,-9461 }, { 73,-9461 }, { 74,-9461 }, { 75,-9461 }, + { 76,-9461 }, { 77,-9461 }, { 78,-9461 }, { 79,-9461 }, { 80,-9461 }, + { 81,-9461 }, { 82,-9461 }, { 83,-9461 }, { 84,-9461 }, { 85,-9461 }, + { 86,-9461 }, { 87,-9461 }, { 88,-9461 }, { 89,-9461 }, { 90,-9461 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-9461 }, { 95, 258 }, + { 96,-9461 }, { 97,-9461 }, { 98,-9461 }, { 99,-9461 }, { 100,-9461 }, + { 101,-9461 }, { 102,-9461 }, { 103,-9461 }, { 104,-9461 }, { 105,-9461 }, + { 106,-9461 }, { 107,-9461 }, { 108,-9461 }, { 109,-9461 }, { 110,-9461 }, + + { 111,-9461 }, { 112,-9461 }, { 113,-9461 }, { 114,-9461 }, { 115,-9461 }, + { 116,-9461 }, { 117,-9461 }, { 118,-9461 }, { 119,-9461 }, { 120,-9461 }, + { 121,-9461 }, { 122,-9461 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 126,-9461 }, { 127,-9461 }, { 128,-9461 }, { 129,-9461 }, { 130,-9461 }, + { 131,-9461 }, { 132,-9461 }, { 133,-9461 }, { 134,-9461 }, { 135,-9461 }, + { 136,-9461 }, { 137,-9461 }, { 138,-9461 }, { 139,-9461 }, { 140,-9461 }, + { 141,-9461 }, { 142,-9461 }, { 143,-9461 }, { 144,-9461 }, { 145,-9461 }, + { 146,-9461 }, { 147,-9461 }, { 148,-9461 }, { 149,-9461 }, { 150,-9461 }, + { 151,-9461 }, { 152,-9461 }, { 153,-9461 }, { 154,-9461 }, { 155,-9461 }, + { 156,-9461 }, { 157,-9461 }, { 158,-9461 }, { 159,-9461 }, { 160,-9461 }, + + { 161,-9461 }, { 162,-9461 }, { 163,-9461 }, { 164,-9461 }, { 165,-9461 }, + { 166,-9461 }, { 167,-9461 }, { 168,-9461 }, { 169,-9461 }, { 170,-9461 }, + { 171,-9461 }, { 172,-9461 }, { 173,-9461 }, { 174,-9461 }, { 175,-9461 }, + { 176,-9461 }, { 177,-9461 }, { 178,-9461 }, { 179,-9461 }, { 180,-9461 }, + { 181,-9461 }, { 182,-9461 }, { 183,-9461 }, { 184,-9461 }, { 185,-9461 }, + { 186,-9461 }, { 187,-9461 }, { 188,-9461 }, { 189,-9461 }, { 190,-9461 }, + { 191,-9461 }, { 192,-9461 }, { 193,-9461 }, { 194,-9461 }, { 195,-9461 }, + { 196,-9461 }, { 197,-9461 }, { 198,-9461 }, { 199,-9461 }, { 200,-9461 }, + { 201,-9461 }, { 202,-9461 }, { 203,-9461 }, { 204,-9461 }, { 205,-9461 }, + { 206,-9461 }, { 207,-9461 }, { 208,-9461 }, { 209,-9461 }, { 210,-9461 }, + + { 211,-9461 }, { 212,-9461 }, { 213,-9461 }, { 214,-9461 }, { 215,-9461 }, + { 216,-9461 }, { 217,-9461 }, { 218,-9461 }, { 219,-9461 }, { 220,-9461 }, + { 221,-9461 }, { 222,-9461 }, { 223,-9461 }, { 224,-9461 }, { 225,-9461 }, + { 226,-9461 }, { 227,-9461 }, { 228,-9461 }, { 229,-9461 }, { 230,-9461 }, + { 231,-9461 }, { 232,-9461 }, { 233,-9461 }, { 234,-9461 }, { 235,-9461 }, + { 236,-9461 }, { 237,-9461 }, { 238,-9461 }, { 239,-9461 }, { 240,-9461 }, + { 241,-9461 }, { 242,-9461 }, { 243,-9461 }, { 244,-9461 }, { 245,-9461 }, + { 246,-9461 }, { 247,-9461 }, { 248,-9461 }, { 249,-9461 }, { 250,-9461 }, + { 251,-9461 }, { 252,-9461 }, { 253,-9461 }, { 254,-9461 }, { 255,-9461 }, + { 256,-9461 }, { 0, 49 }, { 0,1380 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48, 710 }, { 49, 710 }, { 50, 710 }, { 51, 710 }, { 52, 710 }, + + { 53, 710 }, { 54, 710 }, { 55, 710 }, { 56, 710 }, { 57, 710 }, + { 0, 41 }, { 0,1321 }, { 1,-9782 }, { 2,-9782 }, { 3,-9782 }, + { 4,-9782 }, { 5,-9782 }, { 6,-9782 }, { 7,-9782 }, { 8,-9782 }, + { 0, 0 }, { 0, 0 }, { 11,-9782 }, { 0, 0 }, { 0, 0 }, + { 14,-9782 }, { 15,-9782 }, { 16,-9782 }, { 17,-9782 }, { 18,-9782 }, + { 19,-9782 }, { 20,-9782 }, { 21,-9782 }, { 22,-9782 }, { 23,-9782 }, + { 24,-9782 }, { 25,-9782 }, { 26,-9782 }, { 27,-9782 }, { 28,-9782 }, + { 29,-9782 }, { 30,-9782 }, { 31,-9782 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 39,-9782 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-3091 }, + { 49,-3091 }, { 50,-3091 }, { 51,-3091 }, { 52,-3091 }, { 53,-3091 }, + { 54,-3091 }, { 55,-3091 }, { 56,-3091 }, { 57,-3091 }, { 0, 0 }, + { 59,-9782 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 65,-9782 }, { 66,-9782 }, { 67,-9782 }, { 68,-9782 }, + { 69,-5448 }, { 70,-9782 }, { 71,-9782 }, { 72,-9782 }, { 73,-9782 }, + { 74,-9782 }, { 75,-9782 }, { 76,-9782 }, { 77,-9782 }, { 78,-9782 }, + { 79,-9782 }, { 80,-9782 }, { 81,-9782 }, { 82,-9782 }, { 83,-9782 }, + { 84,-9782 }, { 85,-9782 }, { 86,-9782 }, { 87,-9782 }, { 88,-9782 }, + { 89,-9782 }, { 90,-9782 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 94,-9782 }, { 95,-2833 }, { 96,-9782 }, { 97,-9782 }, { 98,-9782 }, + { 99,-9782 }, { 100,-9782 }, { 101,-5448 }, { 102,-9782 }, { 103,-9782 }, + { 104,-9782 }, { 105,-9782 }, { 106,-9782 }, { 107,-9782 }, { 108,-9782 }, + { 109,-9782 }, { 110,-9782 }, { 111,-9782 }, { 112,-9782 }, { 113,-9782 }, + { 114,-9782 }, { 115,-9782 }, { 116,-9782 }, { 117,-9782 }, { 118,-9782 }, + { 119,-9782 }, { 120,-9782 }, { 121,-9782 }, { 122,-9782 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 126,-9782 }, { 127,-9782 }, { 128,-9782 }, + { 129,-9782 }, { 130,-9782 }, { 131,-9782 }, { 132,-9782 }, { 133,-9782 }, + { 134,-9782 }, { 135,-9782 }, { 136,-9782 }, { 137,-9782 }, { 138,-9782 }, + { 139,-9782 }, { 140,-9782 }, { 141,-9782 }, { 142,-9782 }, { 143,-9782 }, + + { 144,-9782 }, { 145,-9782 }, { 146,-9782 }, { 147,-9782 }, { 148,-9782 }, + { 149,-9782 }, { 150,-9782 }, { 151,-9782 }, { 152,-9782 }, { 153,-9782 }, + { 154,-9782 }, { 155,-9782 }, { 156,-9782 }, { 157,-9782 }, { 158,-9782 }, + { 159,-9782 }, { 160,-9782 }, { 161,-9782 }, { 162,-9782 }, { 163,-9782 }, + { 164,-9782 }, { 165,-9782 }, { 166,-9782 }, { 167,-9782 }, { 168,-9782 }, + { 169,-9782 }, { 170,-9782 }, { 171,-9782 }, { 172,-9782 }, { 173,-9782 }, + { 174,-9782 }, { 175,-9782 }, { 176,-9782 }, { 177,-9782 }, { 178,-9782 }, + { 179,-9782 }, { 180,-9782 }, { 181,-9782 }, { 182,-9782 }, { 183,-9782 }, + { 184,-9782 }, { 185,-9782 }, { 186,-9782 }, { 187,-9782 }, { 188,-9782 }, + { 189,-9782 }, { 190,-9782 }, { 191,-9782 }, { 192,-9782 }, { 193,-9782 }, + + { 194,-9782 }, { 195,-9782 }, { 196,-9782 }, { 197,-9782 }, { 198,-9782 }, + { 199,-9782 }, { 200,-9782 }, { 201,-9782 }, { 202,-9782 }, { 203,-9782 }, + { 204,-9782 }, { 205,-9782 }, { 206,-9782 }, { 207,-9782 }, { 208,-9782 }, + { 209,-9782 }, { 210,-9782 }, { 211,-9782 }, { 212,-9782 }, { 213,-9782 }, + { 214,-9782 }, { 215,-9782 }, { 216,-9782 }, { 217,-9782 }, { 218,-9782 }, + { 219,-9782 }, { 220,-9782 }, { 221,-9782 }, { 222,-9782 }, { 223,-9782 }, + { 224,-9782 }, { 225,-9782 }, { 226,-9782 }, { 227,-9782 }, { 228,-9782 }, + { 229,-9782 }, { 230,-9782 }, { 231,-9782 }, { 232,-9782 }, { 233,-9782 }, + { 234,-9782 }, { 235,-9782 }, { 236,-9782 }, { 237,-9782 }, { 238,-9782 }, + { 239,-9782 }, { 240,-9782 }, { 241,-9782 }, { 242,-9782 }, { 243,-9782 }, + + { 244,-9782 }, { 245,-9782 }, { 246,-9782 }, { 247,-9782 }, { 248,-9782 }, + { 249,-9782 }, { 250,-9782 }, { 251,-9782 }, { 252,-9782 }, { 253,-9782 }, + { 254,-9782 }, { 255,-9782 }, { 256,-9782 }, { 0, 40 }, { 0,1063 }, + { 1,-2516 }, { 2,-2516 }, { 3,-2516 }, { 4,-2516 }, { 5,-2516 }, + { 6,-2516 }, { 7,-2516 }, { 8,-2516 }, { 0, 0 }, { 0, 0 }, + { 11,-2516 }, { 0, 0 }, { 0, 0 }, { 14,-2516 }, { 15,-2516 }, + { 16,-2516 }, { 17,-2516 }, { 18,-2516 }, { 19,-2516 }, { 20,-2516 }, + { 21,-2516 }, { 22,-2516 }, { 23,-2516 }, { 24,-2516 }, { 25,-2516 }, + { 26,-2516 }, { 27,-2516 }, { 28,-2516 }, { 29,-2516 }, { 30,-2516 }, + { 31,-2516 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-2516 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 48,-2258 }, { 49,-2258 }, { 50,-2258 }, + { 51,-2258 }, { 52,-2258 }, { 53,-2258 }, { 54,-2258 }, { 55,-2258 }, + { 56,-2258 }, { 57,-2258 }, { 0, 0 }, { 59,-2516 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-2516 }, + { 66,-2516 }, { 67,-2516 }, { 68,-2516 }, { 69,-2516 }, { 70,-2516 }, + { 71,-2516 }, { 72,-2516 }, { 73,-2516 }, { 74,-2516 }, { 75,-2516 }, + { 76,-2516 }, { 77,-2516 }, { 78,-2516 }, { 79,-2516 }, { 80,-2516 }, + { 81,-2516 }, { 82,-2516 }, { 83,-2516 }, { 84,-2516 }, { 85,-2516 }, + + { 86,-2516 }, { 87,-2516 }, { 88,-2516 }, { 89,-2516 }, { 90,-2516 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-2516 }, { 95,-2000 }, + { 96,-2516 }, { 97,-2516 }, { 98,-2516 }, { 99,-2516 }, { 100,-2516 }, + { 101,-2516 }, { 102,-2516 }, { 103,-2516 }, { 104,-2516 }, { 105,-2516 }, + { 106,-2516 }, { 107,-2516 }, { 108,-2516 }, { 109,-2516 }, { 110,-2516 }, + { 111,-2516 }, { 112,-2516 }, { 113,-2516 }, { 114,-2516 }, { 115,-2516 }, + { 116,-2516 }, { 117,-2516 }, { 118,-2516 }, { 119,-2516 }, { 120,-2516 }, + { 121,-2516 }, { 122,-2516 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 126,-2516 }, { 127,-2516 }, { 128,-2516 }, { 129,-2516 }, { 130,-2516 }, + { 131,-2516 }, { 132,-2516 }, { 133,-2516 }, { 134,-2516 }, { 135,-2516 }, + + { 136,-2516 }, { 137,-2516 }, { 138,-2516 }, { 139,-2516 }, { 140,-2516 }, + { 141,-2516 }, { 142,-2516 }, { 143,-2516 }, { 144,-2516 }, { 145,-2516 }, + { 146,-2516 }, { 147,-2516 }, { 148,-2516 }, { 149,-2516 }, { 150,-2516 }, + { 151,-2516 }, { 152,-2516 }, { 153,-2516 }, { 154,-2516 }, { 155,-2516 }, + { 156,-2516 }, { 157,-2516 }, { 158,-2516 }, { 159,-2516 }, { 160,-2516 }, + { 161,-2516 }, { 162,-2516 }, { 163,-2516 }, { 164,-2516 }, { 165,-2516 }, + { 166,-2516 }, { 167,-2516 }, { 168,-2516 }, { 169,-2516 }, { 170,-2516 }, + { 171,-2516 }, { 172,-2516 }, { 173,-2516 }, { 174,-2516 }, { 175,-2516 }, + { 176,-2516 }, { 177,-2516 }, { 178,-2516 }, { 179,-2516 }, { 180,-2516 }, + { 181,-2516 }, { 182,-2516 }, { 183,-2516 }, { 184,-2516 }, { 185,-2516 }, + + { 186,-2516 }, { 187,-2516 }, { 188,-2516 }, { 189,-2516 }, { 190,-2516 }, + { 191,-2516 }, { 192,-2516 }, { 193,-2516 }, { 194,-2516 }, { 195,-2516 }, + { 196,-2516 }, { 197,-2516 }, { 198,-2516 }, { 199,-2516 }, { 200,-2516 }, + { 201,-2516 }, { 202,-2516 }, { 203,-2516 }, { 204,-2516 }, { 205,-2516 }, + { 206,-2516 }, { 207,-2516 }, { 208,-2516 }, { 209,-2516 }, { 210,-2516 }, + { 211,-2516 }, { 212,-2516 }, { 213,-2516 }, { 214,-2516 }, { 215,-2516 }, + { 216,-2516 }, { 217,-2516 }, { 218,-2516 }, { 219,-2516 }, { 220,-2516 }, + { 221,-2516 }, { 222,-2516 }, { 223,-2516 }, { 224,-2516 }, { 225,-2516 }, + { 226,-2516 }, { 227,-2516 }, { 228,-2516 }, { 229,-2516 }, { 230,-2516 }, + { 231,-2516 }, { 232,-2516 }, { 233,-2516 }, { 234,-2516 }, { 235,-2516 }, + + { 236,-2516 }, { 237,-2516 }, { 238,-2516 }, { 239,-2516 }, { 240,-2516 }, + { 241,-2516 }, { 242,-2516 }, { 243,-2516 }, { 244,-2516 }, { 245,-2516 }, + { 246,-2516 }, { 247,-2516 }, { 248,-2516 }, { 249,-2516 }, { 250,-2516 }, + { 251,-2516 }, { 252,-2516 }, { 253,-2516 }, { 254,-2516 }, { 255,-2516 }, + { 256,-2516 }, { 0, 13 }, { 0, 805 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 13 }, + { 0, 782 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48,-9996 }, { 49,-9996 }, { 50,-9996 }, { 51,-9996 }, { 52,-9996 }, + { 53,-9996 }, { 54,-9996 }, { 55,-9996 }, { 56,-9996 }, { 57,-9996 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-9996 }, { 66,-9996 }, { 67,-9996 }, + { 68,-9996 }, { 69,-9996 }, { 70,-9996 }, { 48, 370 }, { 49, 370 }, + { 50, 370 }, { 51, 370 }, { 52, 370 }, { 53, 370 }, { 54, 370 }, + + { 55, 370 }, { 56, 370 }, { 57, 370 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65, 370 }, { 66, 370 }, { 67, 370 }, { 68, 370 }, { 69, 370 }, + { 70, 370 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 97,-9996 }, + { 98,-9996 }, { 99,-9996 }, { 100,-9996 }, { 101,-9996 }, { 102,-9996 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 97, 370 }, { 98, 370 }, { 99, 370 }, + { 100, 370 }, { 101, 370 }, { 102, 370 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 40 }, { 0, 670 }, { 1,-10429 }, { 2,-10429 }, + { 3,-10429 }, { 4,-10429 }, { 5,-10429 }, { 6,-10429 }, { 7,-10429 }, + { 8,-10429 }, { 0, 0 }, { 0, 0 }, { 11,-10429 }, { 0, 0 }, + { 125,-10019 }, { 14,-10429 }, { 15,-10429 }, { 16,-10429 }, { 17,-10429 }, + { 18,-10429 }, { 19,-10429 }, { 20,-10429 }, { 21,-10429 }, { 22,-10429 }, + { 23,-10429 }, { 24,-10429 }, { 25,-10429 }, { 26,-10429 }, { 27,-10429 }, + { 28,-10429 }, { 29,-10429 }, { 30,-10429 }, { 31,-10429 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 39,-10429 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 48,-968 }, { 49,-968 }, { 50,-968 }, { 51,-968 }, { 52,-968 }, + { 53,-968 }, { 54,-968 }, { 55,-968 }, { 56,-968 }, { 57,-968 }, + { 0, 0 }, { 59,-10429 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 65,-10429 }, { 66,-10429 }, { 67,-10429 }, + { 68,-10429 }, { 69,-10429 }, { 70,-10429 }, { 71,-10429 }, { 72,-10429 }, + { 73,-10429 }, { 74,-10429 }, { 75,-10429 }, { 76,-10429 }, { 77,-10429 }, + { 78,-10429 }, { 79,-10429 }, { 80,-10429 }, { 81,-10429 }, { 82,-10429 }, + { 83,-10429 }, { 84,-10429 }, { 85,-10429 }, { 86,-10429 }, { 87,-10429 }, + { 88,-10429 }, { 89,-10429 }, { 90,-10429 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 94,-10429 }, { 95,-710 }, { 96,-10429 }, { 97,-10429 }, + { 98,-10429 }, { 99,-10429 }, { 100,-10429 }, { 101,-10429 }, { 102,-10429 }, + { 103,-10429 }, { 104,-10429 }, { 105,-10429 }, { 106,-10429 }, { 107,-10429 }, + { 108,-10429 }, { 109,-10429 }, { 110,-10429 }, { 111,-10429 }, { 112,-10429 }, + { 113,-10429 }, { 114,-10429 }, { 115,-10429 }, { 116,-10429 }, { 117,-10429 }, + { 118,-10429 }, { 119,-10429 }, { 120,-10429 }, { 121,-10429 }, { 122,-10429 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-10429 }, { 127,-10429 }, + { 128,-10429 }, { 129,-10429 }, { 130,-10429 }, { 131,-10429 }, { 132,-10429 }, + { 133,-10429 }, { 134,-10429 }, { 135,-10429 }, { 136,-10429 }, { 137,-10429 }, + { 138,-10429 }, { 139,-10429 }, { 140,-10429 }, { 141,-10429 }, { 142,-10429 }, + + { 143,-10429 }, { 144,-10429 }, { 145,-10429 }, { 146,-10429 }, { 147,-10429 }, + { 148,-10429 }, { 149,-10429 }, { 150,-10429 }, { 151,-10429 }, { 152,-10429 }, + { 153,-10429 }, { 154,-10429 }, { 155,-10429 }, { 156,-10429 }, { 157,-10429 }, + { 158,-10429 }, { 159,-10429 }, { 160,-10429 }, { 161,-10429 }, { 162,-10429 }, + { 163,-10429 }, { 164,-10429 }, { 165,-10429 }, { 166,-10429 }, { 167,-10429 }, + { 168,-10429 }, { 169,-10429 }, { 170,-10429 }, { 171,-10429 }, { 172,-10429 }, + { 173,-10429 }, { 174,-10429 }, { 175,-10429 }, { 176,-10429 }, { 177,-10429 }, + { 178,-10429 }, { 179,-10429 }, { 180,-10429 }, { 181,-10429 }, { 182,-10429 }, + { 183,-10429 }, { 184,-10429 }, { 185,-10429 }, { 186,-10429 }, { 187,-10429 }, + { 188,-10429 }, { 189,-10429 }, { 190,-10429 }, { 191,-10429 }, { 192,-10429 }, + + { 193,-10429 }, { 194,-10429 }, { 195,-10429 }, { 196,-10429 }, { 197,-10429 }, + { 198,-10429 }, { 199,-10429 }, { 200,-10429 }, { 201,-10429 }, { 202,-10429 }, + { 203,-10429 }, { 204,-10429 }, { 205,-10429 }, { 206,-10429 }, { 207,-10429 }, + { 208,-10429 }, { 209,-10429 }, { 210,-10429 }, { 211,-10429 }, { 212,-10429 }, + { 213,-10429 }, { 214,-10429 }, { 215,-10429 }, { 216,-10429 }, { 217,-10429 }, + { 218,-10429 }, { 219,-10429 }, { 220,-10429 }, { 221,-10429 }, { 222,-10429 }, + { 223,-10429 }, { 224,-10429 }, { 225,-10429 }, { 226,-10429 }, { 227,-10429 }, + { 228,-10429 }, { 229,-10429 }, { 230,-10429 }, { 231,-10429 }, { 232,-10429 }, + { 233,-10429 }, { 234,-10429 }, { 235,-10429 }, { 236,-10429 }, { 237,-10429 }, + { 238,-10429 }, { 239,-10429 }, { 240,-10429 }, { 241,-10429 }, { 242,-10429 }, + + { 243,-10429 }, { 244,-10429 }, { 245,-10429 }, { 246,-10429 }, { 247,-10429 }, + { 248,-10429 }, { 249,-10429 }, { 250,-10429 }, { 251,-10429 }, { 252,-10429 }, + { 253,-10429 }, { 254,-10429 }, { 255,-10429 }, { 256,-10429 }, { 0, 13 }, + { 0, 412 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 13 }, { 0, 374 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 38 }, { 49, 38 }, + { 50, 38 }, { 51, 38 }, { 52, 38 }, { 53, 38 }, { 54, 38 }, + { 55, 38 }, { 56, 38 }, { 57, 38 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 65, 38 }, { 66, 38 }, { 67, 38 }, { 68, 38 }, { 69, 38 }, + { 70, 38 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 48, 116 }, { 49, 116 }, { 50, 116 }, { 51, 116 }, + { 52, 116 }, { 53, 116 }, { 54, 116 }, { 55, 116 }, { 56, 116 }, + { 57, 116 }, { 0, 0 }, { 97, 38 }, { 98, 38 }, { 99, 38 }, + { 100, 38 }, { 101, 38 }, { 102, 38 }, { 65, 116 }, { 66, 116 }, + { 67, 116 }, { 68, 116 }, { 69, 116 }, { 70, 116 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 13 }, { 0, 296 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 125,-10389 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 97, 116 }, { 98, 116 }, { 99, 116 }, { 100, 116 }, { 101, 116 }, + { 102, 116 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 13 }, { 0, 258 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 125,-10427 }, { 48,-4797 }, + { 49,-4797 }, { 50,-4797 }, { 51,-4797 }, { 52,-4797 }, { 53,-4797 }, + { 54,-4797 }, { 55,-4797 }, { 56,-4797 }, { 57,-4797 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 65,-4797 }, { 66,-4797 }, { 67,-4797 }, { 68,-4797 }, + + { 69,-4797 }, { 70,-4797 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 48,-7457 }, { 49,-7457 }, { 50,-7457 }, + { 51,-7457 }, { 52,-7457 }, { 53,-7457 }, { 54,-7457 }, { 55,-7457 }, + { 56,-7457 }, { 57,-7457 }, { 0, 0 }, { 97,-4797 }, { 98,-4797 }, + { 99,-4797 }, { 100,-4797 }, { 101,-4797 }, { 102,-4797 }, { 65,-7457 }, + { 66,-7457 }, { 67,-7457 }, { 68,-7457 }, { 69,-7457 }, { 70,-7457 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 123,-4774 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 97,-7457 }, { 98,-7457 }, { 99,-7457 }, { 100,-7457 }, + { 101,-7457 }, { 102,-7457 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 125,-10543 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 257, 54 }, { 1, 0 }, }; + +static __thread const struct yy_trans_info *yy_start_state_list[11] = + { + &yy_transition[1], + &yy_transition[3], + &yy_transition[261], + &yy_transition[519], + &yy_transition[777], + &yy_transition[1035], + &yy_transition[1293], + &yy_transition[1551], + &yy_transition[1809], + &yy_transition[2067], + &yy_transition[2325], + + } ; + +extern __thread int yy_flex_debug; +__thread int yy_flex_debug = 0; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +__thread char *yytext; +#line 1 "jsonpath_scan.l" + +#line 33 "jsonpath_scan.l" +static __thread JsonPathString scanstring; + +/* Handles to the buffer that the lexer uses internally */ +static __thread YY_BUFFER_STATE scanbufhandle; +static __thread char *scanbuf; +static __thread int scanbuflen; + +static void addstring(bool init, char *s, int l); +static void addchar(bool init, char c); +static enum yytokentype checkKeyword(void); +static bool parseUnicode(char *s, int l, struct Node *escontext); +static bool parseHexChar(char *s, struct Node *escontext); + +/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ +#undef fprintf +#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg) + +static void +fprintf_to_ereport(const char *fmt, const char *msg) +{ + ereport(ERROR, (errmsg_internal("%s", msg))); +} + +/* LCOV_EXCL_START */ + +#line 4161 "jsonpath_scan.c" +#define YY_NO_INPUT 1 +/* + * We use exclusive states for quoted and non-quoted strings, + * quoted variable names and C-style comments. + * Exclusive states: + * <xq> - quoted strings + * <xnq> - non-quoted strings + * <xvq> - quoted variable names + * <xc> - C-style comment + */ + +/* "other" means anything that's not special, blank, or '\' or '"' */ +/* DecimalInteger in ECMAScript; must not start with 0 unless it's exactly 0 */ +/* DecimalDigits in ECMAScript; only used as part of other rules */ +/* Non-decimal integers; in ECMAScript, these must not have underscore after prefix */ +#line 4177 "jsonpath_scan.c" + +#define INITIAL 0 +#define xq 1 +#define xnq 2 +#define xvq 3 +#define xc 4 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include <unistd.h> +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +static int yy_init_globals ( void ); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy ( void ); + +int yyget_debug ( void ); + +void yyset_debug ( int debug_flag ); + +YY_EXTRA_TYPE yyget_extra ( void ); + +void yyset_extra ( YY_EXTRA_TYPE user_defined ); + +FILE *yyget_in ( void ); + +void yyset_in ( FILE * _in_str ); + +FILE *yyget_out ( void ); + +void yyset_out ( FILE * _out_str ); + + int yyget_leng ( void ); + +char *yyget_text ( void ); + +int yyget_lineno ( void ); + +void yyset_lineno ( int _line_number ); + +YYSTYPE * yyget_lval ( void ); + +void yyset_lval ( YYSTYPE * yylval_param ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap ( void ); +#else +extern int yywrap ( void ); +#endif +#endif + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy ( char *, const char *, int ); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen ( const char * ); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput ( void ); +#else +static int input ( void ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + int n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex \ + (YYSTYPE * yylval_param ); + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param ) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK /*LINTED*/break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + yy_state_type yy_current_state; + char *yy_cp, *yy_bp; + int yy_act; + + YYSTYPE * yylval; + + yylval = yylval_param; + + if ( !(yy_init) ) + { + (yy_init) = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! (yy_start) ) + (yy_start) = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE ); + } + + yy_load_buffer_state( ); + } + + { +#line 120 "jsonpath_scan.l" + + +#line 4409 "jsonpath_scan.c" + + while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ + { + yy_cp = (yy_c_buf_p); + + /* Support of yytext. */ + *yy_cp = (yy_hold_char); + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start_state_list[(yy_start)]; +yy_match: + { + const struct yy_trans_info *yy_trans_info; + + YY_CHAR yy_c; + + for ( yy_c = YY_SC_TO_UI(*yy_cp); + (yy_trans_info = &yy_current_state[yy_c])-> + yy_verify == yy_c; + yy_c = YY_SC_TO_UI(*++yy_cp) ) + yy_current_state += yy_trans_info->yy_nxt; + } + +yy_find_action: + yy_act = yy_current_state[-1].yy_nxt; + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ +case 1: +YY_RULE_SETUP +#line 122 "jsonpath_scan.l" +{ + addstring(false, yytext, yyleng); + } + YY_BREAK +case 2: +/* rule 2 can match eol */ +YY_RULE_SETUP +#line 126 "jsonpath_scan.l" +{ + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 132 "jsonpath_scan.l" +{ + yylval->str = scanstring; + BEGIN xc; + } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 137 "jsonpath_scan.l" +{ + yylval->str = scanstring; + yyless(0); + BEGIN INITIAL; + return checkKeyword(); + } + YY_BREAK +case YY_STATE_EOF(xnq): +#line 144 "jsonpath_scan.l" +{ + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 150 "jsonpath_scan.l" +{ addchar(false, '\b'); } + YY_BREAK +case 6: +YY_RULE_SETUP +#line 152 "jsonpath_scan.l" +{ addchar(false, '\f'); } + YY_BREAK +case 7: +YY_RULE_SETUP +#line 154 "jsonpath_scan.l" +{ addchar(false, '\n'); } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 156 "jsonpath_scan.l" +{ addchar(false, '\r'); } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 158 "jsonpath_scan.l" +{ addchar(false, '\t'); } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 160 "jsonpath_scan.l" +{ addchar(false, '\v'); } + YY_BREAK +case 11: +YY_RULE_SETUP +#line 162 "jsonpath_scan.l" +{ + if (!parseUnicode(yytext, yyleng, escontext)) + yyterminate(); + } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 167 "jsonpath_scan.l" +{ + if (!parseHexChar(yytext, escontext)) + yyterminate(); + } + YY_BREAK +case 13: +YY_RULE_SETUP +#line 172 "jsonpath_scan.l" +{ + jsonpath_yyerror(NULL, escontext, + "invalid Unicode escape sequence"); + yyterminate(); + } + YY_BREAK +case 14: +YY_RULE_SETUP +#line 178 "jsonpath_scan.l" +{ + jsonpath_yyerror(NULL, escontext, + "invalid hexadecimal character sequence"); + yyterminate(); + } + YY_BREAK +case 15: +YY_RULE_SETUP +#line 184 "jsonpath_scan.l" +{ + /* throw back the \\, and treat as unicode */ + yyless(yyleng - 1); + if (!parseUnicode(yytext, yyleng, escontext)) + yyterminate(); + } + YY_BREAK +case 16: +YY_RULE_SETUP +#line 191 "jsonpath_scan.l" +{ addchar(false, yytext[1]); } + YY_BREAK +case 17: +YY_RULE_SETUP +#line 193 "jsonpath_scan.l" +{ + jsonpath_yyerror(NULL, escontext, + "unexpected end after backslash"); + yyterminate(); + } + YY_BREAK +case YY_STATE_EOF(xq): +case YY_STATE_EOF(xvq): +#line 199 "jsonpath_scan.l" +{ + jsonpath_yyerror(NULL, escontext, + "unterminated quoted string"); + yyterminate(); + } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 205 "jsonpath_scan.l" +{ + yylval->str = scanstring; + BEGIN INITIAL; + return STRING_P; + } + YY_BREAK +case 19: +YY_RULE_SETUP +#line 211 "jsonpath_scan.l" +{ + yylval->str = scanstring; + BEGIN INITIAL; + return VARIABLE_P; + } + YY_BREAK +case 20: +/* rule 20 can match eol */ +YY_RULE_SETUP +#line 217 "jsonpath_scan.l" +{ addstring(false, yytext, yyleng); } + YY_BREAK +case 21: +YY_RULE_SETUP +#line 219 "jsonpath_scan.l" +{ BEGIN INITIAL; } + YY_BREAK +case 22: +/* rule 22 can match eol */ +YY_RULE_SETUP +#line 221 "jsonpath_scan.l" +{ } + YY_BREAK +case 23: +YY_RULE_SETUP +#line 223 "jsonpath_scan.l" +{ } + YY_BREAK +case YY_STATE_EOF(xc): +#line 225 "jsonpath_scan.l" +{ + jsonpath_yyerror( + NULL, escontext, + "unexpected end of comment"); + yyterminate(); + } + YY_BREAK +case 24: +YY_RULE_SETUP +#line 231 "jsonpath_scan.l" +{ return AND_P; } + YY_BREAK +case 25: +YY_RULE_SETUP +#line 233 "jsonpath_scan.l" +{ return OR_P; } + YY_BREAK +case 26: +YY_RULE_SETUP +#line 235 "jsonpath_scan.l" +{ return NOT_P; } + YY_BREAK +case 27: +YY_RULE_SETUP +#line 237 "jsonpath_scan.l" +{ return ANY_P; } + YY_BREAK +case 28: +YY_RULE_SETUP +#line 239 "jsonpath_scan.l" +{ return LESS_P; } + YY_BREAK +case 29: +YY_RULE_SETUP +#line 241 "jsonpath_scan.l" +{ return LESSEQUAL_P; } + YY_BREAK +case 30: +YY_RULE_SETUP +#line 243 "jsonpath_scan.l" +{ return EQUAL_P; } + YY_BREAK +case 31: +YY_RULE_SETUP +#line 245 "jsonpath_scan.l" +{ return NOTEQUAL_P; } + YY_BREAK +case 32: +YY_RULE_SETUP +#line 247 "jsonpath_scan.l" +{ return NOTEQUAL_P; } + YY_BREAK +case 33: +YY_RULE_SETUP +#line 249 "jsonpath_scan.l" +{ return GREATEREQUAL_P; } + YY_BREAK +case 34: +YY_RULE_SETUP +#line 251 "jsonpath_scan.l" +{ return GREATER_P; } + YY_BREAK +case 35: +YY_RULE_SETUP +#line 253 "jsonpath_scan.l" +{ + addstring(true, yytext + 1, yyleng - 1); + addchar(false, '\0'); + yylval->str = scanstring; + return VARIABLE_P; + } + YY_BREAK +case 36: +YY_RULE_SETUP +#line 260 "jsonpath_scan.l" +{ + addchar(true, '\0'); + BEGIN xvq; + } + YY_BREAK +case 37: +YY_RULE_SETUP +#line 265 "jsonpath_scan.l" +{ return *yytext; } + YY_BREAK +case 38: +/* rule 38 can match eol */ +YY_RULE_SETUP +#line 267 "jsonpath_scan.l" +{ /* ignore */ } + YY_BREAK +case 39: +YY_RULE_SETUP +#line 269 "jsonpath_scan.l" +{ + addchar(true, '\0'); + BEGIN xc; + } + YY_BREAK +case 40: +YY_RULE_SETUP +#line 274 "jsonpath_scan.l" +{ + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return NUMERIC_P; + } + YY_BREAK +case 41: +YY_RULE_SETUP +#line 281 "jsonpath_scan.l" +{ + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return NUMERIC_P; + } + YY_BREAK +case 42: +YY_RULE_SETUP +#line 288 "jsonpath_scan.l" +{ + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return INT_P; + } + YY_BREAK +case 43: +YY_RULE_SETUP +#line 295 "jsonpath_scan.l" +{ + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return INT_P; + } + YY_BREAK +case 44: +YY_RULE_SETUP +#line 302 "jsonpath_scan.l" +{ + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return INT_P; + } + YY_BREAK +case 45: +YY_RULE_SETUP +#line 309 "jsonpath_scan.l" +{ + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return INT_P; + } + YY_BREAK +case 46: +YY_RULE_SETUP +#line 316 "jsonpath_scan.l" +{ + jsonpath_yyerror( + NULL, escontext, + "invalid numeric literal"); + yyterminate(); + } + YY_BREAK +case 47: +YY_RULE_SETUP +#line 322 "jsonpath_scan.l" +{ + jsonpath_yyerror( + NULL, escontext, + "trailing junk after numeric literal"); + yyterminate(); + } + YY_BREAK +case 48: +YY_RULE_SETUP +#line 328 "jsonpath_scan.l" +{ + jsonpath_yyerror( + NULL, escontext, + "trailing junk after numeric literal"); + yyterminate(); + } + YY_BREAK +case 49: +YY_RULE_SETUP +#line 334 "jsonpath_scan.l" +{ + jsonpath_yyerror( + NULL, escontext, + "trailing junk after numeric literal"); + yyterminate(); + } + YY_BREAK +case 50: +YY_RULE_SETUP +#line 340 "jsonpath_scan.l" +{ + addchar(true, '\0'); + BEGIN xq; + } + YY_BREAK +case 51: +YY_RULE_SETUP +#line 345 "jsonpath_scan.l" +{ + yyless(0); + addchar(true, '\0'); + BEGIN xnq; + } + YY_BREAK +case 52: +YY_RULE_SETUP +#line 351 "jsonpath_scan.l" +{ + addstring(true, yytext, yyleng); + BEGIN xnq; + } + YY_BREAK +case YY_STATE_EOF(INITIAL): +#line 356 "jsonpath_scan.l" +{ yyterminate(); } + YY_BREAK +case 53: +YY_RULE_SETUP +#line 358 "jsonpath_scan.l" +YY_FATAL_ERROR( "flex scanner jammed" ); + YY_BREAK +#line 4861 "jsonpath_scan.c" + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = (yy_hold_char); + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++(yy_c_buf_p); + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = (yy_c_buf_p); + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_END_OF_FILE: + { + (yy_did_buffer_switch_on_eof) = 0; + + if ( yywrap( ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = + (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + (yy_c_buf_p) = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of user's declarations */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (void) +{ + char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + char *source = (yytext_ptr); + int number_to_move, i; + int ret_val; + + if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr) - 1); + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; + + int yy_c_buf_p_offset = + (int) ((yy_c_buf_p) - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc( (void *) b->yy_ch_buf, + (yy_size_t) (b->yy_buf_size + 2) ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = NULL; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + (yy_n_chars), num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + if ( (yy_n_chars) == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( + (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + /* "- 2" to take care of EOB's */ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2); + } + + (yy_n_chars) += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; + + (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (void) +{ + yy_state_type yy_current_state; + char *yy_cp; + + yy_current_state = yy_start_state_list[(yy_start)]; + + for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) + { + yy_current_state += yy_current_state[(*yy_cp ? YY_SC_TO_UI(*yy_cp) : 256)].yy_nxt; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) +{ + int yy_is_jam; + + int yy_c = 256; + const struct yy_trans_info *yy_trans_info; + + yy_trans_info = &yy_current_state[(unsigned int) yy_c]; + yy_current_state += yy_trans_info->yy_nxt; + yy_is_jam = (yy_trans_info->yy_verify != yy_c); + + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (void) +#else + static int input (void) +#endif + +{ + int c; + + *(yy_c_buf_p) = (yy_hold_char); + + if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + /* This was really a NUL. */ + *(yy_c_buf_p) = '\0'; + + else + { /* need more input */ + int offset = (int) ((yy_c_buf_p) - (yytext_ptr)); + ++(yy_c_buf_p); + + switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( ) ) + return 0; + + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = (yytext_ptr) + offset; + break; + } + } + } + + c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ + *(yy_c_buf_p) = '\0'; /* preserve yytext */ + (yy_hold_char) = *++(yy_c_buf_p); + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file ) +{ + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE ); + } + + yy_init_buffer( YY_CURRENT_BUFFER, input_file ); + yy_load_buffer_state( ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) +{ + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + (yy_did_buffer_switch_on_eof) = 1; +} + +static void yy_load_buffer_state (void) +{ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + (yy_hold_char) = *(yy_c_buf_p); +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * + */ + void yy_delete_buffer (YY_BUFFER_STATE b ) +{ + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree( (void *) b->yy_ch_buf ); + + yyfree( (void *) b ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) + +{ + int oerrno = errno; + + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * + */ + void yy_flush_buffer (YY_BUFFER_STATE b ) +{ + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) +{ + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + (yy_buffer_stack_top)++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * + */ +void yypop_buffer_state (void) +{ + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + if ((yy_buffer_stack_top) > 0) + --(yy_buffer_stack_top); + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (void) +{ + yy_size_t num_to_alloc; + + if (!(yy_buffer_stack)) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ + (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + (yy_buffer_stack_max) = num_to_alloc; + (yy_buffer_stack_top) = 0; + return; + } + + if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + yy_size_t grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = (yy_buffer_stack_max) + grow_size; + (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc + ((yy_buffer_stack), + num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); + (yy_buffer_stack_max) = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size ) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return NULL; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = NULL; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (const char * yystr ) +{ + + return yy_scan_bytes( yystr, (int) strlen(yystr) ); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len ) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = (yy_size_t) (_yybytes_len + 2); + buf = (char *) yyalloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yynoreturn yy_fatal_error (const char* msg ) +{ + fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = (yy_hold_char); \ + (yy_c_buf_p) = yytext + yyless_macro_arg; \ + (yy_hold_char) = *(yy_c_buf_p); \ + *(yy_c_buf_p) = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the current line number. + * + */ +int yyget_lineno (void) +{ + + return yylineno; +} + +/** Get the input stream. + * + */ +FILE *yyget_in (void) +{ + return yyin; +} + +/** Get the output stream. + * + */ +FILE *yyget_out (void) +{ + return yyout; +} + +/** Get the length of the current token. + * + */ +int yyget_leng (void) +{ + return yyleng; +} + +/** Get the current token. + * + */ + +char *yyget_text (void) +{ + return yytext; +} + +/** Set the current line number. + * @param _line_number line number + * + */ +void yyset_lineno (int _line_number ) +{ + + yylineno = _line_number; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param _in_str A readable stream. + * + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * _in_str ) +{ + yyin = _in_str ; +} + +void yyset_out (FILE * _out_str ) +{ + yyout = _out_str ; +} + +int yyget_debug (void) +{ + return yy_flex_debug; +} + +void yyset_debug (int _bdebug ) +{ + yy_flex_debug = _bdebug ; +} + +static int yy_init_globals (void) +{ + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + (yy_buffer_stack) = NULL; + (yy_buffer_stack_top) = 0; + (yy_buffer_stack_max) = 0; + (yy_c_buf_p) = NULL; + (yy_init) = 0; + (yy_start) = 0; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = NULL; + yyout = NULL; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (void) +{ + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer( YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(); + } + + /* Destroy the stack itself. */ + yyfree((yy_buffer_stack) ); + (yy_buffer_stack) = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( ); + + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, const char * s2, int n ) +{ + + int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (const char * s ) +{ + int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +#define YYTABLES_NAME "yytables" + +#line 358 "jsonpath_scan.l" + + +/* LCOV_EXCL_STOP */ + +void +jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext, + const char *message) +{ + /* don't overwrite escontext if it's already been set */ + if (SOFT_ERROR_OCCURRED(escontext)) + return; + + if (*yytext == YY_END_OF_BUFFER_CHAR) + { + errsave(escontext, + (errcode(ERRCODE_SYNTAX_ERROR), + /* translator: %s is typically "syntax error" */ + errmsg("%s at end of jsonpath input", _(message)))); + } + else + { + errsave(escontext, + (errcode(ERRCODE_SYNTAX_ERROR), + /* translator: first %s is typically "syntax error" */ + errmsg("%s at or near \"%s\" of jsonpath input", + _(message), yytext))); + } +} + +typedef struct JsonPathKeyword +{ + int16 len; + bool lowercase; + int val; + const char *keyword; +} JsonPathKeyword; + +/* + * Array of key words should be sorted by length and then + * alphabetical order + */ +static const JsonPathKeyword keywords[] = { + { 2, false, IS_P, "is"}, + { 2, false, TO_P, "to"}, + { 3, false, ABS_P, "abs"}, + { 3, false, LAX_P, "lax"}, + { 4, false, FLAG_P, "flag"}, + { 4, false, LAST_P, "last"}, + { 4, true, NULL_P, "null"}, + { 4, false, SIZE_P, "size"}, + { 4, true, TRUE_P, "true"}, + { 4, false, TYPE_P, "type"}, + { 4, false, WITH_P, "with"}, + { 5, true, FALSE_P, "false"}, + { 5, false, FLOOR_P, "floor"}, + { 6, false, DOUBLE_P, "double"}, + { 6, false, EXISTS_P, "exists"}, + { 6, false, STARTS_P, "starts"}, + { 6, false, STRICT_P, "strict"}, + { 7, false, CEILING_P, "ceiling"}, + { 7, false, UNKNOWN_P, "unknown"}, + { 8, false, DATETIME_P, "datetime"}, + { 8, false, KEYVALUE_P, "keyvalue"}, + { 10,false, LIKE_REGEX_P, "like_regex"}, +}; + +/* Check if current scanstring value is a keyword */ +static enum yytokentype +checkKeyword() +{ + int res = IDENT_P; + int diff; + const JsonPathKeyword *StopLow = keywords, + *StopHigh = keywords + lengthof(keywords), + *StopMiddle; + + if (scanstring.len > keywords[lengthof(keywords) - 1].len) + return res; + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + + if (StopMiddle->len == scanstring.len) + diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val, + scanstring.len); + else + diff = StopMiddle->len - scanstring.len; + + if (diff < 0) + StopLow = StopMiddle + 1; + else if (diff > 0) + StopHigh = StopMiddle; + else + { + if (StopMiddle->lowercase) + diff = strncmp(StopMiddle->keyword, scanstring.val, + scanstring.len); + + if (diff == 0) + res = StopMiddle->val; + + break; + } + } + + return res; +} + +/* + * Called before any actual parsing is done + */ +static void +jsonpath_scanner_init(const char *str, int slen) +{ + if (slen <= 0) + slen = strlen(str); + + /* + * Might be left over after ereport() + */ + yy_init_globals(); + + /* + * Make a scan buffer with special termination needed by flex. + */ + + scanbuflen = slen; + scanbuf = palloc(slen + 2); + memcpy(scanbuf, str, slen); + scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; + scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); + + BEGIN(INITIAL); +} + + +/* + * Called after parsing is done to clean up after jsonpath_scanner_init() + */ +static void +jsonpath_scanner_finish(void) +{ + yy_delete_buffer(scanbufhandle); + pfree(scanbuf); +} + +/* + * Resize scanstring so that it can append string of given length. + * Reinitialize if required. + */ +static void +resizeString(bool init, int appendLen) +{ + if (init) + { + scanstring.total = Max(32, appendLen); + scanstring.val = (char *) palloc(scanstring.total); + scanstring.len = 0; + } + else + { + if (scanstring.len + appendLen >= scanstring.total) + { + while (scanstring.len + appendLen >= scanstring.total) + scanstring.total *= 2; + scanstring.val = repalloc(scanstring.val, scanstring.total); + } + } +} + +/* Add set of bytes at "s" of length "l" to scanstring */ +static void +addstring(bool init, char *s, int l) +{ + resizeString(init, l + 1); + memcpy(scanstring.val + scanstring.len, s, l); + scanstring.len += l; +} + +/* Add single byte "c" to scanstring */ +static void +addchar(bool init, char c) +{ + resizeString(init, 1); + scanstring.val[scanstring.len] = c; + if (c != '\0') + scanstring.len++; +} + +/* Interface to jsonpath parser */ +JsonPathParseResult * +parsejsonpath(const char *str, int len, struct Node *escontext) +{ + JsonPathParseResult *parseresult; + + jsonpath_scanner_init(str, len); + + if (jsonpath_yyparse((void *) &parseresult, escontext) != 0) + jsonpath_yyerror(NULL, escontext, "invalid input"); /* shouldn't happen */ + + jsonpath_scanner_finish(); + + return parseresult; +} + +/* Turn hex character into integer */ +static bool +hexval(char c, int *result, struct Node *escontext) +{ + if (c >= '0' && c <= '9') + { + *result = c - '0'; + return true; + } + if (c >= 'a' && c <= 'f') + { + *result = c - 'a' + 0xA; + return true; + } + if (c >= 'A' && c <= 'F') + { + *result = c - 'A' + 0xA; + return true; + } + jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit"); + return false; +} + +/* Add given unicode character to scanstring */ +static bool +addUnicodeChar(int ch, struct Node *escontext) +{ + if (ch == 0) + { + /* We can't allow this, since our TEXT type doesn't */ + ereturn(escontext, false, + (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), + errmsg("unsupported Unicode escape sequence"), + errdetail("\\u0000 cannot be converted to text."))); + } + else + { + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + + /* + * If we're trapping the error status, call the noerror form of the + * conversion function. Otherwise call the normal form which provides + * more detailed errors. + */ + + if (! escontext || ! IsA(escontext, ErrorSaveContext)) + pg_unicode_to_server(ch, (unsigned char *) cbuf); + else if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf)) + ereturn(escontext, false, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("could not convert Unicode to server encoding"))); + addstring(false, cbuf, strlen(cbuf)); + } + return true; +} + +/* Add unicode character, processing any surrogate pairs */ +static bool +addUnicode(int ch, int *hi_surrogate, struct Node *escontext) +{ + if (is_utf16_surrogate_first(ch)) + { + if (*hi_surrogate != -1) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "jsonpath"), + errdetail("Unicode high surrogate must not follow " + "a high surrogate."))); + *hi_surrogate = ch; + return true; + } + else if (is_utf16_surrogate_second(ch)) + { + if (*hi_surrogate == -1) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "jsonpath"), + errdetail("Unicode low surrogate must follow a high " + "surrogate."))); + ch = surrogate_pair_to_codepoint(*hi_surrogate, ch); + *hi_surrogate = -1; + } + else if (*hi_surrogate != -1) + { + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "jsonpath"), + errdetail("Unicode low surrogate must follow a high " + "surrogate."))); + } + + return addUnicodeChar(ch, escontext); +} + +/* + * parseUnicode was adopted from json_lex_string() in + * src/backend/utils/adt/json.c + */ +static bool +parseUnicode(char *s, int l, struct Node *escontext) +{ + int i = 2; + int hi_surrogate = -1; + + for (i = 2; i < l; i += 2) /* skip '\u' */ + { + int ch = 0; + int j, si; + + if (s[i] == '{') /* parse '\u{XX...}' */ + { + while (s[++i] != '}' && i < l) + { + if (!hexval(s[i], &si, escontext)) + return false; + ch = (ch << 4) | si; + } + i++; /* skip '}' */ + } + else /* parse '\uXXXX' */ + { + for (j = 0; j < 4 && i < l; j++) + { + if (!hexval(s[i++], &si, escontext)) + return false; + ch = (ch << 4) | si; + } + } + + if (! addUnicode(ch, &hi_surrogate, escontext)) + return false; + } + + if (hi_surrogate != -1) + { + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "jsonpath"), + errdetail("Unicode low surrogate must follow a high " + "surrogate."))); + } + + return true; +} + +/* Parse sequence of hex-encoded characters */ +static bool +parseHexChar(char *s, struct Node *escontext) +{ + int s2, s3, ch; + if (!hexval(s[2], &s2, escontext)) + return false; + if (!hexval(s[3], &s3, escontext)) + return false; + + ch = (s2 << 4) | s3; + + return addUnicodeChar(ch, escontext); +} + +/* + * Interface functions to make flex use palloc() instead of malloc(). + * It'd be better to make these static, but flex insists otherwise. + */ + +void * +jsonpath_yyalloc(yy_size_t bytes) +{ + return palloc(bytes); +} + +void * +jsonpath_yyrealloc(void *ptr, yy_size_t bytes) +{ + if (ptr) + return repalloc(ptr, bytes); + else + return palloc(bytes); +} + +void +jsonpath_yyfree(void *ptr) +{ + if (ptr) + pfree(ptr); +} + diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/levenshtein.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/levenshtein.c new file mode 100644 index 00000000000..f8979776d0d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/levenshtein.c @@ -0,0 +1,401 @@ +/*------------------------------------------------------------------------- + * + * levenshtein.c + * Levenshtein distance implementation. + * + * Original author: Joe Conway <mail@joeconway.com> + * + * This file is included by varlena.c twice, to provide matching code for (1) + * Levenshtein distance with custom costings, and (2) Levenshtein distance with + * custom costings and a "max" value above which exact distances are not + * interesting. Before the inclusion, we rely on the presence of the inline + * function rest_of_char_same(). + * + * Written based on a description of the algorithm by Michael Gilleland found + * at http://www.merriampark.com/ld.htm. Also looked at levenshtein.c in the + * PHP 4.0.6 distribution for inspiration. Configurable penalty costs + * extension is introduced by Volkan YAZICI <volkan.yazici@gmail.com. + * + * Copyright (c) 2001-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/levenshtein.c + * + *------------------------------------------------------------------------- + */ +#define MAX_LEVENSHTEIN_STRLEN 255 + +/* + * Calculates Levenshtein distance metric between supplied strings, which are + * not necessarily null-terminated. + * + * source: source string, of length slen bytes. + * target: target string, of length tlen bytes. + * ins_c, del_c, sub_c: costs to charge for character insertion, deletion, + * and substitution respectively; (1, 1, 1) costs suffice for common + * cases, but your mileage may vary. + * max_d: if provided and >= 0, maximum distance we care about; see below. + * trusted: caller is trusted and need not obey MAX_LEVENSHTEIN_STRLEN. + * + * One way to compute Levenshtein distance is to incrementally construct + * an (m+1)x(n+1) matrix where cell (i, j) represents the minimum number + * of operations required to transform the first i characters of s into + * the first j characters of t. The last column of the final row is the + * answer. + * + * We use that algorithm here with some modification. In lieu of holding + * the entire array in memory at once, we'll just use two arrays of size + * m+1 for storing accumulated values. At each step one array represents + * the "previous" row and one is the "current" row of the notional large + * array. + * + * If max_d >= 0, we only need to provide an accurate answer when that answer + * is less than or equal to max_d. From any cell in the matrix, there is + * theoretical "minimum residual distance" from that cell to the last column + * of the final row. This minimum residual distance is zero when the + * untransformed portions of the strings are of equal length (because we might + * get lucky and find all the remaining characters matching) and is otherwise + * based on the minimum number of insertions or deletions needed to make them + * equal length. The residual distance grows as we move toward the upper + * right or lower left corners of the matrix. When the max_d bound is + * usefully tight, we can use this property to avoid computing the entirety + * of each row; instead, we maintain a start_column and stop_column that + * identify the portion of the matrix close to the diagonal which can still + * affect the final answer. + */ +int +#ifdef LEVENSHTEIN_LESS_EQUAL +varstr_levenshtein_less_equal(const char *source, int slen, + const char *target, int tlen, + int ins_c, int del_c, int sub_c, + int max_d, bool trusted) +#else +varstr_levenshtein(const char *source, int slen, + const char *target, int tlen, + int ins_c, int del_c, int sub_c, + bool trusted) +#endif +{ + int m, + n; + int *prev; + int *curr; + int *s_char_len = NULL; + int i, + j; + const char *y; + + /* + * For varstr_levenshtein_less_equal, we have real variables called + * start_column and stop_column; otherwise it's just short-hand for 0 and + * m. + */ +#ifdef LEVENSHTEIN_LESS_EQUAL + int start_column, + stop_column; + +#undef START_COLUMN +#undef STOP_COLUMN +#define START_COLUMN start_column +#define STOP_COLUMN stop_column +#else +#undef START_COLUMN +#undef STOP_COLUMN +#define START_COLUMN 0 +#define STOP_COLUMN m +#endif + + /* Convert string lengths (in bytes) to lengths in characters */ + m = pg_mbstrlen_with_len(source, slen); + n = pg_mbstrlen_with_len(target, tlen); + + /* + * We can transform an empty s into t with n insertions, or a non-empty t + * into an empty s with m deletions. + */ + if (!m) + return n * ins_c; + if (!n) + return m * del_c; + + /* + * For security concerns, restrict excessive CPU+RAM usage. (This + * implementation uses O(m) memory and has O(mn) complexity.) If + * "trusted" is true, caller is responsible for not making excessive + * requests, typically by using a small max_d along with strings that are + * bounded, though not necessarily to MAX_LEVENSHTEIN_STRLEN exactly. + */ + if (!trusted && + (m > MAX_LEVENSHTEIN_STRLEN || + n > MAX_LEVENSHTEIN_STRLEN)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("levenshtein argument exceeds maximum length of %d characters", + MAX_LEVENSHTEIN_STRLEN))); + +#ifdef LEVENSHTEIN_LESS_EQUAL + /* Initialize start and stop columns. */ + start_column = 0; + stop_column = m + 1; + + /* + * If max_d >= 0, determine whether the bound is impossibly tight. If so, + * return max_d + 1 immediately. Otherwise, determine whether it's tight + * enough to limit the computation we must perform. If so, figure out + * initial stop column. + */ + if (max_d >= 0) + { + int min_theo_d; /* Theoretical minimum distance. */ + int max_theo_d; /* Theoretical maximum distance. */ + int net_inserts = n - m; + + min_theo_d = net_inserts < 0 ? + -net_inserts * del_c : net_inserts * ins_c; + if (min_theo_d > max_d) + return max_d + 1; + if (ins_c + del_c < sub_c) + sub_c = ins_c + del_c; + max_theo_d = min_theo_d + sub_c * Min(m, n); + if (max_d >= max_theo_d) + max_d = -1; + else if (ins_c + del_c > 0) + { + /* + * Figure out how much of the first row of the notional matrix we + * need to fill in. If the string is growing, the theoretical + * minimum distance already incorporates the cost of deleting the + * number of characters necessary to make the two strings equal in + * length. Each additional deletion forces another insertion, so + * the best-case total cost increases by ins_c + del_c. If the + * string is shrinking, the minimum theoretical cost assumes no + * excess deletions; that is, we're starting no further right than + * column n - m. If we do start further right, the best-case + * total cost increases by ins_c + del_c for each move right. + */ + int slack_d = max_d - min_theo_d; + int best_column = net_inserts < 0 ? -net_inserts : 0; + + stop_column = best_column + (slack_d / (ins_c + del_c)) + 1; + if (stop_column > m) + stop_column = m + 1; + } + } +#endif + + /* + * In order to avoid calling pg_mblen() repeatedly on each character in s, + * we cache all the lengths before starting the main loop -- but if all + * the characters in both strings are single byte, then we skip this and + * use a fast-path in the main loop. If only one string contains + * multi-byte characters, we still build the array, so that the fast-path + * needn't deal with the case where the array hasn't been initialized. + */ + if (m != slen || n != tlen) + { + int i; + const char *cp = source; + + s_char_len = (int *) palloc((m + 1) * sizeof(int)); + for (i = 0; i < m; ++i) + { + s_char_len[i] = pg_mblen(cp); + cp += s_char_len[i]; + } + s_char_len[i] = 0; + } + + /* One more cell for initialization column and row. */ + ++m; + ++n; + + /* Previous and current rows of notional array. */ + prev = (int *) palloc(2 * m * sizeof(int)); + curr = prev + m; + + /* + * To transform the first i characters of s into the first 0 characters of + * t, we must perform i deletions. + */ + for (i = START_COLUMN; i < STOP_COLUMN; i++) + prev[i] = i * del_c; + + /* Loop through rows of the notional array */ + for (y = target, j = 1; j < n; j++) + { + int *temp; + const char *x = source; + int y_char_len = n != tlen + 1 ? pg_mblen(y) : 1; + +#ifdef LEVENSHTEIN_LESS_EQUAL + + /* + * In the best case, values percolate down the diagonal unchanged, so + * we must increment stop_column unless it's already on the right end + * of the array. The inner loop will read prev[stop_column], so we + * have to initialize it even though it shouldn't affect the result. + */ + if (stop_column < m) + { + prev[stop_column] = max_d + 1; + ++stop_column; + } + + /* + * The main loop fills in curr, but curr[0] needs a special case: to + * transform the first 0 characters of s into the first j characters + * of t, we must perform j insertions. However, if start_column > 0, + * this special case does not apply. + */ + if (start_column == 0) + { + curr[0] = j * ins_c; + i = 1; + } + else + i = start_column; +#else + curr[0] = j * ins_c; + i = 1; +#endif + + /* + * This inner loop is critical to performance, so we include a + * fast-path to handle the (fairly common) case where no multibyte + * characters are in the mix. The fast-path is entitled to assume + * that if s_char_len is not initialized then BOTH strings contain + * only single-byte characters. + */ + if (s_char_len != NULL) + { + for (; i < STOP_COLUMN; i++) + { + int ins; + int del; + int sub; + int x_char_len = s_char_len[i - 1]; + + /* + * Calculate costs for insertion, deletion, and substitution. + * + * When calculating cost for substitution, we compare the last + * character of each possibly-multibyte character first, + * because that's enough to rule out most mis-matches. If we + * get past that test, then we compare the lengths and the + * remaining bytes. + */ + ins = prev[i] + ins_c; + del = curr[i - 1] + del_c; + if (x[x_char_len - 1] == y[y_char_len - 1] + && x_char_len == y_char_len && + (x_char_len == 1 || rest_of_char_same(x, y, x_char_len))) + sub = prev[i - 1]; + else + sub = prev[i - 1] + sub_c; + + /* Take the one with minimum cost. */ + curr[i] = Min(ins, del); + curr[i] = Min(curr[i], sub); + + /* Point to next character. */ + x += x_char_len; + } + } + else + { + for (; i < STOP_COLUMN; i++) + { + int ins; + int del; + int sub; + + /* Calculate costs for insertion, deletion, and substitution. */ + ins = prev[i] + ins_c; + del = curr[i - 1] + del_c; + sub = prev[i - 1] + ((*x == *y) ? 0 : sub_c); + + /* Take the one with minimum cost. */ + curr[i] = Min(ins, del); + curr[i] = Min(curr[i], sub); + + /* Point to next character. */ + x++; + } + } + + /* Swap current row with previous row. */ + temp = curr; + curr = prev; + prev = temp; + + /* Point to next character. */ + y += y_char_len; + +#ifdef LEVENSHTEIN_LESS_EQUAL + + /* + * This chunk of code represents a significant performance hit if used + * in the case where there is no max_d bound. This is probably not + * because the max_d >= 0 test itself is expensive, but rather because + * the possibility of needing to execute this code prevents tight + * optimization of the loop as a whole. + */ + if (max_d >= 0) + { + /* + * The "zero point" is the column of the current row where the + * remaining portions of the strings are of equal length. There + * are (n - 1) characters in the target string, of which j have + * been transformed. There are (m - 1) characters in the source + * string, so we want to find the value for zp where (n - 1) - j = + * (m - 1) - zp. + */ + int zp = j - (n - m); + + /* Check whether the stop column can slide left. */ + while (stop_column > 0) + { + int ii = stop_column - 1; + int net_inserts = ii - zp; + + if (prev[ii] + (net_inserts > 0 ? net_inserts * ins_c : + -net_inserts * del_c) <= max_d) + break; + stop_column--; + } + + /* Check whether the start column can slide right. */ + while (start_column < stop_column) + { + int net_inserts = start_column - zp; + + if (prev[start_column] + + (net_inserts > 0 ? net_inserts * ins_c : + -net_inserts * del_c) <= max_d) + break; + + /* + * We'll never again update these values, so we must make sure + * there's nothing here that could confuse any future + * iteration of the outer loop. + */ + prev[start_column] = max_d + 1; + curr[start_column] = max_d + 1; + if (start_column != 0) + source += (s_char_len != NULL) ? s_char_len[start_column - 1] : 1; + start_column++; + } + + /* If they cross, we're going to exceed the bound. */ + if (start_column >= stop_column) + return max_d + 1; + } +#endif + } + + /* + * Because the final value was swapped from the previous row to the + * current row, that's where we'll find it. + */ + return prev[m - 1]; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like.c new file mode 100644 index 00000000000..33a2f46aab0 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like.c @@ -0,0 +1,456 @@ +/*------------------------------------------------------------------------- + * + * like.c + * like expression handling code. + * + * NOTES + * A big hack of the regexp.c code!! Contributed by + * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/like.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> + +#include "catalog/pg_collation.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/pg_locale.h" +#include "varatt.h" + + +#define LIKE_TRUE 1 +#define LIKE_FALSE 0 +#define LIKE_ABORT (-1) + + +static int SB_MatchText(const char *t, int tlen, const char *p, int plen, + pg_locale_t locale, bool locale_is_c); +static text *SB_do_like_escape(text *pat, text *esc); + +static int MB_MatchText(const char *t, int tlen, const char *p, int plen, + pg_locale_t locale, bool locale_is_c); +static text *MB_do_like_escape(text *pat, text *esc); + +static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, + pg_locale_t locale, bool locale_is_c); + +static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, + pg_locale_t locale, bool locale_is_c); + +static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation); +static int Generic_Text_IC_like(text *str, text *pat, Oid collation); + +/*-------------------- + * Support routine for MatchText. Compares given multibyte streams + * as wide characters. If they match, returns 1 otherwise returns 0. + *-------------------- + */ +static inline int +wchareq(const char *p1, const char *p2) +{ + int p1_len; + + /* Optimization: quickly compare the first byte. */ + if (*p1 != *p2) + return 0; + + p1_len = pg_mblen(p1); + if (pg_mblen(p2) != p1_len) + return 0; + + /* They are the same length */ + while (p1_len--) + { + if (*p1++ != *p2++) + return 0; + } + return 1; +} + +/* + * Formerly we had a routine iwchareq() here that tried to do case-insensitive + * comparison of multibyte characters. It did not work at all, however, + * because it relied on tolower() which has a single-byte API ... and + * towlower() wouldn't be much better since we have no suitably cheap way + * of getting a single character transformed to the system's wchar_t format. + * So now, we just downcase the strings using lower() and apply regular LIKE + * comparison. This should be revisited when we install better locale support. + */ + +/* + * We do handle case-insensitive matching for single-byte encodings using + * fold-on-the-fly processing, however. + */ +static char +SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) +{ + if (locale_is_c) + return pg_ascii_tolower(c); +#ifdef HAVE_LOCALE_T + else if (locale) + return tolower_l(c, locale->info.lt); +#endif + else + return pg_tolower(c); +} + + +#define NextByte(p, plen) ((p)++, (plen)--) + +/* Set up to compile like_match.c for multibyte characters */ +#define CHAREQ(p1, p2) wchareq((p1), (p2)) +#define NextChar(p, plen) \ + do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) +#define CopyAdvChar(dst, src, srclen) \ + do { int __l = pg_mblen(src); \ + (srclen) -= __l; \ + while (__l-- > 0) \ + *(dst)++ = *(src)++; \ + } while (0) + +#define MatchText MB_MatchText +#define do_like_escape MB_do_like_escape + +#include "like_match.c" + +/* Set up to compile like_match.c for single-byte characters */ +#define CHAREQ(p1, p2) (*(p1) == *(p2)) +#define NextChar(p, plen) NextByte((p), (plen)) +#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) + +#define MatchText SB_MatchText +#define do_like_escape SB_do_like_escape + +#include "like_match.c" + +/* setup to compile like_match.c for single byte case insensitive matches */ +#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c) +#define NextChar(p, plen) NextByte((p), (plen)) +#define MatchText SB_IMatchText + +#include "like_match.c" + +/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ + +#define NextChar(p, plen) \ + do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 ) +#define MatchText UTF8_MatchText + +#include "like_match.c" + +/* Generic for all cases not requiring inline case-folding */ +static inline int +GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation) +{ + if (collation && !lc_ctype_is_c(collation)) + { + pg_locale_t locale = pg_newlocale_from_collation(collation); + + if (!pg_locale_deterministic(locale)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for LIKE"))); + } + + if (pg_database_encoding_max_length() == 1) + return SB_MatchText(s, slen, p, plen, 0, true); + else if (GetDatabaseEncoding() == PG_UTF8) + return UTF8_MatchText(s, slen, p, plen, 0, true); + else + return MB_MatchText(s, slen, p, plen, 0, true); +} + +static inline int +Generic_Text_IC_like(text *str, text *pat, Oid collation) +{ + char *s, + *p; + int slen, + plen; + pg_locale_t locale = 0; + bool locale_is_c = false; + + if (!OidIsValid(collation)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for ILIKE"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + + if (lc_ctype_is_c(collation)) + locale_is_c = true; + else + locale = pg_newlocale_from_collation(collation); + + if (!pg_locale_deterministic(locale)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for ILIKE"))); + + /* + * For efficiency reasons, in the single byte case we don't call lower() + * on the pattern and text, but instead call SB_lower_char on each + * character. In the multi-byte case we don't have much choice :-(. Also, + * ICU does not support single-character case folding, so we go the long + * way. + */ + + if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU)) + { + pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, + PointerGetDatum(pat))); + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, + PointerGetDatum(str))); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + if (GetDatabaseEncoding() == PG_UTF8) + return UTF8_MatchText(s, slen, p, plen, 0, true); + else + return MB_MatchText(s, slen, p, plen, 0, true); + } + else + { + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + return SB_IMatchText(s, slen, p, plen, locale, locale_is_c); + } +} + +/* + * interface routines called by the function manager + */ + +Datum +namelike(PG_FUNCTION_ARGS) +{ + Name str = PG_GETARG_NAME(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + char *s, + *p; + int slen, + plen; + + s = NameStr(*str); + slen = strlen(s); + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +namenlike(PG_FUNCTION_ARGS) +{ + Name str = PG_GETARG_NAME(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + char *s, + *p; + int slen, + plen; + + s = NameStr(*str); + slen = strlen(s); + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +textlike(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + char *s, + *p; + int slen, + plen; + + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +textnlike(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + char *s, + *p; + int slen, + plen; + + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +bytealike(PG_FUNCTION_ARGS) +{ + bytea *str = PG_GETARG_BYTEA_PP(0); + bytea *pat = PG_GETARG_BYTEA_PP(1); + bool result; + char *s, + *p; + int slen, + plen; + + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + + result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +byteanlike(PG_FUNCTION_ARGS) +{ + bytea *str = PG_GETARG_BYTEA_PP(0); + bytea *pat = PG_GETARG_BYTEA_PP(1); + bool result; + char *s, + *p; + int slen, + plen; + + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + + result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +/* + * Case-insensitive versions + */ + +Datum +nameiclike(PG_FUNCTION_ARGS) +{ + Name str = PG_GETARG_NAME(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + text *strtext; + + strtext = DatumGetTextPP(DirectFunctionCall1(name_text, + NameGetDatum(str))); + result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +nameicnlike(PG_FUNCTION_ARGS) +{ + Name str = PG_GETARG_NAME(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + text *strtext; + + strtext = DatumGetTextPP(DirectFunctionCall1(name_text, + NameGetDatum(str))); + result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +texticlike(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + + result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +Datum +texticnlike(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pat = PG_GETARG_TEXT_PP(1); + bool result; + + result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE); + + PG_RETURN_BOOL(result); +} + +/* + * like_escape() --- given a pattern and an ESCAPE string, + * convert the pattern to use Postgres' standard backslash escape convention. + */ +Datum +like_escape(PG_FUNCTION_ARGS) +{ + text *pat = PG_GETARG_TEXT_PP(0); + text *esc = PG_GETARG_TEXT_PP(1); + text *result; + + if (pg_database_encoding_max_length() == 1) + result = SB_do_like_escape(pat, esc); + else + result = MB_do_like_escape(pat, esc); + + PG_RETURN_TEXT_P(result); +} + +/* + * like_escape_bytea() --- given a pattern and an ESCAPE string, + * convert the pattern to use Postgres' standard backslash escape convention. + */ +Datum +like_escape_bytea(PG_FUNCTION_ARGS) +{ + bytea *pat = PG_GETARG_BYTEA_PP(0); + bytea *esc = PG_GETARG_BYTEA_PP(1); + bytea *result = SB_do_like_escape((text *) pat, (text *) esc); + + PG_RETURN_BYTEA_P((bytea *) result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_match.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_match.c new file mode 100644 index 00000000000..2f32cdaf020 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_match.c @@ -0,0 +1,360 @@ +/*------------------------------------------------------------------------- + * + * like_match.c + * LIKE pattern matching internal code. + * + * This file is included by like.c four times, to provide matching code for + * (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings, + * and (4) case insensitive matches in single-byte encodings. + * (UTF8 is a special case because we can use a much more efficient version + * of NextChar than can be used for general multi-byte encodings.) + * + * Before the inclusion, we need to define the following macros: + * + * NextChar + * MatchText - to name of function wanted + * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar + * MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars + * + * Copyright (c) 1996-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/like_match.c + * + *------------------------------------------------------------------------- + */ + +/* + * Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. + * Rich $alz is now <rsalz@bbn.com>. + * Special thanks to Lars Mathiesen <thorinn@diku.dk> for the + * LIKE_ABORT code. + * + * This code was shamelessly stolen from the "pql" code by myself and + * slightly modified :) + * + * All references to the word "star" were replaced by "percent" + * All references to the word "wild" were replaced by "like" + * + * All the nice shell RE matching stuff was replaced by just "_" and "%" + * + * As I don't have a copy of the SQL standard handy I wasn't sure whether + * to leave in the '\' escape character handling. + * + * Keith Parks. <keith@mtcc.demon.co.uk> + * + * SQL lets you specify the escape character by saying + * LIKE <pattern> ESCAPE <escape character>. We are a small operation + * so we force you to use '\'. - ay 7/95 + * + * Now we have the like_escape() function that converts patterns with + * any specified escape character (or none at all) to the internal + * default escape character, which is still '\'. - tgl 9/2000 + * + * The code is rewritten to avoid requiring null-terminated strings, + * which in turn allows us to leave out some memcpy() operations. + * This code should be faster and take less memory, but no promises... + * - thomas 2000-08-06 + */ + + +/*-------------------- + * Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT. + * + * LIKE_TRUE: they match + * LIKE_FALSE: they don't match + * LIKE_ABORT: not only don't they match, but the text is too short. + * + * If LIKE_ABORT is returned, then no suffix of the text can match the + * pattern either, so an upper-level % scan can stop scanning now. + *-------------------- + */ + +#ifdef MATCH_LOWER +#define GETCHAR(t) MATCH_LOWER(t) +#else +#define GETCHAR(t) (t) +#endif + +static int +MatchText(const char *t, int tlen, const char *p, int plen, + pg_locale_t locale, bool locale_is_c) +{ + /* Fast path for match-everything pattern */ + if (plen == 1 && *p == '%') + return LIKE_TRUE; + + /* Since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* + * In this loop, we advance by char when matching wildcards (and thus on + * recursive entry to this function we are properly char-synced). On other + * occasions it is safe to advance by byte, as the text and pattern will + * be in lockstep. This allows us to perform all comparisons between the + * text and pattern on a byte by byte basis, even for multi-byte + * encodings. + */ + while (tlen > 0 && plen > 0) + { + if (*p == '\\') + { + /* Next pattern byte must match literally, whatever it is */ + NextByte(p, plen); + /* ... and there had better be one, per SQL standard */ + if (plen <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("LIKE pattern must not end with escape character"))); + if (GETCHAR(*p) != GETCHAR(*t)) + return LIKE_FALSE; + } + else if (*p == '%') + { + char firstpat; + + /* + * % processing is essentially a search for a text position at + * which the remainder of the text matches the remainder of the + * pattern, using a recursive call to check each potential match. + * + * If there are wildcards immediately following the %, we can skip + * over them first, using the idea that any sequence of N _'s and + * one or more %'s is equivalent to N _'s and one % (ie, it will + * match any sequence of at least N text characters). In this way + * we will always run the recursive search loop using a pattern + * fragment that begins with a literal character-to-match, thereby + * not recursing more than we have to. + */ + NextByte(p, plen); + + while (plen > 0) + { + if (*p == '%') + NextByte(p, plen); + else if (*p == '_') + { + /* If not enough text left to match the pattern, ABORT */ + if (tlen <= 0) + return LIKE_ABORT; + NextChar(t, tlen); + NextByte(p, plen); + } + else + break; /* Reached a non-wildcard pattern char */ + } + + /* + * If we're at end of pattern, match: we have a trailing % which + * matches any remaining text string. + */ + if (plen <= 0) + return LIKE_TRUE; + + /* + * Otherwise, scan for a text position at which we can match the + * rest of the pattern. The first remaining pattern char is known + * to be a regular or escaped literal character, so we can compare + * the first pattern byte to each text byte to avoid recursing + * more than we have to. This fact also guarantees that we don't + * have to consider a match to the zero-length substring at the + * end of the text. + */ + if (*p == '\\') + { + if (plen < 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("LIKE pattern must not end with escape character"))); + firstpat = GETCHAR(p[1]); + } + else + firstpat = GETCHAR(*p); + + while (tlen > 0) + { + if (GETCHAR(*t) == firstpat) + { + int matched = MatchText(t, tlen, p, plen, + locale, locale_is_c); + + if (matched != LIKE_FALSE) + return matched; /* TRUE or ABORT */ + } + + NextChar(t, tlen); + } + + /* + * End of text with no match, so no point in trying later places + * to start matching this pattern. + */ + return LIKE_ABORT; + } + else if (*p == '_') + { + /* _ matches any single character, and we know there is one */ + NextChar(t, tlen); + NextByte(p, plen); + continue; + } + else if (GETCHAR(*p) != GETCHAR(*t)) + { + /* non-wildcard pattern char fails to match text char */ + return LIKE_FALSE; + } + + /* + * Pattern and text match, so advance. + * + * It is safe to use NextByte instead of NextChar here, even for + * multi-byte character sets, because we are not following immediately + * after a wildcard character. If we are in the middle of a multibyte + * character, we must already have matched at least one byte of the + * character from both text and pattern; so we cannot get out-of-sync + * on character boundaries. And we know that no backend-legal + * encoding allows ASCII characters such as '%' to appear as non-first + * bytes of characters, so we won't mistakenly detect a new wildcard. + */ + NextByte(t, tlen); + NextByte(p, plen); + } + + if (tlen > 0) + return LIKE_FALSE; /* end of pattern, but not of text */ + + /* + * End of text, but perhaps not of pattern. Match iff the remaining + * pattern can match a zero-length string, ie, it's zero or more %'s. + */ + while (plen > 0 && *p == '%') + NextByte(p, plen); + if (plen <= 0) + return LIKE_TRUE; + + /* + * End of text with no match, so no point in trying later places to start + * matching this pattern. + */ + return LIKE_ABORT; +} /* MatchText() */ + +/* + * like_escape() --- given a pattern and an ESCAPE string, + * convert the pattern to use Postgres' standard backslash escape convention. + */ +#ifdef do_like_escape + +static text * +do_like_escape(text *pat, text *esc) +{ + text *result; + char *p, + *e, + *r; + int plen, + elen; + bool afterescape; + + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + e = VARDATA_ANY(esc); + elen = VARSIZE_ANY_EXHDR(esc); + + /* + * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth + * trying to calculate the size more accurately than that. + */ + result = (text *) palloc(plen * 2 + VARHDRSZ); + r = VARDATA(result); + + if (elen == 0) + { + /* + * No escape character is wanted. Double any backslashes in the + * pattern to make them act like ordinary characters. + */ + while (plen > 0) + { + if (*p == '\\') + *r++ = '\\'; + CopyAdvChar(r, p, plen); + } + } + else + { + /* + * The specified escape must be only a single character. + */ + NextChar(e, elen); + if (elen != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("invalid escape string"), + errhint("Escape string must be empty or one character."))); + + e = VARDATA_ANY(esc); + + /* + * If specified escape is '\', just copy the pattern as-is. + */ + if (*e == '\\') + { + memcpy(result, pat, VARSIZE_ANY(pat)); + return result; + } + + /* + * Otherwise, convert occurrences of the specified escape character to + * '\', and double occurrences of '\' --- unless they immediately + * follow an escape character! + */ + afterescape = false; + while (plen > 0) + { + if (CHAREQ(p, e) && !afterescape) + { + *r++ = '\\'; + NextChar(p, plen); + afterescape = true; + } + else if (*p == '\\') + { + *r++ = '\\'; + if (!afterescape) + *r++ = '\\'; + NextChar(p, plen); + afterescape = false; + } + else + { + CopyAdvChar(r, p, plen); + afterescape = false; + } + } + } + + SET_VARSIZE(result, r - ((char *) result)); + + return result; +} +#endif /* do_like_escape */ + +#ifdef CHAREQ +#undef CHAREQ +#endif + +#undef NextChar +#undef CopyAdvChar +#undef MatchText + +#ifdef do_like_escape +#undef do_like_escape +#endif + +#undef GETCHAR + +#ifdef MATCH_LOWER +#undef MATCH_LOWER + +#endif diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_support.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_support.c new file mode 100644 index 00000000000..555304ceb6d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_support.c @@ -0,0 +1,1800 @@ +/*------------------------------------------------------------------------- + * + * like_support.c + * Planner support functions for LIKE, regex, and related operators. + * + * These routines handle special optimization of operators that can be + * used with index scans even though they are not known to the executor's + * indexscan machinery. The key idea is that these operators allow us + * to derive approximate indexscan qual clauses, such that any tuples + * that pass the operator clause itself must also satisfy the simpler + * indexscan condition(s). Then we can use the indexscan machinery + * to avoid scanning as much of the table as we'd otherwise have to, + * while applying the original operator as a qpqual condition to ensure + * we deliver only the tuples we want. (In essence, we're using a regular + * index as if it were a lossy index.) + * + * An example of what we're doing is + * textfield LIKE 'abc%def' + * from which we can generate the indexscanable conditions + * textfield >= 'abc' AND textfield < 'abd' + * which allow efficient scanning of an index on textfield. + * (In reality, character set and collation issues make the transformation + * from LIKE to indexscan limits rather harder than one might think ... + * but that's the basic idea.) + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/like_support.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/htup_details.h" +#include "access/stratnum.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_type.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" +#include "utils/selfuncs.h" +#include "utils/varlena.h" + + +typedef enum +{ + Pattern_Type_Like, + Pattern_Type_Like_IC, + Pattern_Type_Regex, + Pattern_Type_Regex_IC, + Pattern_Type_Prefix +} Pattern_Type; + +typedef enum +{ + Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact +} Pattern_Prefix_Status; + +static Node *like_regex_support(Node *rawreq, Pattern_Type ptype); +static List *match_pattern_prefix(Node *leftop, + Node *rightop, + Pattern_Type ptype, + Oid expr_coll, + Oid opfamily, + Oid indexcollation); +static double patternsel_common(PlannerInfo *root, + Oid oprid, + Oid opfuncid, + List *args, + int varRelid, + Oid collation, + Pattern_Type ptype, + bool negate); +static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, + Pattern_Type ptype, + Oid collation, + Const **prefix, + Selectivity *rest_selec); +static Selectivity prefix_selectivity(PlannerInfo *root, + VariableStatData *vardata, + Oid eqopr, Oid ltopr, Oid geopr, + Oid collation, + Const *prefixcon); +static Selectivity like_selectivity(const char *patt, int pattlen, + bool case_insensitive); +static Selectivity regex_selectivity(const char *patt, int pattlen, + bool case_insensitive, + int fixed_prefix_len); +static int pattern_char_isalpha(char c, bool is_multibyte, + pg_locale_t locale, bool locale_is_c); +static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc, + Oid collation); +static Datum string_to_datum(const char *str, Oid datatype); +static Const *string_to_const(const char *str, Oid datatype); +static Const *string_to_bytea_const(const char *str, size_t str_len); + + +/* + * Planner support functions for LIKE, regex, and related operators + */ +Datum +textlike_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like)); +} + +Datum +texticlike_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC)); +} + +Datum +textregexeq_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex)); +} + +Datum +texticregexeq_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC)); +} + +Datum +text_starts_with_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix)); +} + +/* Common code for the above */ +static Node * +like_regex_support(Node *rawreq, Pattern_Type ptype) +{ + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSelectivity)) + { + /* + * Make a selectivity estimate for a function call, just as we'd do if + * the call was via the corresponding operator. + */ + SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq; + Selectivity s1; + + if (req->is_join) + { + /* + * For the moment we just punt. If patternjoinsel is ever + * improved to do better, this should be made to call it. + */ + s1 = DEFAULT_MATCH_SEL; + } + else + { + /* Share code with operator restriction selectivity functions */ + s1 = patternsel_common(req->root, + InvalidOid, + req->funcid, + req->args, + req->varRelid, + req->inputcollid, + ptype, + false); + } + req->selectivity = s1; + ret = (Node *) req; + } + else if (IsA(rawreq, SupportRequestIndexCondition)) + { + /* Try to convert operator/function call to index conditions */ + SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq; + + /* + * Currently we have no "reverse" match operators with the pattern on + * the left, so we only need consider cases with the indexkey on the + * left. + */ + if (req->indexarg != 0) + return NULL; + + if (is_opclause(req->node)) + { + OpExpr *clause = (OpExpr *) req->node; + + Assert(list_length(clause->args) == 2); + ret = (Node *) + match_pattern_prefix((Node *) linitial(clause->args), + (Node *) lsecond(clause->args), + ptype, + clause->inputcollid, + req->opfamily, + req->indexcollation); + } + else if (is_funcclause(req->node)) /* be paranoid */ + { + FuncExpr *clause = (FuncExpr *) req->node; + + Assert(list_length(clause->args) == 2); + ret = (Node *) + match_pattern_prefix((Node *) linitial(clause->args), + (Node *) lsecond(clause->args), + ptype, + clause->inputcollid, + req->opfamily, + req->indexcollation); + } + } + + return ret; +} + +/* + * match_pattern_prefix + * Try to generate an indexqual for a LIKE or regex operator. + */ +static List * +match_pattern_prefix(Node *leftop, + Node *rightop, + Pattern_Type ptype, + Oid expr_coll, + Oid opfamily, + Oid indexcollation) +{ + List *result; + Const *patt; + Const *prefix; + Pattern_Prefix_Status pstatus; + Oid ldatatype; + Oid rdatatype; + Oid eqopr; + Oid ltopr; + Oid geopr; + Oid preopr = InvalidOid; + bool collation_aware; + Expr *expr; + FmgrInfo ltproc; + Const *greaterstr; + + /* + * Can't do anything with a non-constant or NULL pattern argument. + * + * Note that since we restrict ourselves to cases with a hard constant on + * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry + * about verifying that. + */ + if (!IsA(rightop, Const) || + ((Const *) rightop)->constisnull) + return NIL; + patt = (Const *) rightop; + + /* + * Not supported if the expression collation is nondeterministic. The + * optimized equality or prefix tests use bytewise comparisons, which is + * not consistent with nondeterministic collations. The actual + * pattern-matching implementation functions will later error out that + * pattern-matching is not supported with nondeterministic collations. (We + * could also error out here, but by doing it later we get more precise + * error messages.) (It should be possible to support at least + * Pattern_Prefix_Exact, but no point as long as the actual + * pattern-matching implementations don't support it.) + * + * expr_coll is not set for a non-collation-aware data type such as bytea. + */ + if (expr_coll && !get_collation_isdeterministic(expr_coll)) + return NIL; + + /* + * Try to extract a fixed prefix from the pattern. + */ + pstatus = pattern_fixed_prefix(patt, ptype, expr_coll, + &prefix, NULL); + + /* fail if no fixed prefix */ + if (pstatus == Pattern_Prefix_None) + return NIL; + + /* + * Identify the operators we want to use, based on the type of the + * left-hand argument. Usually these are just the type's regular + * comparison operators, but if we are considering one of the semi-legacy + * "pattern" opclasses, use the "pattern" operators instead. Those are + * not collation-sensitive but always use C collation, as we want. The + * selected operators also determine the needed type of the prefix + * constant. + */ + ldatatype = exprType(leftop); + switch (ldatatype) + { + case TEXTOID: + if (opfamily == TEXT_PATTERN_BTREE_FAM_OID) + { + eqopr = TextEqualOperator; + ltopr = TextPatternLessOperator; + geopr = TextPatternGreaterEqualOperator; + collation_aware = false; + } + else if (opfamily == TEXT_SPGIST_FAM_OID) + { + eqopr = TextEqualOperator; + ltopr = TextPatternLessOperator; + geopr = TextPatternGreaterEqualOperator; + /* This opfamily has direct support for prefixing */ + preopr = TextPrefixOperator; + collation_aware = false; + } + else + { + eqopr = TextEqualOperator; + ltopr = TextLessOperator; + geopr = TextGreaterEqualOperator; + collation_aware = true; + } + rdatatype = TEXTOID; + break; + case NAMEOID: + + /* + * Note that here, we need the RHS type to be text, so that the + * comparison value isn't improperly truncated to NAMEDATALEN. + */ + eqopr = NameEqualTextOperator; + ltopr = NameLessTextOperator; + geopr = NameGreaterEqualTextOperator; + collation_aware = true; + rdatatype = TEXTOID; + break; + case BPCHAROID: + if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID) + { + eqopr = BpcharEqualOperator; + ltopr = BpcharPatternLessOperator; + geopr = BpcharPatternGreaterEqualOperator; + collation_aware = false; + } + else + { + eqopr = BpcharEqualOperator; + ltopr = BpcharLessOperator; + geopr = BpcharGreaterEqualOperator; + collation_aware = true; + } + rdatatype = BPCHAROID; + break; + case BYTEAOID: + eqopr = ByteaEqualOperator; + ltopr = ByteaLessOperator; + geopr = ByteaGreaterEqualOperator; + collation_aware = false; + rdatatype = BYTEAOID; + break; + default: + /* Can't get here unless we're attached to the wrong operator */ + return NIL; + } + + /* + * If necessary, coerce the prefix constant to the right type. The given + * prefix constant is either text or bytea type, therefore the only case + * where we need to do anything is when converting text to bpchar. Those + * two types are binary-compatible, so relabeling the Const node is + * sufficient. + */ + if (prefix->consttype != rdatatype) + { + Assert(prefix->consttype == TEXTOID && + rdatatype == BPCHAROID); + prefix->consttype = rdatatype; + } + + /* + * If we found an exact-match pattern, generate an "=" indexqual. + * + * Here and below, check to see whether the desired operator is actually + * supported by the index opclass, and fail quietly if not. This allows + * us to not be concerned with specific opclasses (except for the legacy + * "pattern" cases); any index that correctly implements the operators + * will work. + */ + if (pstatus == Pattern_Prefix_Exact) + { + if (!op_in_opfamily(eqopr, opfamily)) + return NIL; + expr = make_opclause(eqopr, BOOLOID, false, + (Expr *) leftop, (Expr *) prefix, + InvalidOid, indexcollation); + result = list_make1(expr); + return result; + } + + /* + * Otherwise, we have a nonempty required prefix of the values. Some + * opclasses support prefix checks directly, otherwise we'll try to + * generate a range constraint. + */ + if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily)) + { + expr = make_opclause(preopr, BOOLOID, false, + (Expr *) leftop, (Expr *) prefix, + InvalidOid, indexcollation); + result = list_make1(expr); + return result; + } + + /* + * Since we need a range constraint, it's only going to work reliably if + * the index is collation-insensitive or has "C" collation. Note that + * here we are looking at the index's collation, not the expression's + * collation -- this test is *not* dependent on the LIKE/regex operator's + * collation. + */ + if (collation_aware && + !lc_collate_is_c(indexcollation)) + return NIL; + + /* + * We can always say "x >= prefix". + */ + if (!op_in_opfamily(geopr, opfamily)) + return NIL; + expr = make_opclause(geopr, BOOLOID, false, + (Expr *) leftop, (Expr *) prefix, + InvalidOid, indexcollation); + result = list_make1(expr); + + /*------- + * If we can create a string larger than the prefix, we can say + * "x < greaterstr". NB: we rely on make_greater_string() to generate + * a guaranteed-greater string, not just a probably-greater string. + * In general this is only guaranteed in C locale, so we'd better be + * using a C-locale index collation. + *------- + */ + if (!op_in_opfamily(ltopr, opfamily)) + return result; + fmgr_info(get_opcode(ltopr), <proc); + greaterstr = make_greater_string(prefix, <proc, indexcollation); + if (greaterstr) + { + expr = make_opclause(ltopr, BOOLOID, false, + (Expr *) leftop, (Expr *) greaterstr, + InvalidOid, indexcollation); + result = lappend(result, expr); + } + + return result; +} + + +/* + * patternsel_common - generic code for pattern-match restriction selectivity. + * + * To support using this from either the operator or function paths, caller + * may pass either operator OID or underlying function OID; we look up the + * latter from the former if needed. (We could just have patternsel() call + * get_opcode(), but the work would be wasted if we don't have a need to + * compare a fixed prefix to the pg_statistic data.) + * + * Note that oprid and/or opfuncid should be for the positive-match operator + * even when negate is true. + */ +static double +patternsel_common(PlannerInfo *root, + Oid oprid, + Oid opfuncid, + List *args, + int varRelid, + Oid collation, + Pattern_Type ptype, + bool negate) +{ + VariableStatData vardata; + Node *other; + bool varonleft; + Datum constval; + Oid consttype; + Oid vartype; + Oid rdatatype; + Oid eqopr; + Oid ltopr; + Oid geopr; + Pattern_Prefix_Status pstatus; + Const *patt; + Const *prefix = NULL; + Selectivity rest_selec = 0; + double nullfrac = 0.0; + double result; + + /* + * Initialize result to the appropriate default estimate depending on + * whether it's a match or not-match operator. + */ + if (negate) + result = 1.0 - DEFAULT_MATCH_SEL; + else + result = DEFAULT_MATCH_SEL; + + /* + * If expression is not variable op constant, then punt and return the + * default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + return result; + if (!varonleft || !IsA(other, Const)) + { + ReleaseVariableStats(vardata); + return result; + } + + /* + * If the constant is NULL, assume operator is strict and return zero, ie, + * operator will never return TRUE. (It's zero even for a negator op.) + */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + return 0.0; + } + constval = ((Const *) other)->constvalue; + consttype = ((Const *) other)->consttype; + + /* + * The right-hand const is type text or bytea for all supported operators. + * We do not expect to see binary-compatible types here, since + * const-folding should have relabeled the const to exactly match the + * operator's declared type. + */ + if (consttype != TEXTOID && consttype != BYTEAOID) + { + ReleaseVariableStats(vardata); + return result; + } + + /* + * Similarly, the exposed type of the left-hand side should be one of + * those we know. (Do not look at vardata.atttype, which might be + * something binary-compatible but different.) We can use it to identify + * the comparison operators and the required type of the comparison + * constant, much as in match_pattern_prefix(). + */ + vartype = vardata.vartype; + + switch (vartype) + { + case TEXTOID: + eqopr = TextEqualOperator; + ltopr = TextLessOperator; + geopr = TextGreaterEqualOperator; + rdatatype = TEXTOID; + break; + case NAMEOID: + + /* + * Note that here, we need the RHS type to be text, so that the + * comparison value isn't improperly truncated to NAMEDATALEN. + */ + eqopr = NameEqualTextOperator; + ltopr = NameLessTextOperator; + geopr = NameGreaterEqualTextOperator; + rdatatype = TEXTOID; + break; + case BPCHAROID: + eqopr = BpcharEqualOperator; + ltopr = BpcharLessOperator; + geopr = BpcharGreaterEqualOperator; + rdatatype = BPCHAROID; + break; + case BYTEAOID: + eqopr = ByteaEqualOperator; + ltopr = ByteaLessOperator; + geopr = ByteaGreaterEqualOperator; + rdatatype = BYTEAOID; + break; + default: + /* Can't get here unless we're attached to the wrong operator */ + ReleaseVariableStats(vardata); + return result; + } + + /* + * Grab the nullfrac for use below. + */ + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + nullfrac = stats->stanullfrac; + } + + /* + * Pull out any fixed prefix implied by the pattern, and estimate the + * fractional selectivity of the remainder of the pattern. Unlike many + * other selectivity estimators, we use the pattern operator's actual + * collation for this step. This is not because we expect the collation + * to make a big difference in the selectivity estimate (it seldom would), + * but because we want to be sure we cache compiled regexps under the + * right cache key, so that they can be re-used at runtime. + */ + patt = (Const *) other; + pstatus = pattern_fixed_prefix(patt, ptype, collation, + &prefix, &rest_selec); + + /* + * If necessary, coerce the prefix constant to the right type. The only + * case where we need to do anything is when converting text to bpchar. + * Those two types are binary-compatible, so relabeling the Const node is + * sufficient. + */ + if (prefix && prefix->consttype != rdatatype) + { + Assert(prefix->consttype == TEXTOID && + rdatatype == BPCHAROID); + prefix->consttype = rdatatype; + } + + if (pstatus == Pattern_Prefix_Exact) + { + /* + * Pattern specifies an exact match, so estimate as for '=' + */ + result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue, + false, true, false); + } + else + { + /* + * Not exact-match pattern. If we have a sufficiently large + * histogram, estimate selectivity for the histogram part of the + * population by counting matches in the histogram. If not, estimate + * selectivity of the fixed prefix and remainder of pattern + * separately, then combine the two to get an estimate of the + * selectivity for the part of the column population represented by + * the histogram. (For small histograms, we combine these + * approaches.) + * + * We then add up data for any most-common-values values; these are + * not in the histogram population, and we can get exact answers for + * them by applying the pattern operator, so there's no reason to + * approximate. (If the MCVs cover a significant part of the total + * population, this gives us a big leg up in accuracy.) + */ + Selectivity selec; + int hist_size; + FmgrInfo opproc; + double mcv_selec, + sumcommon; + + /* Try to use the histogram entries to get selectivity */ + if (!OidIsValid(opfuncid)) + opfuncid = get_opcode(oprid); + fmgr_info(opfuncid, &opproc); + + selec = histogram_selectivity(&vardata, &opproc, collation, + constval, true, + 10, 1, &hist_size); + + /* If not at least 100 entries, use the heuristic method */ + if (hist_size < 100) + { + Selectivity heursel; + Selectivity prefixsel; + + if (pstatus == Pattern_Prefix_Partial) + prefixsel = prefix_selectivity(root, &vardata, + eqopr, ltopr, geopr, + collation, + prefix); + else + prefixsel = 1.0; + heursel = prefixsel * rest_selec; + + if (selec < 0) /* fewer than 10 histogram entries? */ + selec = heursel; + else + { + /* + * For histogram sizes from 10 to 100, we combine the + * histogram and heuristic selectivities, putting increasingly + * more trust in the histogram for larger sizes. + */ + double hist_weight = hist_size / 100.0; + + selec = selec * hist_weight + heursel * (1.0 - hist_weight); + } + } + + /* In any case, don't believe extremely small or large estimates. */ + if (selec < 0.0001) + selec = 0.0001; + else if (selec > 0.9999) + selec = 0.9999; + + /* + * If we have most-common-values info, add up the fractions of the MCV + * entries that satisfy MCV OP PATTERN. These fractions contribute + * directly to the result selectivity. Also add up the total fraction + * represented by MCV entries. + */ + mcv_selec = mcv_selectivity(&vardata, &opproc, collation, + constval, true, + &sumcommon); + + /* + * Now merge the results from the MCV and histogram calculations, + * realizing that the histogram covers only the non-null values that + * are not listed in MCV. + */ + selec *= 1.0 - nullfrac - sumcommon; + selec += mcv_selec; + result = selec; + } + + /* now adjust if we wanted not-match rather than match */ + if (negate) + result = 1.0 - result - nullfrac; + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(result); + + if (prefix) + { + pfree(DatumGetPointer(prefix->constvalue)); + pfree(prefix); + } + + ReleaseVariableStats(vardata); + + return result; +} + +/* + * Fix impedance mismatch between SQL-callable functions and patternsel_common + */ +static double +patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); + + /* + * If this is for a NOT LIKE or similar operator, get the corresponding + * positive-match operator and work with that. + */ + if (negate) + { + operator = get_negator(operator); + if (!OidIsValid(operator)) + elog(ERROR, "patternsel called for operator without a negator"); + } + + return patternsel_common(root, + operator, + InvalidOid, + args, + varRelid, + collation, + ptype, + negate); +} + +/* + * regexeqsel - Selectivity of regular-expression pattern match. + */ +Datum +regexeqsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false)); +} + +/* + * icregexeqsel - Selectivity of case-insensitive regex match. + */ +Datum +icregexeqsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false)); +} + +/* + * likesel - Selectivity of LIKE pattern match. + */ +Datum +likesel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false)); +} + +/* + * prefixsel - selectivity of prefix operator + */ +Datum +prefixsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false)); +} + +/* + * + * iclikesel - Selectivity of ILIKE pattern match. + */ +Datum +iclikesel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false)); +} + +/* + * regexnesel - Selectivity of regular-expression pattern non-match. + */ +Datum +regexnesel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true)); +} + +/* + * icregexnesel - Selectivity of case-insensitive regex non-match. + */ +Datum +icregexnesel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true)); +} + +/* + * nlikesel - Selectivity of LIKE pattern non-match. + */ +Datum +nlikesel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true)); +} + +/* + * icnlikesel - Selectivity of ILIKE pattern non-match. + */ +Datum +icnlikesel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true)); +} + +/* + * patternjoinsel - Generic code for pattern-match join selectivity. + */ +static double +patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) +{ + /* For the moment we just punt. */ + return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL; +} + +/* + * regexeqjoinsel - Join selectivity of regular-expression pattern match. + */ +Datum +regexeqjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false)); +} + +/* + * icregexeqjoinsel - Join selectivity of case-insensitive regex match. + */ +Datum +icregexeqjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false)); +} + +/* + * likejoinsel - Join selectivity of LIKE pattern match. + */ +Datum +likejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false)); +} + +/* + * prefixjoinsel - Join selectivity of prefix operator + */ +Datum +prefixjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false)); +} + +/* + * iclikejoinsel - Join selectivity of ILIKE pattern match. + */ +Datum +iclikejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false)); +} + +/* + * regexnejoinsel - Join selectivity of regex non-match. + */ +Datum +regexnejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true)); +} + +/* + * icregexnejoinsel - Join selectivity of case-insensitive regex non-match. + */ +Datum +icregexnejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true)); +} + +/* + * nlikejoinsel - Join selectivity of LIKE pattern non-match. + */ +Datum +nlikejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true)); +} + +/* + * icnlikejoinsel - Join selectivity of ILIKE pattern non-match. + */ +Datum +icnlikejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true)); +} + + +/*------------------------------------------------------------------------- + * + * Pattern analysis functions + * + * These routines support analysis of LIKE and regular-expression patterns + * by the planner/optimizer. It's important that they agree with the + * regular-expression code in backend/regex/ and the LIKE code in + * backend/utils/adt/like.c. Also, the computation of the fixed prefix + * must be conservative: if we report a string longer than the true fixed + * prefix, the query may produce actually wrong answers, rather than just + * getting a bad selectivity estimate! + * + *------------------------------------------------------------------------- + */ + +/* + * Extract the fixed prefix, if any, for a pattern. + * + * *prefix is set to a palloc'd prefix string (in the form of a Const node), + * or to NULL if no fixed prefix exists for the pattern. + * If rest_selec is not NULL, *rest_selec is set to an estimate of the + * selectivity of the remainder of the pattern (without any fixed prefix). + * The prefix Const has the same type (TEXT or BYTEA) as the input pattern. + * + * The return value distinguishes no fixed prefix, a partial prefix, + * or an exact-match-only pattern. + */ + +static Pattern_Prefix_Status +like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, + Const **prefix_const, Selectivity *rest_selec) +{ + char *match; + char *patt; + int pattlen; + Oid typeid = patt_const->consttype; + int pos, + match_pos; + bool is_multibyte = (pg_database_encoding_max_length() > 1); + pg_locale_t locale = 0; + bool locale_is_c = false; + + /* the right-hand const is type text or bytea */ + Assert(typeid == BYTEAOID || typeid == TEXTOID); + + if (case_insensitive) + { + if (typeid == BYTEAOID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("case insensitive matching not supported on type bytea"))); + + if (!OidIsValid(collation)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for ILIKE"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + + /* If case-insensitive, we need locale info */ + if (lc_ctype_is_c(collation)) + locale_is_c = true; + else + locale = pg_newlocale_from_collation(collation); + } + + if (typeid != BYTEAOID) + { + patt = TextDatumGetCString(patt_const->constvalue); + pattlen = strlen(patt); + } + else + { + bytea *bstr = DatumGetByteaPP(patt_const->constvalue); + + pattlen = VARSIZE_ANY_EXHDR(bstr); + patt = (char *) palloc(pattlen); + memcpy(patt, VARDATA_ANY(bstr), pattlen); + Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue)); + } + + match = palloc(pattlen + 1); + match_pos = 0; + for (pos = 0; pos < pattlen; pos++) + { + /* % and _ are wildcard characters in LIKE */ + if (patt[pos] == '%' || + patt[pos] == '_') + break; + + /* Backslash escapes the next character */ + if (patt[pos] == '\\') + { + pos++; + if (pos >= pattlen) + break; + } + + /* Stop if case-varying character (it's sort of a wildcard) */ + if (case_insensitive && + pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c)) + break; + + match[match_pos++] = patt[pos]; + } + + match[match_pos] = '\0'; + + if (typeid != BYTEAOID) + *prefix_const = string_to_const(match, typeid); + else + *prefix_const = string_to_bytea_const(match, match_pos); + + if (rest_selec != NULL) + *rest_selec = like_selectivity(&patt[pos], pattlen - pos, + case_insensitive); + + pfree(patt); + pfree(match); + + /* in LIKE, an empty pattern is an exact match! */ + if (pos == pattlen) + return Pattern_Prefix_Exact; /* reached end of pattern, so exact */ + + if (match_pos > 0) + return Pattern_Prefix_Partial; + + return Pattern_Prefix_None; +} + +static Pattern_Prefix_Status +regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, + Const **prefix_const, Selectivity *rest_selec) +{ + Oid typeid = patt_const->consttype; + char *prefix; + bool exact; + + /* + * Should be unnecessary, there are no bytea regex operators defined. As + * such, it should be noted that the rest of this function has *not* been + * made safe for binary (possibly NULL containing) strings. + */ + if (typeid == BYTEAOID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("regular-expression matching not supported on type bytea"))); + + /* Use the regexp machinery to extract the prefix, if any */ + prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue), + case_insensitive, collation, + &exact); + + if (prefix == NULL) + { + *prefix_const = NULL; + + if (rest_selec != NULL) + { + char *patt = TextDatumGetCString(patt_const->constvalue); + + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive, + 0); + pfree(patt); + } + + return Pattern_Prefix_None; + } + + *prefix_const = string_to_const(prefix, typeid); + + if (rest_selec != NULL) + { + if (exact) + { + /* Exact match, so there's no additional selectivity */ + *rest_selec = 1.0; + } + else + { + char *patt = TextDatumGetCString(patt_const->constvalue); + + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive, + strlen(prefix)); + pfree(patt); + } + } + + pfree(prefix); + + if (exact) + return Pattern_Prefix_Exact; /* pattern specifies exact match */ + else + return Pattern_Prefix_Partial; +} + +static Pattern_Prefix_Status +pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, + Const **prefix, Selectivity *rest_selec) +{ + Pattern_Prefix_Status result; + + switch (ptype) + { + case Pattern_Type_Like: + result = like_fixed_prefix(patt, false, collation, + prefix, rest_selec); + break; + case Pattern_Type_Like_IC: + result = like_fixed_prefix(patt, true, collation, + prefix, rest_selec); + break; + case Pattern_Type_Regex: + result = regex_fixed_prefix(patt, false, collation, + prefix, rest_selec); + break; + case Pattern_Type_Regex_IC: + result = regex_fixed_prefix(patt, true, collation, + prefix, rest_selec); + break; + case Pattern_Type_Prefix: + /* Prefix type work is trivial. */ + result = Pattern_Prefix_Partial; + *prefix = makeConst(patt->consttype, + patt->consttypmod, + patt->constcollid, + patt->constlen, + datumCopy(patt->constvalue, + patt->constbyval, + patt->constlen), + patt->constisnull, + patt->constbyval); + if (rest_selec != NULL) + *rest_selec = 1.0; /* all */ + break; + default: + elog(ERROR, "unrecognized ptype: %d", (int) ptype); + result = Pattern_Prefix_None; /* keep compiler quiet */ + break; + } + return result; +} + +/* + * Estimate the selectivity of a fixed prefix for a pattern match. + * + * A fixed prefix "foo" is estimated as the selectivity of the expression + * "variable >= 'foo' AND variable < 'fop'". + * + * The selectivity estimate is with respect to the portion of the column + * population represented by the histogram --- the caller must fold this + * together with info about MCVs and NULLs. + * + * We use the given comparison operators and collation to do the estimation. + * The given variable and Const must be of the associated datatype(s). + * + * XXX Note: we make use of the upper bound to estimate operator selectivity + * even if the locale is such that we cannot rely on the upper-bound string. + * The selectivity only needs to be approximately right anyway, so it seems + * more useful to use the upper-bound code than not. + */ +static Selectivity +prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, + Oid eqopr, Oid ltopr, Oid geopr, + Oid collation, + Const *prefixcon) +{ + Selectivity prefixsel; + FmgrInfo opproc; + Const *greaterstrcon; + Selectivity eq_sel; + + /* Estimate the selectivity of "x >= prefix" */ + fmgr_info(get_opcode(geopr), &opproc); + + prefixsel = ineq_histogram_selectivity(root, vardata, + geopr, &opproc, true, true, + collation, + prefixcon->constvalue, + prefixcon->consttype); + + if (prefixsel < 0.0) + { + /* No histogram is present ... return a suitable default estimate */ + return DEFAULT_MATCH_SEL; + } + + /* + * If we can create a string larger than the prefix, say "x < greaterstr". + */ + fmgr_info(get_opcode(ltopr), &opproc); + greaterstrcon = make_greater_string(prefixcon, &opproc, collation); + if (greaterstrcon) + { + Selectivity topsel; + + topsel = ineq_histogram_selectivity(root, vardata, + ltopr, &opproc, false, false, + collation, + greaterstrcon->constvalue, + greaterstrcon->consttype); + + /* ineq_histogram_selectivity worked before, it shouldn't fail now */ + Assert(topsel >= 0.0); + + /* + * Merge the two selectivities in the same way as for a range query + * (see clauselist_selectivity()). Note that we don't need to worry + * about double-exclusion of nulls, since ineq_histogram_selectivity + * doesn't count those anyway. + */ + prefixsel = topsel + prefixsel - 1.0; + } + + /* + * If the prefix is long then the two bounding values might be too close + * together for the histogram to distinguish them usefully, resulting in a + * zero estimate (plus or minus roundoff error). To avoid returning a + * ridiculously small estimate, compute the estimated selectivity for + * "variable = 'foo'", and clamp to that. (Obviously, the resultant + * estimate should be at least that.) + * + * We apply this even if we couldn't make a greater string. That case + * suggests that the prefix is near the maximum possible, and thus + * probably off the end of the histogram, and thus we probably got a very + * small estimate from the >= condition; so we still need to clamp. + */ + eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue, + false, true, false); + + prefixsel = Max(prefixsel, eq_sel); + + return prefixsel; +} + + +/* + * Estimate the selectivity of a pattern of the specified type. + * Note that any fixed prefix of the pattern will have been removed already, + * so actually we may be looking at just a fragment of the pattern. + * + * For now, we use a very simplistic approach: fixed characters reduce the + * selectivity a good deal, character ranges reduce it a little, + * wildcards (such as % for LIKE or .* for regex) increase it. + */ + +#define FIXED_CHAR_SEL 0.20 /* about 1/5 */ +#define CHAR_RANGE_SEL 0.25 +#define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */ +#define FULL_WILDCARD_SEL 5.0 +#define PARTIAL_WILDCARD_SEL 2.0 + +static Selectivity +like_selectivity(const char *patt, int pattlen, bool case_insensitive) +{ + Selectivity sel = 1.0; + int pos; + + /* Skip any leading wildcard; it's already factored into initial sel */ + for (pos = 0; pos < pattlen; pos++) + { + if (patt[pos] != '%' && patt[pos] != '_') + break; + } + + for (; pos < pattlen; pos++) + { + /* % and _ are wildcard characters in LIKE */ + if (patt[pos] == '%') + sel *= FULL_WILDCARD_SEL; + else if (patt[pos] == '_') + sel *= ANY_CHAR_SEL; + else if (patt[pos] == '\\') + { + /* Backslash quotes the next character */ + pos++; + if (pos >= pattlen) + break; + sel *= FIXED_CHAR_SEL; + } + else + sel *= FIXED_CHAR_SEL; + } + /* Could get sel > 1 if multiple wildcards */ + if (sel > 1.0) + sel = 1.0; + return sel; +} + +static Selectivity +regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive) +{ + Selectivity sel = 1.0; + int paren_depth = 0; + int paren_pos = 0; /* dummy init to keep compiler quiet */ + int pos; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + for (pos = 0; pos < pattlen; pos++) + { + if (patt[pos] == '(') + { + if (paren_depth == 0) + paren_pos = pos; /* remember start of parenthesized item */ + paren_depth++; + } + else if (patt[pos] == ')' && paren_depth > 0) + { + paren_depth--; + if (paren_depth == 0) + sel *= regex_selectivity_sub(patt + (paren_pos + 1), + pos - (paren_pos + 1), + case_insensitive); + } + else if (patt[pos] == '|' && paren_depth == 0) + { + /* + * If unquoted | is present at paren level 0 in pattern, we have + * multiple alternatives; sum their probabilities. + */ + sel += regex_selectivity_sub(patt + (pos + 1), + pattlen - (pos + 1), + case_insensitive); + break; /* rest of pattern is now processed */ + } + else if (patt[pos] == '[') + { + bool negclass = false; + + if (patt[++pos] == '^') + { + negclass = true; + pos++; + } + if (patt[pos] == ']') /* ']' at start of class is not special */ + pos++; + while (pos < pattlen && patt[pos] != ']') + pos++; + if (paren_depth == 0) + sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL); + } + else if (patt[pos] == '.') + { + if (paren_depth == 0) + sel *= ANY_CHAR_SEL; + } + else if (patt[pos] == '*' || + patt[pos] == '?' || + patt[pos] == '+') + { + /* Ought to be smarter about quantifiers... */ + if (paren_depth == 0) + sel *= PARTIAL_WILDCARD_SEL; + } + else if (patt[pos] == '{') + { + while (pos < pattlen && patt[pos] != '}') + pos++; + if (paren_depth == 0) + sel *= PARTIAL_WILDCARD_SEL; + } + else if (patt[pos] == '\\') + { + /* backslash quotes the next character */ + pos++; + if (pos >= pattlen) + break; + if (paren_depth == 0) + sel *= FIXED_CHAR_SEL; + } + else + { + if (paren_depth == 0) + sel *= FIXED_CHAR_SEL; + } + } + /* Could get sel > 1 if multiple wildcards */ + if (sel > 1.0) + sel = 1.0; + return sel; +} + +static Selectivity +regex_selectivity(const char *patt, int pattlen, bool case_insensitive, + int fixed_prefix_len) +{ + Selectivity sel; + + /* If patt doesn't end with $, consider it to have a trailing wildcard */ + if (pattlen > 0 && patt[pattlen - 1] == '$' && + (pattlen == 1 || patt[pattlen - 2] != '\\')) + { + /* has trailing $ */ + sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive); + } + else + { + /* no trailing $ */ + sel = regex_selectivity_sub(patt, pattlen, case_insensitive); + sel *= FULL_WILDCARD_SEL; + } + + /* + * If there's a fixed prefix, discount its selectivity. We have to be + * careful here since a very long prefix could result in pow's result + * underflowing to zero (in which case "sel" probably has as well). + */ + if (fixed_prefix_len > 0) + { + double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len); + + if (prefixsel > 0.0) + sel /= prefixsel; + } + + /* Make sure result stays in range */ + CLAMP_PROBABILITY(sel); + return sel; +} + +/* + * Check whether char is a letter (and, hence, subject to case-folding) + * + * In multibyte character sets or with ICU, we can't use isalpha, and it does + * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha. + * Instead, just assume any non-ASCII char is potentially case-varying, and + * hard-wire knowledge of which ASCII chars are letters. + */ +static int +pattern_char_isalpha(char c, bool is_multibyte, + pg_locale_t locale, bool locale_is_c) +{ + if (locale_is_c) + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + else if (is_multibyte && IS_HIGHBIT_SET(c)) + return true; + else if (locale && locale->provider == COLLPROVIDER_ICU) + return IS_HIGHBIT_SET(c) || + (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +#ifdef HAVE_LOCALE_T + else if (locale && locale->provider == COLLPROVIDER_LIBC) + return isalpha_l((unsigned char) c, locale->info.lt); +#endif + else + return isalpha((unsigned char) c); +} + + +/* + * For bytea, the increment function need only increment the current byte + * (there are no multibyte characters to worry about). + */ +static bool +byte_increment(unsigned char *ptr, int len) +{ + if (*ptr >= 255) + return false; + (*ptr)++; + return true; +} + +/* + * Try to generate a string greater than the given string or any + * string it is a prefix of. If successful, return a palloc'd string + * in the form of a Const node; else return NULL. + * + * The caller must provide the appropriate "less than" comparison function + * for testing the strings, along with the collation to use. + * + * The key requirement here is that given a prefix string, say "foo", + * we must be able to generate another string "fop" that is greater than + * all strings "foobar" starting with "foo". We can test that we have + * generated a string greater than the prefix string, but in non-C collations + * that is not a bulletproof guarantee that an extension of the string might + * not sort after it; an example is that "foo " is less than "foo!", but it + * is not clear that a "dictionary" sort ordering will consider "foo!" less + * than "foo bar". CAUTION: Therefore, this function should be used only for + * estimation purposes when working in a non-C collation. + * + * To try to catch most cases where an extended string might otherwise sort + * before the result value, we determine which of the strings "Z", "z", "y", + * and "9" is seen as largest by the collation, and append that to the given + * prefix before trying to find a string that compares as larger. + * + * To search for a greater string, we repeatedly "increment" the rightmost + * character, using an encoding-specific character incrementer function. + * When it's no longer possible to increment the last character, we truncate + * off that character and start incrementing the next-to-rightmost. + * For example, if "z" were the last character in the sort order, then we + * could produce "foo" as a string greater than "fonz". + * + * This could be rather slow in the worst case, but in most cases we + * won't have to try more than one or two strings before succeeding. + * + * Note that it's important for the character incrementer not to be too anal + * about producing every possible character code, since in some cases the only + * way to get a larger string is to increment a previous character position. + * So we don't want to spend too much time trying every possible character + * code at the last position. A good rule of thumb is to be sure that we + * don't try more than 256*K values for a K-byte character (and definitely + * not 256^K, which is what an exhaustive search would approach). + */ +static Const * +make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation) +{ + Oid datatype = str_const->consttype; + char *workstr; + int len; + Datum cmpstr; + char *cmptxt = NULL; + mbcharacter_incrementer charinc; + + /* + * Get a modifiable copy of the prefix string in C-string format, and set + * up the string we will compare to as a Datum. In C locale this can just + * be the given prefix string, otherwise we need to add a suffix. Type + * BYTEA sorts bytewise so it never needs a suffix either. + */ + if (datatype == BYTEAOID) + { + bytea *bstr = DatumGetByteaPP(str_const->constvalue); + + len = VARSIZE_ANY_EXHDR(bstr); + workstr = (char *) palloc(len); + memcpy(workstr, VARDATA_ANY(bstr), len); + Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue)); + cmpstr = str_const->constvalue; + } + else + { + if (datatype == NAMEOID) + workstr = DatumGetCString(DirectFunctionCall1(nameout, + str_const->constvalue)); + else + workstr = TextDatumGetCString(str_const->constvalue); + len = strlen(workstr); + if (lc_collate_is_c(collation) || len == 0) + cmpstr = str_const->constvalue; + else + { + /* If first time through, determine the suffix to use */ + static __thread char suffixchar = 0; + static __thread Oid suffixcollation = 0; + + if (!suffixchar || suffixcollation != collation) + { + char *best; + + best = "Z"; + if (varstr_cmp(best, 1, "z", 1, collation) < 0) + best = "z"; + if (varstr_cmp(best, 1, "y", 1, collation) < 0) + best = "y"; + if (varstr_cmp(best, 1, "9", 1, collation) < 0) + best = "9"; + suffixchar = *best; + suffixcollation = collation; + } + + /* And build the string to compare to */ + if (datatype == NAMEOID) + { + cmptxt = palloc(len + 2); + memcpy(cmptxt, workstr, len); + cmptxt[len] = suffixchar; + cmptxt[len + 1] = '\0'; + cmpstr = PointerGetDatum(cmptxt); + } + else + { + cmptxt = palloc(VARHDRSZ + len + 1); + SET_VARSIZE(cmptxt, VARHDRSZ + len + 1); + memcpy(VARDATA(cmptxt), workstr, len); + *(VARDATA(cmptxt) + len) = suffixchar; + cmpstr = PointerGetDatum(cmptxt); + } + } + } + + /* Select appropriate character-incrementer function */ + if (datatype == BYTEAOID) + charinc = byte_increment; + else + charinc = pg_database_encoding_character_incrementer(); + + /* And search ... */ + while (len > 0) + { + int charlen; + unsigned char *lastchar; + + /* Identify the last character --- for bytea, just the last byte */ + if (datatype == BYTEAOID) + charlen = 1; + else + charlen = len - pg_mbcliplen(workstr, len, len - 1); + lastchar = (unsigned char *) (workstr + len - charlen); + + /* + * Try to generate a larger string by incrementing the last character + * (for BYTEA, we treat each byte as a character). + * + * Note: the incrementer function is expected to return true if it's + * generated a valid-per-the-encoding new character, otherwise false. + * The contents of the character on false return are unspecified. + */ + while (charinc(lastchar, charlen)) + { + Const *workstr_const; + + if (datatype == BYTEAOID) + workstr_const = string_to_bytea_const(workstr, len); + else + workstr_const = string_to_const(workstr, datatype); + + if (DatumGetBool(FunctionCall2Coll(ltproc, + collation, + cmpstr, + workstr_const->constvalue))) + { + /* Successfully made a string larger than cmpstr */ + if (cmptxt) + pfree(cmptxt); + pfree(workstr); + return workstr_const; + } + + /* No good, release unusable value and try again */ + pfree(DatumGetPointer(workstr_const->constvalue)); + pfree(workstr_const); + } + + /* + * No luck here, so truncate off the last character and try to + * increment the next one. + */ + len -= charlen; + workstr[len] = '\0'; + } + + /* Failed... */ + if (cmptxt) + pfree(cmptxt); + pfree(workstr); + + return NULL; +} + +/* + * Generate a Datum of the appropriate type from a C string. + * Note that all of the supported types are pass-by-ref, so the + * returned value should be pfree'd if no longer needed. + */ +static Datum +string_to_datum(const char *str, Oid datatype) +{ + Assert(str != NULL); + + /* + * We cheat a little by assuming that CStringGetTextDatum() will do for + * bpchar and varchar constants too... + */ + if (datatype == NAMEOID) + return DirectFunctionCall1(namein, CStringGetDatum(str)); + else if (datatype == BYTEAOID) + return DirectFunctionCall1(byteain, CStringGetDatum(str)); + else + return CStringGetTextDatum(str); +} + +/* + * Generate a Const node of the appropriate type from a C string. + */ +static Const * +string_to_const(const char *str, Oid datatype) +{ + Datum conval = string_to_datum(str, datatype); + Oid collation; + int constlen; + + /* + * We only need to support a few datatypes here, so hard-wire properties + * instead of incurring the expense of catalog lookups. + */ + switch (datatype) + { + case TEXTOID: + case VARCHAROID: + case BPCHAROID: + collation = DEFAULT_COLLATION_OID; + constlen = -1; + break; + + case NAMEOID: + collation = C_COLLATION_OID; + constlen = NAMEDATALEN; + break; + + case BYTEAOID: + collation = InvalidOid; + constlen = -1; + break; + + default: + elog(ERROR, "unexpected datatype in string_to_const: %u", + datatype); + return NULL; + } + + return makeConst(datatype, -1, collation, constlen, + conval, false, false); +} + +/* + * Generate a Const node of bytea type from a binary C string and a length. + */ +static Const * +string_to_bytea_const(const char *str, size_t str_len) +{ + bytea *bstr = palloc(VARHDRSZ + str_len); + Datum conval; + + memcpy(VARDATA(bstr), str, str_len); + SET_VARSIZE(bstr, VARHDRSZ + str_len); + conval = PointerGetDatum(bstr); + + return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/lockfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/lockfuncs.c new file mode 100644 index 00000000000..f9b9590997b --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/lockfuncs.c @@ -0,0 +1,1083 @@ +/*------------------------------------------------------------------------- + * + * lockfuncs.c + * Functions for SQL access to various lock-manager capabilities. + * + * Copyright (c) 2002-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/lockfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/predicate_internals.h" +#include "utils/array.h" +#include "utils/builtins.h" + + +/* + * This must match enum LockTagType! Also, be sure to document any changes + * in the docs for the pg_locks view and for wait event types. + */ +const char *const LockTagTypeNames[] = { + "relation", + "extend", + "frozenid", + "page", + "tuple", + "transactionid", + "virtualxid", + "spectoken", + "object", + "userlock", + "advisory", + "applytransaction" +}; + +StaticAssertDecl(lengthof(LockTagTypeNames) == (LOCKTAG_LAST_TYPE + 1), + "array length mismatch"); + +/* This must match enum PredicateLockTargetType (predicate_internals.h) */ +static const char *const PredicateLockTagTypeNames[] = { + "relation", + "page", + "tuple" +}; + +StaticAssertDecl(lengthof(PredicateLockTagTypeNames) == (PREDLOCKTAG_TUPLE + 1), + "array length mismatch"); + +/* Working status for pg_lock_status */ +typedef struct +{ + LockData *lockData; /* state data from lmgr */ + int currIdx; /* current PROCLOCK index */ + PredicateLockData *predLockData; /* state data for pred locks */ + int predLockIdx; /* current index for pred lock */ +} PG_Lock_Status; + +/* Number of columns in pg_locks output */ +#define NUM_LOCK_STATUS_COLUMNS 16 + +/* + * VXIDGetDatum - Construct a text representation of a VXID + * + * This is currently only used in pg_lock_status, so we put it here. + */ +static Datum +VXIDGetDatum(BackendId bid, LocalTransactionId lxid) +{ + /* + * The representation is "<bid>/<lxid>", decimal and unsigned decimal + * respectively. Note that elog.c also knows how to format a vxid. + */ + char vxidstr[32]; + + snprintf(vxidstr, sizeof(vxidstr), "%d/%u", bid, lxid); + + return CStringGetTextDatum(vxidstr); +} + + +/* + * pg_lock_status - produce a view with one row per held or awaited lock mode + */ +Datum +pg_lock_status(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + PG_Lock_Status *mystatus; + LockData *lockData; + PredicateLockData *predLockData; + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext oldcontext; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* build tupdesc for result tuples */ + /* this had better match function's declaration in pg_proc.h */ + tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", + INT2OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid", + XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid", + INT2OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 16, "waitstart", + TIMESTAMPTZOID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* + * Collect all the locking information that we will format and send + * out as a result set. + */ + mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); + funcctx->user_fctx = (void *) mystatus; + + mystatus->lockData = GetLockStatusData(); + mystatus->currIdx = 0; + mystatus->predLockData = GetPredicateLockStatusData(); + mystatus->predLockIdx = 0; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + mystatus = (PG_Lock_Status *) funcctx->user_fctx; + lockData = mystatus->lockData; + + while (mystatus->currIdx < lockData->nelements) + { + bool granted; + LOCKMODE mode = 0; + const char *locktypename; + char tnbuf[32]; + Datum values[NUM_LOCK_STATUS_COLUMNS] = {0}; + bool nulls[NUM_LOCK_STATUS_COLUMNS] = {0}; + HeapTuple tuple; + Datum result; + LockInstanceData *instance; + + instance = &(lockData->locks[mystatus->currIdx]); + + /* + * Look to see if there are any held lock modes in this PROCLOCK. If + * so, report, and destructively modify lockData so we don't report + * again. + */ + granted = false; + if (instance->holdMask) + { + for (mode = 0; mode < MAX_LOCKMODES; mode++) + { + if (instance->holdMask & LOCKBIT_ON(mode)) + { + granted = true; + instance->holdMask &= LOCKBIT_OFF(mode); + break; + } + } + } + + /* + * If no (more) held modes to report, see if PROC is waiting for a + * lock on this lock. + */ + if (!granted) + { + if (instance->waitLockMode != NoLock) + { + /* Yes, so report it with proper mode */ + mode = instance->waitLockMode; + + /* + * We are now done with this PROCLOCK, so advance pointer to + * continue with next one on next call. + */ + mystatus->currIdx++; + } + else + { + /* + * Okay, we've displayed all the locks associated with this + * PROCLOCK, proceed to the next one. + */ + mystatus->currIdx++; + continue; + } + } + + /* + * Form tuple with appropriate data. + */ + + if (instance->locktag.locktag_type <= LOCKTAG_LAST_TYPE) + locktypename = LockTagTypeNames[instance->locktag.locktag_type]; + else + { + snprintf(tnbuf, sizeof(tnbuf), "unknown %d", + (int) instance->locktag.locktag_type); + locktypename = tnbuf; + } + values[0] = CStringGetTextDatum(locktypename); + + switch ((LockTagType) instance->locktag.locktag_type) + { + case LOCKTAG_RELATION: + case LOCKTAG_RELATION_EXTEND: + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; + case LOCKTAG_DATABASE_FROZEN_IDS: + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; + case LOCKTAG_PAGE: + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); + values[3] = UInt32GetDatum(instance->locktag.locktag_field3); + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; + case LOCKTAG_TUPLE: + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); + values[3] = UInt32GetDatum(instance->locktag.locktag_field3); + values[4] = UInt16GetDatum(instance->locktag.locktag_field4); + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; + case LOCKTAG_TRANSACTION: + values[6] = + TransactionIdGetDatum(instance->locktag.locktag_field1); + nulls[1] = true; + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; + case LOCKTAG_VIRTUALTRANSACTION: + values[5] = VXIDGetDatum(instance->locktag.locktag_field1, + instance->locktag.locktag_field2); + nulls[1] = true; + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; + case LOCKTAG_SPECULATIVE_TOKEN: + values[6] = + TransactionIdGetDatum(instance->locktag.locktag_field1); + values[8] = ObjectIdGetDatum(instance->locktag.locktag_field2); + nulls[1] = true; + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[7] = true; + nulls[9] = true; + break; + case LOCKTAG_APPLY_TRANSACTION: + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + values[8] = ObjectIdGetDatum(instance->locktag.locktag_field2); + values[6] = ObjectIdGetDatum(instance->locktag.locktag_field3); + values[9] = Int16GetDatum(instance->locktag.locktag_field4); + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[7] = true; + break; + case LOCKTAG_OBJECT: + case LOCKTAG_USERLOCK: + case LOCKTAG_ADVISORY: + default: /* treat unknown locktags like OBJECT */ + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + values[7] = ObjectIdGetDatum(instance->locktag.locktag_field2); + values[8] = ObjectIdGetDatum(instance->locktag.locktag_field3); + values[9] = Int16GetDatum(instance->locktag.locktag_field4); + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + break; + } + + values[10] = VXIDGetDatum(instance->backend, instance->lxid); + if (instance->pid != 0) + values[11] = Int32GetDatum(instance->pid); + else + nulls[11] = true; + values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode)); + values[13] = BoolGetDatum(granted); + values[14] = BoolGetDatum(instance->fastpath); + if (!granted && instance->waitStart != 0) + values[15] = TimestampTzGetDatum(instance->waitStart); + else + nulls[15] = true; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + SRF_RETURN_NEXT(funcctx, result); + } + + /* + * Have returned all regular locks. Now start on the SIREAD predicate + * locks. + */ + predLockData = mystatus->predLockData; + if (mystatus->predLockIdx < predLockData->nelements) + { + PredicateLockTargetType lockType; + + PREDICATELOCKTARGETTAG *predTag = &(predLockData->locktags[mystatus->predLockIdx]); + SERIALIZABLEXACT *xact = &(predLockData->xacts[mystatus->predLockIdx]); + Datum values[NUM_LOCK_STATUS_COLUMNS] = {0}; + bool nulls[NUM_LOCK_STATUS_COLUMNS] = {0}; + HeapTuple tuple; + Datum result; + + mystatus->predLockIdx++; + + /* + * Form tuple with appropriate data. + */ + + /* lock type */ + lockType = GET_PREDICATELOCKTARGETTAG_TYPE(*predTag); + + values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]); + + /* lock target */ + values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag); + values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag); + if (lockType == PREDLOCKTAG_TUPLE) + values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag); + else + nulls[4] = true; + if ((lockType == PREDLOCKTAG_TUPLE) || + (lockType == PREDLOCKTAG_PAGE)) + values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag); + else + nulls[3] = true; + + /* these fields are targets for other types of locks */ + nulls[5] = true; /* virtualxid */ + nulls[6] = true; /* transactionid */ + nulls[7] = true; /* classid */ + nulls[8] = true; /* objid */ + nulls[9] = true; /* objsubid */ + + /* lock holder */ + values[10] = VXIDGetDatum(xact->vxid.backendId, + xact->vxid.localTransactionId); + if (xact->pid != 0) + values[11] = Int32GetDatum(xact->pid); + else + nulls[11] = true; + + /* + * Lock mode. Currently all predicate locks are SIReadLocks, which are + * always held (never waiting) and have no fast path + */ + values[12] = CStringGetTextDatum("SIReadLock"); + values[13] = BoolGetDatum(true); + values[14] = BoolGetDatum(false); + nulls[15] = true; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + SRF_RETURN_NEXT(funcctx, result); + } + + SRF_RETURN_DONE(funcctx); +} + + +/* + * pg_blocking_pids - produce an array of the PIDs blocking given PID + * + * The reported PIDs are those that hold a lock conflicting with blocked_pid's + * current request (hard block), or are requesting such a lock and are ahead + * of blocked_pid in the lock's wait queue (soft block). + * + * In parallel-query cases, we report all PIDs blocking any member of the + * given PID's lock group, and the reported PIDs are those of the blocking + * PIDs' lock group leaders. This allows callers to compare the result to + * lists of clients' pg_backend_pid() results even during a parallel query. + * + * Parallel query makes it possible for there to be duplicate PIDs in the + * result (either because multiple waiters are blocked by same PID, or + * because multiple blockers have same group leader PID). We do not bother + * to eliminate such duplicates from the result. + * + * We need not consider predicate locks here, since those don't block anything. + */ +Datum +pg_blocking_pids(PG_FUNCTION_ARGS) +{ + int blocked_pid = PG_GETARG_INT32(0); + Datum *arrayelems; + int narrayelems; + BlockedProcsData *lockData; /* state data from lmgr */ + int i, + j; + + /* Collect a snapshot of lock manager state */ + lockData = GetBlockerStatusData(blocked_pid); + + /* We can't need more output entries than there are reported PROCLOCKs */ + arrayelems = (Datum *) palloc(lockData->nlocks * sizeof(Datum)); + narrayelems = 0; + + /* For each blocked proc in the lock group ... */ + for (i = 0; i < lockData->nprocs; i++) + { + BlockedProcData *bproc = &lockData->procs[i]; + LockInstanceData *instances = &lockData->locks[bproc->first_lock]; + int *preceding_waiters = &lockData->waiter_pids[bproc->first_waiter]; + LockInstanceData *blocked_instance; + LockMethod lockMethodTable; + int conflictMask; + + /* + * Locate the blocked proc's own entry in the LockInstanceData array. + * There should be exactly one matching entry. + */ + blocked_instance = NULL; + for (j = 0; j < bproc->num_locks; j++) + { + LockInstanceData *instance = &(instances[j]); + + if (instance->pid == bproc->pid) + { + Assert(blocked_instance == NULL); + blocked_instance = instance; + } + } + Assert(blocked_instance != NULL); + + lockMethodTable = GetLockTagsMethodTable(&(blocked_instance->locktag)); + conflictMask = lockMethodTable->conflictTab[blocked_instance->waitLockMode]; + + /* Now scan the PROCLOCK data for conflicting procs */ + for (j = 0; j < bproc->num_locks; j++) + { + LockInstanceData *instance = &(instances[j]); + + /* A proc never blocks itself, so ignore that entry */ + if (instance == blocked_instance) + continue; + /* Members of same lock group never block each other, either */ + if (instance->leaderPid == blocked_instance->leaderPid) + continue; + + if (conflictMask & instance->holdMask) + { + /* hard block: blocked by lock already held by this entry */ + } + else if (instance->waitLockMode != NoLock && + (conflictMask & LOCKBIT_ON(instance->waitLockMode))) + { + /* conflict in lock requests; who's in front in wait queue? */ + bool ahead = false; + int k; + + for (k = 0; k < bproc->num_waiters; k++) + { + if (preceding_waiters[k] == instance->pid) + { + /* soft block: this entry is ahead of blocked proc */ + ahead = true; + break; + } + } + if (!ahead) + continue; /* not blocked by this entry */ + } + else + { + /* not blocked by this entry */ + continue; + } + + /* blocked by this entry, so emit a record */ + arrayelems[narrayelems++] = Int32GetDatum(instance->leaderPid); + } + } + + /* Assert we didn't overrun arrayelems[] */ + Assert(narrayelems <= lockData->nlocks); + + PG_RETURN_ARRAYTYPE_P(construct_array_builtin(arrayelems, narrayelems, INT4OID)); +} + + +/* + * pg_safe_snapshot_blocking_pids - produce an array of the PIDs blocking + * given PID from getting a safe snapshot + * + * XXX this does not consider parallel-query cases; not clear how big a + * problem that is in practice + */ +Datum +pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS) +{ + int blocked_pid = PG_GETARG_INT32(0); + int *blockers; + int num_blockers; + Datum *blocker_datums; + + /* A buffer big enough for any possible blocker list without truncation */ + blockers = (int *) palloc(MaxBackends * sizeof(int)); + + /* Collect a snapshot of processes waited for by GetSafeSnapshot */ + num_blockers = + GetSafeSnapshotBlockingPids(blocked_pid, blockers, MaxBackends); + + /* Convert int array to Datum array */ + if (num_blockers > 0) + { + int i; + + blocker_datums = (Datum *) palloc(num_blockers * sizeof(Datum)); + for (i = 0; i < num_blockers; ++i) + blocker_datums[i] = Int32GetDatum(blockers[i]); + } + else + blocker_datums = NULL; + + PG_RETURN_ARRAYTYPE_P(construct_array_builtin(blocker_datums, num_blockers, INT4OID)); +} + + +/* + * pg_isolation_test_session_is_blocked - support function for isolationtester + * + * Check if specified PID is blocked by any of the PIDs listed in the second + * argument. Currently, this looks for blocking caused by waiting for + * heavyweight locks or safe snapshots. We ignore blockage caused by PIDs + * not directly under the isolationtester's control, eg autovacuum. + * + * This is an undocumented function intended for use by the isolation tester, + * and may change in future releases as required for testing purposes. + */ +Datum +pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) +{ + int blocked_pid = PG_GETARG_INT32(0); + ArrayType *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1); + ArrayType *blocking_pids_a; + int32 *interesting_pids; + int32 *blocking_pids; + int num_interesting_pids; + int num_blocking_pids; + int dummy; + int i, + j; + + /* Validate the passed-in array */ + Assert(ARR_ELEMTYPE(interesting_pids_a) == INT4OID); + if (array_contains_nulls(interesting_pids_a)) + elog(ERROR, "array must not contain nulls"); + interesting_pids = (int32 *) ARR_DATA_PTR(interesting_pids_a); + num_interesting_pids = ArrayGetNItems(ARR_NDIM(interesting_pids_a), + ARR_DIMS(interesting_pids_a)); + + /* + * Get the PIDs of all sessions blocking the given session's attempt to + * acquire heavyweight locks. + */ + blocking_pids_a = + DatumGetArrayTypeP(DirectFunctionCall1(pg_blocking_pids, blocked_pid)); + + Assert(ARR_ELEMTYPE(blocking_pids_a) == INT4OID); + Assert(!array_contains_nulls(blocking_pids_a)); + blocking_pids = (int32 *) ARR_DATA_PTR(blocking_pids_a); + num_blocking_pids = ArrayGetNItems(ARR_NDIM(blocking_pids_a), + ARR_DIMS(blocking_pids_a)); + + /* + * Check if any of these are in the list of interesting PIDs, that being + * the sessions that the isolation tester is running. We don't use + * "arrayoverlaps" here, because it would lead to cache lookups and one of + * our goals is to run quickly with debug_discard_caches > 0. We expect + * blocking_pids to be usually empty and otherwise a very small number in + * isolation tester cases, so make that the outer loop of a naive search + * for a match. + */ + for (i = 0; i < num_blocking_pids; i++) + for (j = 0; j < num_interesting_pids; j++) + { + if (blocking_pids[i] == interesting_pids[j]) + PG_RETURN_BOOL(true); + } + + /* + * Check if blocked_pid is waiting for a safe snapshot. We could in + * theory check the resulting array of blocker PIDs against the + * interesting PIDs list, but since there is no danger of autovacuum + * blocking GetSafeSnapshot there seems to be no point in expending cycles + * on allocating a buffer and searching for overlap; so it's presently + * sufficient for the isolation tester's purposes to use a single element + * buffer and check if the number of safe snapshot blockers is non-zero. + */ + if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0) + PG_RETURN_BOOL(true); + + PG_RETURN_BOOL(false); +} + + +/* + * Functions for manipulating advisory locks + * + * We make use of the locktag fields as follows: + * + * field1: MyDatabaseId ... ensures locks are local to each database + * field2: first of 2 int4 keys, or high-order half of an int8 key + * field3: second of 2 int4 keys, or low-order half of an int8 key + * field4: 1 if using an int8 key, 2 if using 2 int4 keys + */ +#define SET_LOCKTAG_INT64(tag, key64) \ + SET_LOCKTAG_ADVISORY(tag, \ + MyDatabaseId, \ + (uint32) ((key64) >> 32), \ + (uint32) (key64), \ + 1) +#define SET_LOCKTAG_INT32(tag, key1, key2) \ + SET_LOCKTAG_ADVISORY(tag, MyDatabaseId, key1, key2, 2) + +/* + * pg_advisory_lock(int8) - acquire exclusive lock on an int8 key + */ +Datum +pg_advisory_lock_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + + SET_LOCKTAG_INT64(tag, key); + + (void) LockAcquire(&tag, ExclusiveLock, true, false); + + PG_RETURN_VOID(); +} + +/* + * pg_advisory_xact_lock(int8) - acquire xact scoped + * exclusive lock on an int8 key + */ +Datum +pg_advisory_xact_lock_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + + SET_LOCKTAG_INT64(tag, key); + + (void) LockAcquire(&tag, ExclusiveLock, false, false); + + PG_RETURN_VOID(); +} + +/* + * pg_advisory_lock_shared(int8) - acquire share lock on an int8 key + */ +Datum +pg_advisory_lock_shared_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + + SET_LOCKTAG_INT64(tag, key); + + (void) LockAcquire(&tag, ShareLock, true, false); + + PG_RETURN_VOID(); +} + +/* + * pg_advisory_xact_lock_shared(int8) - acquire xact scoped + * share lock on an int8 key + */ +Datum +pg_advisory_xact_lock_shared_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + + SET_LOCKTAG_INT64(tag, key); + + (void) LockAcquire(&tag, ShareLock, false, false); + + PG_RETURN_VOID(); +} + +/* + * pg_try_advisory_lock(int8) - acquire exclusive lock on an int8 key, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_lock_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT64(tag, key); + + res = LockAcquire(&tag, ExclusiveLock, true, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_try_advisory_xact_lock(int8) - acquire xact scoped + * exclusive lock on an int8 key, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_xact_lock_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT64(tag, key); + + res = LockAcquire(&tag, ExclusiveLock, false, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_try_advisory_lock_shared(int8) - acquire share lock on an int8 key, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_lock_shared_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT64(tag, key); + + res = LockAcquire(&tag, ShareLock, true, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_try_advisory_xact_lock_shared(int8) - acquire xact scoped + * share lock on an int8 key, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_xact_lock_shared_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT64(tag, key); + + res = LockAcquire(&tag, ShareLock, false, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_advisory_unlock(int8) - release exclusive lock on an int8 key + * + * Returns true if successful, false if lock was not held +*/ +Datum +pg_advisory_unlock_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + bool res; + + SET_LOCKTAG_INT64(tag, key); + + res = LockRelease(&tag, ExclusiveLock, true); + + PG_RETURN_BOOL(res); +} + +/* + * pg_advisory_unlock_shared(int8) - release share lock on an int8 key + * + * Returns true if successful, false if lock was not held + */ +Datum +pg_advisory_unlock_shared_int8(PG_FUNCTION_ARGS) +{ + int64 key = PG_GETARG_INT64(0); + LOCKTAG tag; + bool res; + + SET_LOCKTAG_INT64(tag, key); + + res = LockRelease(&tag, ShareLock, true); + + PG_RETURN_BOOL(res); +} + +/* + * pg_advisory_lock(int4, int4) - acquire exclusive lock on 2 int4 keys + */ +Datum +pg_advisory_lock_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + + SET_LOCKTAG_INT32(tag, key1, key2); + + (void) LockAcquire(&tag, ExclusiveLock, true, false); + + PG_RETURN_VOID(); +} + +/* + * pg_advisory_xact_lock(int4, int4) - acquire xact scoped + * exclusive lock on 2 int4 keys + */ +Datum +pg_advisory_xact_lock_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + + SET_LOCKTAG_INT32(tag, key1, key2); + + (void) LockAcquire(&tag, ExclusiveLock, false, false); + + PG_RETURN_VOID(); +} + +/* + * pg_advisory_lock_shared(int4, int4) - acquire share lock on 2 int4 keys + */ +Datum +pg_advisory_lock_shared_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + + SET_LOCKTAG_INT32(tag, key1, key2); + + (void) LockAcquire(&tag, ShareLock, true, false); + + PG_RETURN_VOID(); +} + +/* + * pg_advisory_xact_lock_shared(int4, int4) - acquire xact scoped + * share lock on 2 int4 keys + */ +Datum +pg_advisory_xact_lock_shared_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + + SET_LOCKTAG_INT32(tag, key1, key2); + + (void) LockAcquire(&tag, ShareLock, false, false); + + PG_RETURN_VOID(); +} + +/* + * pg_try_advisory_lock(int4, int4) - acquire exclusive lock on 2 int4 keys, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_lock_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT32(tag, key1, key2); + + res = LockAcquire(&tag, ExclusiveLock, true, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_try_advisory_xact_lock(int4, int4) - acquire xact scoped + * exclusive lock on 2 int4 keys, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_xact_lock_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT32(tag, key1, key2); + + res = LockAcquire(&tag, ExclusiveLock, false, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_try_advisory_lock_shared(int4, int4) - acquire share lock on 2 int4 keys, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_lock_shared_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT32(tag, key1, key2); + + res = LockAcquire(&tag, ShareLock, true, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_try_advisory_xact_lock_shared(int4, int4) - acquire xact scoped + * share lock on 2 int4 keys, no wait + * + * Returns true if successful, false if lock not available + */ +Datum +pg_try_advisory_xact_lock_shared_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_INT32(tag, key1, key2); + + res = LockAcquire(&tag, ShareLock, false, true); + + PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * pg_advisory_unlock(int4, int4) - release exclusive lock on 2 int4 keys + * + * Returns true if successful, false if lock was not held +*/ +Datum +pg_advisory_unlock_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + bool res; + + SET_LOCKTAG_INT32(tag, key1, key2); + + res = LockRelease(&tag, ExclusiveLock, true); + + PG_RETURN_BOOL(res); +} + +/* + * pg_advisory_unlock_shared(int4, int4) - release share lock on 2 int4 keys + * + * Returns true if successful, false if lock was not held + */ +Datum +pg_advisory_unlock_shared_int4(PG_FUNCTION_ARGS) +{ + int32 key1 = PG_GETARG_INT32(0); + int32 key2 = PG_GETARG_INT32(1); + LOCKTAG tag; + bool res; + + SET_LOCKTAG_INT32(tag, key1, key2); + + res = LockRelease(&tag, ShareLock, true); + + PG_RETURN_BOOL(res); +} + +/* + * pg_advisory_unlock_all() - release all advisory locks + */ +Datum +pg_advisory_unlock_all(PG_FUNCTION_ARGS) +{ + LockReleaseSession(USER_LOCKMETHOD); + + PG_RETURN_VOID(); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mac.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mac.c new file mode 100644 index 00000000000..6abf9485af2 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mac.c @@ -0,0 +1,532 @@ +/*------------------------------------------------------------------------- + * + * mac.c + * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses. + * + * Portions Copyright (c) 1998-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/mac.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "common/hashfn.h" +#include "lib/hyperloglog.h" +#include "libpq/pqformat.h" +#include "port/pg_bswap.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/inet.h" +#include "utils/sortsupport.h" + + +/* + * Utility macros used for sorting and comparing: + */ + +#define hibits(addr) \ + ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c))) + +#define lobits(addr) \ + ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f))) + +/* sortsupport for macaddr */ +typedef struct +{ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} macaddr_sortsupport_state; + +static int macaddr_cmp_internal(macaddr *a1, macaddr *a2); +static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup); +static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup); +static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup); + +/* + * MAC address reader. Accepts several common notations. + */ + +Datum +macaddr_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + macaddr *result; + int a, + b, + c, + d, + e, + f; + char junk[2]; + int count; + + /* %1s matches iff there is trailing non-whitespace garbage */ + + count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", "macaddr", + str))); + + if ((a < 0) || (a > 255) || (b < 0) || (b > 255) || + (c < 0) || (c > 255) || (d < 0) || (d > 255) || + (e < 0) || (e > 255) || (f < 0) || (f > 255)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str))); + + result = (macaddr *) palloc(sizeof(macaddr)); + + result->a = a; + result->b = b; + result->c = c; + result->d = d; + result->e = e; + result->f = f; + + PG_RETURN_MACADDR_P(result); +} + +/* + * MAC address output function. Fixed format. + */ + +Datum +macaddr_out(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + char *result; + + result = (char *) palloc(32); + + snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x", + addr->a, addr->b, addr->c, addr->d, addr->e, addr->f); + + PG_RETURN_CSTRING(result); +} + +/* + * macaddr_recv - converts external binary format to macaddr + * + * The external representation is just the six bytes, MSB first. + */ +Datum +macaddr_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + macaddr *addr; + + addr = (macaddr *) palloc(sizeof(macaddr)); + + addr->a = pq_getmsgbyte(buf); + addr->b = pq_getmsgbyte(buf); + addr->c = pq_getmsgbyte(buf); + addr->d = pq_getmsgbyte(buf); + addr->e = pq_getmsgbyte(buf); + addr->f = pq_getmsgbyte(buf); + + PG_RETURN_MACADDR_P(addr); +} + +/* + * macaddr_send - converts macaddr to binary format + */ +Datum +macaddr_send(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendbyte(&buf, addr->a); + pq_sendbyte(&buf, addr->b); + pq_sendbyte(&buf, addr->c); + pq_sendbyte(&buf, addr->d); + pq_sendbyte(&buf, addr->e); + pq_sendbyte(&buf, addr->f); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * Comparison function for sorting: + */ + +static int +macaddr_cmp_internal(macaddr *a1, macaddr *a2) +{ + if (hibits(a1) < hibits(a2)) + return -1; + else if (hibits(a1) > hibits(a2)) + return 1; + else if (lobits(a1) < lobits(a2)) + return -1; + else if (lobits(a1) > lobits(a2)) + return 1; + else + return 0; +} + +Datum +macaddr_cmp(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_INT32(macaddr_cmp_internal(a1, a2)); +} + +/* + * Boolean comparisons. + */ + +Datum +macaddr_lt(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0); +} + +Datum +macaddr_le(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0); +} + +Datum +macaddr_eq(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0); +} + +Datum +macaddr_ge(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0); +} + +Datum +macaddr_gt(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0); +} + +Datum +macaddr_ne(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0); +} + +/* + * Support function for hash indexes on macaddr. + */ +Datum +hashmacaddr(PG_FUNCTION_ARGS) +{ + macaddr *key = PG_GETARG_MACADDR_P(0); + + return hash_any((unsigned char *) key, sizeof(macaddr)); +} + +Datum +hashmacaddrextended(PG_FUNCTION_ARGS) +{ + macaddr *key = PG_GETARG_MACADDR_P(0); + + return hash_any_extended((unsigned char *) key, sizeof(macaddr), + PG_GETARG_INT64(1)); +} + +/* + * Arithmetic functions: bitwise NOT, AND, OR. + */ +Datum +macaddr_not(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + result->a = ~addr->a; + result->b = ~addr->b; + result->c = ~addr->c; + result->d = ~addr->d; + result->e = ~addr->e; + result->f = ~addr->f; + PG_RETURN_MACADDR_P(result); +} + +Datum +macaddr_and(PG_FUNCTION_ARGS) +{ + macaddr *addr1 = PG_GETARG_MACADDR_P(0); + macaddr *addr2 = PG_GETARG_MACADDR_P(1); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + result->a = addr1->a & addr2->a; + result->b = addr1->b & addr2->b; + result->c = addr1->c & addr2->c; + result->d = addr1->d & addr2->d; + result->e = addr1->e & addr2->e; + result->f = addr1->f & addr2->f; + PG_RETURN_MACADDR_P(result); +} + +Datum +macaddr_or(PG_FUNCTION_ARGS) +{ + macaddr *addr1 = PG_GETARG_MACADDR_P(0); + macaddr *addr2 = PG_GETARG_MACADDR_P(1); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + result->a = addr1->a | addr2->a; + result->b = addr1->b | addr2->b; + result->c = addr1->c | addr2->c; + result->d = addr1->d | addr2->d; + result->e = addr1->e | addr2->e; + result->f = addr1->f | addr2->f; + PG_RETURN_MACADDR_P(result); +} + +/* + * Truncation function to allow comparing mac manufacturers. + * From suggestion by Alex Pilosov <alex@pilosoft.com> + */ +Datum +macaddr_trunc(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + + result->a = addr->a; + result->b = addr->b; + result->c = addr->c; + result->d = 0; + result->e = 0; + result->f = 0; + + PG_RETURN_MACADDR_P(result); +} + +/* + * SortSupport strategy function. Populates a SortSupport struct with the + * information necessary to use comparison by abbreviated keys. + */ +Datum +macaddr_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = macaddr_fast_cmp; + ssup->ssup_extra = NULL; + + if (ssup->abbreviate) + { + macaddr_sortsupport_state *uss; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + uss = palloc(sizeof(macaddr_sortsupport_state)); + uss->input_count = 0; + uss->estimating = true; + initHyperLogLog(&uss->abbr_card, 10); + + ssup->ssup_extra = uss; + + ssup->comparator = ssup_datum_unsigned_cmp; + ssup->abbrev_converter = macaddr_abbrev_convert; + ssup->abbrev_abort = macaddr_abbrev_abort; + ssup->abbrev_full_comparator = macaddr_fast_cmp; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * SortSupport "traditional" comparison function. Pulls two MAC addresses from + * the heap and runs a standard comparison on them. + */ +static int +macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + macaddr *arg1 = DatumGetMacaddrP(x); + macaddr *arg2 = DatumGetMacaddrP(y); + + return macaddr_cmp_internal(arg1, arg2); +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization. + * + * We pay no attention to the cardinality of the non-abbreviated data, because + * there is no equality fast-path within authoritative macaddr comparator. + */ +static bool +macaddr_abbrev_abort(int memtupcount, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&uss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. At this point + * we stop counting because we know that we're now fully committed. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count, memtupcount); +#endif + uss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row + * fudge factor allows us to abort earlier on genuinely pathological data + * where we've had exactly one abbreviated value in the first 2k + * (non-null) rows. + */ + if (abbr_card < uss->input_count / 2000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, + memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: cardinality %f after " INT64_FORMAT + " values (%d rows)", abbr_card, uss->input_count, memtupcount); +#endif + + return false; +} + +/* + * SortSupport conversion routine. Converts original macaddr representation + * to abbreviated key representation. + * + * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an + * unsigned integer for purposes of comparison. On a 64-bit machine, there + * will be two zeroed bytes of padding. The integer is converted to native + * endianness to facilitate easy comparison. + */ +static Datum +macaddr_abbrev_convert(Datum original, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + macaddr *authoritative = DatumGetMacaddrP(original); + Datum res; + + /* + * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of + * the MAC address in. There will be two bytes of zero padding on the end + * of the least significant bits. + */ +#if SIZEOF_DATUM == 8 + memset(&res, 0, SIZEOF_DATUM); + memcpy(&res, authoritative, sizeof(macaddr)); +#else /* SIZEOF_DATUM != 8 */ + memcpy(&res, authoritative, SIZEOF_DATUM); +#endif + uss->input_count += 1; + + /* + * Cardinality estimation. The estimate uses uint32, so on a 64-bit + * architecture, XOR the two 32-bit halves together to produce slightly + * more entropy. The two zeroed bytes won't have any practical impact on + * this operation. + */ + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + /* + * Byteswap on little-endian machines. + * + * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer + * 3-way comparator) works correctly on all platforms. Without this, the + * comparator would have to call memcmp() with a pair of pointers to the + * first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + + return res; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mac8.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mac8.c new file mode 100644 index 00000000000..25bb6c16666 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mac8.c @@ -0,0 +1,568 @@ +/*------------------------------------------------------------------------- + * + * mac8.c + * PostgreSQL type definitions for 8 byte (EUI-64) MAC addresses. + * + * EUI-48 (6 byte) MAC addresses are accepted as input and are stored in + * EUI-64 format, with the 4th and 5th bytes set to FF and FE, respectively. + * + * Output is always in 8 byte (EUI-64) format. + * + * The following code is written with the assumption that the OUI field + * size is 24 bits. + * + * Portions Copyright (c) 1998-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/mac8.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "common/hashfn.h" +#include "libpq/pqformat.h" +#include "utils/builtins.h" +#include "utils/inet.h" + +/* + * Utility macros used for sorting and comparing: + */ +#define hibits(addr) \ + ((unsigned long)(((addr)->a<<24) | ((addr)->b<<16) | ((addr)->c<<8) | ((addr)->d))) + +#define lobits(addr) \ + ((unsigned long)(((addr)->e<<24) | ((addr)->f<<16) | ((addr)->g<<8) | ((addr)->h))) + +static unsigned char hex2_to_uchar(const unsigned char *ptr, bool *badhex); + +static const signed char hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +/* + * hex2_to_uchar - convert 2 hex digits to a byte (unsigned char) + * + * Sets *badhex to true if the end of the string is reached ('\0' found), or if + * either character is not a valid hex digit. + */ +static inline unsigned char +hex2_to_uchar(const unsigned char *ptr, bool *badhex) +{ + unsigned char ret; + signed char lookup; + + /* Handle the first character */ + if (*ptr > 127) + goto invalid_input; + + lookup = hexlookup[*ptr]; + if (lookup < 0) + goto invalid_input; + + ret = lookup << 4; + + /* Move to the second character */ + ptr++; + + if (*ptr > 127) + goto invalid_input; + + lookup = hexlookup[*ptr]; + if (lookup < 0) + goto invalid_input; + + ret += lookup; + + return ret; + +invalid_input: + *badhex = true; + return 0; +} + +/* + * MAC address (EUI-48 and EUI-64) reader. Accepts several common notations. + */ +Datum +macaddr8_in(PG_FUNCTION_ARGS) +{ + const unsigned char *str = (unsigned char *) PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + const unsigned char *ptr = str; + bool badhex = false; + macaddr8 *result; + unsigned char a = 0, + b = 0, + c = 0, + d = 0, + e = 0, + f = 0, + g = 0, + h = 0; + int count = 0; + unsigned char spacer = '\0'; + + /* skip leading spaces */ + while (*ptr && isspace(*ptr)) + ptr++; + + /* digits must always come in pairs */ + while (*ptr && *(ptr + 1)) + { + /* + * Attempt to decode each byte, which must be 2 hex digits in a row. + * If either digit is not hex, hex2_to_uchar will throw ereport() for + * us. Either 6 or 8 byte MAC addresses are supported. + */ + + /* Attempt to collect a byte */ + count++; + + switch (count) + { + case 1: + a = hex2_to_uchar(ptr, &badhex); + break; + case 2: + b = hex2_to_uchar(ptr, &badhex); + break; + case 3: + c = hex2_to_uchar(ptr, &badhex); + break; + case 4: + d = hex2_to_uchar(ptr, &badhex); + break; + case 5: + e = hex2_to_uchar(ptr, &badhex); + break; + case 6: + f = hex2_to_uchar(ptr, &badhex); + break; + case 7: + g = hex2_to_uchar(ptr, &badhex); + break; + case 8: + h = hex2_to_uchar(ptr, &badhex); + break; + default: + /* must be trailing garbage... */ + goto fail; + } + + if (badhex) + goto fail; + + /* Move forward to where the next byte should be */ + ptr += 2; + + /* Check for a spacer, these are valid, anything else is not */ + if (*ptr == ':' || *ptr == '-' || *ptr == '.') + { + /* remember the spacer used, if it changes then it isn't valid */ + if (spacer == '\0') + spacer = *ptr; + + /* Have to use the same spacer throughout */ + else if (spacer != *ptr) + goto fail; + + /* move past the spacer */ + ptr++; + } + + /* allow trailing whitespace after if we have 6 or 8 bytes */ + if (count == 6 || count == 8) + { + if (isspace(*ptr)) + { + while (*++ptr && isspace(*ptr)); + + /* If we found a space and then non-space, it's invalid */ + if (*ptr) + goto fail; + } + } + } + + /* Convert a 6 byte MAC address to macaddr8 */ + if (count == 6) + { + h = f; + g = e; + f = d; + + d = 0xFF; + e = 0xFE; + } + else if (count != 8) + goto fail; + + result = (macaddr8 *) palloc0(sizeof(macaddr8)); + + result->a = a; + result->b = b; + result->c = c; + result->d = d; + result->e = e; + result->f = f; + result->g = g; + result->h = h; + + PG_RETURN_MACADDR8_P(result); + +fail: + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", "macaddr8", + str))); +} + +/* + * MAC8 address (EUI-64) output function. Fixed format. + */ +Datum +macaddr8_out(PG_FUNCTION_ARGS) +{ + macaddr8 *addr = PG_GETARG_MACADDR8_P(0); + char *result; + + result = (char *) palloc(32); + + snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + addr->a, addr->b, addr->c, addr->d, + addr->e, addr->f, addr->g, addr->h); + + PG_RETURN_CSTRING(result); +} + +/* + * macaddr8_recv - converts external binary format(EUI-48 and EUI-64) to macaddr8 + * + * The external representation is just the eight bytes, MSB first. + */ +Datum +macaddr8_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + macaddr8 *addr; + + addr = (macaddr8 *) palloc0(sizeof(macaddr8)); + + addr->a = pq_getmsgbyte(buf); + addr->b = pq_getmsgbyte(buf); + addr->c = pq_getmsgbyte(buf); + + if (buf->len == 6) + { + addr->d = 0xFF; + addr->e = 0xFE; + } + else + { + addr->d = pq_getmsgbyte(buf); + addr->e = pq_getmsgbyte(buf); + } + + addr->f = pq_getmsgbyte(buf); + addr->g = pq_getmsgbyte(buf); + addr->h = pq_getmsgbyte(buf); + + PG_RETURN_MACADDR8_P(addr); +} + +/* + * macaddr8_send - converts macaddr8(EUI-64) to binary format + */ +Datum +macaddr8_send(PG_FUNCTION_ARGS) +{ + macaddr8 *addr = PG_GETARG_MACADDR8_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendbyte(&buf, addr->a); + pq_sendbyte(&buf, addr->b); + pq_sendbyte(&buf, addr->c); + pq_sendbyte(&buf, addr->d); + pq_sendbyte(&buf, addr->e); + pq_sendbyte(&buf, addr->f); + pq_sendbyte(&buf, addr->g); + pq_sendbyte(&buf, addr->h); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * macaddr8_cmp_internal - comparison function for sorting: + */ +static int32 +macaddr8_cmp_internal(macaddr8 *a1, macaddr8 *a2) +{ + if (hibits(a1) < hibits(a2)) + return -1; + else if (hibits(a1) > hibits(a2)) + return 1; + else if (lobits(a1) < lobits(a2)) + return -1; + else if (lobits(a1) > lobits(a2)) + return 1; + else + return 0; +} + +Datum +macaddr8_cmp(PG_FUNCTION_ARGS) +{ + macaddr8 *a1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *a2 = PG_GETARG_MACADDR8_P(1); + + PG_RETURN_INT32(macaddr8_cmp_internal(a1, a2)); +} + +/* + * Boolean comparison functions. + */ + +Datum +macaddr8_lt(PG_FUNCTION_ARGS) +{ + macaddr8 *a1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *a2 = PG_GETARG_MACADDR8_P(1); + + PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) < 0); +} + +Datum +macaddr8_le(PG_FUNCTION_ARGS) +{ + macaddr8 *a1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *a2 = PG_GETARG_MACADDR8_P(1); + + PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) <= 0); +} + +Datum +macaddr8_eq(PG_FUNCTION_ARGS) +{ + macaddr8 *a1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *a2 = PG_GETARG_MACADDR8_P(1); + + PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) == 0); +} + +Datum +macaddr8_ge(PG_FUNCTION_ARGS) +{ + macaddr8 *a1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *a2 = PG_GETARG_MACADDR8_P(1); + + PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) >= 0); +} + +Datum +macaddr8_gt(PG_FUNCTION_ARGS) +{ + macaddr8 *a1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *a2 = PG_GETARG_MACADDR8_P(1); + + PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) > 0); +} + +Datum +macaddr8_ne(PG_FUNCTION_ARGS) +{ + macaddr8 *a1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *a2 = PG_GETARG_MACADDR8_P(1); + + PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) != 0); +} + +/* + * Support function for hash indexes on macaddr8. + */ +Datum +hashmacaddr8(PG_FUNCTION_ARGS) +{ + macaddr8 *key = PG_GETARG_MACADDR8_P(0); + + return hash_any((unsigned char *) key, sizeof(macaddr8)); +} + +Datum +hashmacaddr8extended(PG_FUNCTION_ARGS) +{ + macaddr8 *key = PG_GETARG_MACADDR8_P(0); + + return hash_any_extended((unsigned char *) key, sizeof(macaddr8), + PG_GETARG_INT64(1)); +} + +/* + * Arithmetic functions: bitwise NOT, AND, OR. + */ +Datum +macaddr8_not(PG_FUNCTION_ARGS) +{ + macaddr8 *addr = PG_GETARG_MACADDR8_P(0); + macaddr8 *result; + + result = (macaddr8 *) palloc0(sizeof(macaddr8)); + result->a = ~addr->a; + result->b = ~addr->b; + result->c = ~addr->c; + result->d = ~addr->d; + result->e = ~addr->e; + result->f = ~addr->f; + result->g = ~addr->g; + result->h = ~addr->h; + + PG_RETURN_MACADDR8_P(result); +} + +Datum +macaddr8_and(PG_FUNCTION_ARGS) +{ + macaddr8 *addr1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *addr2 = PG_GETARG_MACADDR8_P(1); + macaddr8 *result; + + result = (macaddr8 *) palloc0(sizeof(macaddr8)); + result->a = addr1->a & addr2->a; + result->b = addr1->b & addr2->b; + result->c = addr1->c & addr2->c; + result->d = addr1->d & addr2->d; + result->e = addr1->e & addr2->e; + result->f = addr1->f & addr2->f; + result->g = addr1->g & addr2->g; + result->h = addr1->h & addr2->h; + + PG_RETURN_MACADDR8_P(result); +} + +Datum +macaddr8_or(PG_FUNCTION_ARGS) +{ + macaddr8 *addr1 = PG_GETARG_MACADDR8_P(0); + macaddr8 *addr2 = PG_GETARG_MACADDR8_P(1); + macaddr8 *result; + + result = (macaddr8 *) palloc0(sizeof(macaddr8)); + result->a = addr1->a | addr2->a; + result->b = addr1->b | addr2->b; + result->c = addr1->c | addr2->c; + result->d = addr1->d | addr2->d; + result->e = addr1->e | addr2->e; + result->f = addr1->f | addr2->f; + result->g = addr1->g | addr2->g; + result->h = addr1->h | addr2->h; + + PG_RETURN_MACADDR8_P(result); +} + +/* + * Truncation function to allow comparing macaddr8 manufacturers. + */ +Datum +macaddr8_trunc(PG_FUNCTION_ARGS) +{ + macaddr8 *addr = PG_GETARG_MACADDR8_P(0); + macaddr8 *result; + + result = (macaddr8 *) palloc0(sizeof(macaddr8)); + + result->a = addr->a; + result->b = addr->b; + result->c = addr->c; + result->d = 0; + result->e = 0; + result->f = 0; + result->g = 0; + result->h = 0; + + PG_RETURN_MACADDR8_P(result); +} + +/* + * Set 7th bit for modified EUI-64 as used in IPv6. + */ +Datum +macaddr8_set7bit(PG_FUNCTION_ARGS) +{ + macaddr8 *addr = PG_GETARG_MACADDR8_P(0); + macaddr8 *result; + + result = (macaddr8 *) palloc0(sizeof(macaddr8)); + + result->a = addr->a | 0x02; + result->b = addr->b; + result->c = addr->c; + result->d = addr->d; + result->e = addr->e; + result->f = addr->f; + result->g = addr->g; + result->h = addr->h; + + PG_RETURN_MACADDR8_P(result); +} + +/*---------------------------------------------------------- + * Conversion operators. + *---------------------------------------------------------*/ + +Datum +macaddrtomacaddr8(PG_FUNCTION_ARGS) +{ + macaddr *addr6 = PG_GETARG_MACADDR_P(0); + macaddr8 *result; + + result = (macaddr8 *) palloc0(sizeof(macaddr8)); + + result->a = addr6->a; + result->b = addr6->b; + result->c = addr6->c; + result->d = 0xFF; + result->e = 0xFE; + result->f = addr6->d; + result->g = addr6->e; + result->h = addr6->f; + + + PG_RETURN_MACADDR8_P(result); +} + +Datum +macaddr8tomacaddr(PG_FUNCTION_ARGS) +{ + macaddr8 *addr = PG_GETARG_MACADDR8_P(0); + macaddr *result; + + result = (macaddr *) palloc0(sizeof(macaddr)); + + if ((addr->d != 0xFF) || (addr->e != 0xFE)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("macaddr8 data out of range to convert to macaddr"), + errhint("Only addresses that have FF and FE as values in the " + "4th and 5th bytes from the left, for example " + "xx:xx:xx:ff:fe:xx:xx:xx, are eligible to be converted " + "from macaddr8 to macaddr."))); + + result->a = addr->a; + result->b = addr->b; + result->c = addr->c; + result->d = addr->f; + result->e = addr->g; + result->f = addr->h; + + PG_RETURN_MACADDR_P(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mcxtfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mcxtfuncs.c new file mode 100644 index 00000000000..92ca5b2f728 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/mcxtfuncs.c @@ -0,0 +1,195 @@ +/*------------------------------------------------------------------------- + * + * mcxtfuncs.c + * Functions to show backend memory context. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/mcxtfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "miscadmin.h" +#include "mb/pg_wchar.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "utils/builtins.h" + +/* ---------- + * The max bytes for showing identifiers of MemoryContext. + * ---------- + */ +#define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024 + +/* + * PutMemoryContextsStatsTupleStore + * One recursion level for pg_get_backend_memory_contexts. + */ +static void +PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, + TupleDesc tupdesc, MemoryContext context, + const char *parent, int level) +{ +#define PG_GET_BACKEND_MEMORY_CONTEXTS_COLS 9 + + Datum values[PG_GET_BACKEND_MEMORY_CONTEXTS_COLS]; + bool nulls[PG_GET_BACKEND_MEMORY_CONTEXTS_COLS]; + MemoryContextCounters stat; + MemoryContext child; + const char *name; + const char *ident; + + Assert(MemoryContextIsValid(context)); + + name = context->name; + ident = context->ident; + + /* + * To be consistent with logging output, we label dynahash contexts with + * just the hash table name as with MemoryContextStatsPrint(). + */ + if (ident && strcmp(name, "dynahash") == 0) + { + name = ident; + ident = NULL; + } + + /* Examine the context itself */ + memset(&stat, 0, sizeof(stat)); + (*context->methods->stats) (context, NULL, (void *) &level, &stat, true); + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + if (name) + values[0] = CStringGetTextDatum(name); + else + nulls[0] = true; + + if (ident) + { + int idlen = strlen(ident); + char clipped_ident[MEMORY_CONTEXT_IDENT_DISPLAY_SIZE]; + + /* + * Some identifiers such as SQL query string can be very long, + * truncate oversize identifiers. + */ + if (idlen >= MEMORY_CONTEXT_IDENT_DISPLAY_SIZE) + idlen = pg_mbcliplen(ident, idlen, MEMORY_CONTEXT_IDENT_DISPLAY_SIZE - 1); + + memcpy(clipped_ident, ident, idlen); + clipped_ident[idlen] = '\0'; + values[1] = CStringGetTextDatum(clipped_ident); + } + else + nulls[1] = true; + + if (parent) + values[2] = CStringGetTextDatum(parent); + else + nulls[2] = true; + + values[3] = Int32GetDatum(level); + values[4] = Int64GetDatum(stat.totalspace); + values[5] = Int64GetDatum(stat.nblocks); + values[6] = Int64GetDatum(stat.freespace); + values[7] = Int64GetDatum(stat.freechunks); + values[8] = Int64GetDatum(stat.totalspace - stat.freespace); + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + + for (child = context->firstchild; child != NULL; child = child->nextchild) + { + PutMemoryContextsStatsTupleStore(tupstore, tupdesc, + child, name, level + 1); + } +} + +/* + * pg_get_backend_memory_contexts + * SQL SRF showing backend memory context. + */ +Datum +pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, 0); + PutMemoryContextsStatsTupleStore(rsinfo->setResult, rsinfo->setDesc, + TopMemoryContext, NULL, 0); + + return (Datum) 0; +} + +/* + * pg_log_backend_memory_contexts + * Signal a backend or an auxiliary process to log its memory contexts. + * + * By default, only superusers are allowed to signal to log the memory + * contexts because allowing any users to issue this request at an unbounded + * rate would cause lots of log messages and which can lead to denial of + * service. Additional roles can be permitted with GRANT. + * + * On receipt of this signal, a backend or an auxiliary process sets the flag + * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS() + * or process-specific interrupt handler to log the memory contexts. + */ +Datum +pg_log_backend_memory_contexts(PG_FUNCTION_ARGS) +{ + int pid = PG_GETARG_INT32(0); + PGPROC *proc; + BackendId backendId = InvalidBackendId; + + proc = BackendPidGetProc(pid); + + /* + * See if the process with given pid is a backend or an auxiliary process. + * + * If the given process is a backend, use its backend id in + * SendProcSignal() later to speed up the operation. Otherwise, don't do + * that because auxiliary processes (except the startup process) don't + * have a valid backend id. + */ + if (proc != NULL) + backendId = proc->backendId; + else + proc = AuxiliaryPidGetProc(pid); + + /* + * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid + * isn't valid; but by the time we reach kill(), a process for which we + * get a valid proc here might have terminated on its own. There's no way + * to acquire a lock on an arbitrary process to prevent that. But since + * this mechanism is usually used to debug a backend or an auxiliary + * process running and consuming lots of memory, that it might end on its + * own first and its memory contexts are not logged is not a problem. + */ + if (proc == NULL) + { + /* + * This is just a warning so a loop-through-resultset will not abort + * if one backend terminated on its own during the run. + */ + ereport(WARNING, + (errmsg("PID %d is not a PostgreSQL server process", pid))); + PG_RETURN_BOOL(false); + } + + if (SendProcSignal(pid, PROCSIG_LOG_MEMORY_CONTEXT, backendId) < 0) + { + /* Again, just a warning to allow loops */ + ereport(WARNING, + (errmsg("could not send signal to process %d: %m", pid))); + PG_RETURN_BOOL(false); + } + + PG_RETURN_BOOL(true); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/misc.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/misc.c new file mode 100644 index 00000000000..f94abc14b02 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/misc.c @@ -0,0 +1,1080 @@ +/*------------------------------------------------------------------------- + * + * misc.c + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/misc.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <sys/file.h> +#include <sys/stat.h> +#include <dirent.h> +#include <fcntl.h> +#include <math.h> +#include <unistd.h> + +#include "access/sysattr.h" +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/pg_tablespace.h" +#include "catalog/pg_type.h" +#include "catalog/system_fk_info.h" +#include "commands/dbcommands.h" +#include "commands/tablespace.h" +#include "common/keywords.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/miscnodes.h" +#include "parser/parse_type.h" +#include "parser/scansup.h" +#include "pgstat.h" +#include "postmaster/syslogger.h" +#include "rewrite/rewriteHandler.h" +#include "storage/fd.h" +#include "storage/latch.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/ruleutils.h" +#include "utils/timestamp.h" + + +/* + * structure to cache metadata needed in pg_input_is_valid_common + */ +typedef struct ValidIOData +{ + Oid typoid; + int32 typmod; + bool typname_constant; + Oid typiofunc; + Oid typioparam; + FmgrInfo inputproc; +} ValidIOData; + +static bool pg_input_is_valid_common(FunctionCallInfo fcinfo, + text *txt, text *typname, + ErrorSaveContext *escontext); + + +/* + * Common subroutine for num_nulls() and num_nonnulls(). + * Returns true if successful, false if function should return NULL. + * If successful, total argument count and number of nulls are + * returned into *nargs and *nulls. + */ +static bool +count_nulls(FunctionCallInfo fcinfo, + int32 *nargs, int32 *nulls) +{ + int32 count = 0; + int i; + + /* Did we get a VARIADIC array argument, or separate arguments? */ + if (get_fn_expr_variadic(fcinfo->flinfo)) + { + ArrayType *arr; + int ndims, + nitems, + *dims; + bits8 *bitmap; + + Assert(PG_NARGS() == 1); + + /* + * If we get a null as VARIADIC array argument, we can't say anything + * useful about the number of elements, so return NULL. This behavior + * is consistent with other variadic functions - see concat_internal. + */ + if (PG_ARGISNULL(0)) + return false; + + /* + * Non-null argument had better be an array. We assume that any call + * context that could let get_fn_expr_variadic return true will have + * checked that a VARIADIC-labeled parameter actually is an array. So + * it should be okay to just Assert that it's an array rather than + * doing a full-fledged error check. + */ + Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 0)))); + + /* OK, safe to fetch the array value */ + arr = PG_GETARG_ARRAYTYPE_P(0); + + /* Count the array elements */ + ndims = ARR_NDIM(arr); + dims = ARR_DIMS(arr); + nitems = ArrayGetNItems(ndims, dims); + + /* Count those that are NULL */ + bitmap = ARR_NULLBITMAP(arr); + if (bitmap) + { + int bitmask = 1; + + for (i = 0; i < nitems; i++) + { + if ((*bitmap & bitmask) == 0) + count++; + + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + } + + *nargs = nitems; + *nulls = count; + } + else + { + /* Separate arguments, so just count 'em */ + for (i = 0; i < PG_NARGS(); i++) + { + if (PG_ARGISNULL(i)) + count++; + } + + *nargs = PG_NARGS(); + *nulls = count; + } + + return true; +} + +/* + * num_nulls() + * Count the number of NULL arguments + */ +Datum +pg_num_nulls(PG_FUNCTION_ARGS) +{ + int32 nargs, + nulls; + + if (!count_nulls(fcinfo, &nargs, &nulls)) + PG_RETURN_NULL(); + + PG_RETURN_INT32(nulls); +} + +/* + * num_nonnulls() + * Count the number of non-NULL arguments + */ +Datum +pg_num_nonnulls(PG_FUNCTION_ARGS) +{ + int32 nargs, + nulls; + + if (!count_nulls(fcinfo, &nargs, &nulls)) + PG_RETURN_NULL(); + + PG_RETURN_INT32(nargs - nulls); +} + + +/* + * current_database() + * Expose the current database to the user + */ +Datum +current_database(PG_FUNCTION_ARGS) +{ + Name db; + + db = (Name) palloc(NAMEDATALEN); + + namestrcpy(db, get_database_name(MyDatabaseId)); + PG_RETURN_NAME(db); +} + + +/* + * current_query() + * Expose the current query to the user (useful in stored procedures) + * We might want to use ActivePortal->sourceText someday. + */ +Datum +current_query(PG_FUNCTION_ARGS) +{ + /* there is no easy way to access the more concise 'query_string' */ + if (debug_query_string) + PG_RETURN_TEXT_P(cstring_to_text(debug_query_string)); + else + PG_RETURN_NULL(); +} + +/* Function to find out which databases make use of a tablespace */ + +Datum +pg_tablespace_databases(PG_FUNCTION_ARGS) +{ + Oid tablespaceOid = PG_GETARG_OID(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + char *location; + DIR *dirdesc; + struct dirent *de; + + InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC); + + if (tablespaceOid == GLOBALTABLESPACE_OID) + { + ereport(WARNING, + (errmsg("global tablespace never has databases"))); + /* return empty tuplestore */ + return (Datum) 0; + } + + if (tablespaceOid == DEFAULTTABLESPACE_OID) + location = "base"; + else + location = psprintf("pg_tblspc/%u/%s", tablespaceOid, + TABLESPACE_VERSION_DIRECTORY); + + dirdesc = AllocateDir(location); + + if (!dirdesc) + { + /* the only expected error is ENOENT */ + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", + location))); + ereport(WARNING, + (errmsg("%u is not a tablespace OID", tablespaceOid))); + /* return empty tuplestore */ + return (Datum) 0; + } + + while ((de = ReadDir(dirdesc, location)) != NULL) + { + Oid datOid = atooid(de->d_name); + char *subdir; + bool isempty; + Datum values[1]; + bool nulls[1]; + + /* this test skips . and .., but is awfully weak */ + if (!datOid) + continue; + + /* if database subdir is empty, don't report tablespace as used */ + + subdir = psprintf("%s/%s", location, de->d_name); + isempty = directory_is_empty(subdir); + pfree(subdir); + + if (isempty) + continue; /* indeed, nothing in it */ + + values[0] = ObjectIdGetDatum(datOid); + nulls[0] = false; + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + FreeDir(dirdesc); + return (Datum) 0; +} + + +/* + * pg_tablespace_location - get location for a tablespace + */ +Datum +pg_tablespace_location(PG_FUNCTION_ARGS) +{ + Oid tablespaceOid = PG_GETARG_OID(0); + char sourcepath[MAXPGPATH]; + char targetpath[MAXPGPATH]; + int rllen; + struct stat st; + + /* + * It's useful to apply this function to pg_class.reltablespace, wherein + * zero means "the database's default tablespace". So, rather than + * throwing an error for zero, we choose to assume that's what is meant. + */ + if (tablespaceOid == InvalidOid) + tablespaceOid = MyDatabaseTableSpace; + + /* + * Return empty string for the cluster's default tablespaces + */ + if (tablespaceOid == DEFAULTTABLESPACE_OID || + tablespaceOid == GLOBALTABLESPACE_OID) + PG_RETURN_TEXT_P(cstring_to_text("")); + + /* + * Find the location of the tablespace by reading the symbolic link that + * is in pg_tblspc/<oid>. + */ + snprintf(sourcepath, sizeof(sourcepath), "pg_tblspc/%u", tablespaceOid); + + /* + * Before reading the link, check if the source path is a link or a + * junction point. Note that a directory is possible for a tablespace + * created with allow_in_place_tablespaces enabled. If a directory is + * found, a relative path to the data directory is returned. + */ + if (lstat(sourcepath, &st) < 0) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + sourcepath))); + } + + if (!S_ISLNK(st.st_mode)) + PG_RETURN_TEXT_P(cstring_to_text(sourcepath)); + + /* + * In presence of a link or a junction point, return the path pointing to. + */ + rllen = readlink(sourcepath, targetpath, sizeof(targetpath)); + if (rllen < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read symbolic link \"%s\": %m", + sourcepath))); + if (rllen >= sizeof(targetpath)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("symbolic link \"%s\" target is too long", + sourcepath))); + targetpath[rllen] = '\0'; + + PG_RETURN_TEXT_P(cstring_to_text(targetpath)); +} + +/* + * pg_sleep - delay for N seconds + */ +Datum +pg_sleep(PG_FUNCTION_ARGS) +{ + float8 secs = PG_GETARG_FLOAT8(0); + float8 endtime; + + /* + * We sleep using WaitLatch, to ensure that we'll wake up promptly if an + * important signal (such as SIGALRM or SIGINT) arrives. Because + * WaitLatch's upper limit of delay is INT_MAX milliseconds, and the user + * might ask for more than that, we sleep for at most 10 minutes and then + * loop. + * + * By computing the intended stop time initially, we avoid accumulation of + * extra delay across multiple sleeps. This also ensures we won't delay + * less than the specified time when WaitLatch is terminated early by a + * non-query-canceling signal such as SIGHUP. + */ +#define GetNowFloat() ((float8) GetCurrentTimestamp() / 1000000.0) + + endtime = GetNowFloat() + secs; + + for (;;) + { + float8 delay; + long delay_ms; + + CHECK_FOR_INTERRUPTS(); + + delay = endtime - GetNowFloat(); + if (delay >= 600.0) + delay_ms = 600000; + else if (delay > 0.0) + delay_ms = (long) ceil(delay * 1000.0); + else + break; + + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + delay_ms, + WAIT_EVENT_PG_SLEEP); + ResetLatch(MyLatch); + } + + PG_RETURN_VOID(); +} + +/* Function to return the list of grammar keywords */ +Datum +pg_get_keywords(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + funcctx->tuple_desc = tupdesc; + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < ScanKeywords.num_keywords) + { + char *values[5]; + HeapTuple tuple; + + /* cast-away-const is ugly but alternatives aren't much better */ + values[0] = unconstify(char *, + GetScanKeyword(funcctx->call_cntr, + &ScanKeywords)); + + switch (ScanKeywordCategories[funcctx->call_cntr]) + { + case UNRESERVED_KEYWORD: + values[1] = "U"; + values[3] = _("unreserved"); + break; + case COL_NAME_KEYWORD: + values[1] = "C"; + values[3] = _("unreserved (cannot be function or type name)"); + break; + case TYPE_FUNC_NAME_KEYWORD: + values[1] = "T"; + values[3] = _("reserved (can be function or type name)"); + break; + case RESERVED_KEYWORD: + values[1] = "R"; + values[3] = _("reserved"); + break; + default: /* shouldn't be possible */ + values[1] = NULL; + values[3] = NULL; + break; + } + + if (ScanKeywordBareLabel[funcctx->call_cntr]) + { + values[2] = "true"; + values[4] = _("can be bare label"); + } + else + { + values[2] = "false"; + values[4] = _("requires AS"); + } + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + + SRF_RETURN_DONE(funcctx); +} + + +/* Function to return the list of catalog foreign key relationships */ +Datum +pg_get_catalog_foreign_keys(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + FmgrInfo *arrayinp; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* + * We use array_in to convert the C strings in sys_fk_relationships[] + * to text arrays. But we cannot use DirectFunctionCallN to call + * array_in, and it wouldn't be very efficient if we could. Fill an + * FmgrInfo to use for the call. + */ + arrayinp = (FmgrInfo *) palloc(sizeof(FmgrInfo)); + fmgr_info(F_ARRAY_IN, arrayinp); + funcctx->user_fctx = arrayinp; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + arrayinp = (FmgrInfo *) funcctx->user_fctx; + + if (funcctx->call_cntr < lengthof(sys_fk_relationships)) + { + const SysFKRelationship *fkrel = &sys_fk_relationships[funcctx->call_cntr]; + Datum values[6]; + bool nulls[6]; + HeapTuple tuple; + + memset(nulls, false, sizeof(nulls)); + + values[0] = ObjectIdGetDatum(fkrel->fk_table); + values[1] = FunctionCall3(arrayinp, + CStringGetDatum(fkrel->fk_columns), + ObjectIdGetDatum(TEXTOID), + Int32GetDatum(-1)); + values[2] = ObjectIdGetDatum(fkrel->pk_table); + values[3] = FunctionCall3(arrayinp, + CStringGetDatum(fkrel->pk_columns), + ObjectIdGetDatum(TEXTOID), + Int32GetDatum(-1)); + values[4] = BoolGetDatum(fkrel->is_array); + values[5] = BoolGetDatum(fkrel->is_opt); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + + SRF_RETURN_DONE(funcctx); +} + + +/* + * Return the type of the argument. + */ +Datum +pg_typeof(PG_FUNCTION_ARGS) +{ + PG_RETURN_OID(get_fn_expr_argtype(fcinfo->flinfo, 0)); +} + + +/* + * Implementation of the COLLATE FOR expression; returns the collation + * of the argument. + */ +Datum +pg_collation_for(PG_FUNCTION_ARGS) +{ + Oid typeid; + Oid collid; + + typeid = get_fn_expr_argtype(fcinfo->flinfo, 0); + if (!typeid) + PG_RETURN_NULL(); + if (!type_is_collatable(typeid) && typeid != UNKNOWNOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("collations are not supported by type %s", + format_type_be(typeid)))); + + collid = PG_GET_COLLATION(); + if (!collid) + PG_RETURN_NULL(); + PG_RETURN_TEXT_P(cstring_to_text(generate_collation_name(collid))); +} + + +/* + * pg_relation_is_updatable - determine which update events the specified + * relation supports. + * + * This relies on relation_is_updatable() in rewriteHandler.c, which see + * for additional information. + */ +Datum +pg_relation_is_updatable(PG_FUNCTION_ARGS) +{ + Oid reloid = PG_GETARG_OID(0); + bool include_triggers = PG_GETARG_BOOL(1); + + PG_RETURN_INT32(relation_is_updatable(reloid, NIL, include_triggers, NULL)); +} + +/* + * pg_column_is_updatable - determine whether a column is updatable + * + * This function encapsulates the decision about just what + * information_schema.columns.is_updatable actually means. It's not clear + * whether deletability of the column's relation should be required, so + * we want that decision in C code where we could change it without initdb. + */ +Datum +pg_column_is_updatable(PG_FUNCTION_ARGS) +{ + Oid reloid = PG_GETARG_OID(0); + AttrNumber attnum = PG_GETARG_INT16(1); + AttrNumber col = attnum - FirstLowInvalidHeapAttributeNumber; + bool include_triggers = PG_GETARG_BOOL(2); + int events; + + /* System columns are never updatable */ + if (attnum <= 0) + PG_RETURN_BOOL(false); + + events = relation_is_updatable(reloid, NIL, include_triggers, + bms_make_singleton(col)); + + /* We require both updatability and deletability of the relation */ +#define REQ_EVENTS ((1 << CMD_UPDATE) | (1 << CMD_DELETE)) + + PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS); +} + + +/* + * pg_input_is_valid - test whether string is valid input for datatype. + * + * Returns true if OK, false if not. + * + * This will only work usefully if the datatype's input function has been + * updated to return "soft" errors via errsave/ereturn. + */ +Datum +pg_input_is_valid(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + text *typname = PG_GETARG_TEXT_PP(1); + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + PG_RETURN_BOOL(pg_input_is_valid_common(fcinfo, txt, typname, + &escontext)); +} + +/* + * pg_input_error_info - test whether string is valid input for datatype. + * + * Returns NULL if OK, else the primary message, detail message, hint message + * and sql error code from the error. + * + * This will only work usefully if the datatype's input function has been + * updated to return "soft" errors via errsave/ereturn. + */ +Datum +pg_input_error_info(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + text *typname = PG_GETARG_TEXT_PP(1); + ErrorSaveContext escontext = {T_ErrorSaveContext}; + TupleDesc tupdesc; + Datum values[4]; + bool isnull[4]; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Enable details_wanted */ + escontext.details_wanted = true; + + if (pg_input_is_valid_common(fcinfo, txt, typname, + &escontext)) + memset(isnull, true, sizeof(isnull)); + else + { + char *sqlstate; + + Assert(escontext.error_occurred); + Assert(escontext.error_data != NULL); + Assert(escontext.error_data->message != NULL); + + memset(isnull, false, sizeof(isnull)); + + values[0] = CStringGetTextDatum(escontext.error_data->message); + + if (escontext.error_data->detail != NULL) + values[1] = CStringGetTextDatum(escontext.error_data->detail); + else + isnull[1] = true; + + if (escontext.error_data->hint != NULL) + values[2] = CStringGetTextDatum(escontext.error_data->hint); + else + isnull[2] = true; + + sqlstate = unpack_sql_state(escontext.error_data->sqlerrcode); + values[3] = CStringGetTextDatum(sqlstate); + } + + return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, isnull)); +} + +/* Common subroutine for the above */ +static bool +pg_input_is_valid_common(FunctionCallInfo fcinfo, + text *txt, text *typname, + ErrorSaveContext *escontext) +{ + char *str = text_to_cstring(txt); + ValidIOData *my_extra; + Datum converted; + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the data type doesn't change underneath us. + */ + my_extra = (ValidIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ValidIOData)); + my_extra = (ValidIOData *) fcinfo->flinfo->fn_extra; + my_extra->typoid = InvalidOid; + /* Detect whether typname argument is constant. */ + my_extra->typname_constant = get_fn_expr_arg_stable(fcinfo->flinfo, 1); + } + + /* + * If the typname argument is constant, we only need to parse it the first + * time through. + */ + if (my_extra->typoid == InvalidOid || !my_extra->typname_constant) + { + char *typnamestr = text_to_cstring(typname); + Oid typoid; + + /* Parse type-name argument to obtain type OID and encoded typmod. */ + (void) parseTypeString(typnamestr, &typoid, &my_extra->typmod, NULL); + + /* Update type-specific info if typoid changed. */ + if (my_extra->typoid != typoid) + { + getTypeInputInfo(typoid, + &my_extra->typiofunc, + &my_extra->typioparam); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->inputproc, + fcinfo->flinfo->fn_mcxt); + my_extra->typoid = typoid; + } + } + + /* Now we can try to perform the conversion. */ + return InputFunctionCallSafe(&my_extra->inputproc, + str, + my_extra->typioparam, + my_extra->typmod, + (Node *) escontext, + &converted); +} + + +/* + * Is character a valid identifier start? + * Must match scan.l's {ident_start} character class. + */ +static bool +is_ident_start(unsigned char c) +{ + /* Underscores and ASCII letters are OK */ + if (c == '_') + return true; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + return true; + /* Any high-bit-set character is OK (might be part of a multibyte char) */ + if (IS_HIGHBIT_SET(c)) + return true; + return false; +} + +/* + * Is character a valid identifier continuation? + * Must match scan.l's {ident_cont} character class. + */ +static bool +is_ident_cont(unsigned char c) +{ + /* Can be digit or dollar sign ... */ + if ((c >= '0' && c <= '9') || c == '$') + return true; + /* ... or an identifier start character */ + return is_ident_start(c); +} + +/* + * parse_ident - parse a SQL qualified identifier into separate identifiers. + * When strict mode is active (second parameter), then any chars after + * the last identifier are disallowed. + */ +Datum +parse_ident(PG_FUNCTION_ARGS) +{ + text *qualname = PG_GETARG_TEXT_PP(0); + bool strict = PG_GETARG_BOOL(1); + char *qualname_str = text_to_cstring(qualname); + ArrayBuildState *astate = NULL; + char *nextp; + bool after_dot = false; + + /* + * The code below scribbles on qualname_str in some cases, so we should + * reconvert qualname if we need to show the original string in error + * messages. + */ + nextp = qualname_str; + + /* skip leading whitespace */ + while (scanner_isspace(*nextp)) + nextp++; + + for (;;) + { + char *curname; + bool missing_ident = true; + + if (*nextp == '"') + { + char *endp; + + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '"'); + if (endp == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("String has unclosed double quotes."))); + if (endp[1] != '"') + break; + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + nextp = endp + 1; + *endp = '\0'; + + if (endp - curname == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("Quoted identifier must not be empty."))); + + astate = accumArrayResult(astate, CStringGetTextDatum(curname), + false, TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + else if (is_ident_start((unsigned char) *nextp)) + { + char *downname; + int len; + text *part; + + curname = nextp++; + while (is_ident_cont((unsigned char) *nextp)) + nextp++; + + len = nextp - curname; + + /* + * We don't implicitly truncate identifiers. This is useful for + * allowing the user to check for specific parts of the identifier + * being too long. It's easy enough for the user to get the + * truncated names by casting our output to name[]. + */ + downname = copy_identifier(curname, len); + part = cstring_to_text_with_len(downname, len); + astate = accumArrayResult(astate, PointerGetDatum(part), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + + if (missing_ident) + { + /* Different error messages based on where we failed. */ + if (*nextp == '.') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("No valid identifier before \".\"."))); + else if (after_dot) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("No valid identifier after \".\"."))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)))); + } + + while (scanner_isspace(*nextp)) + nextp++; + + if (*nextp == '.') + { + after_dot = true; + nextp++; + while (scanner_isspace(*nextp)) + nextp++; + } + else if (*nextp == '\0') + { + break; + } + else + { + if (strict) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)))); + break; + } + } + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} + +/* + * pg_current_logfile + * + * Report current log file used by log collector by scanning current_logfiles. + */ +Datum +pg_current_logfile(PG_FUNCTION_ARGS) +{ + FILE *fd; + char lbuffer[MAXPGPATH]; + char *logfmt; + + /* The log format parameter is optional */ + if (PG_NARGS() == 0 || PG_ARGISNULL(0)) + logfmt = NULL; + else + { + logfmt = text_to_cstring(PG_GETARG_TEXT_PP(0)); + + if (strcmp(logfmt, "stderr") != 0 && + strcmp(logfmt, "csvlog") != 0 && + strcmp(logfmt, "jsonlog") != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("log format \"%s\" is not supported", logfmt), + errhint("The supported log formats are \"stderr\", \"csvlog\", and \"jsonlog\"."))); + } + + fd = AllocateFile(LOG_METAINFO_DATAFILE, "r"); + if (fd == NULL) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + LOG_METAINFO_DATAFILE))); + PG_RETURN_NULL(); + } + +#ifdef WIN32 + /* syslogger.c writes CRLF line endings on Windows */ + _setmode(_fileno(fd), _O_TEXT); +#endif + + /* + * Read the file to gather current log filename(s) registered by the + * syslogger. + */ + while (fgets(lbuffer, sizeof(lbuffer), fd) != NULL) + { + char *log_format; + char *log_filepath; + char *nlpos; + + /* Extract log format and log file path from the line. */ + log_format = lbuffer; + log_filepath = strchr(lbuffer, ' '); + if (log_filepath == NULL) + { + /* Uh oh. No space found, so file content is corrupted. */ + elog(ERROR, + "missing space character in \"%s\"", LOG_METAINFO_DATAFILE); + break; + } + + *log_filepath = '\0'; + log_filepath++; + nlpos = strchr(log_filepath, '\n'); + if (nlpos == NULL) + { + /* Uh oh. No newline found, so file content is corrupted. */ + elog(ERROR, + "missing newline character in \"%s\"", LOG_METAINFO_DATAFILE); + break; + } + *nlpos = '\0'; + + if (logfmt == NULL || strcmp(logfmt, log_format) == 0) + { + FreeFile(fd); + PG_RETURN_TEXT_P(cstring_to_text(log_filepath)); + } + } + + /* Close the current log filename file. */ + FreeFile(fd); + + PG_RETURN_NULL(); +} + +/* + * Report current log file used by log collector (1 argument version) + * + * note: this wrapper is necessary to pass the sanity check in opr_sanity, + * which checks that all built-in functions that share the implementing C + * function take the same number of arguments + */ +Datum +pg_current_logfile_1arg(PG_FUNCTION_ARGS) +{ + return pg_current_logfile(fcinfo); +} + +/* + * SQL wrapper around RelationGetReplicaIndex(). + */ +Datum +pg_get_replica_identity_index(PG_FUNCTION_ARGS) +{ + Oid reloid = PG_GETARG_OID(0); + Oid idxoid; + Relation rel; + + rel = table_open(reloid, AccessShareLock); + idxoid = RelationGetReplicaIndex(rel); + table_close(rel, AccessShareLock); + + if (OidIsValid(idxoid)) + PG_RETURN_OID(idxoid); + else + PG_RETURN_NULL(); +} + +/* + * Transition function for the ANY_VALUE aggregate + */ +Datum +any_value_transfn(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes.c new file mode 100644 index 00000000000..9443c2b884a --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes.c @@ -0,0 +1,2927 @@ +/*------------------------------------------------------------------------- + * + * multirangetypes.c + * I/O functions, operators, and support functions for multirange types. + * + * The stored (serialized) format of a multirange value is: + * + * 12 bytes: MultirangeType struct including varlena header, multirange + * type's OID and the number of ranges in the multirange. + * 4 * (rangesCount - 1) bytes: 32-bit items pointing to the each range + * in the multirange starting from + * the second one. + * 1 * rangesCount bytes : 8-bit flags for each range in the multirange + * The rest of the multirange are range bound values pointed by multirange + * items. + * + * Majority of items contain lengths of corresponding range bound values. + * Thanks to that items are typically low numbers. This makes multiranges + * compression-friendly. Every MULTIRANGE_ITEM_OFFSET_STRIDE item contains + * an offset of the corresponding range bound values. That allows fast lookups + * for a particular range index. Offsets are counted starting from the end of + * flags aligned to the bound type. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/multirangetypes.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tupmacs.h" +#include "common/hashfn.h" +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "port/pg_bitutils.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rangetypes.h" +#include "utils/multirangetypes.h" +#include "utils/array.h" +#include "utils/memutils.h" + +/* fn_extra cache entry for one of the range I/O functions */ +typedef struct MultirangeIOData +{ + TypeCacheEntry *typcache; /* multirange type's typcache entry */ + FmgrInfo typioproc; /* range type's I/O proc */ + Oid typioparam; /* range type's I/O parameter */ +} MultirangeIOData; + +typedef enum +{ + MULTIRANGE_BEFORE_RANGE, + MULTIRANGE_IN_RANGE, + MULTIRANGE_IN_RANGE_ESCAPED, + MULTIRANGE_IN_RANGE_QUOTED, + MULTIRANGE_IN_RANGE_QUOTED_ESCAPED, + MULTIRANGE_AFTER_RANGE, + MULTIRANGE_FINISHED, +} MultirangeParseState; + +/* + * Macros for accessing past MultirangeType parts of multirange: items, flags + * and boundaries. + */ +#define MultirangeGetItemsPtr(mr) ((uint32 *) ((Pointer) (mr) + \ + sizeof(MultirangeType))) +#define MultirangeGetFlagsPtr(mr) ((uint8 *) ((Pointer) (mr) + \ + sizeof(MultirangeType) + ((mr)->rangeCount - 1) * sizeof(uint32))) +#define MultirangeGetBoundariesPtr(mr, align) ((Pointer) (mr) + \ + att_align_nominal(sizeof(MultirangeType) + \ + ((mr)->rangeCount - 1) * sizeof(uint32) + \ + (mr)->rangeCount * sizeof(uint8), (align))) + +#define MULTIRANGE_ITEM_OFF_BIT 0x80000000 +#define MULTIRANGE_ITEM_GET_OFFLEN(item) ((item) & 0x7FFFFFFF) +#define MULTIRANGE_ITEM_HAS_OFF(item) ((item) & MULTIRANGE_ITEM_OFF_BIT) +#define MULTIRANGE_ITEM_OFFSET_STRIDE 4 + +typedef int (*multirange_bsearch_comparison) (TypeCacheEntry *typcache, + RangeBound *lower, + RangeBound *upper, + void *key, + bool *match); + +static MultirangeIOData *get_multirange_io_data(FunctionCallInfo fcinfo, + Oid mltrngtypid, + IOFuncSelector func); +static int32 multirange_canonicalize(TypeCacheEntry *rangetyp, + int32 input_range_count, + RangeType **ranges); + +/* + *---------------------------------------------------------- + * I/O FUNCTIONS + *---------------------------------------------------------- + */ + +/* + * Converts string to multirange. + * + * We expect curly brackets to bound the list, with zero or more ranges + * separated by commas. We accept whitespace anywhere: before/after our + * brackets and around the commas. Ranges can be the empty literal or some + * stuff inside parens/brackets. Mostly we delegate parsing the individual + * range contents to range_in, but we have to detect quoting and + * backslash-escaping which can happen for range bounds. Backslashes can + * escape something inside or outside a quoted string, and a quoted string + * can escape quote marks with either backslashes or double double-quotes. + */ +Datum +multirange_in(PG_FUNCTION_ARGS) +{ + char *input_str = PG_GETARG_CSTRING(0); + Oid mltrngtypoid = PG_GETARG_OID(1); + Oid typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + TypeCacheEntry *rangetyp; + int32 ranges_seen = 0; + int32 range_count = 0; + int32 range_capacity = 8; + RangeType *range; + RangeType **ranges = palloc(range_capacity * sizeof(RangeType *)); + MultirangeIOData *cache; + MultirangeType *ret; + MultirangeParseState parse_state; + const char *ptr = input_str; + const char *range_str_begin = NULL; + int32 range_str_len; + char *range_str; + Datum range_datum; + + cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_input); + rangetyp = cache->typcache->rngtype; + + /* consume whitespace */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + if (*ptr == '{') + ptr++; + else + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed multirange literal: \"%s\"", + input_str), + errdetail("Missing left brace."))); + + /* consume ranges */ + parse_state = MULTIRANGE_BEFORE_RANGE; + for (; parse_state != MULTIRANGE_FINISHED; ptr++) + { + char ch = *ptr; + + if (ch == '\0') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed multirange literal: \"%s\"", + input_str), + errdetail("Unexpected end of input."))); + + /* skip whitespace */ + if (isspace((unsigned char) ch)) + continue; + + switch (parse_state) + { + case MULTIRANGE_BEFORE_RANGE: + if (ch == '[' || ch == '(') + { + range_str_begin = ptr; + parse_state = MULTIRANGE_IN_RANGE; + } + else if (ch == '}' && ranges_seen == 0) + parse_state = MULTIRANGE_FINISHED; + else if (pg_strncasecmp(ptr, RANGE_EMPTY_LITERAL, + strlen(RANGE_EMPTY_LITERAL)) == 0) + { + ranges_seen++; + /* nothing to do with an empty range */ + ptr += strlen(RANGE_EMPTY_LITERAL) - 1; + parse_state = MULTIRANGE_AFTER_RANGE; + } + else + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed multirange literal: \"%s\"", + input_str), + errdetail("Expected range start."))); + break; + case MULTIRANGE_IN_RANGE: + if (ch == ']' || ch == ')') + { + range_str_len = ptr - range_str_begin + 1; + range_str = pnstrdup(range_str_begin, range_str_len); + if (range_capacity == range_count) + { + range_capacity *= 2; + ranges = (RangeType **) + repalloc(ranges, range_capacity * sizeof(RangeType *)); + } + ranges_seen++; + if (!InputFunctionCallSafe(&cache->typioproc, + range_str, + cache->typioparam, + typmod, + escontext, + &range_datum)) + PG_RETURN_NULL(); + range = DatumGetRangeTypeP(range_datum); + if (!RangeIsEmpty(range)) + ranges[range_count++] = range; + parse_state = MULTIRANGE_AFTER_RANGE; + } + else + { + if (ch == '"') + parse_state = MULTIRANGE_IN_RANGE_QUOTED; + else if (ch == '\\') + parse_state = MULTIRANGE_IN_RANGE_ESCAPED; + + /* + * We will include this character into range_str once we + * find the end of the range value. + */ + } + break; + case MULTIRANGE_IN_RANGE_ESCAPED: + + /* + * We will include this character into range_str once we find + * the end of the range value. + */ + parse_state = MULTIRANGE_IN_RANGE; + break; + case MULTIRANGE_IN_RANGE_QUOTED: + if (ch == '"') + if (*(ptr + 1) == '"') + { + /* two quote marks means an escaped quote mark */ + ptr++; + } + else + parse_state = MULTIRANGE_IN_RANGE; + else if (ch == '\\') + parse_state = MULTIRANGE_IN_RANGE_QUOTED_ESCAPED; + + /* + * We will include this character into range_str once we find + * the end of the range value. + */ + break; + case MULTIRANGE_AFTER_RANGE: + if (ch == ',') + parse_state = MULTIRANGE_BEFORE_RANGE; + else if (ch == '}') + parse_state = MULTIRANGE_FINISHED; + else + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed multirange literal: \"%s\"", + input_str), + errdetail("Expected comma or end of multirange."))); + break; + case MULTIRANGE_IN_RANGE_QUOTED_ESCAPED: + + /* + * We will include this character into range_str once we find + * the end of the range value. + */ + parse_state = MULTIRANGE_IN_RANGE_QUOTED; + break; + default: + elog(ERROR, "unknown parse state: %d", parse_state); + } + } + + /* consume whitespace */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + if (*ptr != '\0') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed multirange literal: \"%s\"", + input_str), + errdetail("Junk after closing right brace."))); + + ret = make_multirange(mltrngtypoid, rangetyp, range_count, ranges); + PG_RETURN_MULTIRANGE_P(ret); +} + +Datum +multirange_out(PG_FUNCTION_ARGS) +{ + MultirangeType *multirange = PG_GETARG_MULTIRANGE_P(0); + Oid mltrngtypoid = MultirangeTypeGetOid(multirange); + MultirangeIOData *cache; + StringInfoData buf; + RangeType *range; + char *rangeStr; + int32 range_count; + int32 i; + RangeType **ranges; + + cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_output); + + initStringInfo(&buf); + + appendStringInfoChar(&buf, '{'); + + multirange_deserialize(cache->typcache->rngtype, multirange, &range_count, &ranges); + for (i = 0; i < range_count; i++) + { + if (i > 0) + appendStringInfoChar(&buf, ','); + range = ranges[i]; + rangeStr = OutputFunctionCall(&cache->typioproc, RangeTypePGetDatum(range)); + appendStringInfoString(&buf, rangeStr); + } + + appendStringInfoChar(&buf, '}'); + + PG_RETURN_CSTRING(buf.data); +} + +/* + * Binary representation: First a int32-sized count of ranges, followed by + * ranges in their native binary representation. + */ +Datum +multirange_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Oid mltrngtypoid = PG_GETARG_OID(1); + int32 typmod = PG_GETARG_INT32(2); + MultirangeIOData *cache; + uint32 range_count; + RangeType **ranges; + MultirangeType *ret; + StringInfoData tmpbuf; + + cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_receive); + + range_count = pq_getmsgint(buf, 4); + ranges = palloc(range_count * sizeof(RangeType *)); + + initStringInfo(&tmpbuf); + for (int i = 0; i < range_count; i++) + { + uint32 range_len = pq_getmsgint(buf, 4); + const char *range_data = pq_getmsgbytes(buf, range_len); + + resetStringInfo(&tmpbuf); + appendBinaryStringInfo(&tmpbuf, range_data, range_len); + + ranges[i] = DatumGetRangeTypeP(ReceiveFunctionCall(&cache->typioproc, + &tmpbuf, + cache->typioparam, + typmod)); + } + pfree(tmpbuf.data); + + pq_getmsgend(buf); + + ret = make_multirange(mltrngtypoid, cache->typcache->rngtype, + range_count, ranges); + PG_RETURN_MULTIRANGE_P(ret); +} + +Datum +multirange_send(PG_FUNCTION_ARGS) +{ + MultirangeType *multirange = PG_GETARG_MULTIRANGE_P(0); + Oid mltrngtypoid = MultirangeTypeGetOid(multirange); + StringInfo buf = makeStringInfo(); + RangeType **ranges; + int32 range_count; + MultirangeIOData *cache; + + cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_send); + + /* construct output */ + pq_begintypsend(buf); + + pq_sendint32(buf, multirange->rangeCount); + + multirange_deserialize(cache->typcache->rngtype, multirange, &range_count, &ranges); + for (int i = 0; i < range_count; i++) + { + Datum range; + + range = RangeTypePGetDatum(ranges[i]); + range = PointerGetDatum(SendFunctionCall(&cache->typioproc, range)); + + pq_sendint32(buf, VARSIZE(range) - VARHDRSZ); + pq_sendbytes(buf, VARDATA(range), VARSIZE(range) - VARHDRSZ); + } + + PG_RETURN_BYTEA_P(pq_endtypsend(buf)); +} + +/* + * get_multirange_io_data: get cached information needed for multirange type I/O + * + * The multirange I/O functions need a bit more cached info than other multirange + * functions, so they store a MultirangeIOData struct in fn_extra, not just a + * pointer to a type cache entry. + */ +static MultirangeIOData * +get_multirange_io_data(FunctionCallInfo fcinfo, Oid mltrngtypid, IOFuncSelector func) +{ + MultirangeIOData *cache = (MultirangeIOData *) fcinfo->flinfo->fn_extra; + + if (cache == NULL || cache->typcache->type_id != mltrngtypid) + { + Oid typiofunc; + int16 typlen; + bool typbyval; + char typalign; + char typdelim; + + cache = (MultirangeIOData *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(MultirangeIOData)); + cache->typcache = lookup_type_cache(mltrngtypid, TYPECACHE_MULTIRANGE_INFO); + if (cache->typcache->rngtype == NULL) + elog(ERROR, "type %u is not a multirange type", mltrngtypid); + + /* get_type_io_data does more than we need, but is convenient */ + get_type_io_data(cache->typcache->rngtype->type_id, + func, + &typlen, + &typbyval, + &typalign, + &typdelim, + &cache->typioparam, + &typiofunc); + + if (!OidIsValid(typiofunc)) + { + /* this could only happen for receive or send */ + if (func == IOFunc_receive) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary input function available for type %s", + format_type_be(cache->typcache->rngtype->type_id)))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary output function available for type %s", + format_type_be(cache->typcache->rngtype->type_id)))); + } + fmgr_info_cxt(typiofunc, &cache->typioproc, + fcinfo->flinfo->fn_mcxt); + + fcinfo->flinfo->fn_extra = (void *) cache; + } + + return cache; +} + +/* + * Converts a list of arbitrary ranges into a list that is sorted and merged. + * Changes the contents of `ranges`. + * + * Returns the number of slots actually used, which may be less than + * input_range_count but never more. + * + * We assume that no input ranges are null, but empties are okay. + */ +static int32 +multirange_canonicalize(TypeCacheEntry *rangetyp, int32 input_range_count, + RangeType **ranges) +{ + RangeType *lastRange = NULL; + RangeType *currentRange; + int32 i; + int32 output_range_count = 0; + + /* Sort the ranges so we can find the ones that overlap/meet. */ + qsort_arg(ranges, input_range_count, sizeof(RangeType *), range_compare, + rangetyp); + + /* Now merge where possible: */ + for (i = 0; i < input_range_count; i++) + { + currentRange = ranges[i]; + if (RangeIsEmpty(currentRange)) + continue; + + if (lastRange == NULL) + { + ranges[output_range_count++] = lastRange = currentRange; + continue; + } + + /* + * range_adjacent_internal gives true if *either* A meets B or B meets + * A, which is not quite want we want, but we rely on the sorting + * above to rule out B meets A ever happening. + */ + if (range_adjacent_internal(rangetyp, lastRange, currentRange)) + { + /* The two ranges touch (without overlap), so merge them: */ + ranges[output_range_count - 1] = lastRange = + range_union_internal(rangetyp, lastRange, currentRange, false); + } + else if (range_before_internal(rangetyp, lastRange, currentRange)) + { + /* There's a gap, so make a new entry: */ + lastRange = ranges[output_range_count] = currentRange; + output_range_count++; + } + else + { + /* They must overlap, so merge them: */ + ranges[output_range_count - 1] = lastRange = + range_union_internal(rangetyp, lastRange, currentRange, true); + } + } + + return output_range_count; +} + +/* + *---------------------------------------------------------- + * SUPPORT FUNCTIONS + * + * These functions aren't in pg_proc, but are useful for + * defining new generic multirange functions in C. + *---------------------------------------------------------- + */ + +/* + * multirange_get_typcache: get cached information about a multirange type + * + * This is for use by multirange-related functions that follow the convention + * of using the fn_extra field as a pointer to the type cache entry for + * the multirange type. Functions that need to cache more information than + * that must fend for themselves. + */ +TypeCacheEntry * +multirange_get_typcache(FunctionCallInfo fcinfo, Oid mltrngtypid) +{ + TypeCacheEntry *typcache = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + + if (typcache == NULL || + typcache->type_id != mltrngtypid) + { + typcache = lookup_type_cache(mltrngtypid, TYPECACHE_MULTIRANGE_INFO); + if (typcache->rngtype == NULL) + elog(ERROR, "type %u is not a multirange type", mltrngtypid); + fcinfo->flinfo->fn_extra = (void *) typcache; + } + + return typcache; +} + + +/* + * Estimate size occupied by serialized multirange. + */ +static Size +multirange_size_estimate(TypeCacheEntry *rangetyp, int32 range_count, + RangeType **ranges) +{ + char elemalign = rangetyp->rngelemtype->typalign; + Size size; + int32 i; + + /* + * Count space for MultirangeType struct, items and flags. + */ + size = att_align_nominal(sizeof(MultirangeType) + + Max(range_count - 1, 0) * sizeof(uint32) + + range_count * sizeof(uint8), elemalign); + + /* Count space for range bounds */ + for (i = 0; i < range_count; i++) + size += att_align_nominal(VARSIZE(ranges[i]) - + sizeof(RangeType) - + sizeof(char), elemalign); + + return size; +} + +/* + * Write multirange data into pre-allocated space. + */ +static void +write_multirange_data(MultirangeType *multirange, TypeCacheEntry *rangetyp, + int32 range_count, RangeType **ranges) +{ + uint32 *items; + uint32 prev_offset = 0; + uint8 *flags; + int32 i; + Pointer begin, + ptr; + char elemalign = rangetyp->rngelemtype->typalign; + + items = MultirangeGetItemsPtr(multirange); + flags = MultirangeGetFlagsPtr(multirange); + ptr = begin = MultirangeGetBoundariesPtr(multirange, elemalign); + for (i = 0; i < range_count; i++) + { + uint32 len; + + if (i > 0) + { + /* + * Every range, except the first one, has an item. Every + * MULTIRANGE_ITEM_OFFSET_STRIDE item contains an offset, others + * contain lengths. + */ + items[i - 1] = ptr - begin; + if ((i % MULTIRANGE_ITEM_OFFSET_STRIDE) != 0) + items[i - 1] -= prev_offset; + else + items[i - 1] |= MULTIRANGE_ITEM_OFF_BIT; + prev_offset = ptr - begin; + } + flags[i] = *((Pointer) ranges[i] + VARSIZE(ranges[i]) - sizeof(char)); + len = VARSIZE(ranges[i]) - sizeof(RangeType) - sizeof(char); + memcpy(ptr, (Pointer) (ranges[i] + 1), len); + ptr += att_align_nominal(len, elemalign); + } +} + + +/* + * This serializes the multirange from a list of non-null ranges. It also + * sorts the ranges and merges any that touch. The ranges should already be + * detoasted, and there should be no NULLs. This should be used by most + * callers. + * + * Note that we may change the `ranges` parameter (the pointers, but not + * any already-existing RangeType contents). + */ +MultirangeType * +make_multirange(Oid mltrngtypoid, TypeCacheEntry *rangetyp, int32 range_count, + RangeType **ranges) +{ + MultirangeType *multirange; + Size size; + + /* Sort and merge input ranges. */ + range_count = multirange_canonicalize(rangetyp, range_count, ranges); + + /* Note: zero-fill is required here, just as in heap tuples */ + size = multirange_size_estimate(rangetyp, range_count, ranges); + multirange = palloc0(size); + SET_VARSIZE(multirange, size); + + /* Now fill in the datum */ + multirange->multirangetypid = mltrngtypoid; + multirange->rangeCount = range_count; + + write_multirange_data(multirange, rangetyp, range_count, ranges); + + return multirange; +} + +/* + * Get offset of bounds values of the i'th range in the multirange. + */ +static uint32 +multirange_get_bounds_offset(const MultirangeType *multirange, int32 i) +{ + uint32 *items = MultirangeGetItemsPtr(multirange); + uint32 offset = 0; + + /* + * Summarize lengths till we meet an offset. + */ + while (i > 0) + { + offset += MULTIRANGE_ITEM_GET_OFFLEN(items[i - 1]); + if (MULTIRANGE_ITEM_HAS_OFF(items[i - 1])) + break; + i--; + } + return offset; +} + +/* + * Fetch the i'th range from the multirange. + */ +RangeType * +multirange_get_range(TypeCacheEntry *rangetyp, + const MultirangeType *multirange, int i) +{ + uint32 offset; + uint8 flags; + Pointer begin, + ptr; + int16 typlen = rangetyp->rngelemtype->typlen; + char typalign = rangetyp->rngelemtype->typalign; + uint32 len; + RangeType *range; + + Assert(i < multirange->rangeCount); + + offset = multirange_get_bounds_offset(multirange, i); + flags = MultirangeGetFlagsPtr(multirange)[i]; + ptr = begin = MultirangeGetBoundariesPtr(multirange, typalign) + offset; + + /* + * Calculate the size of bound values. In principle, we could get offset + * of the next range bound values and calculate accordingly. But range + * bound values are aligned, so we have to walk the values to get the + * exact size. + */ + if (RANGE_HAS_LBOUND(flags)) + ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr); + if (RANGE_HAS_UBOUND(flags)) + { + ptr = (Pointer) att_align_pointer(ptr, typalign, typlen, ptr); + ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr); + } + len = (ptr - begin) + sizeof(RangeType) + sizeof(uint8); + + range = palloc0(len); + SET_VARSIZE(range, len); + range->rangetypid = rangetyp->type_id; + + memcpy(range + 1, begin, ptr - begin); + *((uint8 *) (range + 1) + (ptr - begin)) = flags; + + return range; +} + +/* + * Fetch bounds from the i'th range of the multirange. This is the shortcut for + * doing the same thing as multirange_get_range() + range_deserialize(), but + * performing fewer operations. + */ +void +multirange_get_bounds(TypeCacheEntry *rangetyp, + const MultirangeType *multirange, + uint32 i, RangeBound *lower, RangeBound *upper) +{ + uint32 offset; + uint8 flags; + Pointer ptr; + int16 typlen = rangetyp->rngelemtype->typlen; + char typalign = rangetyp->rngelemtype->typalign; + bool typbyval = rangetyp->rngelemtype->typbyval; + Datum lbound; + Datum ubound; + + Assert(i < multirange->rangeCount); + + offset = multirange_get_bounds_offset(multirange, i); + flags = MultirangeGetFlagsPtr(multirange)[i]; + ptr = MultirangeGetBoundariesPtr(multirange, typalign) + offset; + + /* multirange can't contain empty ranges */ + Assert((flags & RANGE_EMPTY) == 0); + + /* fetch lower bound, if any */ + if (RANGE_HAS_LBOUND(flags)) + { + /* att_align_pointer cannot be necessary here */ + lbound = fetch_att(ptr, typbyval, typlen); + ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr); + } + else + lbound = (Datum) 0; + + /* fetch upper bound, if any */ + if (RANGE_HAS_UBOUND(flags)) + { + ptr = (Pointer) att_align_pointer(ptr, typalign, typlen, ptr); + ubound = fetch_att(ptr, typbyval, typlen); + /* no need for att_addlength_pointer */ + } + else + ubound = (Datum) 0; + + /* emit results */ + lower->val = lbound; + lower->infinite = (flags & RANGE_LB_INF) != 0; + lower->inclusive = (flags & RANGE_LB_INC) != 0; + lower->lower = true; + + upper->val = ubound; + upper->infinite = (flags & RANGE_UB_INF) != 0; + upper->inclusive = (flags & RANGE_UB_INC) != 0; + upper->lower = false; +} + +/* + * Construct union range from the multirange. + */ +RangeType * +multirange_get_union_range(TypeCacheEntry *rangetyp, + const MultirangeType *mr) +{ + RangeBound lower, + upper, + tmp; + + if (MultirangeIsEmpty(mr)) + return make_empty_range(rangetyp); + + multirange_get_bounds(rangetyp, mr, 0, &lower, &tmp); + multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &tmp, &upper); + + return make_range(rangetyp, &lower, &upper, false, NULL); +} + + +/* + * multirange_deserialize: deconstruct a multirange value + * + * NB: the given multirange object must be fully detoasted; it cannot have a + * short varlena header. + */ +void +multirange_deserialize(TypeCacheEntry *rangetyp, + const MultirangeType *multirange, int32 *range_count, + RangeType ***ranges) +{ + *range_count = multirange->rangeCount; + + /* Convert each ShortRangeType into a RangeType */ + if (*range_count > 0) + { + int i; + + *ranges = palloc(*range_count * sizeof(RangeType *)); + for (i = 0; i < *range_count; i++) + (*ranges)[i] = multirange_get_range(rangetyp, multirange, i); + } + else + { + *ranges = NULL; + } +} + +MultirangeType * +make_empty_multirange(Oid mltrngtypoid, TypeCacheEntry *rangetyp) +{ + return make_multirange(mltrngtypoid, rangetyp, 0, NULL); +} + +/* + * Similar to range_overlaps_internal(), but takes range bounds instead of + * ranges as arguments. + */ +static bool +range_bounds_overlaps(TypeCacheEntry *typcache, + RangeBound *lower1, RangeBound *upper1, + RangeBound *lower2, RangeBound *upper2) +{ + if (range_cmp_bounds(typcache, lower1, lower2) >= 0 && + range_cmp_bounds(typcache, lower1, upper2) <= 0) + return true; + + if (range_cmp_bounds(typcache, lower2, lower1) >= 0 && + range_cmp_bounds(typcache, lower2, upper1) <= 0) + return true; + + return false; +} + +/* + * Similar to range_contains_internal(), but takes range bounds instead of + * ranges as arguments. + */ +static bool +range_bounds_contains(TypeCacheEntry *typcache, + RangeBound *lower1, RangeBound *upper1, + RangeBound *lower2, RangeBound *upper2) +{ + if (range_cmp_bounds(typcache, lower1, lower2) <= 0 && + range_cmp_bounds(typcache, upper1, upper2) >= 0) + return true; + + return false; +} + +/* + * Check if the given key matches any range in multirange using binary search. + * If the required range isn't found, that counts as a mismatch. When the + * required range is found, the comparison function can still report this as + * either match or mismatch. For instance, if we search for containment, we can + * found a range, which is overlapping but not containing the key range, and + * that would count as a mismatch. + */ +static bool +multirange_bsearch_match(TypeCacheEntry *typcache, const MultirangeType *mr, + void *key, multirange_bsearch_comparison cmp_func) +{ + uint32 l, + u, + idx; + int comparison; + bool match = false; + + l = 0; + u = mr->rangeCount; + while (l < u) + { + RangeBound lower, + upper; + + idx = (l + u) / 2; + multirange_get_bounds(typcache, mr, idx, &lower, &upper); + comparison = (*cmp_func) (typcache, &lower, &upper, key, &match); + + if (comparison < 0) + u = idx; + else if (comparison > 0) + l = idx + 1; + else + return match; + } + + return false; +} + +/* + *---------------------------------------------------------- + * GENERIC FUNCTIONS + *---------------------------------------------------------- + */ + +/* + * Construct multirange value from zero or more ranges. Since this is a + * variadic function we get passed an array. The array must contain ranges + * that match our return value, and there must be no NULLs. + */ +Datum +multirange_constructor2(PG_FUNCTION_ARGS) +{ + Oid mltrngtypid = get_fn_expr_rettype(fcinfo->flinfo); + Oid rngtypid; + TypeCacheEntry *typcache; + TypeCacheEntry *rangetyp; + ArrayType *rangeArray; + int range_count; + Datum *elements; + bool *nulls; + RangeType **ranges; + int dims; + int i; + + typcache = multirange_get_typcache(fcinfo, mltrngtypid); + rangetyp = typcache->rngtype; + + /* + * A no-arg invocation should call multirange_constructor0 instead, but + * returning an empty range is what that does. + */ + + if (PG_NARGS() == 0) + PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, 0, NULL)); + + /* + * This check should be guaranteed by our signature, but let's do it just + * in case. + */ + + if (PG_ARGISNULL(0)) + elog(ERROR, + "multirange values cannot contain null members"); + + rangeArray = PG_GETARG_ARRAYTYPE_P(0); + + dims = ARR_NDIM(rangeArray); + if (dims > 1) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("multiranges cannot be constructed from multidimensional arrays"))); + + rngtypid = ARR_ELEMTYPE(rangeArray); + if (rngtypid != rangetyp->type_id) + elog(ERROR, "type %u does not match constructor type", rngtypid); + + /* + * Be careful: we can still be called with zero ranges, like this: + * `int4multirange(variadic '{}'::int4range[]) + */ + if (dims == 0) + { + range_count = 0; + ranges = NULL; + } + else + { + deconstruct_array(rangeArray, rngtypid, rangetyp->typlen, rangetyp->typbyval, + rangetyp->typalign, &elements, &nulls, &range_count); + + ranges = palloc0(range_count * sizeof(RangeType *)); + for (i = 0; i < range_count; i++) + { + if (nulls[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("multirange values cannot contain null members"))); + + /* make_multirange will do its own copy */ + ranges[i] = DatumGetRangeTypeP(elements[i]); + } + } + + PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, range_count, ranges)); +} + +/* + * Construct multirange value from a single range. It'd be nice if we could + * just use multirange_constructor2 for this case, but we need a non-variadic + * single-arg function to let us define a CAST from a range to its multirange. + */ +Datum +multirange_constructor1(PG_FUNCTION_ARGS) +{ + Oid mltrngtypid = get_fn_expr_rettype(fcinfo->flinfo); + Oid rngtypid; + TypeCacheEntry *typcache; + TypeCacheEntry *rangetyp; + RangeType *range; + + typcache = multirange_get_typcache(fcinfo, mltrngtypid); + rangetyp = typcache->rngtype; + + /* + * This check should be guaranteed by our signature, but let's do it just + * in case. + */ + + if (PG_ARGISNULL(0)) + elog(ERROR, + "multirange values cannot contain null members"); + + range = PG_GETARG_RANGE_P(0); + + /* Make sure the range type matches. */ + rngtypid = RangeTypeGetOid(range); + if (rngtypid != rangetyp->type_id) + elog(ERROR, "type %u does not match constructor type", rngtypid); + + PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, 1, &range)); +} + +/* + * Constructor just like multirange_constructor1, but opr_sanity gets angry + * if the same internal function handles multiple functions with different arg + * counts. + */ +Datum +multirange_constructor0(PG_FUNCTION_ARGS) +{ + Oid mltrngtypid; + TypeCacheEntry *typcache; + TypeCacheEntry *rangetyp; + + /* This should always be called without arguments */ + if (PG_NARGS() != 0) + elog(ERROR, + "niladic multirange constructor must not receive arguments"); + + mltrngtypid = get_fn_expr_rettype(fcinfo->flinfo); + typcache = multirange_get_typcache(fcinfo, mltrngtypid); + rangetyp = typcache->rngtype; + + PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, 0, NULL)); +} + + +/* multirange, multirange -> multirange type functions */ + +/* multirange union */ +Datum +multirange_union(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + int32 range_count1; + int32 range_count2; + int32 range_count3; + RangeType **ranges1; + RangeType **ranges2; + RangeType **ranges3; + + if (MultirangeIsEmpty(mr1)) + PG_RETURN_MULTIRANGE_P(mr2); + if (MultirangeIsEmpty(mr2)) + PG_RETURN_MULTIRANGE_P(mr1); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + multirange_deserialize(typcache->rngtype, mr1, &range_count1, &ranges1); + multirange_deserialize(typcache->rngtype, mr2, &range_count2, &ranges2); + + range_count3 = range_count1 + range_count2; + ranges3 = palloc0(range_count3 * sizeof(RangeType *)); + memcpy(ranges3, ranges1, range_count1 * sizeof(RangeType *)); + memcpy(ranges3 + range_count1, ranges2, range_count2 * sizeof(RangeType *)); + PG_RETURN_MULTIRANGE_P(make_multirange(typcache->type_id, typcache->rngtype, + range_count3, ranges3)); +} + +/* multirange minus */ +Datum +multirange_minus(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + Oid mltrngtypoid = MultirangeTypeGetOid(mr1); + TypeCacheEntry *typcache; + TypeCacheEntry *rangetyp; + int32 range_count1; + int32 range_count2; + RangeType **ranges1; + RangeType **ranges2; + + typcache = multirange_get_typcache(fcinfo, mltrngtypoid); + rangetyp = typcache->rngtype; + + if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) + PG_RETURN_MULTIRANGE_P(mr1); + + multirange_deserialize(typcache->rngtype, mr1, &range_count1, &ranges1); + multirange_deserialize(typcache->rngtype, mr2, &range_count2, &ranges2); + + PG_RETURN_MULTIRANGE_P(multirange_minus_internal(mltrngtypoid, + rangetyp, + range_count1, + ranges1, + range_count2, + ranges2)); +} + +MultirangeType * +multirange_minus_internal(Oid mltrngtypoid, TypeCacheEntry *rangetyp, + int32 range_count1, RangeType **ranges1, + int32 range_count2, RangeType **ranges2) +{ + RangeType *r1; + RangeType *r2; + RangeType **ranges3; + int32 range_count3; + int32 i1; + int32 i2; + + /* + * Worst case: every range in ranges1 makes a different cut to some range + * in ranges2. + */ + ranges3 = palloc0((range_count1 + range_count2) * sizeof(RangeType *)); + range_count3 = 0; + + /* + * For each range in mr1, keep subtracting until it's gone or the ranges + * in mr2 have passed it. After a subtraction we assign what's left back + * to r1. The parallel progress through mr1 and mr2 is similar to + * multirange_overlaps_multirange_internal. + */ + r2 = ranges2[0]; + for (i1 = 0, i2 = 0; i1 < range_count1; i1++) + { + r1 = ranges1[i1]; + + /* Discard r2s while r2 << r1 */ + while (r2 != NULL && range_before_internal(rangetyp, r2, r1)) + { + r2 = ++i2 >= range_count2 ? NULL : ranges2[i2]; + } + + while (r2 != NULL) + { + if (range_split_internal(rangetyp, r1, r2, &ranges3[range_count3], &r1)) + { + /* + * If r2 takes a bite out of the middle of r1, we need two + * outputs + */ + range_count3++; + r2 = ++i2 >= range_count2 ? NULL : ranges2[i2]; + } + else if (range_overlaps_internal(rangetyp, r1, r2)) + { + /* + * If r2 overlaps r1, replace r1 with r1 - r2. + */ + r1 = range_minus_internal(rangetyp, r1, r2); + + /* + * If r2 goes past r1, then we need to stay with it, in case + * it hits future r1s. Otherwise we need to keep r1, in case + * future r2s hit it. Since we already subtracted, there's no + * point in using the overright/overleft calls. + */ + if (RangeIsEmpty(r1) || range_before_internal(rangetyp, r1, r2)) + break; + else + r2 = ++i2 >= range_count2 ? NULL : ranges2[i2]; + } + else + { + /* + * This and all future r2s are past r1, so keep them. Also + * assign whatever is left of r1 to the result. + */ + break; + } + } + + /* + * Nothing else can remove anything from r1, so keep it. Even if r1 is + * empty here, make_multirange will remove it. + */ + ranges3[range_count3++] = r1; + } + + return make_multirange(mltrngtypoid, rangetyp, range_count3, ranges3); +} + +/* multirange intersection */ +Datum +multirange_intersect(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + Oid mltrngtypoid = MultirangeTypeGetOid(mr1); + TypeCacheEntry *typcache; + TypeCacheEntry *rangetyp; + int32 range_count1; + int32 range_count2; + RangeType **ranges1; + RangeType **ranges2; + + typcache = multirange_get_typcache(fcinfo, mltrngtypoid); + rangetyp = typcache->rngtype; + + if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) + PG_RETURN_MULTIRANGE_P(make_empty_multirange(mltrngtypoid, rangetyp)); + + multirange_deserialize(rangetyp, mr1, &range_count1, &ranges1); + multirange_deserialize(rangetyp, mr2, &range_count2, &ranges2); + + PG_RETURN_MULTIRANGE_P(multirange_intersect_internal(mltrngtypoid, + rangetyp, + range_count1, + ranges1, + range_count2, + ranges2)); +} + +MultirangeType * +multirange_intersect_internal(Oid mltrngtypoid, TypeCacheEntry *rangetyp, + int32 range_count1, RangeType **ranges1, + int32 range_count2, RangeType **ranges2) +{ + RangeType *r1; + RangeType *r2; + RangeType **ranges3; + int32 range_count3; + int32 i1; + int32 i2; + + if (range_count1 == 0 || range_count2 == 0) + return make_multirange(mltrngtypoid, rangetyp, 0, NULL); + + /*----------------------------------------------- + * Worst case is a stitching pattern like this: + * + * mr1: --- --- --- --- + * mr2: --- --- --- + * mr3: - - - - - - + * + * That seems to be range_count1 + range_count2 - 1, + * but one extra won't hurt. + *----------------------------------------------- + */ + ranges3 = palloc0((range_count1 + range_count2) * sizeof(RangeType *)); + range_count3 = 0; + + /* + * For each range in mr1, keep intersecting until the ranges in mr2 have + * passed it. The parallel progress through mr1 and mr2 is similar to + * multirange_minus_multirange_internal, but we don't have to assign back + * to r1. + */ + r2 = ranges2[0]; + for (i1 = 0, i2 = 0; i1 < range_count1; i1++) + { + r1 = ranges1[i1]; + + /* Discard r2s while r2 << r1 */ + while (r2 != NULL && range_before_internal(rangetyp, r2, r1)) + { + r2 = ++i2 >= range_count2 ? NULL : ranges2[i2]; + } + + while (r2 != NULL) + { + if (range_overlaps_internal(rangetyp, r1, r2)) + { + /* Keep the overlapping part */ + ranges3[range_count3++] = range_intersect_internal(rangetyp, r1, r2); + + /* If we "used up" all of r2, go to the next one... */ + if (range_overleft_internal(rangetyp, r2, r1)) + r2 = ++i2 >= range_count2 ? NULL : ranges2[i2]; + + /* ...otherwise go to the next r1 */ + else + break; + } + else + /* We're past r1, so move to the next one */ + break; + } + + /* If we're out of r2s, there can be no more intersections */ + if (r2 == NULL) + break; + } + + return make_multirange(mltrngtypoid, rangetyp, range_count3, ranges3); +} + +/* + * range_agg_transfn: combine adjacent/overlapping ranges. + * + * All we do here is gather the input ranges into an array + * so that the finalfn can sort and combine them. + */ +Datum +range_agg_transfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggContext; + Oid rngtypoid; + ArrayBuildState *state; + + if (!AggCheckCallContext(fcinfo, &aggContext)) + elog(ERROR, "range_agg_transfn called in non-aggregate context"); + + rngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1); + if (!type_is_range(rngtypoid)) + elog(ERROR, "range_agg must be called with a range"); + + if (PG_ARGISNULL(0)) + state = initArrayResult(rngtypoid, aggContext, false); + else + state = (ArrayBuildState *) PG_GETARG_POINTER(0); + + /* skip NULLs */ + if (!PG_ARGISNULL(1)) + accumArrayResult(state, PG_GETARG_DATUM(1), false, rngtypoid, aggContext); + + PG_RETURN_POINTER(state); +} + +/* + * range_agg_finalfn: use our internal array to merge touching ranges. + * + * Shared by range_agg_finalfn(anyrange) and + * multirange_agg_finalfn(anymultirange). + */ +Datum +range_agg_finalfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggContext; + Oid mltrngtypoid; + TypeCacheEntry *typcache; + ArrayBuildState *state; + int32 range_count; + RangeType **ranges; + int i; + + if (!AggCheckCallContext(fcinfo, &aggContext)) + elog(ERROR, "range_agg_finalfn called in non-aggregate context"); + + state = PG_ARGISNULL(0) ? NULL : (ArrayBuildState *) PG_GETARG_POINTER(0); + if (state == NULL) + /* This shouldn't be possible, but just in case.... */ + PG_RETURN_NULL(); + + /* Also return NULL if we had zero inputs, like other aggregates */ + range_count = state->nelems; + if (range_count == 0) + PG_RETURN_NULL(); + + mltrngtypoid = get_fn_expr_rettype(fcinfo->flinfo); + typcache = multirange_get_typcache(fcinfo, mltrngtypoid); + + ranges = palloc0(range_count * sizeof(RangeType *)); + for (i = 0; i < range_count; i++) + ranges[i] = DatumGetRangeTypeP(state->dvalues[i]); + + PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypoid, typcache->rngtype, range_count, ranges)); +} + +/* + * multirange_agg_transfn: combine adjacent/overlapping multiranges. + * + * All we do here is gather the input multiranges' ranges into an array so + * that the finalfn can sort and combine them. + */ +Datum +multirange_agg_transfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggContext; + Oid mltrngtypoid; + TypeCacheEntry *typcache; + TypeCacheEntry *rngtypcache; + ArrayBuildState *state; + + if (!AggCheckCallContext(fcinfo, &aggContext)) + elog(ERROR, "multirange_agg_transfn called in non-aggregate context"); + + mltrngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1); + if (!type_is_multirange(mltrngtypoid)) + elog(ERROR, "range_agg must be called with a multirange"); + + typcache = multirange_get_typcache(fcinfo, mltrngtypoid); + rngtypcache = typcache->rngtype; + + if (PG_ARGISNULL(0)) + state = initArrayResult(rngtypcache->type_id, aggContext, false); + else + state = (ArrayBuildState *) PG_GETARG_POINTER(0); + + /* skip NULLs */ + if (!PG_ARGISNULL(1)) + { + MultirangeType *current; + int32 range_count; + RangeType **ranges; + + current = PG_GETARG_MULTIRANGE_P(1); + multirange_deserialize(rngtypcache, current, &range_count, &ranges); + if (range_count == 0) + { + /* + * Add an empty range so we get an empty result (not a null + * result). + */ + accumArrayResult(state, + RangeTypePGetDatum(make_empty_range(rngtypcache)), + false, rngtypcache->type_id, aggContext); + } + else + { + for (int32 i = 0; i < range_count; i++) + accumArrayResult(state, RangeTypePGetDatum(ranges[i]), false, rngtypcache->type_id, aggContext); + } + } + + PG_RETURN_POINTER(state); +} + +Datum +multirange_intersect_agg_transfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggContext; + Oid mltrngtypoid; + TypeCacheEntry *typcache; + MultirangeType *result; + MultirangeType *current; + int32 range_count1; + int32 range_count2; + RangeType **ranges1; + RangeType **ranges2; + + if (!AggCheckCallContext(fcinfo, &aggContext)) + elog(ERROR, "multirange_intersect_agg_transfn called in non-aggregate context"); + + mltrngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1); + if (!type_is_multirange(mltrngtypoid)) + elog(ERROR, "range_intersect_agg must be called with a multirange"); + + typcache = multirange_get_typcache(fcinfo, mltrngtypoid); + + /* strictness ensures these are non-null */ + result = PG_GETARG_MULTIRANGE_P(0); + current = PG_GETARG_MULTIRANGE_P(1); + + multirange_deserialize(typcache->rngtype, result, &range_count1, &ranges1); + multirange_deserialize(typcache->rngtype, current, &range_count2, &ranges2); + + result = multirange_intersect_internal(mltrngtypoid, + typcache->rngtype, + range_count1, + ranges1, + range_count2, + ranges2); + PG_RETURN_MULTIRANGE_P(result); +} + + +/* multirange -> element type functions */ + +/* extract lower bound value */ +Datum +multirange_lower(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + + if (MultirangeIsEmpty(mr)) + PG_RETURN_NULL(); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + multirange_get_bounds(typcache->rngtype, mr, 0, + &lower, &upper); + + if (!lower.infinite) + PG_RETURN_DATUM(lower.val); + else + PG_RETURN_NULL(); +} + +/* extract upper bound value */ +Datum +multirange_upper(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + + if (MultirangeIsEmpty(mr)) + PG_RETURN_NULL(); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1, + &lower, &upper); + + if (!upper.infinite) + PG_RETURN_DATUM(upper.val); + else + PG_RETURN_NULL(); +} + + +/* multirange -> bool functions */ + +/* is multirange empty? */ +Datum +multirange_empty(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + + PG_RETURN_BOOL(MultirangeIsEmpty(mr)); +} + +/* is lower bound inclusive? */ +Datum +multirange_lower_inc(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + + if (MultirangeIsEmpty(mr)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + multirange_get_bounds(typcache->rngtype, mr, 0, + &lower, &upper); + + PG_RETURN_BOOL(lower.inclusive); +} + +/* is upper bound inclusive? */ +Datum +multirange_upper_inc(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + + if (MultirangeIsEmpty(mr)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1, + &lower, &upper); + + PG_RETURN_BOOL(upper.inclusive); +} + +/* is lower bound infinite? */ +Datum +multirange_lower_inf(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + + if (MultirangeIsEmpty(mr)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + multirange_get_bounds(typcache->rngtype, mr, 0, + &lower, &upper); + + PG_RETURN_BOOL(lower.infinite); +} + +/* is upper bound infinite? */ +Datum +multirange_upper_inf(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + + if (MultirangeIsEmpty(mr)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1, + &lower, &upper); + + PG_RETURN_BOOL(upper.infinite); +} + + + +/* multirange, element -> bool functions */ + +/* contains? */ +Datum +multirange_contains_elem(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + Datum val = PG_GETARG_DATUM(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(multirange_contains_elem_internal(typcache->rngtype, mr, val)); +} + +/* contained by? */ +Datum +elem_contained_by_multirange(PG_FUNCTION_ARGS) +{ + Datum val = PG_GETARG_DATUM(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(multirange_contains_elem_internal(typcache->rngtype, mr, val)); +} + +/* + * Comparison function for checking if any range of multirange contains given + * key element using binary search. + */ +static int +multirange_elem_bsearch_comparison(TypeCacheEntry *typcache, + RangeBound *lower, RangeBound *upper, + void *key, bool *match) +{ + Datum val = *((Datum *) key); + int cmp; + + if (!lower->infinite) + { + cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo, + typcache->rng_collation, + lower->val, val)); + if (cmp > 0 || (cmp == 0 && !lower->inclusive)) + return -1; + } + + if (!upper->infinite) + { + cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo, + typcache->rng_collation, + upper->val, val)); + if (cmp < 0 || (cmp == 0 && !upper->inclusive)) + return 1; + } + + *match = true; + return 0; +} + +/* + * Test whether multirange mr contains a specific element value. + */ +bool +multirange_contains_elem_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr, Datum val) +{ + if (MultirangeIsEmpty(mr)) + return false; + + return multirange_bsearch_match(rangetyp, mr, &val, + multirange_elem_bsearch_comparison); +} + +/* multirange, range -> bool functions */ + +/* contains? */ +Datum +multirange_contains_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(multirange_contains_range_internal(typcache->rngtype, mr, r)); +} + +Datum +range_contains_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_contains_multirange_internal(typcache->rngtype, r, mr)); +} + +/* contained by? */ +Datum +range_contained_by_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(multirange_contains_range_internal(typcache->rngtype, mr, r)); +} + +Datum +multirange_contained_by_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_contains_multirange_internal(typcache->rngtype, r, mr)); +} + +/* + * Comparison function for checking if any range of multirange contains given + * key range using binary search. + */ +static int +multirange_range_contains_bsearch_comparison(TypeCacheEntry *typcache, + RangeBound *lower, RangeBound *upper, + void *key, bool *match) +{ + RangeBound *keyLower = (RangeBound *) key; + RangeBound *keyUpper = (RangeBound *) key + 1; + + /* Check if key range is strictly in the left or in the right */ + if (range_cmp_bounds(typcache, keyUpper, lower) < 0) + return -1; + if (range_cmp_bounds(typcache, keyLower, upper) > 0) + return 1; + + /* + * At this point we found overlapping range. But we have to check if it + * really contains the key range. Anyway, we have to stop our search + * here, because multirange contains only non-overlapping ranges. + */ + *match = range_bounds_contains(typcache, lower, upper, keyLower, keyUpper); + + return 0; +} + +/* + * Test whether multirange mr contains a specific range r. + */ +bool +multirange_contains_range_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr, + const RangeType *r) +{ + RangeBound bounds[2]; + bool empty; + + /* + * Every multirange contains an infinite number of empty ranges, even an + * empty one. + */ + if (RangeIsEmpty(r)) + return true; + + if (MultirangeIsEmpty(mr)) + return false; + + range_deserialize(rangetyp, r, &bounds[0], &bounds[1], &empty); + Assert(!empty); + + return multirange_bsearch_match(rangetyp, mr, bounds, + multirange_range_contains_bsearch_comparison); +} + +/* + * Test whether range r contains a multirange mr. + */ +bool +range_contains_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2, + tmp; + bool empty; + + /* + * Every range contains an infinite number of empty multiranges, even an + * empty one. + */ + if (MultirangeIsEmpty(mr)) + return true; + + if (RangeIsEmpty(r)) + return false; + + /* Range contains multirange iff it contains its union range. */ + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); + Assert(!empty); + multirange_get_bounds(rangetyp, mr, 0, &lower2, &tmp); + multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &tmp, &upper2); + + return range_bounds_contains(rangetyp, &lower1, &upper1, &lower2, &upper2); +} + + +/* multirange, multirange -> bool functions */ + +/* equality (internal version) */ +bool +multirange_eq_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) +{ + int32 range_count_1; + int32 range_count_2; + int32 i; + RangeBound lower1, + upper1, + lower2, + upper2; + + /* Different types should be prevented by ANYMULTIRANGE matching rules */ + if (MultirangeTypeGetOid(mr1) != MultirangeTypeGetOid(mr2)) + elog(ERROR, "multirange types do not match"); + + range_count_1 = mr1->rangeCount; + range_count_2 = mr2->rangeCount; + + if (range_count_1 != range_count_2) + return false; + + for (i = 0; i < range_count_1; i++) + { + multirange_get_bounds(rangetyp, mr1, i, &lower1, &upper1); + multirange_get_bounds(rangetyp, mr2, i, &lower2, &upper2); + + if (range_cmp_bounds(rangetyp, &lower1, &lower2) != 0 || + range_cmp_bounds(rangetyp, &upper1, &upper2) != 0) + return false; + } + + return true; +} + +/* equality */ +Datum +multirange_eq(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + PG_RETURN_BOOL(multirange_eq_internal(typcache->rngtype, mr1, mr2)); +} + +/* inequality (internal version) */ +bool +multirange_ne_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) +{ + return (!multirange_eq_internal(rangetyp, mr1, mr2)); +} + +/* inequality */ +Datum +multirange_ne(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + PG_RETURN_BOOL(multirange_ne_internal(typcache->rngtype, mr1, mr2)); +} + +/* overlaps? */ +Datum +range_overlaps_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_overlaps_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_overlaps_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + PG_RETURN_BOOL(multirange_overlaps_multirange_internal(typcache->rngtype, mr1, mr2)); +} + +/* + * Comparison function for checking if any range of multirange overlaps given + * key range using binary search. + */ +static int +multirange_range_overlaps_bsearch_comparison(TypeCacheEntry *typcache, + RangeBound *lower, RangeBound *upper, + void *key, bool *match) +{ + RangeBound *keyLower = (RangeBound *) key; + RangeBound *keyUpper = (RangeBound *) key + 1; + + if (range_cmp_bounds(typcache, keyUpper, lower) < 0) + return -1; + if (range_cmp_bounds(typcache, keyLower, upper) > 0) + return 1; + + *match = true; + return 0; +} + +bool +range_overlaps_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound bounds[2]; + bool empty; + + /* + * Empties never overlap, even with empties. (This seems strange since + * they *do* contain each other, but we want to follow how ranges work.) + */ + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + return false; + + range_deserialize(rangetyp, r, &bounds[0], &bounds[1], &empty); + Assert(!empty); + + return multirange_bsearch_match(rangetyp, mr, bounds, + multirange_range_overlaps_bsearch_comparison); +} + +bool +multirange_overlaps_multirange_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) +{ + int32 range_count1; + int32 range_count2; + int32 i1; + int32 i2; + RangeBound lower1, + upper1, + lower2, + upper2; + + /* + * Empties never overlap, even with empties. (This seems strange since + * they *do* contain each other, but we want to follow how ranges work.) + */ + if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) + return false; + + range_count1 = mr1->rangeCount; + range_count2 = mr2->rangeCount; + + /* + * Every range in mr1 gets a chance to overlap with the ranges in mr2, but + * we can use their ordering to avoid O(n^2). This is similar to + * range_overlaps_multirange where r1 : r2 :: mrr : r, but there if we + * don't find an overlap with r we're done, and here if we don't find an + * overlap with r2 we try the next r2. + */ + i1 = 0; + multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1); + for (i1 = 0, i2 = 0; i2 < range_count2; i2++) + { + multirange_get_bounds(rangetyp, mr2, i2, &lower2, &upper2); + + /* Discard r1s while r1 << r2 */ + while (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0) + { + if (++i1 >= range_count1) + return false; + multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1); + } + + /* + * If r1 && r2, we're done, otherwise we failed to find an overlap for + * r2, so go to the next one. + */ + if (range_bounds_overlaps(rangetyp, &lower1, &upper1, &lower2, &upper2)) + return true; + } + + /* We looked through all of mr2 without finding an overlap */ + return false; +} + +/* does not extend to right of? */ +bool +range_overleft_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2; + bool empty; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + PG_RETURN_BOOL(false); + + + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); + Assert(!empty); + multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, + &lower2, &upper2); + + PG_RETURN_BOOL(range_cmp_bounds(rangetyp, &upper1, &upper2) <= 0); +} + +Datum +range_overleft_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_overleft_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_overleft_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + RangeBound lower1, + upper1, + lower2, + upper2; + bool empty; + + if (MultirangeIsEmpty(mr) || RangeIsEmpty(r)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1, + &lower1, &upper1); + range_deserialize(typcache->rngtype, r, &lower2, &upper2, &empty); + Assert(!empty); + + PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &upper1, &upper2) <= 0); +} + +Datum +multirange_overleft_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + RangeBound lower1, + upper1, + lower2, + upper2; + + if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + multirange_get_bounds(typcache->rngtype, mr1, mr1->rangeCount - 1, + &lower1, &upper1); + multirange_get_bounds(typcache->rngtype, mr2, mr2->rangeCount - 1, + &lower2, &upper2); + + PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &upper1, &upper2) <= 0); +} + +/* does not extend to left of? */ +bool +range_overright_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2; + bool empty; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + PG_RETURN_BOOL(false); + + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); + Assert(!empty); + multirange_get_bounds(rangetyp, mr, 0, &lower2, &upper2); + + return (range_cmp_bounds(rangetyp, &lower1, &lower2) >= 0); +} + +Datum +range_overright_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_overright_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_overright_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + RangeBound lower1, + upper1, + lower2, + upper2; + bool empty; + + if (MultirangeIsEmpty(mr) || RangeIsEmpty(r)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + multirange_get_bounds(typcache->rngtype, mr, 0, &lower1, &upper1); + range_deserialize(typcache->rngtype, r, &lower2, &upper2, &empty); + Assert(!empty); + + PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &lower1, &lower2) >= 0); +} + +Datum +multirange_overright_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + RangeBound lower1, + upper1, + lower2, + upper2; + + if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) + PG_RETURN_BOOL(false); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + multirange_get_bounds(typcache->rngtype, mr1, 0, &lower1, &upper1); + multirange_get_bounds(typcache->rngtype, mr2, 0, &lower2, &upper2); + + PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &lower1, &lower2) >= 0); +} + +/* contains? */ +Datum +multirange_contains_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache->rngtype, mr1, mr2)); +} + +/* contained by? */ +Datum +multirange_contained_by_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache->rngtype, mr2, mr1)); +} + +/* + * Test whether multirange mr1 contains every range from another multirange mr2. + */ +bool +multirange_contains_multirange_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) +{ + int32 range_count1 = mr1->rangeCount; + int32 range_count2 = mr2->rangeCount; + int i1, + i2; + RangeBound lower1, + upper1, + lower2, + upper2; + + /* + * We follow the same logic for empties as ranges: - an empty multirange + * contains an empty range/multirange. - an empty multirange can't contain + * any other range/multirange. - an empty multirange is contained by any + * other range/multirange. + */ + + if (range_count2 == 0) + return true; + if (range_count1 == 0) + return false; + + /* + * Every range in mr2 must be contained by some range in mr1. To avoid + * O(n^2) we walk through both ranges in tandem. + */ + i1 = 0; + multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1); + for (i2 = 0; i2 < range_count2; i2++) + { + multirange_get_bounds(rangetyp, mr2, i2, &lower2, &upper2); + + /* Discard r1s while r1 << r2 */ + while (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0) + { + if (++i1 >= range_count1) + return false; + multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1); + } + + /* + * If r1 @> r2, go to the next r2, otherwise return false (since every + * r1[n] and r1[n+1] must have a gap). Note this will give weird + * answers if you don't canonicalize, e.g. with a custom + * int2multirange {[1,1], [2,2]} there is a "gap". But that is + * consistent with other range operators, e.g. '[1,1]'::int2range -|- + * '[2,2]'::int2range is false. + */ + if (!range_bounds_contains(rangetyp, &lower1, &upper1, + &lower2, &upper2)) + return false; + } + + /* All ranges in mr2 are satisfied */ + return true; +} + +/* strictly left of? */ +Datum +range_before_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_before_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_before_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_after_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_before_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + PG_RETURN_BOOL(multirange_before_multirange_internal(typcache->rngtype, mr1, mr2)); +} + +/* strictly right of? */ +Datum +range_after_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_after_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_after_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_before_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_after_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + PG_RETURN_BOOL(multirange_before_multirange_internal(typcache->rngtype, mr2, mr1)); +} + +/* strictly left of? (internal version) */ +bool +range_before_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2; + bool empty; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + return false; + + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); + Assert(!empty); + + multirange_get_bounds(rangetyp, mr, 0, &lower2, &upper2); + + return (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0); +} + +bool +multirange_before_multirange_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) +{ + RangeBound lower1, + upper1, + lower2, + upper2; + + if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) + return false; + + multirange_get_bounds(rangetyp, mr1, mr1->rangeCount - 1, + &lower1, &upper1); + multirange_get_bounds(rangetyp, mr2, 0, + &lower2, &upper2); + + return (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0); +} + +/* strictly right of? (internal version) */ +bool +range_after_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2; + bool empty; + int32 range_count; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + return false; + + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); + Assert(!empty); + + range_count = mr->rangeCount; + multirange_get_bounds(rangetyp, mr, range_count - 1, + &lower2, &upper2); + + return (range_cmp_bounds(rangetyp, &lower1, &upper2) > 0); +} + +bool +range_adjacent_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2; + bool empty; + int32 range_count; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + return false; + + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); + Assert(!empty); + + range_count = mr->rangeCount; + multirange_get_bounds(rangetyp, mr, 0, + &lower2, &upper2); + + if (bounds_adjacent(rangetyp, upper1, lower2)) + return true; + + if (range_count > 1) + multirange_get_bounds(rangetyp, mr, range_count - 1, + &lower2, &upper2); + + if (bounds_adjacent(rangetyp, upper2, lower1)) + return true; + + return false; +} + +/* adjacent to? */ +Datum +range_adjacent_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_adjacent_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + return false; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache->rngtype, r, mr)); +} + +Datum +multirange_adjacent_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + int32 range_count1; + int32 range_count2; + RangeBound lower1, + upper1, + lower2, + upper2; + + if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) + return false; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + range_count1 = mr1->rangeCount; + range_count2 = mr2->rangeCount; + multirange_get_bounds(typcache->rngtype, mr1, range_count1 - 1, + &lower1, &upper1); + multirange_get_bounds(typcache->rngtype, mr2, 0, + &lower2, &upper2); + if (bounds_adjacent(typcache->rngtype, upper1, lower2)) + PG_RETURN_BOOL(true); + + if (range_count1 > 1) + multirange_get_bounds(typcache->rngtype, mr1, 0, + &lower1, &upper1); + if (range_count2 > 1) + multirange_get_bounds(typcache->rngtype, mr2, range_count2 - 1, + &lower2, &upper2); + if (bounds_adjacent(typcache->rngtype, upper2, lower1)) + PG_RETURN_BOOL(true); + PG_RETURN_BOOL(false); +} + +/* Btree support */ + +/* btree comparator */ +Datum +multirange_cmp(PG_FUNCTION_ARGS) +{ + MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0); + MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1); + int32 range_count_1; + int32 range_count_2; + int32 range_count_max; + int32 i; + TypeCacheEntry *typcache; + int cmp = 0; /* If both are empty we'll use this. */ + + /* Different types should be prevented by ANYMULTIRANGE matching rules */ + if (MultirangeTypeGetOid(mr1) != MultirangeTypeGetOid(mr2)) + elog(ERROR, "multirange types do not match"); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); + + range_count_1 = mr1->rangeCount; + range_count_2 = mr2->rangeCount; + + /* Loop over source data */ + range_count_max = Max(range_count_1, range_count_2); + for (i = 0; i < range_count_max; i++) + { + RangeBound lower1, + upper1, + lower2, + upper2; + + /* + * If one multirange is shorter, it's as if it had empty ranges at the + * end to extend its length. An empty range compares earlier than any + * other range, so the shorter multirange comes before the longer. + * This is the same behavior as in other types, e.g. in strings 'aaa' + * < 'aaaaaa'. + */ + if (i >= range_count_1) + { + cmp = -1; + break; + } + if (i >= range_count_2) + { + cmp = 1; + break; + } + + multirange_get_bounds(typcache->rngtype, mr1, i, &lower1, &upper1); + multirange_get_bounds(typcache->rngtype, mr2, i, &lower2, &upper2); + + cmp = range_cmp_bounds(typcache->rngtype, &lower1, &lower2); + if (cmp == 0) + cmp = range_cmp_bounds(typcache->rngtype, &upper1, &upper2); + if (cmp != 0) + break; + } + + PG_FREE_IF_COPY(mr1, 0); + PG_FREE_IF_COPY(mr2, 1); + + PG_RETURN_INT32(cmp); +} + +/* inequality operators using the multirange_cmp function */ +Datum +multirange_lt(PG_FUNCTION_ARGS) +{ + int cmp = multirange_cmp(fcinfo); + + PG_RETURN_BOOL(cmp < 0); +} + +Datum +multirange_le(PG_FUNCTION_ARGS) +{ + int cmp = multirange_cmp(fcinfo); + + PG_RETURN_BOOL(cmp <= 0); +} + +Datum +multirange_ge(PG_FUNCTION_ARGS) +{ + int cmp = multirange_cmp(fcinfo); + + PG_RETURN_BOOL(cmp >= 0); +} + +Datum +multirange_gt(PG_FUNCTION_ARGS) +{ + int cmp = multirange_cmp(fcinfo); + + PG_RETURN_BOOL(cmp > 0); +} + +/* multirange -> range functions */ + +/* Find the smallest range that includes everything in the multirange */ +Datum +range_merge_from_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + Oid mltrngtypoid = MultirangeTypeGetOid(mr); + TypeCacheEntry *typcache; + RangeType *result; + + typcache = multirange_get_typcache(fcinfo, mltrngtypoid); + + if (MultirangeIsEmpty(mr)) + { + result = make_empty_range(typcache->rngtype); + } + else if (mr->rangeCount == 1) + { + result = multirange_get_range(typcache->rngtype, mr, 0); + } + else + { + RangeBound firstLower, + firstUpper, + lastLower, + lastUpper; + + multirange_get_bounds(typcache->rngtype, mr, 0, + &firstLower, &firstUpper); + multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1, + &lastLower, &lastUpper); + + result = make_range(typcache->rngtype, &firstLower, &lastUpper, + false, NULL); + } + + PG_RETURN_RANGE_P(result); +} + +/* Turn multirange into a set of ranges */ +Datum +multirange_unnest(PG_FUNCTION_ARGS) +{ + typedef struct + { + MultirangeType *mr; + TypeCacheEntry *typcache; + int index; + } multirange_unnest_fctx; + + FuncCallContext *funcctx; + multirange_unnest_fctx *fctx; + MemoryContext oldcontext; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MultirangeType *mr; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* + * Get the multirange value and detoast if needed. We can't do this + * earlier because if we have to detoast, we want the detoasted copy + * to be in multi_call_memory_ctx, so it will go away when we're done + * and not before. (If no detoast happens, we assume the originally + * passed multirange will stick around till then.) + */ + mr = PG_GETARG_MULTIRANGE_P(0); + + /* allocate memory for user context */ + fctx = (multirange_unnest_fctx *) palloc(sizeof(multirange_unnest_fctx)); + + /* initialize state */ + fctx->mr = mr; + fctx->index = 0; + fctx->typcache = lookup_type_cache(MultirangeTypeGetOid(mr), + TYPECACHE_MULTIRANGE_INFO); + + funcctx->user_fctx = fctx; + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + fctx = funcctx->user_fctx; + + if (fctx->index < fctx->mr->rangeCount) + { + RangeType *range; + + range = multirange_get_range(fctx->typcache->rngtype, + fctx->mr, + fctx->index); + fctx->index++; + + SRF_RETURN_NEXT(funcctx, RangeTypePGetDatum(range)); + } + else + { + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); + } +} + +/* Hash support */ + +/* hash a multirange value */ +Datum +hash_multirange(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + uint32 result = 1; + TypeCacheEntry *typcache, + *scache; + int32 range_count, + i; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + scache = typcache->rngtype->rngelemtype; + if (!OidIsValid(scache->hash_proc_finfo.fn_oid)) + { + scache = lookup_type_cache(scache->type_id, + TYPECACHE_HASH_PROC_FINFO); + if (!OidIsValid(scache->hash_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(scache->type_id)))); + } + + range_count = mr->rangeCount; + for (i = 0; i < range_count; i++) + { + RangeBound lower, + upper; + uint8 flags = MultirangeGetFlagsPtr(mr)[i]; + uint32 lower_hash; + uint32 upper_hash; + uint32 range_hash; + + multirange_get_bounds(typcache->rngtype, mr, i, &lower, &upper); + + if (RANGE_HAS_LBOUND(flags)) + lower_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo, + typcache->rngtype->rng_collation, + lower.val)); + else + lower_hash = 0; + + if (RANGE_HAS_UBOUND(flags)) + upper_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo, + typcache->rngtype->rng_collation, + upper.val)); + else + upper_hash = 0; + + /* Merge hashes of flags and bounds */ + range_hash = hash_uint32((uint32) flags); + range_hash ^= lower_hash; + range_hash = pg_rotate_left32(range_hash, 1); + range_hash ^= upper_hash; + + /* + * Use the same approach as hash_array to combine the individual + * elements' hash values: + */ + result = (result << 5) - result + range_hash; + } + + PG_FREE_IF_COPY(mr, 0); + + PG_RETURN_UINT32(result); +} + +/* + * Returns 64-bit value by hashing a value to a 64-bit value, with a seed. + * Otherwise, similar to hash_multirange. + */ +Datum +hash_multirange_extended(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + Datum seed = PG_GETARG_DATUM(1); + uint64 result = 1; + TypeCacheEntry *typcache, + *scache; + int32 range_count, + i; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + scache = typcache->rngtype->rngelemtype; + if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid)) + { + scache = lookup_type_cache(scache->type_id, + TYPECACHE_HASH_EXTENDED_PROC_FINFO); + if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(scache->type_id)))); + } + + range_count = mr->rangeCount; + for (i = 0; i < range_count; i++) + { + RangeBound lower, + upper; + uint8 flags = MultirangeGetFlagsPtr(mr)[i]; + uint64 lower_hash; + uint64 upper_hash; + uint64 range_hash; + + multirange_get_bounds(typcache->rngtype, mr, i, &lower, &upper); + + if (RANGE_HAS_LBOUND(flags)) + lower_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo, + typcache->rngtype->rng_collation, + lower.val, + seed)); + else + lower_hash = 0; + + if (RANGE_HAS_UBOUND(flags)) + upper_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo, + typcache->rngtype->rng_collation, + upper.val, + seed)); + else + upper_hash = 0; + + /* Merge hashes of flags and bounds */ + range_hash = DatumGetUInt64(hash_uint32_extended((uint32) flags, + DatumGetInt64(seed))); + range_hash ^= lower_hash; + range_hash = ROTATE_HIGH_AND_LOW_32BITS(range_hash); + range_hash ^= upper_hash; + + /* + * Use the same approach as hash_array to combine the individual + * elements' hash values: + */ + result = (result << 5) - result + range_hash; + } + + PG_FREE_IF_COPY(mr, 0); + + PG_RETURN_UINT64(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes_selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes_selfuncs.c new file mode 100644 index 00000000000..cefc4710fd4 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes_selfuncs.c @@ -0,0 +1,1337 @@ +/*------------------------------------------------------------------------- + * + * multirangetypes_selfuncs.c + * Functions for selectivity estimation of multirange operators + * + * Estimates are based on histograms of lower and upper bounds, and the + * fraction of empty multiranges. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/multirangetypes_selfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_type.h" +#include "utils/float.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/rangetypes.h" +#include "utils/multirangetypes.h" +#include "utils/selfuncs.h" +#include "utils/typcache.h" + +static double calc_multirangesel(TypeCacheEntry *typcache, + VariableStatData *vardata, + const MultirangeType *constval, Oid operator); +static double default_multirange_selectivity(Oid operator); +static double calc_hist_selectivity(TypeCacheEntry *typcache, + VariableStatData *vardata, + const MultirangeType *constval, + Oid operator); +static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache, + const RangeBound *constbound, + const RangeBound *hist, + int hist_nvalues, bool equal); +static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, + const RangeBound *hist, int hist_length, bool equal); +static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value, + const RangeBound *hist1, const RangeBound *hist2); +static float8 get_len_position(double value, double hist1, double hist2); +static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, + const RangeBound *bound2); +static int length_hist_bsearch(Datum *length_hist_values, + int length_hist_nvalues, double value, + bool equal); +static double calc_length_hist_frac(Datum *length_hist_values, + int length_hist_nvalues, double length1, + double length2, bool equal); +static double calc_hist_selectivity_contained(TypeCacheEntry *typcache, + const RangeBound *lower, + RangeBound *upper, + const RangeBound *hist_lower, + int hist_nvalues, + Datum *length_hist_values, + int length_hist_nvalues); +static double calc_hist_selectivity_contains(TypeCacheEntry *typcache, + const RangeBound *lower, + const RangeBound *upper, + const RangeBound *hist_lower, + int hist_nvalues, + Datum *length_hist_values, + int length_hist_nvalues); + +/* + * Returns a default selectivity estimate for given operator, when we don't + * have statistics or cannot use them for some reason. + */ +static double +default_multirange_selectivity(Oid operator) +{ + switch (operator) + { + case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_RANGE_OP: + case OID_RANGE_OVERLAPS_MULTIRANGE_OP: + return 0.01; + + case OID_RANGE_CONTAINS_MULTIRANGE_OP: + case OID_RANGE_MULTIRANGE_CONTAINED_OP: + case OID_MULTIRANGE_CONTAINS_RANGE_OP: + case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP: + case OID_MULTIRANGE_RANGE_CONTAINED_OP: + case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP: + return 0.005; + + case OID_MULTIRANGE_CONTAINS_ELEM_OP: + case OID_MULTIRANGE_ELEM_CONTAINED_OP: + + /* + * "multirange @> elem" is more or less identical to a scalar + * inequality "A >= b AND A <= c". + */ + return DEFAULT_MULTIRANGE_INEQ_SEL; + + case OID_MULTIRANGE_LESS_OP: + case OID_MULTIRANGE_LESS_EQUAL_OP: + case OID_MULTIRANGE_GREATER_OP: + case OID_MULTIRANGE_GREATER_EQUAL_OP: + case OID_MULTIRANGE_LEFT_RANGE_OP: + case OID_MULTIRANGE_LEFT_MULTIRANGE_OP: + case OID_RANGE_LEFT_MULTIRANGE_OP: + case OID_MULTIRANGE_RIGHT_RANGE_OP: + case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP: + case OID_RANGE_RIGHT_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP: + case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP: + case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP: + /* these are similar to regular scalar inequalities */ + return DEFAULT_INEQ_SEL; + + default: + + /* + * all multirange operators should be handled above, but just in + * case + */ + return 0.01; + } +} + +/* + * multirangesel -- restriction selectivity for multirange operators + */ +Datum +multirangesel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Selectivity selec; + TypeCacheEntry *typcache = NULL; + MultirangeType *constmultirange = NULL; + RangeType *constrange = NULL; + + /* + * If expression is not (variable op something) or (something op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(default_multirange_selectivity(operator)); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(default_multirange_selectivity(operator)); + } + + /* + * All the multirange operators are strict, so we can cope with a NULL + * constant right away. + */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + /* + * If var is on the right, commute the operator, so that we can assume the + * var is on the left in what follows. + */ + if (!varonleft) + { + /* we have other Op var, commute to make var Op other */ + operator = get_commutator(operator); + if (!operator) + { + /* Use default selectivity (should we raise an error instead?) */ + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(default_multirange_selectivity(operator)); + } + } + + /* + * OK, there's a Var and a Const we're dealing with here. We need the + * Const to be of same multirange type as the column, else we can't do + * anything useful. (Such cases will likely fail at runtime, but here we'd + * rather just return a default estimate.) + * + * If the operator is "multirange @> element", the constant should be of + * the element type of the multirange column. Convert it to a multirange + * that includes only that single point, so that we don't need special + * handling for that in what follows. + */ + if (operator == OID_MULTIRANGE_CONTAINS_ELEM_OP) + { + typcache = multirange_get_typcache(fcinfo, vardata.vartype); + + if (((Const *) other)->consttype == typcache->rngtype->rngelemtype->type_id) + { + RangeBound lower, + upper; + + lower.inclusive = true; + lower.val = ((Const *) other)->constvalue; + lower.infinite = false; + lower.lower = true; + upper.inclusive = true; + upper.val = ((Const *) other)->constvalue; + upper.infinite = false; + upper.lower = false; + constrange = range_serialize(typcache->rngtype, &lower, &upper, + false, NULL); + constmultirange = make_multirange(typcache->type_id, typcache->rngtype, + 1, &constrange); + } + } + else if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP || + operator == OID_MULTIRANGE_CONTAINS_RANGE_OP || + operator == OID_MULTIRANGE_OVERLAPS_RANGE_OP || + operator == OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP || + operator == OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP || + operator == OID_MULTIRANGE_LEFT_RANGE_OP || + operator == OID_MULTIRANGE_RIGHT_RANGE_OP) + { + /* + * Promote a range in "multirange OP range" just like we do an element + * in "multirange OP element". + */ + typcache = multirange_get_typcache(fcinfo, vardata.vartype); + if (((Const *) other)->consttype == typcache->rngtype->type_id) + { + constrange = DatumGetRangeTypeP(((Const *) other)->constvalue); + constmultirange = make_multirange(typcache->type_id, typcache->rngtype, + 1, &constrange); + } + } + else if (operator == OID_RANGE_OVERLAPS_MULTIRANGE_OP || + operator == OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP || + operator == OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP || + operator == OID_RANGE_LEFT_MULTIRANGE_OP || + operator == OID_RANGE_RIGHT_MULTIRANGE_OP || + operator == OID_RANGE_CONTAINS_MULTIRANGE_OP || + operator == OID_MULTIRANGE_ELEM_CONTAINED_OP || + operator == OID_MULTIRANGE_RANGE_CONTAINED_OP) + { + /* + * Here, the Var is the elem/range, not the multirange. For now we + * just punt and return the default estimate. In future we could + * disassemble the multirange constant to do something more + * intelligent. + */ + } + else if (((Const *) other)->consttype == vardata.vartype) + { + /* Both sides are the same multirange type */ + typcache = multirange_get_typcache(fcinfo, vardata.vartype); + + constmultirange = DatumGetMultirangeTypeP(((Const *) other)->constvalue); + } + + /* + * If we got a valid constant on one side of the operator, proceed to + * estimate using statistics. Otherwise punt and return a default constant + * estimate. Note that calc_multirangesel need not handle + * OID_MULTIRANGE_*_CONTAINED_OP. + */ + if (constmultirange) + selec = calc_multirangesel(typcache, &vardata, constmultirange, operator); + else + selec = default_multirange_selectivity(operator); + + ReleaseVariableStats(vardata); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +static double +calc_multirangesel(TypeCacheEntry *typcache, VariableStatData *vardata, + const MultirangeType *constval, Oid operator) +{ + double hist_selec; + double selec; + float4 empty_frac, + null_frac; + + /* + * First look up the fraction of NULLs and empty multiranges from + * pg_statistic. + */ + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + AttStatsSlot sslot; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + null_frac = stats->stanullfrac; + + /* Try to get fraction of empty multiranges */ + if (get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + if (sslot.nnumbers != 1) + elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */ + empty_frac = sslot.numbers[0]; + free_attstatsslot(&sslot); + } + else + { + /* No empty fraction statistic. Assume no empty ranges. */ + empty_frac = 0.0; + } + } + else + { + /* + * No stats are available. Follow through the calculations below + * anyway, assuming no NULLs and no empty multiranges. This still + * allows us to give a better-than-nothing estimate based on whether + * the constant is an empty multirange or not. + */ + null_frac = 0.0; + empty_frac = 0.0; + } + + if (MultirangeIsEmpty(constval)) + { + /* + * An empty multirange matches all multiranges, all empty multiranges, + * or nothing, depending on the operator + */ + switch (operator) + { + /* these return false if either argument is empty */ + case OID_MULTIRANGE_OVERLAPS_RANGE_OP: + case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP: + case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP: + case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP: + case OID_MULTIRANGE_LEFT_RANGE_OP: + case OID_MULTIRANGE_LEFT_MULTIRANGE_OP: + case OID_MULTIRANGE_RIGHT_RANGE_OP: + case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP: + /* nothing is less than an empty multirange */ + case OID_MULTIRANGE_LESS_OP: + selec = 0.0; + break; + + /* + * only empty multiranges can be contained by an empty + * multirange + */ + case OID_RANGE_MULTIRANGE_CONTAINED_OP: + case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP: + /* only empty ranges are <= an empty multirange */ + case OID_MULTIRANGE_LESS_EQUAL_OP: + selec = empty_frac; + break; + + /* everything contains an empty multirange */ + case OID_MULTIRANGE_CONTAINS_RANGE_OP: + case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP: + /* everything is >= an empty multirange */ + case OID_MULTIRANGE_GREATER_EQUAL_OP: + selec = 1.0; + break; + + /* all non-empty multiranges are > an empty multirange */ + case OID_MULTIRANGE_GREATER_OP: + selec = 1.0 - empty_frac; + break; + + /* an element cannot be empty */ + case OID_MULTIRANGE_CONTAINS_ELEM_OP: + + /* filtered out by multirangesel() */ + case OID_RANGE_OVERLAPS_MULTIRANGE_OP: + case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP: + case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP: + case OID_RANGE_LEFT_MULTIRANGE_OP: + case OID_RANGE_RIGHT_MULTIRANGE_OP: + case OID_RANGE_CONTAINS_MULTIRANGE_OP: + case OID_MULTIRANGE_ELEM_CONTAINED_OP: + case OID_MULTIRANGE_RANGE_CONTAINED_OP: + + default: + elog(ERROR, "unexpected operator %u", operator); + selec = 0.0; /* keep compiler quiet */ + break; + } + } + else + { + /* + * Calculate selectivity using bound histograms. If that fails for + * some reason, e.g no histogram in pg_statistic, use the default + * constant estimate for the fraction of non-empty values. This is + * still somewhat better than just returning the default estimate, + * because this still takes into account the fraction of empty and + * NULL tuples, if we had statistics for them. + */ + hist_selec = calc_hist_selectivity(typcache, vardata, constval, + operator); + if (hist_selec < 0.0) + hist_selec = default_multirange_selectivity(operator); + + /* + * Now merge the results for the empty multiranges and histogram + * calculations, realizing that the histogram covers only the + * non-null, non-empty values. + */ + if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP || + operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP) + { + /* empty is contained by anything non-empty */ + selec = (1.0 - empty_frac) * hist_selec + empty_frac; + } + else + { + /* with any other operator, empty Op non-empty matches nothing */ + selec = (1.0 - empty_frac) * hist_selec; + } + } + + /* all multirange operators are strict */ + selec *= (1.0 - null_frac); + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * Calculate multirange operator selectivity using histograms of multirange bounds. + * + * This estimate is for the portion of values that are not empty and not + * NULL. + */ +static double +calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, + const MultirangeType *constval, Oid operator) +{ + TypeCacheEntry *rng_typcache = typcache->rngtype; + AttStatsSlot hslot; + AttStatsSlot lslot; + int nhist; + RangeBound *hist_lower; + RangeBound *hist_upper; + int i; + RangeBound const_lower; + RangeBound const_upper; + RangeBound tmp; + double hist_selec; + + /* Can't use the histogram with insecure multirange support functions */ + if (!statistic_proc_security_check(vardata, + rng_typcache->rng_cmp_proc_finfo.fn_oid)) + return -1; + if (OidIsValid(rng_typcache->rng_subdiff_finfo.fn_oid) && + !statistic_proc_security_check(vardata, + rng_typcache->rng_subdiff_finfo.fn_oid)) + return -1; + + /* Try to get histogram of ranges */ + if (!(HeapTupleIsValid(vardata->statsTuple) && + get_attstatsslot(&hslot, vardata->statsTuple, + STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES))) + return -1.0; + + /* check that it's a histogram, not just a dummy entry */ + if (hslot.nvalues < 2) + { + free_attstatsslot(&hslot); + return -1.0; + } + + /* + * Convert histogram of ranges into histograms of its lower and upper + * bounds. + */ + nhist = hslot.nvalues; + hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist); + hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist); + for (i = 0; i < nhist; i++) + { + bool empty; + + range_deserialize(rng_typcache, DatumGetRangeTypeP(hslot.values[i]), + &hist_lower[i], &hist_upper[i], &empty); + /* The histogram should not contain any empty ranges */ + if (empty) + elog(ERROR, "bounds histogram contains an empty range"); + } + + /* @> and @< also need a histogram of range lengths */ + if (operator == OID_MULTIRANGE_CONTAINS_RANGE_OP || + operator == OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP || + operator == OID_MULTIRANGE_RANGE_CONTAINED_OP || + operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP) + { + if (!(HeapTupleIsValid(vardata->statsTuple) && + get_attstatsslot(&lslot, vardata->statsTuple, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + InvalidOid, + ATTSTATSSLOT_VALUES))) + { + free_attstatsslot(&hslot); + return -1.0; + } + + /* check that it's a histogram, not just a dummy entry */ + if (lslot.nvalues < 2) + { + free_attstatsslot(&lslot); + free_attstatsslot(&hslot); + return -1.0; + } + } + else + memset(&lslot, 0, sizeof(lslot)); + + /* Extract the bounds of the constant value. */ + Assert(constval->rangeCount > 0); + multirange_get_bounds(rng_typcache, constval, 0, + &const_lower, &tmp); + multirange_get_bounds(rng_typcache, constval, constval->rangeCount - 1, + &tmp, &const_upper); + + /* + * Calculate selectivity comparing the lower or upper bound of the + * constant with the histogram of lower or upper bounds. + */ + switch (operator) + { + case OID_MULTIRANGE_LESS_OP: + + /* + * The regular b-tree comparison operators (<, <=, >, >=) compare + * the lower bounds first, and the upper bounds for values with + * equal lower bounds. Estimate that by comparing the lower bounds + * only. This gives a fairly accurate estimate assuming there + * aren't many rows with a lower bound equal to the constant's + * lower bound. + */ + hist_selec = + calc_hist_selectivity_scalar(rng_typcache, &const_lower, + hist_lower, nhist, false); + break; + + case OID_MULTIRANGE_LESS_EQUAL_OP: + hist_selec = + calc_hist_selectivity_scalar(rng_typcache, &const_lower, + hist_lower, nhist, true); + break; + + case OID_MULTIRANGE_GREATER_OP: + hist_selec = + 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower, + hist_lower, nhist, false); + break; + + case OID_MULTIRANGE_GREATER_EQUAL_OP: + hist_selec = + 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower, + hist_lower, nhist, true); + break; + + case OID_MULTIRANGE_LEFT_RANGE_OP: + case OID_MULTIRANGE_LEFT_MULTIRANGE_OP: + /* var << const when upper(var) < lower(const) */ + hist_selec = + calc_hist_selectivity_scalar(rng_typcache, &const_lower, + hist_upper, nhist, false); + break; + + case OID_MULTIRANGE_RIGHT_RANGE_OP: + case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP: + /* var >> const when lower(var) > upper(const) */ + hist_selec = + 1 - calc_hist_selectivity_scalar(rng_typcache, &const_upper, + hist_lower, nhist, true); + break; + + case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP: + case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP: + /* compare lower bounds */ + hist_selec = + 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower, + hist_lower, nhist, false); + break; + + case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP: + case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP: + /* compare upper bounds */ + hist_selec = + calc_hist_selectivity_scalar(rng_typcache, &const_upper, + hist_upper, nhist, true); + break; + + case OID_MULTIRANGE_OVERLAPS_RANGE_OP: + case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP: + case OID_MULTIRANGE_CONTAINS_ELEM_OP: + + /* + * A && B <=> NOT (A << B OR A >> B). + * + * Since A << B and A >> B are mutually exclusive events we can + * sum their probabilities to find probability of (A << B OR A >> + * B). + * + * "multirange @> elem" is equivalent to "multirange && + * {[elem,elem]}". The caller already constructed the singular + * range from the element constant, so just treat it the same as + * &&. + */ + hist_selec = + calc_hist_selectivity_scalar(rng_typcache, + &const_lower, hist_upper, + nhist, false); + hist_selec += + (1.0 - calc_hist_selectivity_scalar(rng_typcache, + &const_upper, hist_lower, + nhist, true)); + hist_selec = 1.0 - hist_selec; + break; + + case OID_MULTIRANGE_CONTAINS_RANGE_OP: + case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP: + hist_selec = + calc_hist_selectivity_contains(rng_typcache, &const_lower, + &const_upper, hist_lower, nhist, + lslot.values, lslot.nvalues); + break; + + case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP: + case OID_RANGE_MULTIRANGE_CONTAINED_OP: + if (const_lower.infinite) + { + /* + * Lower bound no longer matters. Just estimate the fraction + * with an upper bound <= const upper bound + */ + hist_selec = + calc_hist_selectivity_scalar(rng_typcache, &const_upper, + hist_upper, nhist, true); + } + else if (const_upper.infinite) + { + hist_selec = + 1.0 - calc_hist_selectivity_scalar(rng_typcache, &const_lower, + hist_lower, nhist, false); + } + else + { + hist_selec = + calc_hist_selectivity_contained(rng_typcache, &const_lower, + &const_upper, hist_lower, nhist, + lslot.values, lslot.nvalues); + } + break; + + /* filtered out by multirangesel() */ + case OID_RANGE_OVERLAPS_MULTIRANGE_OP: + case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP: + case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP: + case OID_RANGE_LEFT_MULTIRANGE_OP: + case OID_RANGE_RIGHT_MULTIRANGE_OP: + case OID_RANGE_CONTAINS_MULTIRANGE_OP: + case OID_MULTIRANGE_ELEM_CONTAINED_OP: + case OID_MULTIRANGE_RANGE_CONTAINED_OP: + + default: + elog(ERROR, "unknown multirange operator %u", operator); + hist_selec = -1.0; /* keep compiler quiet */ + break; + } + + free_attstatsslot(&lslot); + free_attstatsslot(&hslot); + + return hist_selec; +} + + +/* + * Look up the fraction of values less than (or equal, if 'equal' argument + * is true) a given const in a histogram of range bounds. + */ +static double +calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound, + const RangeBound *hist, int hist_nvalues, bool equal) +{ + Selectivity selec; + int index; + + /* + * Find the histogram bin the given constant falls into. Estimate + * selectivity as the number of preceding whole bins. + */ + index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal); + selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1); + + /* Adjust using linear interpolation within the bin */ + if (index >= 0 && index < hist_nvalues - 1) + selec += get_position(typcache, constbound, &hist[index], + &hist[index + 1]) / (Selectivity) (hist_nvalues - 1); + + return selec; +} + +/* + * Binary search on an array of range bounds. Returns greatest index of range + * bound in array which is less(less or equal) than given range bound. If all + * range bounds in array are greater or equal(greater) than given range bound, + * return -1. When "equal" flag is set conditions in brackets are used. + * + * This function is used in scalar operator selectivity estimation. Another + * goal of this function is to find a histogram bin where to stop + * interpolation of portion of bounds which are less than or equal to given bound. + */ +static int +rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist, + int hist_length, bool equal) +{ + int lower = -1, + upper = hist_length - 1, + cmp, + middle; + + while (lower < upper) + { + middle = (lower + upper + 1) / 2; + cmp = range_cmp_bounds(typcache, &hist[middle], value); + + if (cmp < 0 || (equal && cmp == 0)) + lower = middle; + else + upper = middle - 1; + } + return lower; +} + + +/* + * Binary search on length histogram. Returns greatest index of range length in + * histogram which is less than (less than or equal) the given length value. If + * all lengths in the histogram are greater than (greater than or equal) the + * given length, returns -1. + */ +static int +length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues, + double value, bool equal) +{ + int lower = -1, + upper = length_hist_nvalues - 1, + middle; + + while (lower < upper) + { + double middleval; + + middle = (lower + upper + 1) / 2; + + middleval = DatumGetFloat8(length_hist_values[middle]); + if (middleval < value || (equal && middleval <= value)) + lower = middle; + else + upper = middle - 1; + } + return lower; +} + +/* + * Get relative position of value in histogram bin in [0,1] range. + */ +static float8 +get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1, + const RangeBound *hist2) +{ + bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); + float8 position; + + if (!hist1->infinite && !hist2->infinite) + { + float8 bin_width; + + /* + * Both bounds are finite. Assuming the subtype's comparison function + * works sanely, the value must be finite, too, because it lies + * somewhere between the bounds. If it doesn't, arbitrarily return + * 0.5. + */ + if (value->infinite) + return 0.5; + + /* Can't interpolate without subdiff function */ + if (!has_subdiff) + return 0.5; + + /* Calculate relative position using subdiff function. */ + bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + hist2->val, + hist1->val)); + if (isnan(bin_width) || bin_width <= 0.0) + return 0.5; /* punt for NaN or zero-width bin */ + + position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + value->val, + hist1->val)) + / bin_width; + + if (isnan(position)) + return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */ + + /* Relative position must be in [0,1] range */ + position = Max(position, 0.0); + position = Min(position, 1.0); + return position; + } + else if (hist1->infinite && !hist2->infinite) + { + /* + * Lower bin boundary is -infinite, upper is finite. If the value is + * -infinite, return 0.0 to indicate it's equal to the lower bound. + * Otherwise return 1.0 to indicate it's infinitely far from the lower + * bound. + */ + return ((value->infinite && value->lower) ? 0.0 : 1.0); + } + else if (!hist1->infinite && hist2->infinite) + { + /* same as above, but in reverse */ + return ((value->infinite && !value->lower) ? 1.0 : 0.0); + } + else + { + /* + * If both bin boundaries are infinite, they should be equal to each + * other, and the value should also be infinite and equal to both + * bounds. (But don't Assert that, to avoid crashing if a user creates + * a datatype with a broken comparison function). + * + * Assume the value to lie in the middle of the infinite bounds. + */ + return 0.5; + } +} + + +/* + * Get relative position of value in a length histogram bin in [0,1] range. + */ +static double +get_len_position(double value, double hist1, double hist2) +{ + if (!isinf(hist1) && !isinf(hist2)) + { + /* + * Both bounds are finite. The value should be finite too, because it + * lies somewhere between the bounds. If it doesn't, just return + * something. + */ + if (isinf(value)) + return 0.5; + + return 1.0 - (hist2 - value) / (hist2 - hist1); + } + else if (isinf(hist1) && !isinf(hist2)) + { + /* + * Lower bin boundary is -infinite, upper is finite. Return 1.0 to + * indicate the value is infinitely far from the lower bound. + */ + return 1.0; + } + else if (isinf(hist1) && isinf(hist2)) + { + /* same as above, but in reverse */ + return 0.0; + } + else + { + /* + * If both bin boundaries are infinite, they should be equal to each + * other, and the value should also be infinite and equal to both + * bounds. (But don't Assert that, to avoid crashing unnecessarily if + * the caller messes up) + * + * Assume the value to lie in the middle of the infinite bounds. + */ + return 0.5; + } +} + +/* + * Measure distance between two range bounds. + */ +static float8 +get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2) +{ + bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); + + if (!bound1->infinite && !bound2->infinite) + { + /* + * Neither bound is infinite, use subdiff function or return default + * value of 1.0 if no subdiff is available. + */ + if (has_subdiff) + { + float8 res; + + res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + bound2->val, + bound1->val)); + /* Reject possible NaN result, also negative result */ + if (isnan(res) || res < 0.0) + return 1.0; + else + return res; + } + else + return 1.0; + } + else if (bound1->infinite && bound2->infinite) + { + /* Both bounds are infinite */ + if (bound1->lower == bound2->lower) + return 0.0; + else + return get_float8_infinity(); + } + else + { + /* One bound is infinite, the other is not */ + return get_float8_infinity(); + } +} + +/* + * Calculate the average of function P(x), in the interval [length1, length2], + * where P(x) is the fraction of tuples with length < x (or length <= x if + * 'equal' is true). + */ +static double +calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues, + double length1, double length2, bool equal) +{ + double frac; + double A, + B, + PA, + PB; + double pos; + int i; + double area; + + Assert(length2 >= length1); + + if (length2 < 0.0) + return 0.0; /* shouldn't happen, but doesn't hurt to check */ + + /* All lengths in the table are <= infinite. */ + if (isinf(length2) && equal) + return 1.0; + + /*---------- + * The average of a function between A and B can be calculated by the + * formula: + * + * B + * 1 / + * ------- | P(x)dx + * B - A / + * A + * + * The geometrical interpretation of the integral is the area under the + * graph of P(x). P(x) is defined by the length histogram. We calculate + * the area in a piecewise fashion, iterating through the length histogram + * bins. Each bin is a trapezoid: + * + * P(x2) + * /| + * / | + * P(x1)/ | + * | | + * | | + * ---+---+-- + * x1 x2 + * + * where x1 and x2 are the boundaries of the current histogram, and P(x1) + * and P(x1) are the cumulative fraction of tuples at the boundaries. + * + * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1) + * + * The first bin contains the lower bound passed by the caller, so we + * use linear interpolation between the previous and next histogram bin + * boundary to calculate P(x1). Likewise for the last bin: we use linear + * interpolation to calculate P(x2). For the bins in between, x1 and x2 + * lie on histogram bin boundaries, so P(x1) and P(x2) are simply: + * P(x1) = (bin index) / (number of bins) + * P(x2) = (bin index + 1 / (number of bins) + */ + + /* First bin, the one that contains lower bound */ + i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal); + if (i >= length_hist_nvalues - 1) + return 1.0; + + if (i < 0) + { + i = 0; + pos = 0.0; + } + else + { + /* interpolate length1's position in the bin */ + pos = get_len_position(length1, + DatumGetFloat8(length_hist_values[i]), + DatumGetFloat8(length_hist_values[i + 1])); + } + PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1); + B = length1; + + /* + * In the degenerate case that length1 == length2, simply return + * P(length1). This is not merely an optimization: if length1 == length2, + * we'd divide by zero later on. + */ + if (length2 == length1) + return PB; + + /* + * Loop through all the bins, until we hit the last bin, the one that + * contains the upper bound. (if lower and upper bounds are in the same + * bin, this falls out immediately) + */ + area = 0.0; + for (; i < length_hist_nvalues - 1; i++) + { + double bin_upper = DatumGetFloat8(length_hist_values[i + 1]); + + /* check if we've reached the last bin */ + if (!(bin_upper < length2 || (equal && bin_upper <= length2))) + break; + + /* the upper bound of previous bin is the lower bound of this bin */ + A = B; + PA = PB; + + B = bin_upper; + PB = (double) i / (double) (length_hist_nvalues - 1); + + /* + * Add the area of this trapezoid to the total. The point of the + * if-check is to avoid NaN, in the corner case that PA == PB == 0, + * and B - A == Inf. The area of a zero-height trapezoid (PA == PB == + * 0) is zero, regardless of the width (B - A). + */ + if (PA > 0 || PB > 0) + area += 0.5 * (PB + PA) * (B - A); + } + + /* Last bin */ + A = B; + PA = PB; + + B = length2; /* last bin ends at the query upper bound */ + if (i >= length_hist_nvalues - 1) + pos = 0.0; + else + { + if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1])) + pos = 0.0; + else + pos = get_len_position(length2, + DatumGetFloat8(length_hist_values[i]), + DatumGetFloat8(length_hist_values[i + 1])); + } + PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1); + + if (PA > 0 || PB > 0) + area += 0.5 * (PB + PA) * (B - A); + + /* + * Ok, we have calculated the area, ie. the integral. Divide by width to + * get the requested average. + * + * Avoid NaN arising from infinite / infinite. This happens at least if + * length2 is infinite. It's not clear what the correct value would be in + * that case, so 0.5 seems as good as any value. + */ + if (isinf(area) && isinf(length2)) + frac = 0.5; + else + frac = area / (length2 - length1); + + return frac; +} + +/* + * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction + * of multiranges that fall within the constant lower and upper bounds. This uses + * the histograms of range lower bounds and range lengths, on the assumption + * that the range lengths are independent of the lower bounds. + * + * The caller has already checked that constant lower and upper bounds are + * finite. + */ +static double +calc_hist_selectivity_contained(TypeCacheEntry *typcache, + const RangeBound *lower, RangeBound *upper, + const RangeBound *hist_lower, int hist_nvalues, + Datum *length_hist_values, int length_hist_nvalues) +{ + int i, + upper_index; + float8 prev_dist; + double bin_width; + double upper_bin_width; + double sum_frac; + + /* + * Begin by finding the bin containing the upper bound, in the lower bound + * histogram. Any range with a lower bound > constant upper bound can't + * match, ie. there are no matches in bins greater than upper_index. + */ + upper->inclusive = !upper->inclusive; + upper->lower = true; + upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues, + false); + + /* + * If the upper bound value is below the histogram's lower limit, there + * are no matches. + */ + if (upper_index < 0) + return 0.0; + + /* + * If the upper bound value is at or beyond the histogram's upper limit, + * start our loop at the last actual bin, as though the upper bound were + * within that bin; get_position will clamp its result to 1.0 anyway. + * (This corresponds to assuming that the data population above the + * histogram's upper limit is empty, exactly like what we just assumed for + * the lower limit.) + */ + upper_index = Min(upper_index, hist_nvalues - 2); + + /* + * Calculate upper_bin_width, ie. the fraction of the (upper_index, + * upper_index + 1) bin which is greater than upper bound of query range + * using linear interpolation of subdiff function. + */ + upper_bin_width = get_position(typcache, upper, + &hist_lower[upper_index], + &hist_lower[upper_index + 1]); + + /* + * In the loop, dist and prev_dist are the distance of the "current" bin's + * lower and upper bounds from the constant upper bound. + * + * bin_width represents the width of the current bin. Normally it is 1.0, + * meaning a full width bin, but can be less in the corner cases: start + * and end of the loop. We start with bin_width = upper_bin_width, because + * we begin at the bin containing the upper bound. + */ + prev_dist = 0.0; + bin_width = upper_bin_width; + + sum_frac = 0.0; + for (i = upper_index; i >= 0; i--) + { + double dist; + double length_hist_frac; + bool final_bin = false; + + /* + * dist -- distance from upper bound of query range to lower bound of + * the current bin in the lower bound histogram. Or to the lower bound + * of the constant range, if this is the final bin, containing the + * constant lower bound. + */ + if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0) + { + dist = get_distance(typcache, lower, upper); + + /* + * Subtract from bin_width the portion of this bin that we want to + * ignore. + */ + bin_width -= get_position(typcache, lower, &hist_lower[i], + &hist_lower[i + 1]); + if (bin_width < 0.0) + bin_width = 0.0; + final_bin = true; + } + else + dist = get_distance(typcache, &hist_lower[i], upper); + + /* + * Estimate the fraction of tuples in this bin that are narrow enough + * to not exceed the distance to the upper bound of the query range. + */ + length_hist_frac = calc_length_hist_frac(length_hist_values, + length_hist_nvalues, + prev_dist, dist, true); + + /* + * Add the fraction of tuples in this bin, with a suitable length, to + * the total. + */ + sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1); + + if (final_bin) + break; + + bin_width = 1.0; + prev_dist = dist; + } + + return sum_frac; +} + +/* + * Calculate selectivity of "var @> const" operator, ie. estimate the fraction + * of multiranges that contain the constant lower and upper bounds. This uses + * the histograms of range lower bounds and range lengths, on the assumption + * that the range lengths are independent of the lower bounds. + */ +static double +calc_hist_selectivity_contains(TypeCacheEntry *typcache, + const RangeBound *lower, const RangeBound *upper, + const RangeBound *hist_lower, int hist_nvalues, + Datum *length_hist_values, int length_hist_nvalues) +{ + int i, + lower_index; + double bin_width, + lower_bin_width; + double sum_frac; + float8 prev_dist; + + /* Find the bin containing the lower bound of query range. */ + lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues, + true); + + /* + * If the lower bound value is below the histogram's lower limit, there + * are no matches. + */ + if (lower_index < 0) + return 0.0; + + /* + * If the lower bound value is at or beyond the histogram's upper limit, + * start our loop at the last actual bin, as though the upper bound were + * within that bin; get_position will clamp its result to 1.0 anyway. + * (This corresponds to assuming that the data population above the + * histogram's upper limit is empty, exactly like what we just assumed for + * the lower limit.) + */ + lower_index = Min(lower_index, hist_nvalues - 2); + + /* + * Calculate lower_bin_width, ie. the fraction of the of (lower_index, + * lower_index + 1) bin which is greater than lower bound of query range + * using linear interpolation of subdiff function. + */ + lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index], + &hist_lower[lower_index + 1]); + + /* + * Loop through all the lower bound bins, smaller than the query lower + * bound. In the loop, dist and prev_dist are the distance of the + * "current" bin's lower and upper bounds from the constant upper bound. + * We begin from query lower bound, and walk backwards, so the first bin's + * upper bound is the query lower bound, and its distance to the query + * upper bound is the length of the query range. + * + * bin_width represents the width of the current bin. Normally it is 1.0, + * meaning a full width bin, except for the first bin, which is only + * counted up to the constant lower bound. + */ + prev_dist = get_distance(typcache, lower, upper); + sum_frac = 0.0; + bin_width = lower_bin_width; + for (i = lower_index; i >= 0; i--) + { + float8 dist; + double length_hist_frac; + + /* + * dist -- distance from upper bound of query range to current value + * of lower bound histogram or lower bound of query range (if we've + * reach it). + */ + dist = get_distance(typcache, &hist_lower[i], upper); + + /* + * Get average fraction of length histogram which covers intervals + * longer than (or equal to) distance to upper bound of query range. + */ + length_hist_frac = + 1.0 - calc_length_hist_frac(length_hist_values, + length_hist_nvalues, + prev_dist, dist, false); + + sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1); + + bin_width = 1.0; + prev_dist = dist; + } + + return sum_frac; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/name.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/name.c new file mode 100644 index 00000000000..c136eabdbc9 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/name.c @@ -0,0 +1,355 @@ +/*------------------------------------------------------------------------- + * + * name.c + * Functions for the built-in type "name". + * + * name replaces char16 and is carefully implemented so that it + * is a string of physical length NAMEDATALEN. + * DO NOT use hard-coded constants anywhere + * always use NAMEDATALEN as the symbolic constant! - jolly 8/21/95 + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/name.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/namespace.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "libpq/pqformat.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/varlena.h" + + +/***************************************************************************** + * USER I/O ROUTINES (none) * + *****************************************************************************/ + + +/* + * namein - converts cstring to internal representation + * + * Note: + * [Old] Currently if strlen(s) < NAMEDATALEN, the extra chars are nulls + * Now, always NULL terminated + */ +Datum +namein(PG_FUNCTION_ARGS) +{ + char *s = PG_GETARG_CSTRING(0); + Name result; + int len; + + len = strlen(s); + + /* Truncate oversize input */ + if (len >= NAMEDATALEN) + len = pg_mbcliplen(s, len, NAMEDATALEN - 1); + + /* We use palloc0 here to ensure result is zero-padded */ + result = (Name) palloc0(NAMEDATALEN); + memcpy(NameStr(*result), s, len); + + PG_RETURN_NAME(result); +} + +/* + * nameout - converts internal representation to cstring + */ +Datum +nameout(PG_FUNCTION_ARGS) +{ + Name s = PG_GETARG_NAME(0); + + PG_RETURN_CSTRING(pstrdup(NameStr(*s))); +} + +/* + * namerecv - converts external binary format to name + */ +Datum +namerecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Name result; + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + if (nbytes >= NAMEDATALEN) + ereport(ERROR, + (errcode(ERRCODE_NAME_TOO_LONG), + errmsg("identifier too long"), + errdetail("Identifier must be less than %d characters.", + NAMEDATALEN))); + result = (NameData *) palloc0(NAMEDATALEN); + memcpy(result, str, nbytes); + pfree(str); + PG_RETURN_NAME(result); +} + +/* + * namesend - converts name to binary format + */ +Datum +namesend(PG_FUNCTION_ARGS) +{ + Name s = PG_GETARG_NAME(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendtext(&buf, NameStr(*s), strlen(NameStr(*s))); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/***************************************************************************** + * COMPARISON/SORTING ROUTINES * + *****************************************************************************/ + +/* + * nameeq - returns 1 iff arguments are equal + * namene - returns 1 iff arguments are not equal + * namelt - returns 1 iff a < b + * namele - returns 1 iff a <= b + * namegt - returns 1 iff a > b + * namege - returns 1 iff a >= b + * + * Note that the use of strncmp with NAMEDATALEN limit is mostly historical; + * strcmp would do as well, because we do not allow NAME values that don't + * have a '\0' terminator. Whatever might be past the terminator is not + * considered relevant to comparisons. + */ +static int +namecmp(Name arg1, Name arg2, Oid collid) +{ + /* Fast path for common case used in system catalogs */ + if (collid == C_COLLATION_OID) + return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); + + /* Else rely on the varstr infrastructure */ + return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), + NameStr(*arg2), strlen(NameStr(*arg2)), + collid); +} + +Datum +nameeq(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0); +} + +Datum +namene(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0); +} + +Datum +namelt(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) < 0); +} + +Datum +namele(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) <= 0); +} + +Datum +namegt(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) > 0); +} + +Datum +namege(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) >= 0); +} + +Datum +btnamecmp(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_INT32(namecmp(arg1, arg2, PG_GET_COLLATION())); +} + +Datum +btnamesortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport */ + varstr_sortsupport(ssup, NAMEOID, collid); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + + +/***************************************************************************** + * MISCELLANEOUS PUBLIC ROUTINES * + *****************************************************************************/ + +void +namestrcpy(Name name, const char *str) +{ + /* NB: We need to zero-pad the destination. */ + strncpy(NameStr(*name), str, NAMEDATALEN); + NameStr(*name)[NAMEDATALEN - 1] = '\0'; +} + +/* + * Compare a NAME to a C string + * + * Assumes C collation always; be careful when using this for + * anything but equality checks! + */ +int +namestrcmp(Name name, const char *str) +{ + if (!name && !str) + return 0; + if (!name) + return -1; /* NULL < anything */ + if (!str) + return 1; /* NULL < anything */ + return strncmp(NameStr(*name), str, NAMEDATALEN); +} + + +/* + * SQL-functions CURRENT_USER, SESSION_USER + */ +Datum +current_user(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall1(namein, CStringGetDatum(GetUserNameFromId(GetUserId(), false)))); +} + +Datum +session_user(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall1(namein, CStringGetDatum(GetUserNameFromId(GetSessionUserId(), false)))); +} + + +/* + * SQL-functions CURRENT_SCHEMA, CURRENT_SCHEMAS + */ +Datum +current_schema(PG_FUNCTION_ARGS) +{ + List *search_path = fetch_search_path(false); + char *nspname; + + if (search_path == NIL) + PG_RETURN_NULL(); + nspname = get_namespace_name(linitial_oid(search_path)); + list_free(search_path); + if (!nspname) + PG_RETURN_NULL(); /* recently-deleted namespace? */ + PG_RETURN_DATUM(DirectFunctionCall1(namein, CStringGetDatum(nspname))); +} + +Datum +current_schemas(PG_FUNCTION_ARGS) +{ + List *search_path = fetch_search_path(PG_GETARG_BOOL(0)); + ListCell *l; + Datum *names; + int i; + ArrayType *array; + + names = (Datum *) palloc(list_length(search_path) * sizeof(Datum)); + i = 0; + foreach(l, search_path) + { + char *nspname; + + nspname = get_namespace_name(lfirst_oid(l)); + if (nspname) /* watch out for deleted namespace */ + { + names[i] = DirectFunctionCall1(namein, CStringGetDatum(nspname)); + i++; + } + } + list_free(search_path); + + array = construct_array_builtin(names, i, NAMEOID); + + PG_RETURN_POINTER(array); +} + +/* + * SQL-function nameconcatoid(name, oid) returns name + * + * This is used in the information_schema to produce specific_name columns, + * which are supposed to be unique per schema. We achieve that (in an ugly + * way) by appending the object's OID. The result is the same as + * ($1::text || '_' || $2::text)::name + * except that, if it would not fit in NAMEDATALEN, we make it do so by + * truncating the name input (not the oid). + */ +Datum +nameconcatoid(PG_FUNCTION_ARGS) +{ + Name nam = PG_GETARG_NAME(0); + Oid oid = PG_GETARG_OID(1); + Name result; + char suffix[20]; + int suflen; + int namlen; + + suflen = snprintf(suffix, sizeof(suffix), "_%u", oid); + namlen = strlen(NameStr(*nam)); + + /* Truncate oversize input by truncating name part, not suffix */ + if (namlen + suflen >= NAMEDATALEN) + namlen = pg_mbcliplen(NameStr(*nam), namlen, NAMEDATALEN - 1 - suflen); + + /* We use palloc0 here to ensure result is zero-padded */ + result = (Name) palloc0(NAMEDATALEN); + memcpy(NameStr(*result), NameStr(*nam), namlen); + memcpy(NameStr(*result) + namlen, suffix, suflen); + + PG_RETURN_NAME(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network.c new file mode 100644 index 00000000000..640fc37dc83 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network.c @@ -0,0 +1,2104 @@ +/* + * PostgreSQL type definitions for the INET and CIDR types. + * + * src/backend/utils/adt/network.c + * + * Jon Postel RIP 16 Oct 1998 + */ + +#include "postgres.h" + +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "access/stratnum.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "common/ip.h" +#include "lib/hyperloglog.h" +#include "libpq/libpq-be.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/inet.h" +#include "utils/lsyscache.h" +#include "utils/sortsupport.h" + + +/* + * An IPv4 netmask size is a value in the range of 0 - 32, which is + * represented with 6 bits in inet/cidr abbreviated keys where possible. + * + * An IPv4 inet/cidr abbreviated key can use up to 25 bits for subnet + * component. + */ +#define ABBREV_BITS_INET4_NETMASK_SIZE 6 +#define ABBREV_BITS_INET4_SUBNET 25 + +/* sortsupport for inet/cidr */ +typedef struct +{ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} network_sortsupport_state; + +static int32 network_cmp_internal(inet *a1, inet *a2); +static int network_fast_cmp(Datum x, Datum y, SortSupport ssup); +static bool network_abbrev_abort(int memtupcount, SortSupport ssup); +static Datum network_abbrev_convert(Datum original, SortSupport ssup); +static List *match_network_function(Node *leftop, + Node *rightop, + int indexarg, + Oid funcid, + Oid opfamily); +static List *match_network_subset(Node *leftop, + Node *rightop, + bool is_eq, + Oid opfamily); +static bool addressOK(unsigned char *a, int bits, int family); +static inet *internal_inetpl(inet *ip, int64 addend); + + +/* + * Common INET/CIDR input routine + */ +static inet * +network_in(char *src, bool is_cidr, Node *escontext) +{ + int bits; + inet *dst; + + dst = (inet *) palloc0(sizeof(inet)); + + /* + * First, check to see if this is an IPv6 or IPv4 address. IPv6 addresses + * will have a : somewhere in them (several, in fact) so if there is one + * present, assume it's V6, otherwise assume it's V4. + */ + + if (strchr(src, ':') != NULL) + ip_family(dst) = PGSQL_AF_INET6; + else + ip_family(dst) = PGSQL_AF_INET; + + bits = pg_inet_net_pton(ip_family(dst), src, ip_addr(dst), + is_cidr ? ip_addrsize(dst) : -1); + if ((bits < 0) || (bits > ip_maxbits(dst))) + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + /* translator: first %s is inet or cidr */ + errmsg("invalid input syntax for type %s: \"%s\"", + is_cidr ? "cidr" : "inet", src))); + + /* + * Error check: CIDR values must not have any bits set beyond the masklen. + */ + if (is_cidr) + { + if (!addressOK(ip_addr(dst), bits, ip_family(dst))) + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid cidr value: \"%s\"", src), + errdetail("Value has bits set to right of mask."))); + } + + ip_bits(dst) = bits; + SET_INET_VARSIZE(dst); + + return dst; +} + +Datum +inet_in(PG_FUNCTION_ARGS) +{ + char *src = PG_GETARG_CSTRING(0); + + PG_RETURN_INET_P(network_in(src, false, fcinfo->context)); +} + +Datum +cidr_in(PG_FUNCTION_ARGS) +{ + char *src = PG_GETARG_CSTRING(0); + + PG_RETURN_INET_P(network_in(src, true, fcinfo->context)); +} + + +/* + * Common INET/CIDR output routine + */ +static char * +network_out(inet *src, bool is_cidr) +{ + char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")]; + char *dst; + int len; + + dst = pg_inet_net_ntop(ip_family(src), ip_addr(src), ip_bits(src), + tmp, sizeof(tmp)); + if (dst == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("could not format inet value: %m"))); + + /* For CIDR, add /n if not present */ + if (is_cidr && strchr(tmp, '/') == NULL) + { + len = strlen(tmp); + snprintf(tmp + len, sizeof(tmp) - len, "/%u", ip_bits(src)); + } + + return pstrdup(tmp); +} + +Datum +inet_out(PG_FUNCTION_ARGS) +{ + inet *src = PG_GETARG_INET_PP(0); + + PG_RETURN_CSTRING(network_out(src, false)); +} + +Datum +cidr_out(PG_FUNCTION_ARGS) +{ + inet *src = PG_GETARG_INET_PP(0); + + PG_RETURN_CSTRING(network_out(src, true)); +} + + +/* + * network_recv - converts external binary format to inet + * + * The external representation is (one byte apiece for) + * family, bits, is_cidr, address length, address in network byte order. + * + * Presence of is_cidr is largely for historical reasons, though it might + * allow some code-sharing on the client side. We send it correctly on + * output, but ignore the value on input. + */ +static inet * +network_recv(StringInfo buf, bool is_cidr) +{ + inet *addr; + char *addrptr; + int bits; + int nb, + i; + + /* make sure any unused bits in a CIDR value are zeroed */ + addr = (inet *) palloc0(sizeof(inet)); + + ip_family(addr) = pq_getmsgbyte(buf); + if (ip_family(addr) != PGSQL_AF_INET && + ip_family(addr) != PGSQL_AF_INET6) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + /* translator: %s is inet or cidr */ + errmsg("invalid address family in external \"%s\" value", + is_cidr ? "cidr" : "inet"))); + bits = pq_getmsgbyte(buf); + if (bits < 0 || bits > ip_maxbits(addr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + /* translator: %s is inet or cidr */ + errmsg("invalid bits in external \"%s\" value", + is_cidr ? "cidr" : "inet"))); + ip_bits(addr) = bits; + i = pq_getmsgbyte(buf); /* ignore is_cidr */ + nb = pq_getmsgbyte(buf); + if (nb != ip_addrsize(addr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + /* translator: %s is inet or cidr */ + errmsg("invalid length in external \"%s\" value", + is_cidr ? "cidr" : "inet"))); + + addrptr = (char *) ip_addr(addr); + for (i = 0; i < nb; i++) + addrptr[i] = pq_getmsgbyte(buf); + + /* + * Error check: CIDR values must not have any bits set beyond the masklen. + */ + if (is_cidr) + { + if (!addressOK(ip_addr(addr), bits, ip_family(addr))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid external \"cidr\" value"), + errdetail("Value has bits set to right of mask."))); + } + + SET_INET_VARSIZE(addr); + + return addr; +} + +Datum +inet_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_INET_P(network_recv(buf, false)); +} + +Datum +cidr_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_INET_P(network_recv(buf, true)); +} + + +/* + * network_send - converts inet to binary format + */ +static bytea * +network_send(inet *addr, bool is_cidr) +{ + StringInfoData buf; + char *addrptr; + int nb, + i; + + pq_begintypsend(&buf); + pq_sendbyte(&buf, ip_family(addr)); + pq_sendbyte(&buf, ip_bits(addr)); + pq_sendbyte(&buf, is_cidr); + nb = ip_addrsize(addr); + if (nb < 0) + nb = 0; + pq_sendbyte(&buf, nb); + addrptr = (char *) ip_addr(addr); + for (i = 0; i < nb; i++) + pq_sendbyte(&buf, addrptr[i]); + return pq_endtypsend(&buf); +} + +Datum +inet_send(PG_FUNCTION_ARGS) +{ + inet *addr = PG_GETARG_INET_PP(0); + + PG_RETURN_BYTEA_P(network_send(addr, false)); +} + +Datum +cidr_send(PG_FUNCTION_ARGS) +{ + inet *addr = PG_GETARG_INET_PP(0); + + PG_RETURN_BYTEA_P(network_send(addr, true)); +} + + +Datum +inet_to_cidr(PG_FUNCTION_ARGS) +{ + inet *src = PG_GETARG_INET_PP(0); + int bits; + + bits = ip_bits(src); + + /* safety check */ + if ((bits < 0) || (bits > ip_maxbits(src))) + elog(ERROR, "invalid inet bit length: %d", bits); + + PG_RETURN_INET_P(cidr_set_masklen_internal(src, bits)); +} + +Datum +inet_set_masklen(PG_FUNCTION_ARGS) +{ + inet *src = PG_GETARG_INET_PP(0); + int bits = PG_GETARG_INT32(1); + inet *dst; + + if (bits == -1) + bits = ip_maxbits(src); + + if ((bits < 0) || (bits > ip_maxbits(src))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid mask length: %d", bits))); + + /* clone the original data */ + dst = (inet *) palloc(VARSIZE_ANY(src)); + memcpy(dst, src, VARSIZE_ANY(src)); + + ip_bits(dst) = bits; + + PG_RETURN_INET_P(dst); +} + +Datum +cidr_set_masklen(PG_FUNCTION_ARGS) +{ + inet *src = PG_GETARG_INET_PP(0); + int bits = PG_GETARG_INT32(1); + + if (bits == -1) + bits = ip_maxbits(src); + + if ((bits < 0) || (bits > ip_maxbits(src))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid mask length: %d", bits))); + + PG_RETURN_INET_P(cidr_set_masklen_internal(src, bits)); +} + +/* + * Copy src and set mask length to 'bits' (which must be valid for the family) + */ +inet * +cidr_set_masklen_internal(const inet *src, int bits) +{ + inet *dst = (inet *) palloc0(sizeof(inet)); + + ip_family(dst) = ip_family(src); + ip_bits(dst) = bits; + + if (bits > 0) + { + Assert(bits <= ip_maxbits(dst)); + + /* Clone appropriate bytes of the address, leaving the rest 0 */ + memcpy(ip_addr(dst), ip_addr(src), (bits + 7) / 8); + + /* Clear any unwanted bits in the last partial byte */ + if (bits % 8) + ip_addr(dst)[bits / 8] &= ~(0xFF >> (bits % 8)); + } + + /* Set varlena header correctly */ + SET_INET_VARSIZE(dst); + + return dst; +} + +/* + * Basic comparison function for sorting and inet/cidr comparisons. + * + * Comparison is first on the common bits of the network part, then on + * the length of the network part, and then on the whole unmasked address. + * The effect is that the network part is the major sort key, and for + * equal network parts we sort on the host part. Note this is only sane + * for CIDR if address bits to the right of the mask are guaranteed zero; + * otherwise logically-equal CIDRs might compare different. + */ + +static int32 +network_cmp_internal(inet *a1, inet *a2) +{ + if (ip_family(a1) == ip_family(a2)) + { + int order; + + order = bitncmp(ip_addr(a1), ip_addr(a2), + Min(ip_bits(a1), ip_bits(a2))); + if (order != 0) + return order; + order = ((int) ip_bits(a1)) - ((int) ip_bits(a2)); + if (order != 0) + return order; + return bitncmp(ip_addr(a1), ip_addr(a2), ip_maxbits(a1)); + } + + return ip_family(a1) - ip_family(a2); +} + +Datum +network_cmp(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_INT32(network_cmp_internal(a1, a2)); +} + +/* + * SortSupport strategy routine + */ +Datum +network_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = network_fast_cmp; + ssup->ssup_extra = NULL; + + if (ssup->abbreviate) + { + network_sortsupport_state *uss; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + uss = palloc(sizeof(network_sortsupport_state)); + uss->input_count = 0; + uss->estimating = true; + initHyperLogLog(&uss->abbr_card, 10); + + ssup->ssup_extra = uss; + + ssup->comparator = ssup_datum_unsigned_cmp; + ssup->abbrev_converter = network_abbrev_convert; + ssup->abbrev_abort = network_abbrev_abort; + ssup->abbrev_full_comparator = network_fast_cmp; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * SortSupport comparison func + */ +static int +network_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + inet *arg1 = DatumGetInetPP(x); + inet *arg2 = DatumGetInetPP(y); + + return network_cmp_internal(arg1, arg2); +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization. + * + * We pay no attention to the cardinality of the non-abbreviated data, because + * there is no equality fast-path within authoritative inet comparator. + */ +static bool +network_abbrev_abort(int memtupcount, SortSupport ssup) +{ + network_sortsupport_state *uss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&uss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. At this point + * we stop counting because we know that we're now fully committed. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "network_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count, memtupcount); +#endif + uss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row + * fudge factor allows us to abort earlier on genuinely pathological data + * where we've had exactly one abbreviated value in the first 2k + * (non-null) rows. + */ + if (abbr_card < uss->input_count / 2000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "network_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, + memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "network_abbrev: cardinality %f after " INT64_FORMAT + " values (%d rows)", abbr_card, uss->input_count, memtupcount); +#endif + + return false; +} + +/* + * SortSupport conversion routine. Converts original inet/cidr representation + * to abbreviated key representation that works with simple 3-way unsigned int + * comparisons. The network_cmp_internal() rules for sorting inet/cidr datums + * are followed by abbreviated comparisons by an encoding scheme that + * conditions keys through careful use of padding. + * + * Some background: inet values have three major components (take for example + * the address 1.2.3.4/24): + * + * * A network, or netmasked bits (1.2.3.0). + * * A netmask size (/24). + * * A subnet, or bits outside of the netmask (0.0.0.4). + * + * cidr values are the same except that with only the first two components -- + * all their subnet bits *must* be zero (1.2.3.0/24). + * + * IPv4 and IPv6 are identical in this makeup, with the difference being that + * IPv4 addresses have a maximum of 32 bits compared to IPv6's 64 bits, so in + * IPv6 each part may be larger. + * + * inet/cidr types compare using these sorting rules. If inequality is detected + * at any step, comparison is finished. If any rule is a tie, the algorithm + * drops through to the next to break it: + * + * 1. IPv4 always appears before IPv6. + * 2. Network bits are compared. + * 3. Netmask size is compared. + * 4. All bits are compared (having made it here, we know that both + * netmasked bits and netmask size are equal, so we're in effect only + * comparing subnet bits). + * + * When generating abbreviated keys for SortSupport, we pack as much as we can + * into a datum while ensuring that when comparing those keys as integers, + * these rules will be respected. Exact contents depend on IP family and datum + * size. + * + * IPv4 + * ---- + * + * 4 byte datums: + * + * Start with 1 bit for the IP family (IPv4 or IPv6; this bit is present in + * every case below) followed by all but 1 of the netmasked bits. + * + * +----------+---------------------+ + * | 1 bit IP | 31 bits network | (1 bit network + * | family | (truncated) | omitted) + * +----------+---------------------+ + * + * 8 byte datums: + * + * We have space to store all netmasked bits, followed by the netmask size, + * followed by 25 bits of the subnet (25 bits is usually more than enough in + * practice). cidr datums always have all-zero subnet bits. + * + * +----------+-----------------------+--------------+--------------------+ + * | 1 bit IP | 32 bits network | 6 bits | 25 bits subnet | + * | family | (full) | network size | (truncated) | + * +----------+-----------------------+--------------+--------------------+ + * + * IPv6 + * ---- + * + * 4 byte datums: + * + * +----------+---------------------+ + * | 1 bit IP | 31 bits network | (up to 97 bits + * | family | (truncated) | network omitted) + * +----------+---------------------+ + * + * 8 byte datums: + * + * +----------+---------------------------------+ + * | 1 bit IP | 63 bits network | (up to 65 bits + * | family | (truncated) | network omitted) + * +----------+---------------------------------+ + */ +static Datum +network_abbrev_convert(Datum original, SortSupport ssup) +{ + network_sortsupport_state *uss = ssup->ssup_extra; + inet *authoritative = DatumGetInetPP(original); + Datum res, + ipaddr_datum, + subnet_bitmask, + network; + int subnet_size; + + Assert(ip_family(authoritative) == PGSQL_AF_INET || + ip_family(authoritative) == PGSQL_AF_INET6); + + /* + * Get an unsigned integer representation of the IP address by taking its + * first 4 or 8 bytes. Always take all 4 bytes of an IPv4 address. Take + * the first 8 bytes of an IPv6 address with an 8 byte datum and 4 bytes + * otherwise. + * + * We're consuming an array of unsigned char, so byteswap on little endian + * systems (an inet's ipaddr field stores the most significant byte + * first). + */ + if (ip_family(authoritative) == PGSQL_AF_INET) + { + uint32 ipaddr_datum32; + + memcpy(&ipaddr_datum32, ip_addr(authoritative), sizeof(uint32)); + + /* Must byteswap on little-endian machines */ +#ifndef WORDS_BIGENDIAN + ipaddr_datum = pg_bswap32(ipaddr_datum32); +#else + ipaddr_datum = ipaddr_datum32; +#endif + + /* Initialize result without setting ipfamily bit */ + res = (Datum) 0; + } + else + { + memcpy(&ipaddr_datum, ip_addr(authoritative), sizeof(Datum)); + + /* Must byteswap on little-endian machines */ + ipaddr_datum = DatumBigEndianToNative(ipaddr_datum); + + /* Initialize result with ipfamily (most significant) bit set */ + res = ((Datum) 1) << (SIZEOF_DATUM * BITS_PER_BYTE - 1); + } + + /* + * ipaddr_datum must be "split": high order bits go in "network" component + * of abbreviated key (often with zeroed bits at the end due to masking), + * while low order bits go in "subnet" component when there is space for + * one. This is often accomplished by generating a temp datum subnet + * bitmask, which we may reuse later when generating the subnet bits + * themselves. (Note that subnet bits are only used with IPv4 datums on + * platforms where datum is 8 bytes.) + * + * The number of bits in subnet is used to generate a datum subnet + * bitmask. For example, with a /24 IPv4 datum there are 8 subnet bits + * (since 32 - 24 is 8), so the final subnet bitmask is B'1111 1111'. We + * need explicit handling for cases where the ipaddr bits cannot all fit + * in a datum, though (otherwise we'd incorrectly mask the network + * component with IPv6 values). + */ + subnet_size = ip_maxbits(authoritative) - ip_bits(authoritative); + Assert(subnet_size >= 0); + /* subnet size must work with prefix ipaddr cases */ + subnet_size %= SIZEOF_DATUM * BITS_PER_BYTE; + if (ip_bits(authoritative) == 0) + { + /* Fit as many ipaddr bits as possible into subnet */ + subnet_bitmask = ((Datum) 0) - 1; + network = 0; + } + else if (ip_bits(authoritative) < SIZEOF_DATUM * BITS_PER_BYTE) + { + /* Split ipaddr bits between network and subnet */ + subnet_bitmask = (((Datum) 1) << subnet_size) - 1; + network = ipaddr_datum & ~subnet_bitmask; + } + else + { + /* Fit as many ipaddr bits as possible into network */ + subnet_bitmask = 0; + network = ipaddr_datum; + } + +#if SIZEOF_DATUM == 8 + if (ip_family(authoritative) == PGSQL_AF_INET) + { + /* + * IPv4 with 8 byte datums: keep all 32 netmasked bits, netmask size, + * and most significant 25 subnet bits + */ + Datum netmask_size = (Datum) ip_bits(authoritative); + Datum subnet; + + /* + * Shift left 31 bits: 6 bits netmask size + 25 subnet bits. + * + * We don't make any distinction between network bits that are zero + * due to masking and "true"/non-masked zero bits. An abbreviated + * comparison that is resolved by comparing a non-masked and non-zero + * bit to a masked/zeroed bit is effectively resolved based on + * ip_bits(), even though the comparison won't reach the netmask_size + * bits. + */ + network <<= (ABBREV_BITS_INET4_NETMASK_SIZE + + ABBREV_BITS_INET4_SUBNET); + + /* Shift size to make room for subnet bits at the end */ + netmask_size <<= ABBREV_BITS_INET4_SUBNET; + + /* Extract subnet bits without shifting them */ + subnet = ipaddr_datum & subnet_bitmask; + + /* + * If we have more than 25 subnet bits, we can't fit everything. Shift + * subnet down to avoid clobbering bits that are only supposed to be + * used for netmask_size. + * + * Discarding the least significant subnet bits like this is correct + * because abbreviated comparisons that are resolved at the subnet + * level must have had equal netmask_size/ip_bits() values in order to + * get that far. + */ + if (subnet_size > ABBREV_BITS_INET4_SUBNET) + subnet >>= subnet_size - ABBREV_BITS_INET4_SUBNET; + + /* + * Assemble the final abbreviated key without clobbering the ipfamily + * bit that must remain a zero. + */ + res |= network | netmask_size | subnet; + } + else +#endif + { + /* + * 4 byte datums, or IPv6 with 8 byte datums: Use as many of the + * netmasked bits as will fit in final abbreviated key. Avoid + * clobbering the ipfamily bit that was set earlier. + */ + res |= network >> 1; + } + + uss->input_count += 1; + + /* Hash abbreviated key */ + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + return res; +} + +/* + * Boolean ordering tests. + */ +Datum +network_lt(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_BOOL(network_cmp_internal(a1, a2) < 0); +} + +Datum +network_le(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_BOOL(network_cmp_internal(a1, a2) <= 0); +} + +Datum +network_eq(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_BOOL(network_cmp_internal(a1, a2) == 0); +} + +Datum +network_ge(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_BOOL(network_cmp_internal(a1, a2) >= 0); +} + +Datum +network_gt(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_BOOL(network_cmp_internal(a1, a2) > 0); +} + +Datum +network_ne(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_BOOL(network_cmp_internal(a1, a2) != 0); +} + +/* + * MIN/MAX support functions. + */ +Datum +network_smaller(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (network_cmp_internal(a1, a2) < 0) + PG_RETURN_INET_P(a1); + else + PG_RETURN_INET_P(a2); +} + +Datum +network_larger(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (network_cmp_internal(a1, a2) > 0) + PG_RETURN_INET_P(a1); + else + PG_RETURN_INET_P(a2); +} + +/* + * Support function for hash indexes on inet/cidr. + */ +Datum +hashinet(PG_FUNCTION_ARGS) +{ + inet *addr = PG_GETARG_INET_PP(0); + int addrsize = ip_addrsize(addr); + + /* XXX this assumes there are no pad bytes in the data structure */ + return hash_any((unsigned char *) VARDATA_ANY(addr), addrsize + 2); +} + +Datum +hashinetextended(PG_FUNCTION_ARGS) +{ + inet *addr = PG_GETARG_INET_PP(0); + int addrsize = ip_addrsize(addr); + + return hash_any_extended((unsigned char *) VARDATA_ANY(addr), addrsize + 2, + PG_GETARG_INT64(1)); +} + +/* + * Boolean network-inclusion tests. + */ +Datum +network_sub(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (ip_family(a1) == ip_family(a2)) + { + PG_RETURN_BOOL(ip_bits(a1) > ip_bits(a2) && + bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a2)) == 0); + } + + PG_RETURN_BOOL(false); +} + +Datum +network_subeq(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (ip_family(a1) == ip_family(a2)) + { + PG_RETURN_BOOL(ip_bits(a1) >= ip_bits(a2) && + bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a2)) == 0); + } + + PG_RETURN_BOOL(false); +} + +Datum +network_sup(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (ip_family(a1) == ip_family(a2)) + { + PG_RETURN_BOOL(ip_bits(a1) < ip_bits(a2) && + bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a1)) == 0); + } + + PG_RETURN_BOOL(false); +} + +Datum +network_supeq(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (ip_family(a1) == ip_family(a2)) + { + PG_RETURN_BOOL(ip_bits(a1) <= ip_bits(a2) && + bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a1)) == 0); + } + + PG_RETURN_BOOL(false); +} + +Datum +network_overlap(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (ip_family(a1) == ip_family(a2)) + { + PG_RETURN_BOOL(bitncmp(ip_addr(a1), ip_addr(a2), + Min(ip_bits(a1), ip_bits(a2))) == 0); + } + + PG_RETURN_BOOL(false); +} + +/* + * Planner support function for network subset/superset operators + */ +Datum +network_subset_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestIndexCondition)) + { + /* Try to convert operator/function call to index conditions */ + SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq; + + if (is_opclause(req->node)) + { + OpExpr *clause = (OpExpr *) req->node; + + Assert(list_length(clause->args) == 2); + ret = (Node *) + match_network_function((Node *) linitial(clause->args), + (Node *) lsecond(clause->args), + req->indexarg, + req->funcid, + req->opfamily); + } + else if (is_funcclause(req->node)) /* be paranoid */ + { + FuncExpr *clause = (FuncExpr *) req->node; + + Assert(list_length(clause->args) == 2); + ret = (Node *) + match_network_function((Node *) linitial(clause->args), + (Node *) lsecond(clause->args), + req->indexarg, + req->funcid, + req->opfamily); + } + } + + PG_RETURN_POINTER(ret); +} + +/* + * match_network_function + * Try to generate an indexqual for a network subset/superset function. + * + * This layer is just concerned with identifying the function and swapping + * the arguments if necessary. + */ +static List * +match_network_function(Node *leftop, + Node *rightop, + int indexarg, + Oid funcid, + Oid opfamily) +{ + switch (funcid) + { + case F_NETWORK_SUB: + /* indexkey must be on the left */ + if (indexarg != 0) + return NIL; + return match_network_subset(leftop, rightop, false, opfamily); + + case F_NETWORK_SUBEQ: + /* indexkey must be on the left */ + if (indexarg != 0) + return NIL; + return match_network_subset(leftop, rightop, true, opfamily); + + case F_NETWORK_SUP: + /* indexkey must be on the right */ + if (indexarg != 1) + return NIL; + return match_network_subset(rightop, leftop, false, opfamily); + + case F_NETWORK_SUPEQ: + /* indexkey must be on the right */ + if (indexarg != 1) + return NIL; + return match_network_subset(rightop, leftop, true, opfamily); + + default: + + /* + * We'd only get here if somebody attached this support function + * to an unexpected function. Maybe we should complain, but for + * now, do nothing. + */ + return NIL; + } +} + +/* + * match_network_subset + * Try to generate an indexqual for a network subset function. + */ +static List * +match_network_subset(Node *leftop, + Node *rightop, + bool is_eq, + Oid opfamily) +{ + List *result; + Datum rightopval; + Oid datatype = INETOID; + Oid opr1oid; + Oid opr2oid; + Datum opr1right; + Datum opr2right; + Expr *expr; + + /* + * Can't do anything with a non-constant or NULL comparison value. + * + * Note that since we restrict ourselves to cases with a hard constant on + * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry + * about verifying that. + */ + if (!IsA(rightop, Const) || + ((Const *) rightop)->constisnull) + return NIL; + rightopval = ((Const *) rightop)->constvalue; + + /* + * Must check that index's opfamily supports the operators we will want to + * apply. + * + * We insist on the opfamily being the specific one we expect, else we'd + * do the wrong thing if someone were to make a reverse-sort opfamily with + * the same operators. + */ + if (opfamily != NETWORK_BTREE_FAM_OID) + return NIL; + + /* + * create clause "key >= network_scan_first( rightopval )", or ">" if the + * operator disallows equality. + * + * Note: seeing that this function supports only fixed values for opfamily + * and datatype, we could just hard-wire the operator OIDs instead of + * looking them up. But for now it seems better to be general. + */ + if (is_eq) + { + opr1oid = get_opfamily_member(opfamily, datatype, datatype, + BTGreaterEqualStrategyNumber); + if (opr1oid == InvalidOid) + elog(ERROR, "no >= operator for opfamily %u", opfamily); + } + else + { + opr1oid = get_opfamily_member(opfamily, datatype, datatype, + BTGreaterStrategyNumber); + if (opr1oid == InvalidOid) + elog(ERROR, "no > operator for opfamily %u", opfamily); + } + + opr1right = network_scan_first(rightopval); + + expr = make_opclause(opr1oid, BOOLOID, false, + (Expr *) leftop, + (Expr *) makeConst(datatype, -1, + InvalidOid, /* not collatable */ + -1, opr1right, + false, false), + InvalidOid, InvalidOid); + result = list_make1(expr); + + /* create clause "key <= network_scan_last( rightopval )" */ + + opr2oid = get_opfamily_member(opfamily, datatype, datatype, + BTLessEqualStrategyNumber); + if (opr2oid == InvalidOid) + elog(ERROR, "no <= operator for opfamily %u", opfamily); + + opr2right = network_scan_last(rightopval); + + expr = make_opclause(opr2oid, BOOLOID, false, + (Expr *) leftop, + (Expr *) makeConst(datatype, -1, + InvalidOid, /* not collatable */ + -1, opr2right, + false, false), + InvalidOid, InvalidOid); + result = lappend(result, expr); + + return result; +} + + +/* + * Extract data from a network datatype. + */ +Datum +network_host(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + char *ptr; + char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")]; + + /* force display of max bits, regardless of masklen... */ + if (pg_inet_net_ntop(ip_family(ip), ip_addr(ip), ip_maxbits(ip), + tmp, sizeof(tmp)) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("could not format inet value: %m"))); + + /* Suppress /n if present (shouldn't happen now) */ + if ((ptr = strchr(tmp, '/')) != NULL) + *ptr = '\0'; + + PG_RETURN_TEXT_P(cstring_to_text(tmp)); +} + +/* + * network_show implements the inet and cidr casts to text. This is not + * quite the same behavior as network_out, hence we can't drop it in favor + * of CoerceViaIO. + */ +Datum +network_show(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + int len; + char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")]; + + if (pg_inet_net_ntop(ip_family(ip), ip_addr(ip), ip_maxbits(ip), + tmp, sizeof(tmp)) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("could not format inet value: %m"))); + + /* Add /n if not present (which it won't be) */ + if (strchr(tmp, '/') == NULL) + { + len = strlen(tmp); + snprintf(tmp + len, sizeof(tmp) - len, "/%u", ip_bits(ip)); + } + + PG_RETURN_TEXT_P(cstring_to_text(tmp)); +} + +Datum +inet_abbrev(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + char *dst; + char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")]; + + dst = pg_inet_net_ntop(ip_family(ip), ip_addr(ip), + ip_bits(ip), tmp, sizeof(tmp)); + + if (dst == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("could not format inet value: %m"))); + + PG_RETURN_TEXT_P(cstring_to_text(tmp)); +} + +Datum +cidr_abbrev(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + char *dst; + char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")]; + + dst = pg_inet_cidr_ntop(ip_family(ip), ip_addr(ip), + ip_bits(ip), tmp, sizeof(tmp)); + + if (dst == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("could not format cidr value: %m"))); + + PG_RETURN_TEXT_P(cstring_to_text(tmp)); +} + +Datum +network_masklen(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + + PG_RETURN_INT32(ip_bits(ip)); +} + +Datum +network_family(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + + switch (ip_family(ip)) + { + case PGSQL_AF_INET: + PG_RETURN_INT32(4); + break; + case PGSQL_AF_INET6: + PG_RETURN_INT32(6); + break; + default: + PG_RETURN_INT32(0); + break; + } +} + +Datum +network_broadcast(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *dst; + int byte; + int bits; + int maxbytes; + unsigned char mask; + unsigned char *a, + *b; + + /* make sure any unused bits are zeroed */ + dst = (inet *) palloc0(sizeof(inet)); + + maxbytes = ip_addrsize(ip); + bits = ip_bits(ip); + a = ip_addr(ip); + b = ip_addr(dst); + + for (byte = 0; byte < maxbytes; byte++) + { + if (bits >= 8) + { + mask = 0x00; + bits -= 8; + } + else if (bits == 0) + mask = 0xff; + else + { + mask = 0xff >> bits; + bits = 0; + } + + b[byte] = a[byte] | mask; + } + + ip_family(dst) = ip_family(ip); + ip_bits(dst) = ip_bits(ip); + SET_INET_VARSIZE(dst); + + PG_RETURN_INET_P(dst); +} + +Datum +network_network(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *dst; + int byte; + int bits; + unsigned char mask; + unsigned char *a, + *b; + + /* make sure any unused bits are zeroed */ + dst = (inet *) palloc0(sizeof(inet)); + + bits = ip_bits(ip); + a = ip_addr(ip); + b = ip_addr(dst); + + byte = 0; + + while (bits) + { + if (bits >= 8) + { + mask = 0xff; + bits -= 8; + } + else + { + mask = 0xff << (8 - bits); + bits = 0; + } + + b[byte] = a[byte] & mask; + byte++; + } + + ip_family(dst) = ip_family(ip); + ip_bits(dst) = ip_bits(ip); + SET_INET_VARSIZE(dst); + + PG_RETURN_INET_P(dst); +} + +Datum +network_netmask(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *dst; + int byte; + int bits; + unsigned char mask; + unsigned char *b; + + /* make sure any unused bits are zeroed */ + dst = (inet *) palloc0(sizeof(inet)); + + bits = ip_bits(ip); + b = ip_addr(dst); + + byte = 0; + + while (bits) + { + if (bits >= 8) + { + mask = 0xff; + bits -= 8; + } + else + { + mask = 0xff << (8 - bits); + bits = 0; + } + + b[byte] = mask; + byte++; + } + + ip_family(dst) = ip_family(ip); + ip_bits(dst) = ip_maxbits(ip); + SET_INET_VARSIZE(dst); + + PG_RETURN_INET_P(dst); +} + +Datum +network_hostmask(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *dst; + int byte; + int bits; + int maxbytes; + unsigned char mask; + unsigned char *b; + + /* make sure any unused bits are zeroed */ + dst = (inet *) palloc0(sizeof(inet)); + + maxbytes = ip_addrsize(ip); + bits = ip_maxbits(ip) - ip_bits(ip); + b = ip_addr(dst); + + byte = maxbytes - 1; + + while (bits) + { + if (bits >= 8) + { + mask = 0xff; + bits -= 8; + } + else + { + mask = 0xff >> (8 - bits); + bits = 0; + } + + b[byte] = mask; + byte--; + } + + ip_family(dst) = ip_family(ip); + ip_bits(dst) = ip_maxbits(ip); + SET_INET_VARSIZE(dst); + + PG_RETURN_INET_P(dst); +} + +/* + * Returns true if the addresses are from the same family, or false. Used to + * check that we can create a network which contains both of the networks. + */ +Datum +inet_same_family(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + PG_RETURN_BOOL(ip_family(a1) == ip_family(a2)); +} + +/* + * Returns the smallest CIDR which contains both of the inputs. + */ +Datum +inet_merge(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0), + *a2 = PG_GETARG_INET_PP(1); + int commonbits; + + if (ip_family(a1) != ip_family(a2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot merge addresses from different families"))); + + commonbits = bitncommon(ip_addr(a1), ip_addr(a2), + Min(ip_bits(a1), ip_bits(a2))); + + PG_RETURN_INET_P(cidr_set_masklen_internal(a1, commonbits)); +} + +/* + * Convert a value of a network datatype to an approximate scalar value. + * This is used for estimating selectivities of inequality operators + * involving network types. + * + * On failure (e.g., unsupported typid), set *failure to true; + * otherwise, that variable is not changed. + */ +double +convert_network_to_scalar(Datum value, Oid typid, bool *failure) +{ + switch (typid) + { + case INETOID: + case CIDROID: + { + inet *ip = DatumGetInetPP(value); + int len; + double res; + int i; + + /* + * Note that we don't use the full address for IPv6. + */ + if (ip_family(ip) == PGSQL_AF_INET) + len = 4; + else + len = 5; + + res = ip_family(ip); + for (i = 0; i < len; i++) + { + res *= 256; + res += ip_addr(ip)[i]; + } + return res; + } + case MACADDROID: + { + macaddr *mac = DatumGetMacaddrP(value); + double res; + + res = (mac->a << 16) | (mac->b << 8) | (mac->c); + res *= 256 * 256 * 256; + res += (mac->d << 16) | (mac->e << 8) | (mac->f); + return res; + } + case MACADDR8OID: + { + macaddr8 *mac = DatumGetMacaddr8P(value); + double res; + + res = (mac->a << 24) | (mac->b << 16) | (mac->c << 8) | (mac->d); + res *= ((double) 256) * 256 * 256 * 256; + res += (mac->e << 24) | (mac->f << 16) | (mac->g << 8) | (mac->h); + return res; + } + } + + *failure = true; + return 0; +} + +/* + * int + * bitncmp(l, r, n) + * compare bit masks l and r, for n bits. + * return: + * <0, >0, or 0 in the libc tradition. + * note: + * network byte order assumed. this means 192.5.5.240/28 has + * 0x11110000 in its fourth octet. + * author: + * Paul Vixie (ISC), June 1996 + */ +int +bitncmp(const unsigned char *l, const unsigned char *r, int n) +{ + unsigned int lb, + rb; + int x, + b; + + b = n / 8; + x = memcmp(l, r, b); + if (x || (n % 8) == 0) + return x; + + lb = l[b]; + rb = r[b]; + for (b = n % 8; b > 0; b--) + { + if (IS_HIGHBIT_SET(lb) != IS_HIGHBIT_SET(rb)) + { + if (IS_HIGHBIT_SET(lb)) + return 1; + return -1; + } + lb <<= 1; + rb <<= 1; + } + return 0; +} + +/* + * bitncommon: compare bit masks l and r, for up to n bits. + * + * Returns the number of leading bits that match (0 to n). + */ +int +bitncommon(const unsigned char *l, const unsigned char *r, int n) +{ + int byte, + nbits; + + /* number of bits to examine in last byte */ + nbits = n % 8; + + /* check whole bytes */ + for (byte = 0; byte < n / 8; byte++) + { + if (l[byte] != r[byte]) + { + /* at least one bit in the last byte is not common */ + nbits = 7; + break; + } + } + + /* check bits in last partial byte */ + if (nbits != 0) + { + /* calculate diff of first non-matching bytes */ + unsigned int diff = l[byte] ^ r[byte]; + + /* compare the bits from the most to the least */ + while ((diff >> (8 - nbits)) != 0) + nbits--; + } + + return (8 * byte) + nbits; +} + + +/* + * Verify a CIDR address is OK (doesn't have bits set past the masklen) + */ +static bool +addressOK(unsigned char *a, int bits, int family) +{ + int byte; + int nbits; + int maxbits; + int maxbytes; + unsigned char mask; + + if (family == PGSQL_AF_INET) + { + maxbits = 32; + maxbytes = 4; + } + else + { + maxbits = 128; + maxbytes = 16; + } + Assert(bits <= maxbits); + + if (bits == maxbits) + return true; + + byte = bits / 8; + + nbits = bits % 8; + mask = 0xff; + if (bits != 0) + mask >>= nbits; + + while (byte < maxbytes) + { + if ((a[byte] & mask) != 0) + return false; + mask = 0xff; + byte++; + } + + return true; +} + + +/* + * These functions are used by planner to generate indexscan limits + * for clauses a << b and a <<= b + */ + +/* return the minimal value for an IP on a given network */ +Datum +network_scan_first(Datum in) +{ + return DirectFunctionCall1(network_network, in); +} + +/* + * return "last" IP on a given network. It's the broadcast address, + * however, masklen has to be set to its max bits, since + * 192.168.0.255/24 is considered less than 192.168.0.255/32 + * + * inet_set_masklen() hacked to max out the masklength to 128 for IPv6 + * and 32 for IPv4 when given '-1' as argument. + */ +Datum +network_scan_last(Datum in) +{ + return DirectFunctionCall2(inet_set_masklen, + DirectFunctionCall1(network_broadcast, in), + Int32GetDatum(-1)); +} + + +/* + * IP address that the client is connecting from (NULL if Unix socket) + */ +Datum +inet_client_addr(PG_FUNCTION_ARGS) +{ + Port *port = MyProcPort; + char remote_host[NI_MAXHOST]; + int ret; + + if (port == NULL) + PG_RETURN_NULL(); + + switch (port->raddr.addr.ss_family) + { + case AF_INET: + case AF_INET6: + break; + default: + PG_RETURN_NULL(); + } + + remote_host[0] = '\0'; + + ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen, + remote_host, sizeof(remote_host), + NULL, 0, + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) + PG_RETURN_NULL(); + + clean_ipv6_addr(port->raddr.addr.ss_family, remote_host); + + PG_RETURN_INET_P(network_in(remote_host, false, NULL)); +} + + +/* + * port that the client is connecting from (NULL if Unix socket) + */ +Datum +inet_client_port(PG_FUNCTION_ARGS) +{ + Port *port = MyProcPort; + char remote_port[NI_MAXSERV]; + int ret; + + if (port == NULL) + PG_RETURN_NULL(); + + switch (port->raddr.addr.ss_family) + { + case AF_INET: + case AF_INET6: + break; + default: + PG_RETURN_NULL(); + } + + remote_port[0] = '\0'; + + ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen, + NULL, 0, + remote_port, sizeof(remote_port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(DirectFunctionCall1(int4in, CStringGetDatum(remote_port))); +} + + +/* + * IP address that the server accepted the connection on (NULL if Unix socket) + */ +Datum +inet_server_addr(PG_FUNCTION_ARGS) +{ + Port *port = MyProcPort; + char local_host[NI_MAXHOST]; + int ret; + + if (port == NULL) + PG_RETURN_NULL(); + + switch (port->laddr.addr.ss_family) + { + case AF_INET: + case AF_INET6: + break; + default: + PG_RETURN_NULL(); + } + + local_host[0] = '\0'; + + ret = pg_getnameinfo_all(&port->laddr.addr, port->laddr.salen, + local_host, sizeof(local_host), + NULL, 0, + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) + PG_RETURN_NULL(); + + clean_ipv6_addr(port->laddr.addr.ss_family, local_host); + + PG_RETURN_INET_P(network_in(local_host, false, NULL)); +} + + +/* + * port that the server accepted the connection on (NULL if Unix socket) + */ +Datum +inet_server_port(PG_FUNCTION_ARGS) +{ + Port *port = MyProcPort; + char local_port[NI_MAXSERV]; + int ret; + + if (port == NULL) + PG_RETURN_NULL(); + + switch (port->laddr.addr.ss_family) + { + case AF_INET: + case AF_INET6: + break; + default: + PG_RETURN_NULL(); + } + + local_port[0] = '\0'; + + ret = pg_getnameinfo_all(&port->laddr.addr, port->laddr.salen, + NULL, 0, + local_port, sizeof(local_port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(DirectFunctionCall1(int4in, CStringGetDatum(local_port))); +} + + +Datum +inetnot(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *dst; + + dst = (inet *) palloc0(sizeof(inet)); + + { + int nb = ip_addrsize(ip); + unsigned char *pip = ip_addr(ip); + unsigned char *pdst = ip_addr(dst); + + while (--nb >= 0) + pdst[nb] = ~pip[nb]; + } + ip_bits(dst) = ip_bits(ip); + + ip_family(dst) = ip_family(ip); + SET_INET_VARSIZE(dst); + + PG_RETURN_INET_P(dst); +} + + +Datum +inetand(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *ip2 = PG_GETARG_INET_PP(1); + inet *dst; + + dst = (inet *) palloc0(sizeof(inet)); + + if (ip_family(ip) != ip_family(ip2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot AND inet values of different sizes"))); + else + { + int nb = ip_addrsize(ip); + unsigned char *pip = ip_addr(ip); + unsigned char *pip2 = ip_addr(ip2); + unsigned char *pdst = ip_addr(dst); + + while (--nb >= 0) + pdst[nb] = pip[nb] & pip2[nb]; + } + ip_bits(dst) = Max(ip_bits(ip), ip_bits(ip2)); + + ip_family(dst) = ip_family(ip); + SET_INET_VARSIZE(dst); + + PG_RETURN_INET_P(dst); +} + + +Datum +inetor(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *ip2 = PG_GETARG_INET_PP(1); + inet *dst; + + dst = (inet *) palloc0(sizeof(inet)); + + if (ip_family(ip) != ip_family(ip2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot OR inet values of different sizes"))); + else + { + int nb = ip_addrsize(ip); + unsigned char *pip = ip_addr(ip); + unsigned char *pip2 = ip_addr(ip2); + unsigned char *pdst = ip_addr(dst); + + while (--nb >= 0) + pdst[nb] = pip[nb] | pip2[nb]; + } + ip_bits(dst) = Max(ip_bits(ip), ip_bits(ip2)); + + ip_family(dst) = ip_family(ip); + SET_INET_VARSIZE(dst); + + PG_RETURN_INET_P(dst); +} + + +static inet * +internal_inetpl(inet *ip, int64 addend) +{ + inet *dst; + + dst = (inet *) palloc0(sizeof(inet)); + + { + int nb = ip_addrsize(ip); + unsigned char *pip = ip_addr(ip); + unsigned char *pdst = ip_addr(dst); + int carry = 0; + + while (--nb >= 0) + { + carry = pip[nb] + (int) (addend & 0xFF) + carry; + pdst[nb] = (unsigned char) (carry & 0xFF); + carry >>= 8; + + /* + * We have to be careful about right-shifting addend because + * right-shift isn't portable for negative values, while simply + * dividing by 256 doesn't work (the standard rounding is in the + * wrong direction, besides which there may be machines out there + * that round the wrong way). So, explicitly clear the low-order + * byte to remove any doubt about the correct result of the + * division, and then divide rather than shift. + */ + addend &= ~((int64) 0xFF); + addend /= 0x100; + } + + /* + * At this point we should have addend and carry both zero if original + * addend was >= 0, or addend -1 and carry 1 if original addend was < + * 0. Anything else means overflow. + */ + if (!((addend == 0 && carry == 0) || + (addend == -1 && carry == 1))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("result is out of range"))); + } + + ip_bits(dst) = ip_bits(ip); + ip_family(dst) = ip_family(ip); + SET_INET_VARSIZE(dst); + + return dst; +} + + +Datum +inetpl(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + int64 addend = PG_GETARG_INT64(1); + + PG_RETURN_INET_P(internal_inetpl(ip, addend)); +} + + +Datum +inetmi_int8(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + int64 addend = PG_GETARG_INT64(1); + + PG_RETURN_INET_P(internal_inetpl(ip, -addend)); +} + + +Datum +inetmi(PG_FUNCTION_ARGS) +{ + inet *ip = PG_GETARG_INET_PP(0); + inet *ip2 = PG_GETARG_INET_PP(1); + int64 res = 0; + + if (ip_family(ip) != ip_family(ip2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot subtract inet values of different sizes"))); + else + { + /* + * We form the difference using the traditional complement, increment, + * and add rule, with the increment part being handled by starting the + * carry off at 1. If you don't think integer arithmetic is done in + * two's complement, too bad. + */ + int nb = ip_addrsize(ip); + int byte = 0; + unsigned char *pip = ip_addr(ip); + unsigned char *pip2 = ip_addr(ip2); + int carry = 1; + + while (--nb >= 0) + { + int lobyte; + + carry = pip[nb] + (~pip2[nb] & 0xFF) + carry; + lobyte = carry & 0xFF; + if (byte < sizeof(int64)) + { + res |= ((int64) lobyte) << (byte * 8); + } + else + { + /* + * Input wider than int64: check for overflow. All bytes to + * the left of what will fit should be 0 or 0xFF, depending on + * sign of the now-complete result. + */ + if ((res < 0) ? (lobyte != 0xFF) : (lobyte != 0)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("result is out of range"))); + } + carry >>= 8; + byte++; + } + + /* + * If input is narrower than int64, overflow is not possible, but we + * have to do proper sign extension. + */ + if (carry == 0 && byte < sizeof(int64)) + res |= ((uint64) (int64) -1) << (byte * 8); + } + + PG_RETURN_INT64(res); +} + + +/* + * clean_ipv6_addr --- remove any '%zone' part from an IPv6 address string + * + * XXX This should go away someday! + * + * This is a kluge needed because we don't yet support zones in stored inet + * values. Since the result of getnameinfo() might include a zone spec, + * call this to remove it anywhere we want to feed getnameinfo's output to + * network_in. Beats failing entirely. + * + * An alternative approach would be to let network_in ignore %-parts for + * itself, but that would mean we'd silently drop zone specs in user input, + * which seems not such a good idea. + */ +void +clean_ipv6_addr(int addr_family, char *addr) +{ + if (addr_family == AF_INET6) + { + char *pct = strchr(addr, '%'); + + if (pct) + *pct = '\0'; + } +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_gist.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_gist.c new file mode 100644 index 00000000000..32cde28f00e --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_gist.c @@ -0,0 +1,810 @@ +/*------------------------------------------------------------------------- + * + * network_gist.c + * GiST support for network types. + * + * The key thing to understand about this code is the definition of the + * "union" of a set of INET/CIDR values. It works like this: + * 1. If the values are not all of the same IP address family, the "union" + * is a dummy value with family number zero, minbits zero, commonbits zero, + * address all zeroes. Otherwise: + * 2. The union has the common IP address family number. + * 3. The union's minbits value is the smallest netmask length ("ip_bits") + * of all the input values. + * 4. Let C be the number of leading address bits that are in common among + * all the input values (C ranges from 0 to ip_maxbits for the family). + * 5. The union's commonbits value is C. + * 6. The union's address value is the same as the common prefix for its + * first C bits, and is zeroes to the right of that. The physical width + * of the address value is ip_maxbits for the address family. + * + * In a leaf index entry (representing a single key), commonbits is equal to + * ip_maxbits for the address family, minbits is the same as the represented + * value's ip_bits, and the address is equal to the represented address. + * Although it may appear that we're wasting a byte by storing the union + * format and not just the represented INET/CIDR value in leaf keys, the + * extra byte is actually "free" because of alignment considerations. + * + * Note that this design tracks minbits and commonbits independently; in any + * given union value, either might be smaller than the other. This does not + * help us much when descending the tree, because of the way inet comparison + * is defined: at non-leaf nodes we can't compare more than minbits bits + * even if we know them. However, it greatly improves the quality of split + * decisions. Preliminary testing suggests that searches are as much as + * twice as fast as for a simpler design in which a single field doubles as + * the common prefix length and the minimum ip_bits value. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/network_gist.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <sys/socket.h> + +#include "access/gist.h" +#include "access/stratnum.h" +#include "utils/builtins.h" +#include "utils/inet.h" +#include "varatt.h" + +/* + * Operator strategy numbers used in the GiST inet_ops opclass + */ +#define INETSTRAT_OVERLAPS RTOverlapStrategyNumber +#define INETSTRAT_EQ RTEqualStrategyNumber +#define INETSTRAT_NE RTNotEqualStrategyNumber +#define INETSTRAT_LT RTLessStrategyNumber +#define INETSTRAT_LE RTLessEqualStrategyNumber +#define INETSTRAT_GT RTGreaterStrategyNumber +#define INETSTRAT_GE RTGreaterEqualStrategyNumber +#define INETSTRAT_SUB RTSubStrategyNumber +#define INETSTRAT_SUBEQ RTSubEqualStrategyNumber +#define INETSTRAT_SUP RTSuperStrategyNumber +#define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber + + +/* + * Representation of a GiST INET/CIDR index key. This is not identical to + * INET/CIDR because we need to keep track of the length of the common address + * prefix as well as the minimum netmask length. However, as long as it + * follows varlena header rules, the core GiST code won't know the difference. + * For simplicity we always use 1-byte-header varlena format. + */ +typedef struct GistInetKey +{ + uint8 va_header; /* varlena header --- don't touch directly */ + unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */ + unsigned char minbits; /* minimum number of bits in netmask */ + unsigned char commonbits; /* number of common prefix bits in addresses */ + unsigned char ipaddr[16]; /* up to 128 bits of common address */ +} GistInetKey; + +#define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X)) +#define InetKeyPGetDatum(X) PointerGetDatum(X) + +/* + * Access macros; not really exciting, but we use these for notational + * consistency with access to INET/CIDR values. Note that family-zero values + * are stored with 4 bytes of address, not 16. + */ +#define gk_ip_family(gkptr) ((gkptr)->family) +#define gk_ip_minbits(gkptr) ((gkptr)->minbits) +#define gk_ip_commonbits(gkptr) ((gkptr)->commonbits) +#define gk_ip_addr(gkptr) ((gkptr)->ipaddr) +#define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32) + +/* These require that the family field has been set: */ +#define gk_ip_addrsize(gkptr) \ + (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4) +#define gk_ip_maxbits(gkptr) \ + ip_family_maxbits(gk_ip_family(gkptr)) +#define SET_GK_VARSIZE(dst) \ + SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst)) + + +/* + * The GiST query consistency check + */ +Datum +inet_gist_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0); + inet *query = PG_GETARG_INET_PP(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + + /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); + GistInetKey *key = DatumGetInetKeyP(ent->key); + int minbits, + order; + + /* All operators served by this function are exact. */ + *recheck = false; + + /* + * Check 0: different families + * + * If key represents multiple address families, its children could match + * anything. This can only happen on an inner index page. + */ + if (gk_ip_family(key) == 0) + { + Assert(!GIST_LEAF(ent)); + PG_RETURN_BOOL(true); + } + + /* + * Check 1: different families + * + * Matching families do not help any of the strategies. + */ + if (gk_ip_family(key) != ip_family(query)) + { + switch (strategy) + { + case INETSTRAT_LT: + case INETSTRAT_LE: + if (gk_ip_family(key) < ip_family(query)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_GE: + case INETSTRAT_GT: + if (gk_ip_family(key) > ip_family(query)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_NE: + PG_RETURN_BOOL(true); + } + /* For all other cases, we can be sure there is no match */ + PG_RETURN_BOOL(false); + } + + /* + * Check 2: network bit count + * + * Network bit count (ip_bits) helps to check leaves for sub network and + * sup network operators. At non-leaf nodes, we know every child value + * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some + * cases too. + */ + switch (strategy) + { + case INETSTRAT_SUB: + if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_SUBEQ: + if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_SUPEQ: + case INETSTRAT_EQ: + if (gk_ip_minbits(key) > ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_SUP: + if (gk_ip_minbits(key) >= ip_bits(query)) + PG_RETURN_BOOL(false); + break; + } + + /* + * Check 3: common network bits + * + * Compare available common prefix bits to the query, but not beyond + * either the query's netmask or the minimum netmask among the represented + * values. If these bits don't match the query, we have our answer (and + * may or may not need to descend, depending on the operator). If they do + * match, and we are not at a leaf, we descend in all cases. + * + * Note this is the final check for operators that only consider the + * network part of the address. + */ + minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key)); + minbits = Min(minbits, ip_bits(query)); + + order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits); + + switch (strategy) + { + case INETSTRAT_SUB: + case INETSTRAT_SUBEQ: + case INETSTRAT_OVERLAPS: + case INETSTRAT_SUPEQ: + case INETSTRAT_SUP: + PG_RETURN_BOOL(order == 0); + + case INETSTRAT_LT: + case INETSTRAT_LE: + if (order > 0) + PG_RETURN_BOOL(false); + if (order < 0 || !GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_EQ: + if (order != 0) + PG_RETURN_BOOL(false); + if (!GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_GE: + case INETSTRAT_GT: + if (order < 0) + PG_RETURN_BOOL(false); + if (order > 0 || !GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_NE: + if (order != 0 || !GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + } + + /* + * Remaining checks are only for leaves and basic comparison strategies. + * See network_cmp_internal() in network.c for the implementation we need + * to match. Note that in a leaf key, commonbits should equal the address + * length, so we compared the whole network parts above. + */ + Assert(GIST_LEAF(ent)); + + /* + * Check 4: network bit count + * + * Next step is to compare netmask widths. + */ + switch (strategy) + { + case INETSTRAT_LT: + case INETSTRAT_LE: + if (gk_ip_minbits(key) < ip_bits(query)) + PG_RETURN_BOOL(true); + if (gk_ip_minbits(key) > ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_EQ: + if (gk_ip_minbits(key) != ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_GE: + case INETSTRAT_GT: + if (gk_ip_minbits(key) > ip_bits(query)) + PG_RETURN_BOOL(true); + if (gk_ip_minbits(key) < ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_NE: + if (gk_ip_minbits(key) != ip_bits(query)) + PG_RETURN_BOOL(true); + break; + } + + /* + * Check 5: whole address + * + * Netmask bit counts are the same, so check all the address bits. + */ + order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key)); + + switch (strategy) + { + case INETSTRAT_LT: + PG_RETURN_BOOL(order < 0); + + case INETSTRAT_LE: + PG_RETURN_BOOL(order <= 0); + + case INETSTRAT_EQ: + PG_RETURN_BOOL(order == 0); + + case INETSTRAT_GE: + PG_RETURN_BOOL(order >= 0); + + case INETSTRAT_GT: + PG_RETURN_BOOL(order > 0); + + case INETSTRAT_NE: + PG_RETURN_BOOL(order != 0); + } + + elog(ERROR, "unknown strategy for inet GiST"); + PG_RETURN_BOOL(false); /* keep compiler quiet */ +} + +/* + * Calculate parameters of the union of some GistInetKeys. + * + * Examine the keys in elements m..n inclusive of the GISTENTRY array, + * and compute these output parameters: + * *minfamily_p = minimum IP address family number + * *maxfamily_p = maximum IP address family number + * *minbits_p = minimum netmask width + * *commonbits_p = number of leading bits in common among the addresses + * + * minbits and commonbits are forced to zero if there's more than one + * address family. + */ +static void +calc_inet_union_params(GISTENTRY *ent, + int m, int n, + int *minfamily_p, + int *maxfamily_p, + int *minbits_p, + int *commonbits_p) +{ + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp; + int i; + + /* Must be at least one key. */ + Assert(m <= n); + + /* Initialize variables using the first key. */ + tmp = DatumGetInetKeyP(ent[m].key); + minfamily = maxfamily = gk_ip_family(tmp); + minbits = gk_ip_minbits(tmp); + commonbits = gk_ip_commonbits(tmp); + addr = gk_ip_addr(tmp); + + /* Scan remaining keys. */ + for (i = m + 1; i <= n; i++) + { + tmp = DatumGetInetKeyP(ent[i].key); + + /* Determine range of family numbers */ + if (minfamily > gk_ip_family(tmp)) + minfamily = gk_ip_family(tmp); + if (maxfamily < gk_ip_family(tmp)) + maxfamily = gk_ip_family(tmp); + + /* Find minimum minbits */ + if (minbits > gk_ip_minbits(tmp)) + minbits = gk_ip_minbits(tmp); + + /* Find minimum number of bits in common */ + if (commonbits > gk_ip_commonbits(tmp)) + commonbits = gk_ip_commonbits(tmp); + if (commonbits > 0) + commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits); + } + + /* Force minbits/commonbits to zero if more than one family. */ + if (minfamily != maxfamily) + minbits = commonbits = 0; + + *minfamily_p = minfamily; + *maxfamily_p = maxfamily; + *minbits_p = minbits; + *commonbits_p = commonbits; +} + +/* + * Same as above, but the GISTENTRY elements to examine are those with + * indices listed in the offsets[] array. + */ +static void +calc_inet_union_params_indexed(GISTENTRY *ent, + OffsetNumber *offsets, int noffsets, + int *minfamily_p, + int *maxfamily_p, + int *minbits_p, + int *commonbits_p) +{ + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp; + int i; + + /* Must be at least one key. */ + Assert(noffsets > 0); + + /* Initialize variables using the first key. */ + tmp = DatumGetInetKeyP(ent[offsets[0]].key); + minfamily = maxfamily = gk_ip_family(tmp); + minbits = gk_ip_minbits(tmp); + commonbits = gk_ip_commonbits(tmp); + addr = gk_ip_addr(tmp); + + /* Scan remaining keys. */ + for (i = 1; i < noffsets; i++) + { + tmp = DatumGetInetKeyP(ent[offsets[i]].key); + + /* Determine range of family numbers */ + if (minfamily > gk_ip_family(tmp)) + minfamily = gk_ip_family(tmp); + if (maxfamily < gk_ip_family(tmp)) + maxfamily = gk_ip_family(tmp); + + /* Find minimum minbits */ + if (minbits > gk_ip_minbits(tmp)) + minbits = gk_ip_minbits(tmp); + + /* Find minimum number of bits in common */ + if (commonbits > gk_ip_commonbits(tmp)) + commonbits = gk_ip_commonbits(tmp); + if (commonbits > 0) + commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits); + } + + /* Force minbits/commonbits to zero if more than one family. */ + if (minfamily != maxfamily) + minbits = commonbits = 0; + + *minfamily_p = minfamily; + *maxfamily_p = maxfamily; + *minbits_p = minbits; + *commonbits_p = commonbits; +} + +/* + * Construct a GistInetKey representing a union value. + * + * Inputs are the family/minbits/commonbits values to use, plus a pointer to + * the address field of one of the union inputs. (Since we're going to copy + * just the bits-in-common, it doesn't matter which one.) + */ +static GistInetKey * +build_inet_union_key(int family, int minbits, int commonbits, + unsigned char *addr) +{ + GistInetKey *result; + + /* Make sure any unused bits are zeroed. */ + result = (GistInetKey *) palloc0(sizeof(GistInetKey)); + + gk_ip_family(result) = family; + gk_ip_minbits(result) = minbits; + gk_ip_commonbits(result) = commonbits; + + /* Clone appropriate bytes of the address. */ + if (commonbits > 0) + memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8); + + /* Clean any unwanted bits in the last partial byte. */ + if (commonbits % 8 != 0) + gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8)); + + /* Set varlena header correctly. */ + SET_GK_VARSIZE(result); + + return result; +} + + +/* + * The GiST union function + * + * See comments at head of file for the definition of the union. + */ +Datum +inet_gist_union(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GISTENTRY *ent = entryvec->vector; + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp, + *result; + + /* Determine parameters of the union. */ + calc_inet_union_params(ent, 0, entryvec->n - 1, + &minfamily, &maxfamily, + &minbits, &commonbits); + + /* If more than one family, emit family number zero. */ + if (minfamily != maxfamily) + minfamily = 0; + + /* Initialize address using the first key. */ + tmp = DatumGetInetKeyP(ent[0].key); + addr = gk_ip_addr(tmp); + + /* Construct the union value. */ + result = build_inet_union_key(minfamily, minbits, commonbits, addr); + + PG_RETURN_POINTER(result); +} + +/* + * The GiST compress function + * + * Convert an inet value to GistInetKey. + */ +Datum +inet_gist_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval; + + if (entry->leafkey) + { + retval = palloc(sizeof(GISTENTRY)); + if (DatumGetPointer(entry->key) != NULL) + { + inet *in = DatumGetInetPP(entry->key); + GistInetKey *r; + + r = (GistInetKey *) palloc0(sizeof(GistInetKey)); + + gk_ip_family(r) = ip_family(in); + gk_ip_minbits(r) = ip_bits(in); + gk_ip_commonbits(r) = gk_ip_maxbits(r); + memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r)); + SET_GK_VARSIZE(r); + + gistentryinit(*retval, PointerGetDatum(r), + entry->rel, entry->page, + entry->offset, false); + } + else + { + gistentryinit(*retval, (Datum) 0, + entry->rel, entry->page, + entry->offset, false); + } + } + else + retval = entry; + PG_RETURN_POINTER(retval); +} + +/* + * We do not need a decompress function, because the other GiST inet + * support functions work with the GistInetKey representation. + */ + +/* + * The GiST fetch function + * + * Reconstruct the original inet datum from a GistInetKey. + */ +Datum +inet_gist_fetch(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GistInetKey *key = DatumGetInetKeyP(entry->key); + GISTENTRY *retval; + inet *dst; + + dst = (inet *) palloc0(sizeof(inet)); + + ip_family(dst) = gk_ip_family(key); + ip_bits(dst) = gk_ip_minbits(key); + memcpy(ip_addr(dst), gk_ip_addr(key), ip_addrsize(dst)); + SET_INET_VARSIZE(dst); + + retval = palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, InetPGetDatum(dst), entry->rel, entry->page, + entry->offset, false); + + PG_RETURN_POINTER(retval); +} + +/* + * The GiST page split penalty function + * + * Charge a large penalty if address family doesn't match, or a somewhat + * smaller one if the new value would degrade the union's minbits + * (minimum netmask width). Otherwise, penalty is inverse of the + * new number of common address bits. + */ +Datum +inet_gist_penalty(PG_FUNCTION_ARGS) +{ + GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1); + float *penalty = (float *) PG_GETARG_POINTER(2); + GistInetKey *orig = DatumGetInetKeyP(origent->key), + *new = DatumGetInetKeyP(newent->key); + int commonbits; + + if (gk_ip_family(orig) == gk_ip_family(new)) + { + if (gk_ip_minbits(orig) <= gk_ip_minbits(new)) + { + commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new), + Min(gk_ip_commonbits(orig), + gk_ip_commonbits(new))); + if (commonbits > 0) + *penalty = 1.0f / commonbits; + else + *penalty = 2; + } + else + *penalty = 3; + } + else + *penalty = 4; + + PG_RETURN_POINTER(penalty); +} + +/* + * The GiST PickSplit method + * + * There are two ways to split. First one is to split by address families, + * if there are multiple families appearing in the input. + * + * The second and more common way is to split by addresses. To achieve this, + * determine the number of leading bits shared by all the keys, then split on + * the next bit. (We don't currently consider the netmask widths while doing + * this; should we?) If we fail to get a nontrivial split that way, split + * 50-50. + */ +Datum +inet_gist_picksplit(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + GISTENTRY *ent = entryvec->vector; + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp, + *left_union, + *right_union; + int maxoff, + nbytes; + OffsetNumber i, + *left, + *right; + + maxoff = entryvec->n - 1; + nbytes = (maxoff + 1) * sizeof(OffsetNumber); + + left = (OffsetNumber *) palloc(nbytes); + right = (OffsetNumber *) palloc(nbytes); + + splitvec->spl_left = left; + splitvec->spl_right = right; + + splitvec->spl_nleft = 0; + splitvec->spl_nright = 0; + + /* Determine parameters of the union of all the inputs. */ + calc_inet_union_params(ent, FirstOffsetNumber, maxoff, + &minfamily, &maxfamily, + &minbits, &commonbits); + + if (minfamily != maxfamily) + { + /* Multiple families, so split by family. */ + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + /* + * If there's more than 2 families, all but maxfamily go into the + * left union. This could only happen if the inputs include some + * IPv4, some IPv6, and some already-multiple-family unions. + */ + tmp = DatumGetInetKeyP(ent[i].key); + if (gk_ip_family(tmp) != maxfamily) + left[splitvec->spl_nleft++] = i; + else + right[splitvec->spl_nright++] = i; + } + } + else + { + /* + * Split on the next bit after the common bits. If that yields a + * trivial split, try the next bit position to the right. Repeat till + * success; or if we run out of bits, do an arbitrary 50-50 split. + */ + int maxbits = ip_family_maxbits(minfamily); + + while (commonbits < maxbits) + { + /* Split using the commonbits'th bit position. */ + int bitbyte = commonbits / 8; + int bitmask = 0x80 >> (commonbits % 8); + + splitvec->spl_nleft = splitvec->spl_nright = 0; + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + tmp = DatumGetInetKeyP(ent[i].key); + addr = gk_ip_addr(tmp); + if ((addr[bitbyte] & bitmask) == 0) + left[splitvec->spl_nleft++] = i; + else + right[splitvec->spl_nright++] = i; + } + + if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0) + break; /* success */ + commonbits++; + } + + if (commonbits >= maxbits) + { + /* Failed ... do a 50-50 split. */ + splitvec->spl_nleft = splitvec->spl_nright = 0; + + for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i)) + { + left[splitvec->spl_nleft++] = i; + } + for (; i <= maxoff; i = OffsetNumberNext(i)) + { + right[splitvec->spl_nright++] = i; + } + } + } + + /* + * Compute the union value for each side from scratch. In most cases we + * could approximate the union values with what we already know, but this + * ensures that each side has minbits and commonbits set as high as + * possible. + */ + calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft, + &minfamily, &maxfamily, + &minbits, &commonbits); + if (minfamily != maxfamily) + minfamily = 0; + tmp = DatumGetInetKeyP(ent[left[0]].key); + addr = gk_ip_addr(tmp); + left_union = build_inet_union_key(minfamily, minbits, commonbits, addr); + splitvec->spl_ldatum = PointerGetDatum(left_union); + + calc_inet_union_params_indexed(ent, right, splitvec->spl_nright, + &minfamily, &maxfamily, + &minbits, &commonbits); + if (minfamily != maxfamily) + minfamily = 0; + tmp = DatumGetInetKeyP(ent[right[0]].key); + addr = gk_ip_addr(tmp); + right_union = build_inet_union_key(minfamily, minbits, commonbits, addr); + splitvec->spl_rdatum = PointerGetDatum(right_union); + + PG_RETURN_POINTER(splitvec); +} + +/* + * The GiST equality function + */ +Datum +inet_gist_same(PG_FUNCTION_ARGS) +{ + GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0)); + GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1)); + bool *result = (bool *) PG_GETARG_POINTER(2); + + *result = (gk_ip_family(left) == gk_ip_family(right) && + gk_ip_minbits(left) == gk_ip_minbits(right) && + gk_ip_commonbits(left) == gk_ip_commonbits(right) && + memcmp(gk_ip_addr(left), gk_ip_addr(right), + gk_ip_addrsize(left)) == 0); + + PG_RETURN_POINTER(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_selfuncs.c new file mode 100644 index 00000000000..315985215c3 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_selfuncs.c @@ -0,0 +1,972 @@ +/*------------------------------------------------------------------------- + * + * network_selfuncs.c + * Functions for selectivity estimation of inet/cidr operators + * + * This module provides estimators for the subnet inclusion and overlap + * operators. Estimates are based on null fraction, most common values, + * and histogram of inet/cidr columns. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/network_selfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic.h" +#include "utils/builtins.h" +#include "utils/inet.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" + + +/* Default selectivity for the inet overlap operator */ +#define DEFAULT_OVERLAP_SEL 0.01 + +/* Default selectivity for the various inclusion operators */ +#define DEFAULT_INCLUSION_SEL 0.005 + +/* Default selectivity for specified operator */ +#define DEFAULT_SEL(operator) \ + ((operator) == OID_INET_OVERLAP_OP ? \ + DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL) + +/* Maximum number of items to consider in join selectivity calculations */ +#define MAX_CONSIDERED_ELEMS 1024 + +static Selectivity networkjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2); +static Selectivity networkjoinsel_semi(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2); +static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues); +static Selectivity inet_hist_value_sel(Datum *values, int nvalues, + Datum constvalue, int opr_codenum); +static Selectivity inet_mcv_join_sel(Datum *mcv1_values, + float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values, + float4 *mcv2_numbers, int mcv2_nvalues, Oid operator); +static Selectivity inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, + int mcv_nvalues, Datum *hist_values, int hist_nvalues, + int opr_codenum); +static Selectivity inet_hist_inclusion_join_sel(Datum *hist1_values, + int hist1_nvalues, + Datum *hist2_values, int hist2_nvalues, + int opr_codenum); +static Selectivity inet_semi_join_sel(Datum lhs_value, + bool mcv_exists, Datum *mcv_values, int mcv_nvalues, + bool hist_exists, Datum *hist_values, int hist_nvalues, + double hist_weight, + FmgrInfo *proc, int opr_codenum); +static int inet_opr_codenum(Oid operator); +static int inet_inclusion_cmp(inet *left, inet *right, int opr_codenum); +static int inet_masklen_inclusion_cmp(inet *left, inet *right, + int opr_codenum); +static int inet_hist_match_divider(inet *boundary, inet *query, + int opr_codenum); + +/* + * Selectivity estimation for the subnet inclusion/overlap operators + */ +Datum +networksel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Selectivity selec, + mcv_selec, + non_mcv_selec; + Datum constvalue; + Form_pg_statistic stats; + AttStatsSlot hslot; + double sumcommon, + nullfrac; + FmgrInfo proc; + + /* + * If expression is not (variable op something) or (something op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + } + + /* All of the operators handled here are strict. */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + constvalue = ((Const *) other)->constvalue; + + /* Otherwise, we need stats in order to produce a non-default estimate. */ + if (!HeapTupleIsValid(vardata.statsTuple)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + } + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + nullfrac = stats->stanullfrac; + + /* + * If we have most-common-values info, add up the fractions of the MCV + * entries that satisfy MCV OP CONST. These fractions contribute directly + * to the result selectivity. Also add up the total fraction represented + * by MCV entries. + */ + fmgr_info(get_opcode(operator), &proc); + mcv_selec = mcv_selectivity(&vardata, &proc, InvalidOid, + constvalue, varonleft, + &sumcommon); + + /* + * If we have a histogram, use it to estimate the proportion of the + * non-MCV population that satisfies the clause. If we don't, apply the + * default selectivity to that population. + */ + if (get_attstatsslot(&hslot, vardata.statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES)) + { + int opr_codenum = inet_opr_codenum(operator); + + /* Commute if needed, so we can consider histogram to be on the left */ + if (!varonleft) + opr_codenum = -opr_codenum; + non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues, + constvalue, opr_codenum); + + free_attstatsslot(&hslot); + } + else + non_mcv_selec = DEFAULT_SEL(operator); + + /* Combine selectivities for MCV and non-MCV populations */ + selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec; + + /* Result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + ReleaseVariableStats(vardata); + + PG_RETURN_FLOAT8(selec); +} + +/* + * Join selectivity estimation for the subnet inclusion/overlap operators + * + * This function has the same structure as eqjoinsel() in selfuncs.c. + * + * Throughout networkjoinsel and its subroutines, we have a performance issue + * in that the amount of work to be done is O(N^2) in the length of the MCV + * and histogram arrays. To keep the runtime from getting out of hand when + * large statistics targets have been set, we arbitrarily limit the number of + * values considered to 1024 (MAX_CONSIDERED_ELEMS). For the MCV arrays, this + * is easy: just consider at most the first N elements. (Since the MCVs are + * sorted by decreasing frequency, this correctly gets us the first N MCVs.) + * For the histogram arrays, we decimate; that is consider only every k'th + * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS + * elements are considered. This should still give us a good random sample of + * the non-MCV population. Decimation is done on-the-fly in the loops that + * iterate over the histogram arrays. + */ +Datum +networkjoinsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); +#ifdef NOT_USED + JoinType jointype = (JoinType) PG_GETARG_INT16(3); +#endif + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); + double selec; + VariableStatData vardata1; + VariableStatData vardata2; + bool join_is_reversed; + + get_join_variables(root, args, sjinfo, + &vardata1, &vardata2, &join_is_reversed); + + switch (sjinfo->jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_FULL: + + /* + * Selectivity for left/full join is not exactly the same as inner + * join, but we neglect the difference, as eqjoinsel does. + */ + selec = networkjoinsel_inner(operator, &vardata1, &vardata2); + break; + case JOIN_SEMI: + case JOIN_ANTI: + /* Here, it's important that we pass the outer var on the left. */ + if (!join_is_reversed) + selec = networkjoinsel_semi(operator, &vardata1, &vardata2); + else + selec = networkjoinsel_semi(get_commutator(operator), + &vardata2, &vardata1); + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", + (int) sjinfo->jointype); + selec = 0; /* keep compiler quiet */ + break; + } + + ReleaseVariableStats(vardata1); + ReleaseVariableStats(vardata2); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * Inner join selectivity estimation for subnet inclusion/overlap operators + * + * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram + * selectivity for join using the subnet inclusion operators. Unlike the + * join selectivity function for the equality operator, eqjoinsel_inner(), + * one to one matching of the values is not enough. Network inclusion + * operators are likely to match many to many, so we must check all pairs. + * (Note: it might be possible to exploit understanding of the histogram's + * btree ordering to reduce the work needed, but we don't currently try.) + * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner(). + */ +static Selectivity +networkjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2) +{ + Form_pg_statistic stats; + double nullfrac1 = 0.0, + nullfrac2 = 0.0; + Selectivity selec = 0.0, + sumcommon1 = 0.0, + sumcommon2 = 0.0; + bool mcv1_exists = false, + mcv2_exists = false, + hist1_exists = false, + hist2_exists = false; + int opr_codenum; + int mcv1_length = 0, + mcv2_length = 0; + AttStatsSlot mcv1_slot; + AttStatsSlot mcv2_slot; + AttStatsSlot hist1_slot; + AttStatsSlot hist2_slot; + + if (HeapTupleIsValid(vardata1->statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); + nullfrac1 = stats->stanullfrac; + + mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); + hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES); + /* Arbitrarily limit number of MCVs considered */ + mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS); + if (mcv1_exists) + sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length); + } + else + { + memset(&mcv1_slot, 0, sizeof(mcv1_slot)); + memset(&hist1_slot, 0, sizeof(hist1_slot)); + } + + if (HeapTupleIsValid(vardata2->statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); + nullfrac2 = stats->stanullfrac; + + mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); + hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES); + /* Arbitrarily limit number of MCVs considered */ + mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS); + if (mcv2_exists) + sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length); + } + else + { + memset(&mcv2_slot, 0, sizeof(mcv2_slot)); + memset(&hist2_slot, 0, sizeof(hist2_slot)); + } + + opr_codenum = inet_opr_codenum(operator); + + /* + * Calculate selectivity for MCV vs MCV matches. + */ + if (mcv1_exists && mcv2_exists) + selec += inet_mcv_join_sel(mcv1_slot.values, mcv1_slot.numbers, + mcv1_length, + mcv2_slot.values, mcv2_slot.numbers, + mcv2_length, + operator); + + /* + * Add in selectivities for MCV vs histogram matches, scaling according to + * the fractions of the populations represented by the histograms. Note + * that the second case needs to commute the operator. + */ + if (mcv1_exists && hist2_exists) + selec += (1.0 - nullfrac2 - sumcommon2) * + inet_mcv_hist_sel(mcv1_slot.values, mcv1_slot.numbers, mcv1_length, + hist2_slot.values, hist2_slot.nvalues, + opr_codenum); + if (mcv2_exists && hist1_exists) + selec += (1.0 - nullfrac1 - sumcommon1) * + inet_mcv_hist_sel(mcv2_slot.values, mcv2_slot.numbers, mcv2_length, + hist1_slot.values, hist1_slot.nvalues, + -opr_codenum); + + /* + * Add in selectivity for histogram vs histogram matches, again scaling + * appropriately. + */ + if (hist1_exists && hist2_exists) + selec += (1.0 - nullfrac1 - sumcommon1) * + (1.0 - nullfrac2 - sumcommon2) * + inet_hist_inclusion_join_sel(hist1_slot.values, hist1_slot.nvalues, + hist2_slot.values, hist2_slot.nvalues, + opr_codenum); + + /* + * If useful statistics are not available then use the default estimate. + * We can apply null fractions if known, though. + */ + if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists)) + selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator); + + /* Release stats. */ + free_attstatsslot(&mcv1_slot); + free_attstatsslot(&mcv2_slot); + free_attstatsslot(&hist1_slot); + free_attstatsslot(&hist2_slot); + + return selec; +} + +/* + * Semi join selectivity estimation for subnet inclusion/overlap operators + * + * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs + * histogram selectivity for semi/anti join cases. + */ +static Selectivity +networkjoinsel_semi(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2) +{ + Form_pg_statistic stats; + Selectivity selec = 0.0, + sumcommon1 = 0.0, + sumcommon2 = 0.0; + double nullfrac1 = 0.0, + nullfrac2 = 0.0, + hist2_weight = 0.0; + bool mcv1_exists = false, + mcv2_exists = false, + hist1_exists = false, + hist2_exists = false; + int opr_codenum; + FmgrInfo proc; + int i, + mcv1_length = 0, + mcv2_length = 0; + AttStatsSlot mcv1_slot; + AttStatsSlot mcv2_slot; + AttStatsSlot hist1_slot; + AttStatsSlot hist2_slot; + + if (HeapTupleIsValid(vardata1->statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); + nullfrac1 = stats->stanullfrac; + + mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); + hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES); + /* Arbitrarily limit number of MCVs considered */ + mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS); + if (mcv1_exists) + sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length); + } + else + { + memset(&mcv1_slot, 0, sizeof(mcv1_slot)); + memset(&hist1_slot, 0, sizeof(hist1_slot)); + } + + if (HeapTupleIsValid(vardata2->statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); + nullfrac2 = stats->stanullfrac; + + mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); + hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES); + /* Arbitrarily limit number of MCVs considered */ + mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS); + if (mcv2_exists) + sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length); + } + else + { + memset(&mcv2_slot, 0, sizeof(mcv2_slot)); + memset(&hist2_slot, 0, sizeof(hist2_slot)); + } + + opr_codenum = inet_opr_codenum(operator); + fmgr_info(get_opcode(operator), &proc); + + /* Estimate number of input rows represented by RHS histogram. */ + if (hist2_exists && vardata2->rel) + hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows; + + /* + * Consider each element of the LHS MCV list, matching it to whatever RHS + * stats we have. Scale according to the known frequency of the MCV. + */ + if (mcv1_exists && (mcv2_exists || hist2_exists)) + { + for (i = 0; i < mcv1_length; i++) + { + selec += mcv1_slot.numbers[i] * + inet_semi_join_sel(mcv1_slot.values[i], + mcv2_exists, mcv2_slot.values, mcv2_length, + hist2_exists, + hist2_slot.values, hist2_slot.nvalues, + hist2_weight, + &proc, opr_codenum); + } + } + + /* + * Consider each element of the LHS histogram, except for the first and + * last elements, which we exclude on the grounds that they're outliers + * and thus not very representative. Scale on the assumption that each + * such histogram element represents an equal share of the LHS histogram + * population (which is a bit bogus, because the members of its bucket may + * not all act the same with respect to the join clause, but it's hard to + * do better). + * + * If there are too many histogram elements, decimate to limit runtime. + */ + if (hist1_exists && hist1_slot.nvalues > 2 && (mcv2_exists || hist2_exists)) + { + double hist_selec_sum = 0.0; + int k, + n; + + k = (hist1_slot.nvalues - 3) / MAX_CONSIDERED_ELEMS + 1; + + n = 0; + for (i = 1; i < hist1_slot.nvalues - 1; i += k) + { + hist_selec_sum += + inet_semi_join_sel(hist1_slot.values[i], + mcv2_exists, mcv2_slot.values, mcv2_length, + hist2_exists, + hist2_slot.values, hist2_slot.nvalues, + hist2_weight, + &proc, opr_codenum); + n++; + } + + selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n; + } + + /* + * If useful statistics are not available then use the default estimate. + * We can apply null fractions if known, though. + */ + if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists)) + selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator); + + /* Release stats. */ + free_attstatsslot(&mcv1_slot); + free_attstatsslot(&mcv2_slot); + free_attstatsslot(&hist1_slot); + free_attstatsslot(&hist2_slot); + + return selec; +} + +/* + * Compute the fraction of a relation's population that is represented + * by the MCV list. + */ +static Selectivity +mcv_population(float4 *mcv_numbers, int mcv_nvalues) +{ + Selectivity sumcommon = 0.0; + int i; + + for (i = 0; i < mcv_nvalues; i++) + { + sumcommon += mcv_numbers[i]; + } + + return sumcommon; +} + +/* + * Inet histogram vs single value selectivity estimation + * + * Estimate the fraction of the histogram population that satisfies + * "value OPR CONST". (The result needs to be scaled to reflect the + * proportion of the total population represented by the histogram.) + * + * The histogram is originally for the inet btree comparison operators. + * Only the common bits of the network part and the length of the network part + * (masklen) are interesting for the subnet inclusion operators. Fortunately, + * btree comparison treats the network part as the major sort key. Even so, + * the length of the network part would not really be significant in the + * histogram. This would lead to big mistakes for data sets with uneven + * masklen distribution. To reduce this problem, comparisons with the left + * and the right sides of the buckets are used together. + * + * Histogram bucket matches are calculated in two forms. If the constant + * matches both bucket endpoints the bucket is considered as fully matched. + * The second form is to match the bucket partially; we recognize this when + * the constant matches just one endpoint, or the two endpoints fall on + * opposite sides of the constant. (Note that when the constant matches an + * interior histogram element, it gets credit for partial matches to the + * buckets on both sides, while a match to a histogram endpoint gets credit + * for only one partial match. This is desirable.) + * + * The divider in the partial bucket match is imagined as the distance + * between the decisive bits and the common bits of the addresses. It will + * be used as a power of two as it is the natural scale for the IP network + * inclusion. This partial bucket match divider calculation is an empirical + * formula and subject to change with more experiment. + * + * For a partial match, we try to calculate dividers for both of the + * boundaries. If the address family of a boundary value does not match the + * constant or comparison of the length of the network parts is not correct + * for the operator, the divider for that boundary will not be taken into + * account. If both of the dividers are valid, the greater one will be used + * to minimize the mistake in buckets that have disparate masklens. This + * calculation is unfair when dividers can be calculated for both of the + * boundaries but they are far from each other; but it is not a common + * situation as the boundaries are expected to share most of their significant + * bits of their masklens. The mistake would be greater, if we would use the + * minimum instead of the maximum, and we don't know a sensible way to combine + * them. + * + * For partial match in buckets that have different address families on the + * left and right sides, only the boundary with the same address family is + * taken into consideration. This can cause more mistakes for these buckets + * if the masklens of their boundaries are also disparate. But this can only + * happen in one bucket, since only two address families exist. It seems a + * better option than not considering these buckets at all. + */ +static Selectivity +inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue, + int opr_codenum) +{ + Selectivity match = 0.0; + inet *query, + *left, + *right; + int i, + k, + n; + int left_order, + right_order, + left_divider, + right_divider; + + /* guard against zero-divide below */ + if (nvalues <= 1) + return 0.0; + + /* if there are too many histogram elements, decimate to limit runtime */ + k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1; + + query = DatumGetInetPP(constvalue); + + /* "left" is the left boundary value of the current bucket ... */ + left = DatumGetInetPP(values[0]); + left_order = inet_inclusion_cmp(left, query, opr_codenum); + + n = 0; + for (i = k; i < nvalues; i += k) + { + /* ... and "right" is the right boundary value */ + right = DatumGetInetPP(values[i]); + right_order = inet_inclusion_cmp(right, query, opr_codenum); + + if (left_order == 0 && right_order == 0) + { + /* The whole bucket matches, since both endpoints do. */ + match += 1.0; + } + else if ((left_order <= 0 && right_order >= 0) || + (left_order >= 0 && right_order <= 0)) + { + /* Partial bucket match. */ + left_divider = inet_hist_match_divider(left, query, opr_codenum); + right_divider = inet_hist_match_divider(right, query, opr_codenum); + + if (left_divider >= 0 || right_divider >= 0) + match += 1.0 / pow(2.0, Max(left_divider, right_divider)); + } + + /* Shift the variables. */ + left = right; + left_order = right_order; + + /* Count the number of buckets considered. */ + n++; + } + + return match / n; +} + +/* + * Inet MCV vs MCV join selectivity estimation + * + * We simply add up the fractions of the populations that satisfy the clause. + * The result is exact and does not need to be scaled further. + */ +static Selectivity +inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues, + Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues, + Oid operator) +{ + Selectivity selec = 0.0; + FmgrInfo proc; + int i, + j; + + fmgr_info(get_opcode(operator), &proc); + + for (i = 0; i < mcv1_nvalues; i++) + { + for (j = 0; j < mcv2_nvalues; j++) + if (DatumGetBool(FunctionCall2(&proc, + mcv1_values[i], + mcv2_values[j]))) + selec += mcv1_numbers[i] * mcv2_numbers[j]; + } + return selec; +} + +/* + * Inet MCV vs histogram join selectivity estimation + * + * For each MCV on the lefthand side, estimate the fraction of the righthand's + * histogram population that satisfies the join clause, and add those up, + * scaling by the MCV's frequency. The result still needs to be scaled + * according to the fraction of the righthand's population represented by + * the histogram. + */ +static Selectivity +inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues, + Datum *hist_values, int hist_nvalues, + int opr_codenum) +{ + Selectivity selec = 0.0; + int i; + + /* + * We'll call inet_hist_value_selec with the histogram on the left, so we + * must commute the operator. + */ + opr_codenum = -opr_codenum; + + for (i = 0; i < mcv_nvalues; i++) + { + selec += mcv_numbers[i] * + inet_hist_value_sel(hist_values, hist_nvalues, mcv_values[i], + opr_codenum); + } + return selec; +} + +/* + * Inet histogram vs histogram join selectivity estimation + * + * Here, we take all values listed in the second histogram (except for the + * first and last elements, which are excluded on the grounds of possibly + * not being very representative) and treat them as a uniform sample of + * the non-MCV population for that relation. For each one, we apply + * inet_hist_value_selec to see what fraction of the first histogram + * it matches. + * + * We could alternatively do this the other way around using the operator's + * commutator. XXX would it be worthwhile to do it both ways and take the + * average? That would at least avoid non-commutative estimation results. + */ +static Selectivity +inet_hist_inclusion_join_sel(Datum *hist1_values, int hist1_nvalues, + Datum *hist2_values, int hist2_nvalues, + int opr_codenum) +{ + double match = 0.0; + int i, + k, + n; + + if (hist2_nvalues <= 2) + return 0.0; /* no interior histogram elements */ + + /* if there are too many histogram elements, decimate to limit runtime */ + k = (hist2_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1; + + n = 0; + for (i = 1; i < hist2_nvalues - 1; i += k) + { + match += inet_hist_value_sel(hist1_values, hist1_nvalues, + hist2_values[i], opr_codenum); + n++; + } + + return match / n; +} + +/* + * Inet semi join selectivity estimation for one value + * + * The function calculates the probability that there is at least one row + * in the RHS table that satisfies the "lhs_value op column" condition. + * It is used in semi join estimation to check a sample from the left hand + * side table. + * + * The MCV and histogram from the right hand side table should be provided as + * arguments with the lhs_value from the left hand side table for the join. + * hist_weight is the total number of rows represented by the histogram. + * For example, if the table has 1000 rows, and 10% of the rows are in the MCV + * list, and another 10% are NULLs, hist_weight would be 800. + * + * First, the lhs_value will be matched to the most common values. If it + * matches any of them, 1.0 will be returned, because then there is surely + * a match. + * + * Otherwise, the histogram will be used to estimate the number of rows in + * the second table that match the condition. If the estimate is greater + * than 1.0, 1.0 will be returned, because it means there is a greater chance + * that the lhs_value will match more than one row in the table. If it is + * between 0.0 and 1.0, it will be returned as the probability. + */ +static Selectivity +inet_semi_join_sel(Datum lhs_value, + bool mcv_exists, Datum *mcv_values, int mcv_nvalues, + bool hist_exists, Datum *hist_values, int hist_nvalues, + double hist_weight, + FmgrInfo *proc, int opr_codenum) +{ + if (mcv_exists) + { + int i; + + for (i = 0; i < mcv_nvalues; i++) + { + if (DatumGetBool(FunctionCall2(proc, + lhs_value, + mcv_values[i]))) + return 1.0; + } + } + + if (hist_exists && hist_weight > 0) + { + Selectivity hist_selec; + + /* Commute operator, since we're passing lhs_value on the right */ + hist_selec = inet_hist_value_sel(hist_values, hist_nvalues, + lhs_value, -opr_codenum); + + if (hist_selec > 0) + return Min(1.0, hist_weight * hist_selec); + } + + return 0.0; +} + +/* + * Assign useful code numbers for the subnet inclusion/overlap operators + * + * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend + * on the exact codes assigned here; but many other places in this file + * know that they can negate a code to obtain the code for the commutator + * operator. + */ +static int +inet_opr_codenum(Oid operator) +{ + switch (operator) + { + case OID_INET_SUP_OP: + return -2; + case OID_INET_SUPEQ_OP: + return -1; + case OID_INET_OVERLAP_OP: + return 0; + case OID_INET_SUBEQ_OP: + return 1; + case OID_INET_SUB_OP: + return 2; + default: + elog(ERROR, "unrecognized operator %u for inet selectivity", + operator); + } + return 0; /* unreached, but keep compiler quiet */ +} + +/* + * Comparison function for the subnet inclusion/overlap operators + * + * If the comparison is okay for the specified inclusion operator, the return + * value will be 0. Otherwise the return value will be less than or greater + * than 0 as appropriate for the operator. + * + * Comparison is compatible with the basic comparison function for the inet + * type. See network_cmp_internal() in network.c for the original. Basic + * comparison operators are implemented with the network_cmp_internal() + * function. It is possible to implement the subnet inclusion operators with + * this function. + * + * Comparison is first on the common bits of the network part, then on the + * length of the network part (masklen) as in the network_cmp_internal() + * function. Only the first part is in this function. The second part is + * separated to another function for reusability. The difference between the + * second part and the original network_cmp_internal() is that the inclusion + * operator is considered while comparing the lengths of the network parts. + * See the inet_masklen_inclusion_cmp() function below. + */ +static int +inet_inclusion_cmp(inet *left, inet *right, int opr_codenum) +{ + if (ip_family(left) == ip_family(right)) + { + int order; + + order = bitncmp(ip_addr(left), ip_addr(right), + Min(ip_bits(left), ip_bits(right))); + if (order != 0) + return order; + + return inet_masklen_inclusion_cmp(left, right, opr_codenum); + } + + return ip_family(left) - ip_family(right); +} + +/* + * Masklen comparison function for the subnet inclusion/overlap operators + * + * Compares the lengths of the network parts of the inputs. If the comparison + * is okay for the specified inclusion operator, the return value will be 0. + * Otherwise the return value will be less than or greater than 0 as + * appropriate for the operator. + */ +static int +inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum) +{ + int order; + + order = (int) ip_bits(left) - (int) ip_bits(right); + + /* + * Return 0 if the operator would accept this combination of masklens. + * Note that opr_codenum zero (overlaps) will accept all cases. + */ + if ((order > 0 && opr_codenum >= 0) || + (order == 0 && opr_codenum >= -1 && opr_codenum <= 1) || + (order < 0 && opr_codenum <= 0)) + return 0; + + /* + * Otherwise, return a negative value for sup/supeq (notionally, the RHS + * needs to have a larger masklen than it has, which would make it sort + * later), or a positive value for sub/subeq (vice versa). + */ + return opr_codenum; +} + +/* + * Inet histogram partial match divider calculation + * + * First the families and the lengths of the network parts are compared using + * the subnet inclusion operator. If those are acceptable for the operator, + * the divider will be calculated using the masklens and the common bits of + * the addresses. -1 will be returned if it cannot be calculated. + * + * See commentary for inet_hist_value_sel() for some rationale for this. + */ +static int +inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum) +{ + if (ip_family(boundary) == ip_family(query) && + inet_masklen_inclusion_cmp(boundary, query, opr_codenum) == 0) + { + int min_bits, + decisive_bits; + + min_bits = Min(ip_bits(boundary), ip_bits(query)); + + /* + * Set decisive_bits to the masklen of the one that should contain the + * other according to the operator. + */ + if (opr_codenum < 0) + decisive_bits = ip_bits(boundary); + else if (opr_codenum > 0) + decisive_bits = ip_bits(query); + else + decisive_bits = min_bits; + + /* + * Now return the number of non-common decisive bits. (This will be + * zero if the boundary and query in fact match, else positive.) + */ + if (min_bits > 0) + return decisive_bits - bitncommon(ip_addr(boundary), + ip_addr(query), + min_bits); + return decisive_bits; + } + + return -1; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_spgist.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_spgist.c new file mode 100644 index 00000000000..5d3697306c0 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_spgist.c @@ -0,0 +1,712 @@ +/*------------------------------------------------------------------------- + * + * network_spgist.c + * SP-GiST support for network types. + * + * We split inet index entries first by address family (IPv4 or IPv6). + * If the entries below a given inner tuple are all of the same family, + * we identify their common prefix and split by the next bit of the address, + * and by whether their masklens exceed the length of the common prefix. + * + * An inner tuple that has both IPv4 and IPv6 children has a null prefix + * and exactly two nodes, the first being for IPv4 and the second for IPv6. + * + * Otherwise, the prefix is a CIDR value representing the common prefix, + * and there are exactly four nodes. Node numbers 0 and 1 are for addresses + * with the same masklen as the prefix, while node numbers 2 and 3 are for + * addresses with larger masklen. (We do not allow a tuple to contain + * entries with masklen smaller than its prefix's.) Node numbers 0 and 1 + * are distinguished by the next bit of the address after the common prefix, + * and likewise for node numbers 2 and 3. If there are no more bits in + * the address family, everything goes into node 0 (which will probably + * lead to creating an allTheSame tuple). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/network_spgist.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <sys/socket.h> + +#include "access/spgist.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/inet.h" +#include "varatt.h" + + +static int inet_spg_node_number(const inet *val, int commonbits); +static int inet_spg_consistent_bitmap(const inet *prefix, int nkeys, + ScanKey scankeys, bool leaf); + +/* + * The SP-GiST configuration function + */ +Datum +inet_spg_config(PG_FUNCTION_ARGS) +{ + /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */ + spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1); + + cfg->prefixType = CIDROID; + cfg->labelType = VOIDOID; + cfg->canReturnData = true; + cfg->longValuesOK = false; + + PG_RETURN_VOID(); +} + +/* + * The SP-GiST choose function + */ +Datum +inet_spg_choose(PG_FUNCTION_ARGS) +{ + spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); + spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); + inet *val = DatumGetInetPP(in->datum), + *prefix; + int commonbits; + + /* + * If we're looking at a tuple that splits by address family, choose the + * appropriate subnode. + */ + if (!in->hasPrefix) + { + /* allTheSame isn't possible for such a tuple */ + Assert(!in->allTheSame); + Assert(in->nNodes == 2); + + out->resultType = spgMatchNode; + out->result.matchNode.nodeN = (ip_family(val) == PGSQL_AF_INET) ? 0 : 1; + out->result.matchNode.restDatum = InetPGetDatum(val); + + PG_RETURN_VOID(); + } + + /* Else it must split by prefix */ + Assert(in->nNodes == 4 || in->allTheSame); + + prefix = DatumGetInetPP(in->prefixDatum); + commonbits = ip_bits(prefix); + + /* + * We cannot put addresses from different families under the same inner + * node, so we have to split if the new value's family is different. + */ + if (ip_family(val) != ip_family(prefix)) + { + /* Set up 2-node tuple */ + out->resultType = spgSplitTuple; + out->result.splitTuple.prefixHasPrefix = false; + out->result.splitTuple.prefixNNodes = 2; + out->result.splitTuple.prefixNodeLabels = NULL; + + /* Identify which node the existing data goes into */ + out->result.splitTuple.childNodeN = + (ip_family(prefix) == PGSQL_AF_INET) ? 0 : 1; + + out->result.splitTuple.postfixHasPrefix = true; + out->result.splitTuple.postfixPrefixDatum = InetPGetDatum(prefix); + + PG_RETURN_VOID(); + } + + /* + * If the new value does not match the existing prefix, we have to split. + */ + if (ip_bits(val) < commonbits || + bitncmp(ip_addr(prefix), ip_addr(val), commonbits) != 0) + { + /* Determine new prefix length for the split tuple */ + commonbits = bitncommon(ip_addr(prefix), ip_addr(val), + Min(ip_bits(val), commonbits)); + + /* Set up 4-node tuple */ + out->resultType = spgSplitTuple; + out->result.splitTuple.prefixHasPrefix = true; + out->result.splitTuple.prefixPrefixDatum = + InetPGetDatum(cidr_set_masklen_internal(val, commonbits)); + out->result.splitTuple.prefixNNodes = 4; + out->result.splitTuple.prefixNodeLabels = NULL; + + /* Identify which node the existing data goes into */ + out->result.splitTuple.childNodeN = + inet_spg_node_number(prefix, commonbits); + + out->result.splitTuple.postfixHasPrefix = true; + out->result.splitTuple.postfixPrefixDatum = InetPGetDatum(prefix); + + PG_RETURN_VOID(); + } + + /* + * All OK, choose the node to descend into. (If this tuple is marked + * allTheSame, the core code will ignore our choice of nodeN; but we need + * not account for that case explicitly here.) + */ + out->resultType = spgMatchNode; + out->result.matchNode.nodeN = inet_spg_node_number(val, commonbits); + out->result.matchNode.restDatum = InetPGetDatum(val); + + PG_RETURN_VOID(); +} + +/* + * The GiST PickSplit method + */ +Datum +inet_spg_picksplit(PG_FUNCTION_ARGS) +{ + spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); + spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); + inet *prefix, + *tmp; + int i, + commonbits; + bool differentFamilies = false; + + /* Initialize the prefix with the first item */ + prefix = DatumGetInetPP(in->datums[0]); + commonbits = ip_bits(prefix); + + /* Examine remaining items to discover minimum common prefix length */ + for (i = 1; i < in->nTuples; i++) + { + tmp = DatumGetInetPP(in->datums[i]); + + if (ip_family(tmp) != ip_family(prefix)) + { + differentFamilies = true; + break; + } + + if (ip_bits(tmp) < commonbits) + commonbits = ip_bits(tmp); + commonbits = bitncommon(ip_addr(prefix), ip_addr(tmp), commonbits); + if (commonbits == 0) + break; + } + + /* Don't need labels; allocate output arrays */ + out->nodeLabels = NULL; + out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples); + out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples); + + if (differentFamilies) + { + /* Set up 2-node tuple */ + out->hasPrefix = false; + out->nNodes = 2; + + for (i = 0; i < in->nTuples; i++) + { + tmp = DatumGetInetPP(in->datums[i]); + out->mapTuplesToNodes[i] = + (ip_family(tmp) == PGSQL_AF_INET) ? 0 : 1; + out->leafTupleDatums[i] = InetPGetDatum(tmp); + } + } + else + { + /* Set up 4-node tuple */ + out->hasPrefix = true; + out->prefixDatum = + InetPGetDatum(cidr_set_masklen_internal(prefix, commonbits)); + out->nNodes = 4; + + for (i = 0; i < in->nTuples; i++) + { + tmp = DatumGetInetPP(in->datums[i]); + out->mapTuplesToNodes[i] = inet_spg_node_number(tmp, commonbits); + out->leafTupleDatums[i] = InetPGetDatum(tmp); + } + } + + PG_RETURN_VOID(); +} + +/* + * The SP-GiST query consistency check for inner tuples + */ +Datum +inet_spg_inner_consistent(PG_FUNCTION_ARGS) +{ + spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); + spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); + int i; + int which; + + if (!in->hasPrefix) + { + Assert(!in->allTheSame); + Assert(in->nNodes == 2); + + /* Identify which child nodes need to be visited */ + which = 1 | (1 << 1); + + for (i = 0; i < in->nkeys; i++) + { + StrategyNumber strategy = in->scankeys[i].sk_strategy; + inet *argument = DatumGetInetPP(in->scankeys[i].sk_argument); + + switch (strategy) + { + case RTLessStrategyNumber: + case RTLessEqualStrategyNumber: + if (ip_family(argument) == PGSQL_AF_INET) + which &= 1; + break; + + case RTGreaterEqualStrategyNumber: + case RTGreaterStrategyNumber: + if (ip_family(argument) == PGSQL_AF_INET6) + which &= (1 << 1); + break; + + case RTNotEqualStrategyNumber: + break; + + default: + /* all other ops can only match addrs of same family */ + if (ip_family(argument) == PGSQL_AF_INET) + which &= 1; + else + which &= (1 << 1); + break; + } + } + } + else if (!in->allTheSame) + { + Assert(in->nNodes == 4); + + /* Identify which child nodes need to be visited */ + which = inet_spg_consistent_bitmap(DatumGetInetPP(in->prefixDatum), + in->nkeys, in->scankeys, false); + } + else + { + /* Must visit all nodes; we assume there are less than 32 of 'em */ + which = ~0; + } + + out->nNodes = 0; + + if (which) + { + out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); + + for (i = 0; i < in->nNodes; i++) + { + if (which & (1 << i)) + { + out->nodeNumbers[out->nNodes] = i; + out->nNodes++; + } + } + } + + PG_RETURN_VOID(); +} + +/* + * The SP-GiST query consistency check for leaf tuples + */ +Datum +inet_spg_leaf_consistent(PG_FUNCTION_ARGS) +{ + spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0); + spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1); + inet *leaf = DatumGetInetPP(in->leafDatum); + + /* All tests are exact. */ + out->recheck = false; + + /* Leaf is what it is... */ + out->leafValue = InetPGetDatum(leaf); + + /* Use common code to apply the tests. */ + PG_RETURN_BOOL(inet_spg_consistent_bitmap(leaf, in->nkeys, in->scankeys, + true)); +} + +/* + * Calculate node number (within a 4-node, single-family inner index tuple) + * + * The value must have the same family as the node's prefix, and + * commonbits is the mask length of the prefix. We use even or odd + * nodes according to the next address bit after the commonbits, + * and low or high nodes according to whether the value's mask length + * is larger than commonbits. + */ +static int +inet_spg_node_number(const inet *val, int commonbits) +{ + int nodeN = 0; + + if (commonbits < ip_maxbits(val) && + ip_addr(val)[commonbits / 8] & (1 << (7 - commonbits % 8))) + nodeN |= 1; + if (commonbits < ip_bits(val)) + nodeN |= 2; + + return nodeN; +} + +/* + * Calculate bitmap of node numbers that are consistent with the query + * + * This can be used either at a 4-way inner tuple, or at a leaf tuple. + * In the latter case, we should return a boolean result (0 or 1) + * not a bitmap. + * + * This definition is pretty odd, but the inner and leaf consistency checks + * are mostly common and it seems best to keep them in one function. + */ +static int +inet_spg_consistent_bitmap(const inet *prefix, int nkeys, ScanKey scankeys, + bool leaf) +{ + int bitmap; + int commonbits, + i; + + /* Initialize result to allow visiting all children */ + if (leaf) + bitmap = 1; + else + bitmap = 1 | (1 << 1) | (1 << 2) | (1 << 3); + + commonbits = ip_bits(prefix); + + for (i = 0; i < nkeys; i++) + { + inet *argument = DatumGetInetPP(scankeys[i].sk_argument); + StrategyNumber strategy = scankeys[i].sk_strategy; + int order; + + /* + * Check 0: different families + * + * Matching families do not help any of the strategies. + */ + if (ip_family(argument) != ip_family(prefix)) + { + switch (strategy) + { + case RTLessStrategyNumber: + case RTLessEqualStrategyNumber: + if (ip_family(argument) < ip_family(prefix)) + bitmap = 0; + break; + + case RTGreaterEqualStrategyNumber: + case RTGreaterStrategyNumber: + if (ip_family(argument) > ip_family(prefix)) + bitmap = 0; + break; + + case RTNotEqualStrategyNumber: + break; + + default: + /* For all other cases, we can be sure there is no match */ + bitmap = 0; + break; + } + + if (!bitmap) + break; + + /* Other checks make no sense with different families. */ + continue; + } + + /* + * Check 1: network bit count + * + * Network bit count (ip_bits) helps to check leaves for sub network + * and sup network operators. At non-leaf nodes, we know every child + * value has greater ip_bits, so we can avoid descending in some cases + * too. + * + * This check is less expensive than checking the address bits, so we + * are doing this before, but it has to be done after for the basic + * comparison strategies, because ip_bits only affect their results + * when the common network bits are the same. + */ + switch (strategy) + { + case RTSubStrategyNumber: + if (commonbits <= ip_bits(argument)) + bitmap &= (1 << 2) | (1 << 3); + break; + + case RTSubEqualStrategyNumber: + if (commonbits < ip_bits(argument)) + bitmap &= (1 << 2) | (1 << 3); + break; + + case RTSuperStrategyNumber: + if (commonbits == ip_bits(argument) - 1) + bitmap &= 1 | (1 << 1); + else if (commonbits >= ip_bits(argument)) + bitmap = 0; + break; + + case RTSuperEqualStrategyNumber: + if (commonbits == ip_bits(argument)) + bitmap &= 1 | (1 << 1); + else if (commonbits > ip_bits(argument)) + bitmap = 0; + break; + + case RTEqualStrategyNumber: + if (commonbits < ip_bits(argument)) + bitmap &= (1 << 2) | (1 << 3); + else if (commonbits == ip_bits(argument)) + bitmap &= 1 | (1 << 1); + else + bitmap = 0; + break; + } + + if (!bitmap) + break; + + /* + * Check 2: common network bits + * + * Compare available common prefix bits to the query, but not beyond + * either the query's netmask or the minimum netmask among the + * represented values. If these bits don't match the query, we can + * eliminate some cases. + */ + order = bitncmp(ip_addr(prefix), ip_addr(argument), + Min(commonbits, ip_bits(argument))); + + if (order != 0) + { + switch (strategy) + { + case RTLessStrategyNumber: + case RTLessEqualStrategyNumber: + if (order > 0) + bitmap = 0; + break; + + case RTGreaterEqualStrategyNumber: + case RTGreaterStrategyNumber: + if (order < 0) + bitmap = 0; + break; + + case RTNotEqualStrategyNumber: + break; + + default: + /* For all other cases, we can be sure there is no match */ + bitmap = 0; + break; + } + + if (!bitmap) + break; + + /* + * Remaining checks make no sense when common bits don't match. + */ + continue; + } + + /* + * Check 3: next network bit + * + * We can filter out branch 2 or 3 using the next network bit of the + * argument, if it is available. + * + * This check matters for the performance of the search. The results + * would be correct without it. + */ + if (bitmap & ((1 << 2) | (1 << 3)) && + commonbits < ip_bits(argument)) + { + int nextbit; + + nextbit = ip_addr(argument)[commonbits / 8] & + (1 << (7 - commonbits % 8)); + + switch (strategy) + { + case RTLessStrategyNumber: + case RTLessEqualStrategyNumber: + if (!nextbit) + bitmap &= 1 | (1 << 1) | (1 << 2); + break; + + case RTGreaterEqualStrategyNumber: + case RTGreaterStrategyNumber: + if (nextbit) + bitmap &= 1 | (1 << 1) | (1 << 3); + break; + + case RTNotEqualStrategyNumber: + break; + + default: + if (!nextbit) + bitmap &= 1 | (1 << 1) | (1 << 2); + else + bitmap &= 1 | (1 << 1) | (1 << 3); + break; + } + + if (!bitmap) + break; + } + + /* + * Remaining checks are only for the basic comparison strategies. This + * test relies on the strategy number ordering defined in stratnum.h. + */ + if (strategy < RTEqualStrategyNumber || + strategy > RTGreaterEqualStrategyNumber) + continue; + + /* + * Check 4: network bit count + * + * At this point, we know that the common network bits of the prefix + * and the argument are the same, so we can go forward and check the + * ip_bits. + */ + switch (strategy) + { + case RTLessStrategyNumber: + case RTLessEqualStrategyNumber: + if (commonbits == ip_bits(argument)) + bitmap &= 1 | (1 << 1); + else if (commonbits > ip_bits(argument)) + bitmap = 0; + break; + + case RTGreaterEqualStrategyNumber: + case RTGreaterStrategyNumber: + if (commonbits < ip_bits(argument)) + bitmap &= (1 << 2) | (1 << 3); + break; + } + + if (!bitmap) + break; + + /* Remaining checks don't make sense with different ip_bits. */ + if (commonbits != ip_bits(argument)) + continue; + + /* + * Check 5: next host bit + * + * We can filter out branch 0 or 1 using the next host bit of the + * argument, if it is available. + * + * This check matters for the performance of the search. The results + * would be correct without it. There is no point in running it for + * leafs as we have to check the whole address on the next step. + */ + if (!leaf && bitmap & (1 | (1 << 1)) && + commonbits < ip_maxbits(argument)) + { + int nextbit; + + nextbit = ip_addr(argument)[commonbits / 8] & + (1 << (7 - commonbits % 8)); + + switch (strategy) + { + case RTLessStrategyNumber: + case RTLessEqualStrategyNumber: + if (!nextbit) + bitmap &= 1 | (1 << 2) | (1 << 3); + break; + + case RTGreaterEqualStrategyNumber: + case RTGreaterStrategyNumber: + if (nextbit) + bitmap &= (1 << 1) | (1 << 2) | (1 << 3); + break; + + case RTNotEqualStrategyNumber: + break; + + default: + if (!nextbit) + bitmap &= 1 | (1 << 2) | (1 << 3); + else + bitmap &= (1 << 1) | (1 << 2) | (1 << 3); + break; + } + + if (!bitmap) + break; + } + + /* + * Check 6: whole address + * + * This is the last check for correctness of the basic comparison + * strategies. It's only appropriate at leaf entries. + */ + if (leaf) + { + /* Redo ordering comparison using all address bits */ + order = bitncmp(ip_addr(prefix), ip_addr(argument), + ip_maxbits(prefix)); + + switch (strategy) + { + case RTLessStrategyNumber: + if (order >= 0) + bitmap = 0; + break; + + case RTLessEqualStrategyNumber: + if (order > 0) + bitmap = 0; + break; + + case RTEqualStrategyNumber: + if (order != 0) + bitmap = 0; + break; + + case RTGreaterEqualStrategyNumber: + if (order < 0) + bitmap = 0; + break; + + case RTGreaterStrategyNumber: + if (order <= 0) + bitmap = 0; + break; + + case RTNotEqualStrategyNumber: + if (order == 0) + bitmap = 0; + break; + } + + if (!bitmap) + break; + } + } + + return bitmap; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/numeric.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/numeric.c new file mode 100644 index 00000000000..3c3184f15b5 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/numeric.c @@ -0,0 +1,12025 @@ +/*------------------------------------------------------------------------- + * + * numeric.c + * An exact numeric data type for the Postgres database system + * + * Original coding 1998, Jan Wieck. Heavily revised 2003, Tom Lane. + * + * Many of the algorithmic ideas are borrowed from David M. Smith's "FM" + * multiple-precision math library, most recently published as Algorithm + * 786: Multiple-Precision Complex Arithmetic and Functions, ACM + * Transactions on Mathematical Software, Vol. 24, No. 4, December 1998, + * pages 359-367. + * + * Copyright (c) 1998-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/numeric.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <ctype.h> +#include <float.h> +#include <limits.h> +#include <math.h> + +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "common/int.h" +#include "funcapi.h" +#include "lib/hyperloglog.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/float.h" +#include "utils/guc.h" +#include "utils/numeric.h" +#include "utils/pg_lsn.h" +#include "utils/sortsupport.h" + +/* ---------- + * Uncomment the following to enable compilation of dump_numeric() + * and dump_var() and to get a dump of any result produced by make_result(). + * ---------- +#define NUMERIC_DEBUG + */ + + +/* ---------- + * Local data types + * + * Numeric values are represented in a base-NBASE floating point format. + * Each "digit" ranges from 0 to NBASE-1. The type NumericDigit is signed + * and wide enough to store a digit. We assume that NBASE*NBASE can fit in + * an int. Although the purely calculational routines could handle any even + * NBASE that's less than sqrt(INT_MAX), in practice we are only interested + * in NBASE a power of ten, so that I/O conversions and decimal rounding + * are easy. Also, it's actually more efficient if NBASE is rather less than + * sqrt(INT_MAX), so that there is "headroom" for mul_var and div_var_fast to + * postpone processing carries. + * + * Values of NBASE other than 10000 are considered of historical interest only + * and are no longer supported in any sense; no mechanism exists for the client + * to discover the base, so every client supporting binary mode expects the + * base-10000 format. If you plan to change this, also note the numeric + * abbreviation code, which assumes NBASE=10000. + * ---------- + */ + +#if 0 +#define NBASE 10 +#define HALF_NBASE 5 +#define DEC_DIGITS 1 /* decimal digits per NBASE digit */ +#define MUL_GUARD_DIGITS 4 /* these are measured in NBASE digits */ +#define DIV_GUARD_DIGITS 8 + +typedef signed char NumericDigit; +#endif + +#if 0 +#define NBASE 100 +#define HALF_NBASE 50 +#define DEC_DIGITS 2 /* decimal digits per NBASE digit */ +#define MUL_GUARD_DIGITS 3 /* these are measured in NBASE digits */ +#define DIV_GUARD_DIGITS 6 + +typedef signed char NumericDigit; +#endif + +#if 1 +#define NBASE 10000 +#define HALF_NBASE 5000 +#define DEC_DIGITS 4 /* decimal digits per NBASE digit */ +#define MUL_GUARD_DIGITS 2 /* these are measured in NBASE digits */ +#define DIV_GUARD_DIGITS 4 + +typedef int16 NumericDigit; +#endif + +/* + * The Numeric type as stored on disk. + * + * If the high bits of the first word of a NumericChoice (n_header, or + * n_short.n_header, or n_long.n_sign_dscale) are NUMERIC_SHORT, then the + * numeric follows the NumericShort format; if they are NUMERIC_POS or + * NUMERIC_NEG, it follows the NumericLong format. If they are NUMERIC_SPECIAL, + * the value is a NaN or Infinity. We currently always store SPECIAL values + * using just two bytes (i.e. only n_header), but previous releases used only + * the NumericLong format, so we might find 4-byte NaNs (though not infinities) + * on disk if a database has been migrated using pg_upgrade. In either case, + * the low-order bits of a special value's header are reserved and currently + * should always be set to zero. + * + * In the NumericShort format, the remaining 14 bits of the header word + * (n_short.n_header) are allocated as follows: 1 for sign (positive or + * negative), 6 for dynamic scale, and 7 for weight. In practice, most + * commonly-encountered values can be represented this way. + * + * In the NumericLong format, the remaining 14 bits of the header word + * (n_long.n_sign_dscale) represent the display scale; and the weight is + * stored separately in n_weight. + * + * NOTE: by convention, values in the packed form have been stripped of + * all leading and trailing zero digits (where a "digit" is of base NBASE). + * In particular, if the value is zero, there will be no digits at all! + * The weight is arbitrary in that case, but we normally set it to zero. + */ + +struct NumericShort +{ + uint16 n_header; /* Sign + display scale + weight */ + NumericDigit n_data[FLEXIBLE_ARRAY_MEMBER]; /* Digits */ +}; + +struct NumericLong +{ + uint16 n_sign_dscale; /* Sign + display scale */ + int16 n_weight; /* Weight of 1st digit */ + NumericDigit n_data[FLEXIBLE_ARRAY_MEMBER]; /* Digits */ +}; + +union NumericChoice +{ + uint16 n_header; /* Header word */ + struct NumericLong n_long; /* Long form (4-byte header) */ + struct NumericShort n_short; /* Short form (2-byte header) */ +}; + +struct NumericData +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + union NumericChoice choice; /* choice of format */ +}; + + +/* + * Interpretation of high bits. + */ + +#define NUMERIC_SIGN_MASK 0xC000 +#define NUMERIC_POS 0x0000 +#define NUMERIC_NEG 0x4000 +#define NUMERIC_SHORT 0x8000 +#define NUMERIC_SPECIAL 0xC000 + +#define NUMERIC_FLAGBITS(n) ((n)->choice.n_header & NUMERIC_SIGN_MASK) +#define NUMERIC_IS_SHORT(n) (NUMERIC_FLAGBITS(n) == NUMERIC_SHORT) +#define NUMERIC_IS_SPECIAL(n) (NUMERIC_FLAGBITS(n) == NUMERIC_SPECIAL) + +#define NUMERIC_HDRSZ (VARHDRSZ + sizeof(uint16) + sizeof(int16)) +#define NUMERIC_HDRSZ_SHORT (VARHDRSZ + sizeof(uint16)) + +/* + * If the flag bits are NUMERIC_SHORT or NUMERIC_SPECIAL, we want the short + * header; otherwise, we want the long one. Instead of testing against each + * value, we can just look at the high bit, for a slight efficiency gain. + */ +#define NUMERIC_HEADER_IS_SHORT(n) (((n)->choice.n_header & 0x8000) != 0) +#define NUMERIC_HEADER_SIZE(n) \ + (VARHDRSZ + sizeof(uint16) + \ + (NUMERIC_HEADER_IS_SHORT(n) ? 0 : sizeof(int16))) + +/* + * Definitions for special values (NaN, positive infinity, negative infinity). + * + * The two bits after the NUMERIC_SPECIAL bits are 00 for NaN, 01 for positive + * infinity, 11 for negative infinity. (This makes the sign bit match where + * it is in a short-format value, though we make no use of that at present.) + * We could mask off the remaining bits before testing the active bits, but + * currently those bits must be zeroes, so masking would just add cycles. + */ +#define NUMERIC_EXT_SIGN_MASK 0xF000 /* high bits plus NaN/Inf flag bits */ +#define NUMERIC_NAN 0xC000 +#define NUMERIC_PINF 0xD000 +#define NUMERIC_NINF 0xF000 +#define NUMERIC_INF_SIGN_MASK 0x2000 + +#define NUMERIC_EXT_FLAGBITS(n) ((n)->choice.n_header & NUMERIC_EXT_SIGN_MASK) +#define NUMERIC_IS_NAN(n) ((n)->choice.n_header == NUMERIC_NAN) +#define NUMERIC_IS_PINF(n) ((n)->choice.n_header == NUMERIC_PINF) +#define NUMERIC_IS_NINF(n) ((n)->choice.n_header == NUMERIC_NINF) +#define NUMERIC_IS_INF(n) \ + (((n)->choice.n_header & ~NUMERIC_INF_SIGN_MASK) == NUMERIC_PINF) + +/* + * Short format definitions. + */ + +#define NUMERIC_SHORT_SIGN_MASK 0x2000 +#define NUMERIC_SHORT_DSCALE_MASK 0x1F80 +#define NUMERIC_SHORT_DSCALE_SHIFT 7 +#define NUMERIC_SHORT_DSCALE_MAX \ + (NUMERIC_SHORT_DSCALE_MASK >> NUMERIC_SHORT_DSCALE_SHIFT) +#define NUMERIC_SHORT_WEIGHT_SIGN_MASK 0x0040 +#define NUMERIC_SHORT_WEIGHT_MASK 0x003F +#define NUMERIC_SHORT_WEIGHT_MAX NUMERIC_SHORT_WEIGHT_MASK +#define NUMERIC_SHORT_WEIGHT_MIN (-(NUMERIC_SHORT_WEIGHT_MASK+1)) + +/* + * Extract sign, display scale, weight. These macros extract field values + * suitable for the NumericVar format from the Numeric (on-disk) format. + * + * Note that we don't trouble to ensure that dscale and weight read as zero + * for an infinity; however, that doesn't matter since we never convert + * "special" numerics to NumericVar form. Only the constants defined below + * (const_nan, etc) ever represent a non-finite value as a NumericVar. + */ + +#define NUMERIC_DSCALE_MASK 0x3FFF +#define NUMERIC_DSCALE_MAX NUMERIC_DSCALE_MASK + +#define NUMERIC_SIGN(n) \ + (NUMERIC_IS_SHORT(n) ? \ + (((n)->choice.n_short.n_header & NUMERIC_SHORT_SIGN_MASK) ? \ + NUMERIC_NEG : NUMERIC_POS) : \ + (NUMERIC_IS_SPECIAL(n) ? \ + NUMERIC_EXT_FLAGBITS(n) : NUMERIC_FLAGBITS(n))) +#define NUMERIC_DSCALE(n) (NUMERIC_HEADER_IS_SHORT((n)) ? \ + ((n)->choice.n_short.n_header & NUMERIC_SHORT_DSCALE_MASK) \ + >> NUMERIC_SHORT_DSCALE_SHIFT \ + : ((n)->choice.n_long.n_sign_dscale & NUMERIC_DSCALE_MASK)) +#define NUMERIC_WEIGHT(n) (NUMERIC_HEADER_IS_SHORT((n)) ? \ + (((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_SIGN_MASK ? \ + ~NUMERIC_SHORT_WEIGHT_MASK : 0) \ + | ((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_MASK)) \ + : ((n)->choice.n_long.n_weight)) + +/* ---------- + * NumericVar is the format we use for arithmetic. The digit-array part + * is the same as the NumericData storage format, but the header is more + * complex. + * + * The value represented by a NumericVar is determined by the sign, weight, + * ndigits, and digits[] array. If it is a "special" value (NaN or Inf) + * then only the sign field matters; ndigits should be zero, and the weight + * and dscale fields are ignored. + * + * Note: the first digit of a NumericVar's value is assumed to be multiplied + * by NBASE ** weight. Another way to say it is that there are weight+1 + * digits before the decimal point. It is possible to have weight < 0. + * + * buf points at the physical start of the palloc'd digit buffer for the + * NumericVar. digits points at the first digit in actual use (the one + * with the specified weight). We normally leave an unused digit or two + * (preset to zeroes) between buf and digits, so that there is room to store + * a carry out of the top digit without reallocating space. We just need to + * decrement digits (and increment weight) to make room for the carry digit. + * (There is no such extra space in a numeric value stored in the database, + * only in a NumericVar in memory.) + * + * If buf is NULL then the digit buffer isn't actually palloc'd and should + * not be freed --- see the constants below for an example. + * + * dscale, or display scale, is the nominal precision expressed as number + * of digits after the decimal point (it must always be >= 0 at present). + * dscale may be more than the number of physically stored fractional digits, + * implying that we have suppressed storage of significant trailing zeroes. + * It should never be less than the number of stored digits, since that would + * imply hiding digits that are present. NOTE that dscale is always expressed + * in *decimal* digits, and so it may correspond to a fractional number of + * base-NBASE digits --- divide by DEC_DIGITS to convert to NBASE digits. + * + * rscale, or result scale, is the target precision for a computation. + * Like dscale it is expressed as number of *decimal* digits after the decimal + * point, and is always >= 0 at present. + * Note that rscale is not stored in variables --- it's figured on-the-fly + * from the dscales of the inputs. + * + * While we consistently use "weight" to refer to the base-NBASE weight of + * a numeric value, it is convenient in some scale-related calculations to + * make use of the base-10 weight (ie, the approximate log10 of the value). + * To avoid confusion, such a decimal-units weight is called a "dweight". + * + * NB: All the variable-level functions are written in a style that makes it + * possible to give one and the same variable as argument and destination. + * This is feasible because the digit buffer is separate from the variable. + * ---------- + */ +typedef struct NumericVar +{ + int ndigits; /* # of digits in digits[] - can be 0! */ + int weight; /* weight of first digit */ + int sign; /* NUMERIC_POS, _NEG, _NAN, _PINF, or _NINF */ + int dscale; /* display scale */ + NumericDigit *buf; /* start of palloc'd space for digits[] */ + NumericDigit *digits; /* base-NBASE digits */ +} NumericVar; + + +/* ---------- + * Data for generate_series + * ---------- + */ +typedef struct +{ + NumericVar current; + NumericVar stop; + NumericVar step; +} generate_series_numeric_fctx; + + +/* ---------- + * Sort support. + * ---------- + */ +typedef struct +{ + void *buf; /* buffer for short varlenas */ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} NumericSortSupport; + + +/* ---------- + * Fast sum accumulator. + * + * NumericSumAccum is used to implement SUM(), and other standard aggregates + * that track the sum of input values. It uses 32-bit integers to store the + * digits, instead of the normal 16-bit integers (with NBASE=10000). This + * way, we can safely accumulate up to NBASE - 1 values without propagating + * carry, before risking overflow of any of the digits. 'num_uncarried' + * tracks how many values have been accumulated without propagating carry. + * + * Positive and negative values are accumulated separately, in 'pos_digits' + * and 'neg_digits'. This is simpler and faster than deciding whether to add + * or subtract from the current value, for each new value (see sub_var() for + * the logic we avoid by doing this). Both buffers are of same size, and + * have the same weight and scale. In accum_sum_final(), the positive and + * negative sums are added together to produce the final result. + * + * When a new value has a larger ndigits or weight than the accumulator + * currently does, the accumulator is enlarged to accommodate the new value. + * We normally have one zero digit reserved for carry propagation, and that + * is indicated by the 'have_carry_space' flag. When accum_sum_carry() uses + * up the reserved digit, it clears the 'have_carry_space' flag. The next + * call to accum_sum_add() will enlarge the buffer, to make room for the + * extra digit, and set the flag again. + * + * To initialize a new accumulator, simply reset all fields to zeros. + * + * The accumulator does not handle NaNs. + * ---------- + */ +typedef struct NumericSumAccum +{ + int ndigits; + int weight; + int dscale; + int num_uncarried; + bool have_carry_space; + int32 *pos_digits; + int32 *neg_digits; +} NumericSumAccum; + + +/* + * We define our own macros for packing and unpacking abbreviated-key + * representations for numeric values in order to avoid depending on + * USE_FLOAT8_BYVAL. The type of abbreviation we use is based only on + * the size of a datum, not the argument-passing convention for float8. + * + * The range of abbreviations for finite values is from +PG_INT64/32_MAX + * to -PG_INT64/32_MAX. NaN has the abbreviation PG_INT64/32_MIN, and we + * define the sort ordering to make that work out properly (see further + * comments below). PINF and NINF share the abbreviations of the largest + * and smallest finite abbreviation classes. + */ +#define NUMERIC_ABBREV_BITS (SIZEOF_DATUM * BITS_PER_BYTE) +#if SIZEOF_DATUM == 8 +#define NumericAbbrevGetDatum(X) ((Datum) (X)) +#define DatumGetNumericAbbrev(X) ((int64) (X)) +#define NUMERIC_ABBREV_NAN NumericAbbrevGetDatum(PG_INT64_MIN) +#define NUMERIC_ABBREV_PINF NumericAbbrevGetDatum(-PG_INT64_MAX) +#define NUMERIC_ABBREV_NINF NumericAbbrevGetDatum(PG_INT64_MAX) +#else +#define NumericAbbrevGetDatum(X) ((Datum) (X)) +#define DatumGetNumericAbbrev(X) ((int32) (X)) +#define NUMERIC_ABBREV_NAN NumericAbbrevGetDatum(PG_INT32_MIN) +#define NUMERIC_ABBREV_PINF NumericAbbrevGetDatum(-PG_INT32_MAX) +#define NUMERIC_ABBREV_NINF NumericAbbrevGetDatum(PG_INT32_MAX) +#endif + + +/* ---------- + * Some preinitialized constants + * ---------- + */ +static const NumericDigit const_zero_data[1] = {0}; +static const NumericVar const_zero = +{0, 0, NUMERIC_POS, 0, NULL, (NumericDigit *) const_zero_data}; + +static const NumericDigit const_one_data[1] = {1}; +static const NumericVar const_one = +{1, 0, NUMERIC_POS, 0, NULL, (NumericDigit *) const_one_data}; + +static const NumericVar const_minus_one = +{1, 0, NUMERIC_NEG, 0, NULL, (NumericDigit *) const_one_data}; + +static const NumericDigit const_two_data[1] = {2}; +static const NumericVar const_two = +{1, 0, NUMERIC_POS, 0, NULL, (NumericDigit *) const_two_data}; + +#if DEC_DIGITS == 4 +static const NumericDigit const_zero_point_nine_data[1] = {9000}; +#elif DEC_DIGITS == 2 +static const NumericDigit const_zero_point_nine_data[1] = {90}; +#elif DEC_DIGITS == 1 +static const NumericDigit const_zero_point_nine_data[1] = {9}; +#endif +static const NumericVar const_zero_point_nine = +{1, -1, NUMERIC_POS, 1, NULL, (NumericDigit *) const_zero_point_nine_data}; + +#if DEC_DIGITS == 4 +static const NumericDigit const_one_point_one_data[2] = {1, 1000}; +#elif DEC_DIGITS == 2 +static const NumericDigit const_one_point_one_data[2] = {1, 10}; +#elif DEC_DIGITS == 1 +static const NumericDigit const_one_point_one_data[2] = {1, 1}; +#endif +static const NumericVar const_one_point_one = +{2, 0, NUMERIC_POS, 1, NULL, (NumericDigit *) const_one_point_one_data}; + +static const NumericVar const_nan = +{0, 0, NUMERIC_NAN, 0, NULL, NULL}; + +static const NumericVar const_pinf = +{0, 0, NUMERIC_PINF, 0, NULL, NULL}; + +static const NumericVar const_ninf = +{0, 0, NUMERIC_NINF, 0, NULL, NULL}; + +#if DEC_DIGITS == 4 +static const int round_powers[4] = {0, 1000, 100, 10}; +#endif + + +/* ---------- + * Local functions + * ---------- + */ + +#ifdef NUMERIC_DEBUG +static void dump_numeric(const char *str, Numeric num); +static void dump_var(const char *str, NumericVar *var); +#else +#define dump_numeric(s,n) +#define dump_var(s,v) +#endif + +#define digitbuf_alloc(ndigits) \ + ((NumericDigit *) palloc((ndigits) * sizeof(NumericDigit))) +#define digitbuf_free(buf) \ + do { \ + if ((buf) != NULL) \ + pfree(buf); \ + } while (0) + +#define init_var(v) memset(v, 0, sizeof(NumericVar)) + +#define NUMERIC_DIGITS(num) (NUMERIC_HEADER_IS_SHORT(num) ? \ + (num)->choice.n_short.n_data : (num)->choice.n_long.n_data) +#define NUMERIC_NDIGITS(num) \ + ((VARSIZE(num) - NUMERIC_HEADER_SIZE(num)) / sizeof(NumericDigit)) +#define NUMERIC_CAN_BE_SHORT(scale,weight) \ + ((scale) <= NUMERIC_SHORT_DSCALE_MAX && \ + (weight) <= NUMERIC_SHORT_WEIGHT_MAX && \ + (weight) >= NUMERIC_SHORT_WEIGHT_MIN) + +static void alloc_var(NumericVar *var, int ndigits); +static void free_var(NumericVar *var); +static void zero_var(NumericVar *var); + +static bool set_var_from_str(const char *str, const char *cp, + NumericVar *dest, const char **endptr, + Node *escontext); +static bool set_var_from_non_decimal_integer_str(const char *str, + const char *cp, int sign, + int base, NumericVar *dest, + const char **endptr, + Node *escontext); +static void set_var_from_num(Numeric num, NumericVar *dest); +static void init_var_from_num(Numeric num, NumericVar *dest); +static void set_var_from_var(const NumericVar *value, NumericVar *dest); +static char *get_str_from_var(const NumericVar *var); +static char *get_str_from_var_sci(const NumericVar *var, int rscale); + +static void numericvar_serialize(StringInfo buf, const NumericVar *var); +static void numericvar_deserialize(StringInfo buf, NumericVar *var); + +static Numeric duplicate_numeric(Numeric num); +static Numeric make_result(const NumericVar *var); +static Numeric make_result_opt_error(const NumericVar *var, bool *have_error); + +static bool apply_typmod(NumericVar *var, int32 typmod, Node *escontext); +static bool apply_typmod_special(Numeric num, int32 typmod, Node *escontext); + +static bool numericvar_to_int32(const NumericVar *var, int32 *result); +static bool numericvar_to_int64(const NumericVar *var, int64 *result); +static void int64_to_numericvar(int64 val, NumericVar *var); +static bool numericvar_to_uint64(const NumericVar *var, uint64 *result); +#ifdef HAVE_INT128 +static bool numericvar_to_int128(const NumericVar *var, int128 *result); +static void int128_to_numericvar(int128 val, NumericVar *var); +#endif +static double numericvar_to_double_no_overflow(const NumericVar *var); + +static Datum numeric_abbrev_convert(Datum original_datum, SortSupport ssup); +static bool numeric_abbrev_abort(int memtupcount, SortSupport ssup); +static int numeric_fast_cmp(Datum x, Datum y, SortSupport ssup); +static int numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup); + +static Datum numeric_abbrev_convert_var(const NumericVar *var, + NumericSortSupport *nss); + +static int cmp_numerics(Numeric num1, Numeric num2); +static int cmp_var(const NumericVar *var1, const NumericVar *var2); +static int cmp_var_common(const NumericDigit *var1digits, int var1ndigits, + int var1weight, int var1sign, + const NumericDigit *var2digits, int var2ndigits, + int var2weight, int var2sign); +static void add_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *result); +static void sub_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *result); +static void mul_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, + int rscale); +static void div_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, + int rscale, bool round); +static void div_var_fast(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, int rscale, bool round); +static void div_var_int(const NumericVar *var, int ival, int ival_weight, + NumericVar *result, int rscale, bool round); +#ifdef HAVE_INT128 +static void div_var_int64(const NumericVar *var, int64 ival, int ival_weight, + NumericVar *result, int rscale, bool round); +#endif +static int select_div_scale(const NumericVar *var1, const NumericVar *var2); +static void mod_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *result); +static void div_mod_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *quot, NumericVar *rem); +static void ceil_var(const NumericVar *var, NumericVar *result); +static void floor_var(const NumericVar *var, NumericVar *result); + +static void gcd_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *result); +static void sqrt_var(const NumericVar *arg, NumericVar *result, int rscale); +static void exp_var(const NumericVar *arg, NumericVar *result, int rscale); +static int estimate_ln_dweight(const NumericVar *var); +static void ln_var(const NumericVar *arg, NumericVar *result, int rscale); +static void log_var(const NumericVar *base, const NumericVar *num, + NumericVar *result); +static void power_var(const NumericVar *base, const NumericVar *exp, + NumericVar *result); +static void power_var_int(const NumericVar *base, int exp, int exp_dscale, + NumericVar *result); +static void power_ten_int(int exp, NumericVar *result); + +static int cmp_abs(const NumericVar *var1, const NumericVar *var2); +static int cmp_abs_common(const NumericDigit *var1digits, int var1ndigits, + int var1weight, + const NumericDigit *var2digits, int var2ndigits, + int var2weight); +static void add_abs(const NumericVar *var1, const NumericVar *var2, + NumericVar *result); +static void sub_abs(const NumericVar *var1, const NumericVar *var2, + NumericVar *result); +static void round_var(NumericVar *var, int rscale); +static void trunc_var(NumericVar *var, int rscale); +static void strip_var(NumericVar *var); +static void compute_bucket(Numeric operand, Numeric bound1, Numeric bound2, + const NumericVar *count_var, bool reversed_bounds, + NumericVar *result_var); + +static void accum_sum_add(NumericSumAccum *accum, const NumericVar *val); +static void accum_sum_rescale(NumericSumAccum *accum, const NumericVar *val); +static void accum_sum_carry(NumericSumAccum *accum); +static void accum_sum_reset(NumericSumAccum *accum); +static void accum_sum_final(NumericSumAccum *accum, NumericVar *result); +static void accum_sum_copy(NumericSumAccum *dst, NumericSumAccum *src); +static void accum_sum_combine(NumericSumAccum *accum, NumericSumAccum *accum2); + + +/* ---------------------------------------------------------------------- + * + * Input-, output- and rounding-functions + * + * ---------------------------------------------------------------------- + */ + + +/* + * numeric_in() - + * + * Input function for numeric data type + */ +Datum +numeric_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + Numeric res; + const char *cp; + const char *numstart; + int sign; + + /* Skip leading spaces */ + cp = str; + while (*cp) + { + if (!isspace((unsigned char) *cp)) + break; + cp++; + } + + /* + * Process the number's sign. This duplicates logic in set_var_from_str(), + * but it's worth doing here, since it simplifies the handling of + * infinities and non-decimal integers. + */ + numstart = cp; + sign = NUMERIC_POS; + + if (*cp == '+') + cp++; + else if (*cp == '-') + { + sign = NUMERIC_NEG; + cp++; + } + + /* + * Check for NaN and infinities. We recognize the same strings allowed by + * float8in(). + * + * Since all other legal inputs have a digit or a decimal point after the + * sign, we need only check for NaN/infinity if that's not the case. + */ + if (!isdigit((unsigned char) *cp) && *cp != '.') + { + /* + * The number must be NaN or infinity; anything else can only be a + * syntax error. Note that NaN mustn't have a sign. + */ + if (pg_strncasecmp(numstart, "NaN", 3) == 0) + { + res = make_result(&const_nan); + cp = numstart + 3; + } + else if (pg_strncasecmp(cp, "Infinity", 8) == 0) + { + res = make_result(sign == NUMERIC_POS ? &const_pinf : &const_ninf); + cp += 8; + } + else if (pg_strncasecmp(cp, "inf", 3) == 0) + { + res = make_result(sign == NUMERIC_POS ? &const_pinf : &const_ninf); + cp += 3; + } + else + goto invalid_syntax; + + /* + * Check for trailing junk; there should be nothing left but spaces. + * + * We intentionally do this check before applying the typmod because + * we would like to throw any trailing-junk syntax error before any + * semantic error resulting from apply_typmod_special(). + */ + while (*cp) + { + if (!isspace((unsigned char) *cp)) + goto invalid_syntax; + cp++; + } + + if (!apply_typmod_special(res, typmod, escontext)) + PG_RETURN_NULL(); + } + else + { + /* + * We have a normal numeric value, which may be a non-decimal integer + * or a regular decimal number. + */ + NumericVar value; + int base; + bool have_error; + + init_var(&value); + + /* + * Determine the number's base by looking for a non-decimal prefix + * indicator ("0x", "0o", or "0b"). + */ + if (cp[0] == '0') + { + switch (cp[1]) + { + case 'x': + case 'X': + base = 16; + break; + case 'o': + case 'O': + base = 8; + break; + case 'b': + case 'B': + base = 2; + break; + default: + base = 10; + } + } + else + base = 10; + + /* Parse the rest of the number and apply the sign */ + if (base == 10) + { + if (!set_var_from_str(str, cp, &value, &cp, escontext)) + PG_RETURN_NULL(); + value.sign = sign; + } + else + { + if (!set_var_from_non_decimal_integer_str(str, cp + 2, sign, base, + &value, &cp, escontext)) + PG_RETURN_NULL(); + } + + /* + * Should be nothing left but spaces. As above, throw any typmod error + * after finishing syntax check. + */ + while (*cp) + { + if (!isspace((unsigned char) *cp)) + goto invalid_syntax; + cp++; + } + + if (!apply_typmod(&value, typmod, escontext)) + PG_RETURN_NULL(); + + res = make_result_opt_error(&value, &have_error); + + if (have_error) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + + free_var(&value); + } + + PG_RETURN_NUMERIC(res); + +invalid_syntax: + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "numeric", str))); +} + + +/* + * numeric_out() - + * + * Output function for numeric data type + */ +Datum +numeric_out(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + NumericVar x; + char *str; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_PINF(num)) + PG_RETURN_CSTRING(pstrdup("Infinity")); + else if (NUMERIC_IS_NINF(num)) + PG_RETURN_CSTRING(pstrdup("-Infinity")); + else + PG_RETURN_CSTRING(pstrdup("NaN")); + } + + /* + * Get the number in the variable format. + */ + init_var_from_num(num, &x); + + str = get_str_from_var(&x); + + PG_RETURN_CSTRING(str); +} + +/* + * numeric_is_nan() - + * + * Is Numeric value a NaN? + */ +bool +numeric_is_nan(Numeric num) +{ + return NUMERIC_IS_NAN(num); +} + +/* + * numeric_is_inf() - + * + * Is Numeric value an infinity? + */ +bool +numeric_is_inf(Numeric num) +{ + return NUMERIC_IS_INF(num); +} + +/* + * numeric_is_integral() - + * + * Is Numeric value integral? + */ +static bool +numeric_is_integral(Numeric num) +{ + NumericVar arg; + + /* Reject NaN, but infinities are considered integral */ + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_NAN(num)) + return false; + return true; + } + + /* Integral if there are no digits to the right of the decimal point */ + init_var_from_num(num, &arg); + + return (arg.ndigits == 0 || arg.ndigits <= arg.weight + 1); +} + +/* + * make_numeric_typmod() - + * + * Pack numeric precision and scale values into a typmod. The upper 16 bits + * are used for the precision (though actually not all these bits are needed, + * since the maximum allowed precision is 1000). The lower 16 bits are for + * the scale, but since the scale is constrained to the range [-1000, 1000], + * we use just the lower 11 of those 16 bits, and leave the remaining 5 bits + * unset, for possible future use. + * + * For purely historical reasons VARHDRSZ is then added to the result, thus + * the unused space in the upper 16 bits is not all as freely available as it + * might seem. (We can't let the result overflow to a negative int32, as + * other parts of the system would interpret that as not-a-valid-typmod.) + */ +static inline int32 +make_numeric_typmod(int precision, int scale) +{ + return ((precision << 16) | (scale & 0x7ff)) + VARHDRSZ; +} + +/* + * Because of the offset, valid numeric typmods are at least VARHDRSZ + */ +static inline bool +is_valid_numeric_typmod(int32 typmod) +{ + return typmod >= (int32) VARHDRSZ; +} + +/* + * numeric_typmod_precision() - + * + * Extract the precision from a numeric typmod --- see make_numeric_typmod(). + */ +static inline int +numeric_typmod_precision(int32 typmod) +{ + return ((typmod - VARHDRSZ) >> 16) & 0xffff; +} + +/* + * numeric_typmod_scale() - + * + * Extract the scale from a numeric typmod --- see make_numeric_typmod(). + * + * Note that the scale may be negative, so we must do sign extension when + * unpacking it. We do this using the bit hack (x^1024)-1024, which sign + * extends an 11-bit two's complement number x. + */ +static inline int +numeric_typmod_scale(int32 typmod) +{ + return (((typmod - VARHDRSZ) & 0x7ff) ^ 1024) - 1024; +} + +/* + * numeric_maximum_size() - + * + * Maximum size of a numeric with given typmod, or -1 if unlimited/unknown. + */ +int32 +numeric_maximum_size(int32 typmod) +{ + int precision; + int numeric_digits; + + if (!is_valid_numeric_typmod(typmod)) + return -1; + + /* precision (ie, max # of digits) is in upper bits of typmod */ + precision = numeric_typmod_precision(typmod); + + /* + * This formula computes the maximum number of NumericDigits we could need + * in order to store the specified number of decimal digits. Because the + * weight is stored as a number of NumericDigits rather than a number of + * decimal digits, it's possible that the first NumericDigit will contain + * only a single decimal digit. Thus, the first two decimal digits can + * require two NumericDigits to store, but it isn't until we reach + * DEC_DIGITS + 2 decimal digits that we potentially need a third + * NumericDigit. + */ + numeric_digits = (precision + 2 * (DEC_DIGITS - 1)) / DEC_DIGITS; + + /* + * In most cases, the size of a numeric will be smaller than the value + * computed below, because the varlena header will typically get toasted + * down to a single byte before being stored on disk, and it may also be + * possible to use a short numeric header. But our job here is to compute + * the worst case. + */ + return NUMERIC_HDRSZ + (numeric_digits * sizeof(NumericDigit)); +} + +/* + * numeric_out_sci() - + * + * Output function for numeric data type in scientific notation. + */ +char * +numeric_out_sci(Numeric num, int scale) +{ + NumericVar x; + char *str; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_PINF(num)) + return pstrdup("Infinity"); + else if (NUMERIC_IS_NINF(num)) + return pstrdup("-Infinity"); + else + return pstrdup("NaN"); + } + + init_var_from_num(num, &x); + + str = get_str_from_var_sci(&x, scale); + + return str; +} + +/* + * numeric_normalize() - + * + * Output function for numeric data type, suppressing insignificant trailing + * zeroes and then any trailing decimal point. The intent of this is to + * produce strings that are equal if and only if the input numeric values + * compare equal. + */ +char * +numeric_normalize(Numeric num) +{ + NumericVar x; + char *str; + int last; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_PINF(num)) + return pstrdup("Infinity"); + else if (NUMERIC_IS_NINF(num)) + return pstrdup("-Infinity"); + else + return pstrdup("NaN"); + } + + init_var_from_num(num, &x); + + str = get_str_from_var(&x); + + /* If there's no decimal point, there's certainly nothing to remove. */ + if (strchr(str, '.') != NULL) + { + /* + * Back up over trailing fractional zeroes. Since there is a decimal + * point, this loop will terminate safely. + */ + last = strlen(str) - 1; + while (str[last] == '0') + last--; + + /* We want to get rid of the decimal point too, if it's now last. */ + if (str[last] == '.') + last--; + + /* Delete whatever we backed up over. */ + str[last + 1] = '\0'; + } + + return str; +} + +/* + * numeric_recv - converts external binary format to numeric + * + * External format is a sequence of int16's: + * ndigits, weight, sign, dscale, NumericDigits. + */ +Datum +numeric_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + NumericVar value; + Numeric res; + int len, + i; + + init_var(&value); + + len = (uint16) pq_getmsgint(buf, sizeof(uint16)); + + alloc_var(&value, len); + + value.weight = (int16) pq_getmsgint(buf, sizeof(int16)); + /* we allow any int16 for weight --- OK? */ + + value.sign = (uint16) pq_getmsgint(buf, sizeof(uint16)); + if (!(value.sign == NUMERIC_POS || + value.sign == NUMERIC_NEG || + value.sign == NUMERIC_NAN || + value.sign == NUMERIC_PINF || + value.sign == NUMERIC_NINF)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid sign in external \"numeric\" value"))); + + value.dscale = (uint16) pq_getmsgint(buf, sizeof(uint16)); + if ((value.dscale & NUMERIC_DSCALE_MASK) != value.dscale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid scale in external \"numeric\" value"))); + + for (i = 0; i < len; i++) + { + NumericDigit d = pq_getmsgint(buf, sizeof(NumericDigit)); + + if (d < 0 || d >= NBASE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid digit in external \"numeric\" value"))); + value.digits[i] = d; + } + + /* + * If the given dscale would hide any digits, truncate those digits away. + * We could alternatively throw an error, but that would take a bunch of + * extra code (about as much as trunc_var involves), and it might cause + * client compatibility issues. Be careful not to apply trunc_var to + * special values, as it could do the wrong thing; we don't need it + * anyway, since make_result will ignore all but the sign field. + * + * After doing that, be sure to check the typmod restriction. + */ + if (value.sign == NUMERIC_POS || + value.sign == NUMERIC_NEG) + { + trunc_var(&value, value.dscale); + + (void) apply_typmod(&value, typmod, NULL); + + res = make_result(&value); + } + else + { + /* apply_typmod_special wants us to make the Numeric first */ + res = make_result(&value); + + (void) apply_typmod_special(res, typmod, NULL); + } + + free_var(&value); + + PG_RETURN_NUMERIC(res); +} + +/* + * numeric_send - converts numeric to binary format + */ +Datum +numeric_send(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + NumericVar x; + StringInfoData buf; + int i; + + init_var_from_num(num, &x); + + pq_begintypsend(&buf); + + pq_sendint16(&buf, x.ndigits); + pq_sendint16(&buf, x.weight); + pq_sendint16(&buf, x.sign); + pq_sendint16(&buf, x.dscale); + for (i = 0; i < x.ndigits; i++) + pq_sendint16(&buf, x.digits[i]); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * numeric_support() + * + * Planner support function for the numeric() length coercion function. + * + * Flatten calls that solely represent increases in allowable precision. + * Scale changes mutate every datum, so they are unoptimizable. Some values, + * e.g. 1E-1001, can only fit into an unconstrained numeric, so a change from + * an unconstrained numeric to any constrained numeric is also unoptimizable. + */ +Datum +numeric_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *expr = req->fcall; + Node *typmod; + + Assert(list_length(expr->args) >= 2); + + typmod = (Node *) lsecond(expr->args); + + if (IsA(typmod, Const) && !((Const *) typmod)->constisnull) + { + Node *source = (Node *) linitial(expr->args); + int32 old_typmod = exprTypmod(source); + int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue); + int32 old_scale = numeric_typmod_scale(old_typmod); + int32 new_scale = numeric_typmod_scale(new_typmod); + int32 old_precision = numeric_typmod_precision(old_typmod); + int32 new_precision = numeric_typmod_precision(new_typmod); + + /* + * If new_typmod is invalid, the destination is unconstrained; + * that's always OK. If old_typmod is valid, the source is + * constrained, and we're OK if the scale is unchanged and the + * precision is not decreasing. See further notes in function + * header comment. + */ + if (!is_valid_numeric_typmod(new_typmod) || + (is_valid_numeric_typmod(old_typmod) && + new_scale == old_scale && new_precision >= old_precision)) + ret = relabel_to_typmod(source, new_typmod); + } + } + + PG_RETURN_POINTER(ret); +} + +/* + * numeric() - + * + * This is a special function called by the Postgres database system + * before a value is stored in a tuple's attribute. The precision and + * scale of the attribute have to be applied on the value. + */ +Datum +numeric (PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + int32 typmod = PG_GETARG_INT32(1); + Numeric new; + int precision; + int scale; + int ddigits; + int maxdigits; + int dscale; + NumericVar var; + + /* + * Handle NaN and infinities: if apply_typmod_special doesn't complain, + * just return a copy of the input. + */ + if (NUMERIC_IS_SPECIAL(num)) + { + (void) apply_typmod_special(num, typmod, NULL); + PG_RETURN_NUMERIC(duplicate_numeric(num)); + } + + /* + * If the value isn't a valid type modifier, simply return a copy of the + * input value + */ + if (!is_valid_numeric_typmod(typmod)) + PG_RETURN_NUMERIC(duplicate_numeric(num)); + + /* + * Get the precision and scale out of the typmod value + */ + precision = numeric_typmod_precision(typmod); + scale = numeric_typmod_scale(typmod); + maxdigits = precision - scale; + + /* The target display scale is non-negative */ + dscale = Max(scale, 0); + + /* + * If the number is certainly in bounds and due to the target scale no + * rounding could be necessary, just make a copy of the input and modify + * its scale fields, unless the larger scale forces us to abandon the + * short representation. (Note we assume the existing dscale is + * honest...) + */ + ddigits = (NUMERIC_WEIGHT(num) + 1) * DEC_DIGITS; + if (ddigits <= maxdigits && scale >= NUMERIC_DSCALE(num) + && (NUMERIC_CAN_BE_SHORT(dscale, NUMERIC_WEIGHT(num)) + || !NUMERIC_IS_SHORT(num))) + { + new = duplicate_numeric(num); + if (NUMERIC_IS_SHORT(num)) + new->choice.n_short.n_header = + (num->choice.n_short.n_header & ~NUMERIC_SHORT_DSCALE_MASK) + | (dscale << NUMERIC_SHORT_DSCALE_SHIFT); + else + new->choice.n_long.n_sign_dscale = NUMERIC_SIGN(new) | + ((uint16) dscale & NUMERIC_DSCALE_MASK); + PG_RETURN_NUMERIC(new); + } + + /* + * We really need to fiddle with things - unpack the number into a + * variable and let apply_typmod() do it. + */ + init_var(&var); + + set_var_from_num(num, &var); + (void) apply_typmod(&var, typmod, NULL); + new = make_result(&var); + + free_var(&var); + + PG_RETURN_NUMERIC(new); +} + +Datum +numerictypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + int32 typmod; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n == 2) + { + if (tl[0] < 1 || tl[0] > NUMERIC_MAX_PRECISION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("NUMERIC precision %d must be between 1 and %d", + tl[0], NUMERIC_MAX_PRECISION))); + if (tl[1] < NUMERIC_MIN_SCALE || tl[1] > NUMERIC_MAX_SCALE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("NUMERIC scale %d must be between %d and %d", + tl[1], NUMERIC_MIN_SCALE, NUMERIC_MAX_SCALE))); + typmod = make_numeric_typmod(tl[0], tl[1]); + } + else if (n == 1) + { + if (tl[0] < 1 || tl[0] > NUMERIC_MAX_PRECISION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("NUMERIC precision %d must be between 1 and %d", + tl[0], NUMERIC_MAX_PRECISION))); + /* scale defaults to zero */ + typmod = make_numeric_typmod(tl[0], 0); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid NUMERIC type modifier"))); + typmod = 0; /* keep compiler quiet */ + } + + PG_RETURN_INT32(typmod); +} + +Datum +numerictypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + char *res = (char *) palloc(64); + + if (is_valid_numeric_typmod(typmod)) + snprintf(res, 64, "(%d,%d)", + numeric_typmod_precision(typmod), + numeric_typmod_scale(typmod)); + else + *res = '\0'; + + PG_RETURN_CSTRING(res); +} + + +/* ---------------------------------------------------------------------- + * + * Sign manipulation, rounding and the like + * + * ---------------------------------------------------------------------- + */ + +Datum +numeric_abs(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + + /* + * Do it the easy way directly on the packed format + */ + res = duplicate_numeric(num); + + if (NUMERIC_IS_SHORT(num)) + res->choice.n_short.n_header = + num->choice.n_short.n_header & ~NUMERIC_SHORT_SIGN_MASK; + else if (NUMERIC_IS_SPECIAL(num)) + { + /* This changes -Inf to Inf, and doesn't affect NaN */ + res->choice.n_short.n_header = + num->choice.n_short.n_header & ~NUMERIC_INF_SIGN_MASK; + } + else + res->choice.n_long.n_sign_dscale = NUMERIC_POS | NUMERIC_DSCALE(num); + + PG_RETURN_NUMERIC(res); +} + + +Datum +numeric_uminus(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + + /* + * Do it the easy way directly on the packed format + */ + res = duplicate_numeric(num); + + if (NUMERIC_IS_SPECIAL(num)) + { + /* Flip the sign, if it's Inf or -Inf */ + if (!NUMERIC_IS_NAN(num)) + res->choice.n_short.n_header = + num->choice.n_short.n_header ^ NUMERIC_INF_SIGN_MASK; + } + + /* + * The packed format is known to be totally zero digit trimmed always. So + * once we've eliminated specials, we can identify a zero by the fact that + * there are no digits at all. Do nothing to a zero. + */ + else if (NUMERIC_NDIGITS(num) != 0) + { + /* Else, flip the sign */ + if (NUMERIC_IS_SHORT(num)) + res->choice.n_short.n_header = + num->choice.n_short.n_header ^ NUMERIC_SHORT_SIGN_MASK; + else if (NUMERIC_SIGN(num) == NUMERIC_POS) + res->choice.n_long.n_sign_dscale = + NUMERIC_NEG | NUMERIC_DSCALE(num); + else + res->choice.n_long.n_sign_dscale = + NUMERIC_POS | NUMERIC_DSCALE(num); + } + + PG_RETURN_NUMERIC(res); +} + + +Datum +numeric_uplus(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + + PG_RETURN_NUMERIC(duplicate_numeric(num)); +} + + +/* + * numeric_sign_internal() - + * + * Returns -1 if the argument is less than 0, 0 if the argument is equal + * to 0, and 1 if the argument is greater than zero. Caller must have + * taken care of the NaN case, but we can handle infinities here. + */ +static int +numeric_sign_internal(Numeric num) +{ + if (NUMERIC_IS_SPECIAL(num)) + { + Assert(!NUMERIC_IS_NAN(num)); + /* Must be Inf or -Inf */ + if (NUMERIC_IS_PINF(num)) + return 1; + else + return -1; + } + + /* + * The packed format is known to be totally zero digit trimmed always. So + * once we've eliminated specials, we can identify a zero by the fact that + * there are no digits at all. + */ + else if (NUMERIC_NDIGITS(num) == 0) + return 0; + else if (NUMERIC_SIGN(num) == NUMERIC_NEG) + return -1; + else + return 1; +} + +/* + * numeric_sign() - + * + * returns -1 if the argument is less than 0, 0 if the argument is equal + * to 0, and 1 if the argument is greater than zero. + */ +Datum +numeric_sign(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + + /* + * Handle NaN (infinities can be handled normally) + */ + if (NUMERIC_IS_NAN(num)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + + switch (numeric_sign_internal(num)) + { + case 0: + PG_RETURN_NUMERIC(make_result(&const_zero)); + case 1: + PG_RETURN_NUMERIC(make_result(&const_one)); + case -1: + PG_RETURN_NUMERIC(make_result(&const_minus_one)); + } + + Assert(false); + return (Datum) 0; +} + + +/* + * numeric_round() - + * + * Round a value to have 'scale' digits after the decimal point. + * We allow negative 'scale', implying rounding before the decimal + * point --- Oracle interprets rounding that way. + */ +Datum +numeric_round(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + int32 scale = PG_GETARG_INT32(1); + Numeric res; + NumericVar arg; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NUMERIC(duplicate_numeric(num)); + + /* + * Limit the scale value to avoid possible overflow in calculations + */ + scale = Max(scale, -NUMERIC_MAX_RESULT_SCALE); + scale = Min(scale, NUMERIC_MAX_RESULT_SCALE); + + /* + * Unpack the argument and round it at the proper digit position + */ + init_var(&arg); + set_var_from_num(num, &arg); + + round_var(&arg, scale); + + /* We don't allow negative output dscale */ + if (scale < 0) + arg.dscale = 0; + + /* + * Return the rounded result + */ + res = make_result(&arg); + + free_var(&arg); + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_trunc() - + * + * Truncate a value to have 'scale' digits after the decimal point. + * We allow negative 'scale', implying a truncation before the decimal + * point --- Oracle interprets truncation that way. + */ +Datum +numeric_trunc(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + int32 scale = PG_GETARG_INT32(1); + Numeric res; + NumericVar arg; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NUMERIC(duplicate_numeric(num)); + + /* + * Limit the scale value to avoid possible overflow in calculations + */ + scale = Max(scale, -NUMERIC_MAX_RESULT_SCALE); + scale = Min(scale, NUMERIC_MAX_RESULT_SCALE); + + /* + * Unpack the argument and truncate it at the proper digit position + */ + init_var(&arg); + set_var_from_num(num, &arg); + + trunc_var(&arg, scale); + + /* We don't allow negative output dscale */ + if (scale < 0) + arg.dscale = 0; + + /* + * Return the truncated result + */ + res = make_result(&arg); + + free_var(&arg); + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_ceil() - + * + * Return the smallest integer greater than or equal to the argument + */ +Datum +numeric_ceil(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + NumericVar result; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NUMERIC(duplicate_numeric(num)); + + init_var_from_num(num, &result); + ceil_var(&result, &result); + + res = make_result(&result); + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_floor() - + * + * Return the largest integer equal to or less than the argument + */ +Datum +numeric_floor(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + NumericVar result; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NUMERIC(duplicate_numeric(num)); + + init_var_from_num(num, &result); + floor_var(&result, &result); + + res = make_result(&result); + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * generate_series_numeric() - + * + * Generate series of numeric. + */ +Datum +generate_series_numeric(PG_FUNCTION_ARGS) +{ + return generate_series_step_numeric(fcinfo); +} + +Datum +generate_series_step_numeric(PG_FUNCTION_ARGS) +{ + generate_series_numeric_fctx *fctx; + FuncCallContext *funcctx; + MemoryContext oldcontext; + + if (SRF_IS_FIRSTCALL()) + { + Numeric start_num = PG_GETARG_NUMERIC(0); + Numeric stop_num = PG_GETARG_NUMERIC(1); + NumericVar steploc = const_one; + + /* Reject NaN and infinities in start and stop values */ + if (NUMERIC_IS_SPECIAL(start_num)) + { + if (NUMERIC_IS_NAN(start_num)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("start value cannot be NaN"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("start value cannot be infinity"))); + } + if (NUMERIC_IS_SPECIAL(stop_num)) + { + if (NUMERIC_IS_NAN(stop_num)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("stop value cannot be NaN"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("stop value cannot be infinity"))); + } + + /* see if we were given an explicit step size */ + if (PG_NARGS() == 3) + { + Numeric step_num = PG_GETARG_NUMERIC(2); + + if (NUMERIC_IS_SPECIAL(step_num)) + { + if (NUMERIC_IS_NAN(step_num)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("step size cannot be NaN"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("step size cannot be infinity"))); + } + + init_var_from_num(step_num, &steploc); + + if (cmp_var(&steploc, &const_zero) == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("step size cannot equal zero"))); + } + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * Switch to memory context appropriate for multiple function calls. + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* allocate memory for user context */ + fctx = (generate_series_numeric_fctx *) + palloc(sizeof(generate_series_numeric_fctx)); + + /* + * Use fctx to keep state from call to call. Seed current with the + * original start value. We must copy the start_num and stop_num + * values rather than pointing to them, since we may have detoasted + * them in the per-call context. + */ + init_var(&fctx->current); + init_var(&fctx->stop); + init_var(&fctx->step); + + set_var_from_num(start_num, &fctx->current); + set_var_from_num(stop_num, &fctx->stop); + set_var_from_var(&steploc, &fctx->step); + + funcctx->user_fctx = fctx; + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + /* + * Get the saved state and use current state as the result of this + * iteration. + */ + fctx = funcctx->user_fctx; + + if ((fctx->step.sign == NUMERIC_POS && + cmp_var(&fctx->current, &fctx->stop) <= 0) || + (fctx->step.sign == NUMERIC_NEG && + cmp_var(&fctx->current, &fctx->stop) >= 0)) + { + Numeric result = make_result(&fctx->current); + + /* switch to memory context appropriate for iteration calculation */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* increment current in preparation for next iteration */ + add_var(&fctx->current, &fctx->step, &fctx->current); + MemoryContextSwitchTo(oldcontext); + + /* do when there is more left to send */ + SRF_RETURN_NEXT(funcctx, NumericGetDatum(result)); + } + else + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); +} + + +/* + * Implements the numeric version of the width_bucket() function + * defined by SQL2003. See also width_bucket_float8(). + * + * 'bound1' and 'bound2' are the lower and upper bounds of the + * histogram's range, respectively. 'count' is the number of buckets + * in the histogram. width_bucket() returns an integer indicating the + * bucket number that 'operand' belongs to in an equiwidth histogram + * with the specified characteristics. An operand smaller than the + * lower bound is assigned to bucket 0. An operand greater than the + * upper bound is assigned to an additional bucket (with number + * count+1). We don't allow "NaN" for any of the numeric arguments. + */ +Datum +width_bucket_numeric(PG_FUNCTION_ARGS) +{ + Numeric operand = PG_GETARG_NUMERIC(0); + Numeric bound1 = PG_GETARG_NUMERIC(1); + Numeric bound2 = PG_GETARG_NUMERIC(2); + int32 count = PG_GETARG_INT32(3); + NumericVar count_var; + NumericVar result_var; + int32 result; + + if (count <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("count must be greater than zero"))); + + if (NUMERIC_IS_SPECIAL(operand) || + NUMERIC_IS_SPECIAL(bound1) || + NUMERIC_IS_SPECIAL(bound2)) + { + if (NUMERIC_IS_NAN(operand) || + NUMERIC_IS_NAN(bound1) || + NUMERIC_IS_NAN(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("operand, lower bound, and upper bound cannot be NaN"))); + /* We allow "operand" to be infinite; cmp_numerics will cope */ + if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower and upper bounds must be finite"))); + } + + init_var(&result_var); + init_var(&count_var); + + /* Convert 'count' to a numeric, for ease of use later */ + int64_to_numericvar((int64) count, &count_var); + + switch (cmp_numerics(bound1, bound2)) + { + case 0: + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower bound cannot equal upper bound"))); + break; + + /* bound1 < bound2 */ + case -1: + if (cmp_numerics(operand, bound1) < 0) + set_var_from_var(&const_zero, &result_var); + else if (cmp_numerics(operand, bound2) >= 0) + add_var(&count_var, &const_one, &result_var); + else + compute_bucket(operand, bound1, bound2, &count_var, false, + &result_var); + break; + + /* bound1 > bound2 */ + case 1: + if (cmp_numerics(operand, bound1) > 0) + set_var_from_var(&const_zero, &result_var); + else if (cmp_numerics(operand, bound2) <= 0) + add_var(&count_var, &const_one, &result_var); + else + compute_bucket(operand, bound1, bound2, &count_var, true, + &result_var); + break; + } + + /* if result exceeds the range of a legal int4, we ereport here */ + if (!numericvar_to_int32(&result_var, &result)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + free_var(&count_var); + free_var(&result_var); + + PG_RETURN_INT32(result); +} + +/* + * 'operand' is inside the bucket range, so determine the correct + * bucket for it to go. The calculations performed by this function + * are derived directly from the SQL2003 spec. Note however that we + * multiply by count before dividing, to avoid unnecessary roundoff error. + */ +static void +compute_bucket(Numeric operand, Numeric bound1, Numeric bound2, + const NumericVar *count_var, bool reversed_bounds, + NumericVar *result_var) +{ + NumericVar bound1_var; + NumericVar bound2_var; + NumericVar operand_var; + + init_var_from_num(bound1, &bound1_var); + init_var_from_num(bound2, &bound2_var); + init_var_from_num(operand, &operand_var); + + if (!reversed_bounds) + { + sub_var(&operand_var, &bound1_var, &operand_var); + sub_var(&bound2_var, &bound1_var, &bound2_var); + } + else + { + sub_var(&bound1_var, &operand_var, &operand_var); + sub_var(&bound1_var, &bound2_var, &bound2_var); + } + + mul_var(&operand_var, count_var, &operand_var, + operand_var.dscale + count_var->dscale); + div_var(&operand_var, &bound2_var, result_var, + select_div_scale(&operand_var, &bound2_var), true); + + /* + * Roundoff in the division could give us a quotient exactly equal to + * "count", which is too large. Clamp so that we do not emit a result + * larger than "count". + */ + if (cmp_var(result_var, count_var) >= 0) + set_var_from_var(count_var, result_var); + else + { + add_var(result_var, &const_one, result_var); + floor_var(result_var, result_var); + } + + free_var(&bound1_var); + free_var(&bound2_var); + free_var(&operand_var); +} + +/* ---------------------------------------------------------------------- + * + * Comparison functions + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + * + * Sort support: + * + * We implement the sortsupport strategy routine in order to get the benefit of + * abbreviation. The ordinary numeric comparison can be quite slow as a result + * of palloc/pfree cycles (due to detoasting packed values for alignment); + * while this could be worked on itself, the abbreviation strategy gives more + * speedup in many common cases. + * + * Two different representations are used for the abbreviated form, one in + * int32 and one in int64, whichever fits into a by-value Datum. In both cases + * the representation is negated relative to the original value, because we use + * the largest negative value for NaN, which sorts higher than other values. We + * convert the absolute value of the numeric to a 31-bit or 63-bit positive + * value, and then negate it if the original number was positive. + * + * We abort the abbreviation process if the abbreviation cardinality is below + * 0.01% of the row count (1 per 10k non-null rows). The actual break-even + * point is somewhat below that, perhaps 1 per 30k (at 1 per 100k there's a + * very small penalty), but we don't want to build up too many abbreviated + * values before first testing for abort, so we take the slightly pessimistic + * number. We make no attempt to estimate the cardinality of the real values, + * since it plays no part in the cost model here (if the abbreviation is equal, + * the cost of comparing equal and unequal underlying values is comparable). + * We discontinue even checking for abort (saving us the hashing overhead) if + * the estimated cardinality gets to 100k; that would be enough to support many + * billions of rows while doing no worse than breaking even. + * + * ---------------------------------------------------------------------- + */ + +/* + * Sort support strategy routine. + */ +Datum +numeric_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = numeric_fast_cmp; + + if (ssup->abbreviate) + { + NumericSortSupport *nss; + MemoryContext oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + nss = palloc(sizeof(NumericSortSupport)); + + /* + * palloc a buffer for handling unaligned packed values in addition to + * the support struct + */ + nss->buf = palloc(VARATT_SHORT_MAX + VARHDRSZ + 1); + + nss->input_count = 0; + nss->estimating = true; + initHyperLogLog(&nss->abbr_card, 10); + + ssup->ssup_extra = nss; + + ssup->abbrev_full_comparator = ssup->comparator; + ssup->comparator = numeric_cmp_abbrev; + ssup->abbrev_converter = numeric_abbrev_convert; + ssup->abbrev_abort = numeric_abbrev_abort; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * Abbreviate a numeric datum, handling NaNs and detoasting + * (must not leak memory!) + */ +static Datum +numeric_abbrev_convert(Datum original_datum, SortSupport ssup) +{ + NumericSortSupport *nss = ssup->ssup_extra; + void *original_varatt = PG_DETOAST_DATUM_PACKED(original_datum); + Numeric value; + Datum result; + + nss->input_count += 1; + + /* + * This is to handle packed datums without needing a palloc/pfree cycle; + * we keep and reuse a buffer large enough to handle any short datum. + */ + if (VARATT_IS_SHORT(original_varatt)) + { + void *buf = nss->buf; + Size sz = VARSIZE_SHORT(original_varatt) - VARHDRSZ_SHORT; + + Assert(sz <= VARATT_SHORT_MAX - VARHDRSZ_SHORT); + + SET_VARSIZE(buf, VARHDRSZ + sz); + memcpy(VARDATA(buf), VARDATA_SHORT(original_varatt), sz); + + value = (Numeric) buf; + } + else + value = (Numeric) original_varatt; + + if (NUMERIC_IS_SPECIAL(value)) + { + if (NUMERIC_IS_PINF(value)) + result = NUMERIC_ABBREV_PINF; + else if (NUMERIC_IS_NINF(value)) + result = NUMERIC_ABBREV_NINF; + else + result = NUMERIC_ABBREV_NAN; + } + else + { + NumericVar var; + + init_var_from_num(value, &var); + + result = numeric_abbrev_convert_var(&var, nss); + } + + /* should happen only for external/compressed toasts */ + if ((Pointer) original_varatt != DatumGetPointer(original_datum)) + pfree(original_varatt); + + return result; +} + +/* + * Consider whether to abort abbreviation. + * + * We pay no attention to the cardinality of the non-abbreviated data. There is + * no reason to do so: unlike text, we have no fast check for equal values, so + * we pay the full overhead whenever the abbreviations are equal regardless of + * whether the underlying values are also equal. + */ +static bool +numeric_abbrev_abort(int memtupcount, SortSupport ssup) +{ + NumericSortSupport *nss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || nss->input_count < 10000 || !nss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&nss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. Stop even + * counting at that point. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "numeric_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, nss->input_count, memtupcount); +#endif + nss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~10k of non-null inputs. (The + * break even point is somewhere between one per 100k rows, where + * abbreviation has a very slight penalty, and 1 per 10k where it wins by + * a measurable percentage.) We use the relatively pessimistic 10k + * threshold, and add a 0.5 row fudge factor, because it allows us to + * abort earlier on genuinely pathological data where we've had exactly + * one abbreviated value in the first 10k (non-null) rows. + */ + if (abbr_card < nss->input_count / 10000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "numeric_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, nss->input_count / 10000.0 + 0.5, + nss->input_count, memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "numeric_abbrev: cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, nss->input_count, memtupcount); +#endif + + return false; +} + +/* + * Non-fmgr interface to the comparison routine to allow sortsupport to elide + * the fmgr call. The saving here is small given how slow numeric comparisons + * are, but it is a required part of the sort support API when abbreviations + * are performed. + * + * Two palloc/pfree cycles could be saved here by using persistent buffers for + * aligning short-varlena inputs, but this has not so far been considered to + * be worth the effort. + */ +static int +numeric_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + Numeric nx = DatumGetNumeric(x); + Numeric ny = DatumGetNumeric(y); + int result; + + result = cmp_numerics(nx, ny); + + if ((Pointer) nx != DatumGetPointer(x)) + pfree(nx); + if ((Pointer) ny != DatumGetPointer(y)) + pfree(ny); + + return result; +} + +/* + * Compare abbreviations of values. (Abbreviations may be equal where the true + * values differ, but if the abbreviations differ, they must reflect the + * ordering of the true values.) + */ +static int +numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup) +{ + /* + * NOTE WELL: this is intentionally backwards, because the abbreviation is + * negated relative to the original value, to handle NaN/infinity cases. + */ + if (DatumGetNumericAbbrev(x) < DatumGetNumericAbbrev(y)) + return 1; + if (DatumGetNumericAbbrev(x) > DatumGetNumericAbbrev(y)) + return -1; + return 0; +} + +/* + * Abbreviate a NumericVar according to the available bit size. + * + * The 31-bit value is constructed as: + * + * 0 + 7bits digit weight + 24 bits digit value + * + * where the digit weight is in single decimal digits, not digit words, and + * stored in excess-44 representation[1]. The 24-bit digit value is the 7 most + * significant decimal digits of the value converted to binary. Values whose + * weights would fall outside the representable range are rounded off to zero + * (which is also used to represent actual zeros) or to 0x7FFFFFFF (which + * otherwise cannot occur). Abbreviation therefore fails to gain any advantage + * where values are outside the range 10^-44 to 10^83, which is not considered + * to be a serious limitation, or when values are of the same magnitude and + * equal in the first 7 decimal digits, which is considered to be an + * unavoidable limitation given the available bits. (Stealing three more bits + * to compare another digit would narrow the range of representable weights by + * a factor of 8, which starts to look like a real limiting factor.) + * + * (The value 44 for the excess is essentially arbitrary) + * + * The 63-bit value is constructed as: + * + * 0 + 7bits weight + 4 x 14-bit packed digit words + * + * The weight in this case is again stored in excess-44, but this time it is + * the original weight in digit words (i.e. powers of 10000). The first four + * digit words of the value (if present; trailing zeros are assumed as needed) + * are packed into 14 bits each to form the rest of the value. Again, + * out-of-range values are rounded off to 0 or 0x7FFFFFFFFFFFFFFF. The + * representable range in this case is 10^-176 to 10^332, which is considered + * to be good enough for all practical purposes, and comparison of 4 words + * means that at least 13 decimal digits are compared, which is considered to + * be a reasonable compromise between effectiveness and efficiency in computing + * the abbreviation. + * + * (The value 44 for the excess is even more arbitrary here, it was chosen just + * to match the value used in the 31-bit case) + * + * [1] - Excess-k representation means that the value is offset by adding 'k' + * and then treated as unsigned, so the smallest representable value is stored + * with all bits zero. This allows simple comparisons to work on the composite + * value. + */ + +#if NUMERIC_ABBREV_BITS == 64 + +static Datum +numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss) +{ + int ndigits = var->ndigits; + int weight = var->weight; + int64 result; + + if (ndigits == 0 || weight < -44) + { + result = 0; + } + else if (weight > 83) + { + result = PG_INT64_MAX; + } + else + { + result = ((int64) (weight + 44) << 56); + + switch (ndigits) + { + default: + result |= ((int64) var->digits[3]); + /* FALLTHROUGH */ + case 3: + result |= ((int64) var->digits[2]) << 14; + /* FALLTHROUGH */ + case 2: + result |= ((int64) var->digits[1]) << 28; + /* FALLTHROUGH */ + case 1: + result |= ((int64) var->digits[0]) << 42; + break; + } + } + + /* the abbrev is negated relative to the original */ + if (var->sign == NUMERIC_POS) + result = -result; + + if (nss->estimating) + { + uint32 tmp = ((uint32) result + ^ (uint32) ((uint64) result >> 32)); + + addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + return NumericAbbrevGetDatum(result); +} + +#endif /* NUMERIC_ABBREV_BITS == 64 */ + +#if NUMERIC_ABBREV_BITS == 32 + +static Datum +numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss) +{ + int ndigits = var->ndigits; + int weight = var->weight; + int32 result; + + if (ndigits == 0 || weight < -11) + { + result = 0; + } + else if (weight > 20) + { + result = PG_INT32_MAX; + } + else + { + NumericDigit nxt1 = (ndigits > 1) ? var->digits[1] : 0; + + weight = (weight + 11) * 4; + + result = var->digits[0]; + + /* + * "result" now has 1 to 4 nonzero decimal digits. We pack in more + * digits to make 7 in total (largest we can fit in 24 bits) + */ + + if (result > 999) + { + /* already have 4 digits, add 3 more */ + result = (result * 1000) + (nxt1 / 10); + weight += 3; + } + else if (result > 99) + { + /* already have 3 digits, add 4 more */ + result = (result * 10000) + nxt1; + weight += 2; + } + else if (result > 9) + { + NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0; + + /* already have 2 digits, add 5 more */ + result = (result * 100000) + (nxt1 * 10) + (nxt2 / 1000); + weight += 1; + } + else + { + NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0; + + /* already have 1 digit, add 6 more */ + result = (result * 1000000) + (nxt1 * 100) + (nxt2 / 100); + } + + result = result | (weight << 24); + } + + /* the abbrev is negated relative to the original */ + if (var->sign == NUMERIC_POS) + result = -result; + + if (nss->estimating) + { + uint32 tmp = (uint32) result; + + addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + return NumericAbbrevGetDatum(result); +} + +#endif /* NUMERIC_ABBREV_BITS == 32 */ + +/* + * Ordinary (non-sortsupport) comparisons follow. + */ + +Datum +numeric_cmp(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + int result; + + result = cmp_numerics(num1, num2); + + PG_FREE_IF_COPY(num1, 0); + PG_FREE_IF_COPY(num2, 1); + + PG_RETURN_INT32(result); +} + + +Datum +numeric_eq(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + bool result; + + result = cmp_numerics(num1, num2) == 0; + + PG_FREE_IF_COPY(num1, 0); + PG_FREE_IF_COPY(num2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +numeric_ne(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + bool result; + + result = cmp_numerics(num1, num2) != 0; + + PG_FREE_IF_COPY(num1, 0); + PG_FREE_IF_COPY(num2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +numeric_gt(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + bool result; + + result = cmp_numerics(num1, num2) > 0; + + PG_FREE_IF_COPY(num1, 0); + PG_FREE_IF_COPY(num2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +numeric_ge(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + bool result; + + result = cmp_numerics(num1, num2) >= 0; + + PG_FREE_IF_COPY(num1, 0); + PG_FREE_IF_COPY(num2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +numeric_lt(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + bool result; + + result = cmp_numerics(num1, num2) < 0; + + PG_FREE_IF_COPY(num1, 0); + PG_FREE_IF_COPY(num2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +numeric_le(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + bool result; + + result = cmp_numerics(num1, num2) <= 0; + + PG_FREE_IF_COPY(num1, 0); + PG_FREE_IF_COPY(num2, 1); + + PG_RETURN_BOOL(result); +} + +static int +cmp_numerics(Numeric num1, Numeric num2) +{ + int result; + + /* + * We consider all NANs to be equal and larger than any non-NAN (including + * Infinity). This is somewhat arbitrary; the important thing is to have + * a consistent sort order. + */ + if (NUMERIC_IS_SPECIAL(num1)) + { + if (NUMERIC_IS_NAN(num1)) + { + if (NUMERIC_IS_NAN(num2)) + result = 0; /* NAN = NAN */ + else + result = 1; /* NAN > non-NAN */ + } + else if (NUMERIC_IS_PINF(num1)) + { + if (NUMERIC_IS_NAN(num2)) + result = -1; /* PINF < NAN */ + else if (NUMERIC_IS_PINF(num2)) + result = 0; /* PINF = PINF */ + else + result = 1; /* PINF > anything else */ + } + else /* num1 must be NINF */ + { + if (NUMERIC_IS_NINF(num2)) + result = 0; /* NINF = NINF */ + else + result = -1; /* NINF < anything else */ + } + } + else if (NUMERIC_IS_SPECIAL(num2)) + { + if (NUMERIC_IS_NINF(num2)) + result = 1; /* normal > NINF */ + else + result = -1; /* normal < NAN or PINF */ + } + else + { + result = cmp_var_common(NUMERIC_DIGITS(num1), NUMERIC_NDIGITS(num1), + NUMERIC_WEIGHT(num1), NUMERIC_SIGN(num1), + NUMERIC_DIGITS(num2), NUMERIC_NDIGITS(num2), + NUMERIC_WEIGHT(num2), NUMERIC_SIGN(num2)); + } + + return result; +} + +/* + * in_range support function for numeric. + */ +Datum +in_range_numeric_numeric(PG_FUNCTION_ARGS) +{ + Numeric val = PG_GETARG_NUMERIC(0); + Numeric base = PG_GETARG_NUMERIC(1); + Numeric offset = PG_GETARG_NUMERIC(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + bool result; + + /* + * Reject negative (including -Inf) or NaN offset. Negative is per spec, + * and NaN is because appropriate semantics for that seem non-obvious. + */ + if (NUMERIC_IS_NAN(offset) || + NUMERIC_IS_NINF(offset) || + NUMERIC_SIGN(offset) == NUMERIC_NEG) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* + * Deal with cases where val and/or base is NaN, following the rule that + * NaN sorts after non-NaN (cf cmp_numerics). The offset cannot affect + * the conclusion. + */ + if (NUMERIC_IS_NAN(val)) + { + if (NUMERIC_IS_NAN(base)) + result = true; /* NAN = NAN */ + else + result = !less; /* NAN > non-NAN */ + } + else if (NUMERIC_IS_NAN(base)) + { + result = less; /* non-NAN < NAN */ + } + + /* + * Deal with infinite offset (necessarily +Inf, at this point). + */ + else if (NUMERIC_IS_SPECIAL(offset)) + { + Assert(NUMERIC_IS_PINF(offset)); + if (sub ? NUMERIC_IS_PINF(base) : NUMERIC_IS_NINF(base)) + { + /* + * base +/- offset would produce NaN, so return true for any val + * (see in_range_float8_float8() for reasoning). + */ + result = true; + } + else if (sub) + { + /* base - offset must be -inf */ + if (less) + result = NUMERIC_IS_NINF(val); /* only -inf is <= sum */ + else + result = true; /* any val is >= sum */ + } + else + { + /* base + offset must be +inf */ + if (less) + result = true; /* any val is <= sum */ + else + result = NUMERIC_IS_PINF(val); /* only +inf is >= sum */ + } + } + + /* + * Deal with cases where val and/or base is infinite. The offset, being + * now known finite, cannot affect the conclusion. + */ + else if (NUMERIC_IS_SPECIAL(val)) + { + if (NUMERIC_IS_PINF(val)) + { + if (NUMERIC_IS_PINF(base)) + result = true; /* PINF = PINF */ + else + result = !less; /* PINF > any other non-NAN */ + } + else /* val must be NINF */ + { + if (NUMERIC_IS_NINF(base)) + result = true; /* NINF = NINF */ + else + result = less; /* NINF < anything else */ + } + } + else if (NUMERIC_IS_SPECIAL(base)) + { + if (NUMERIC_IS_NINF(base)) + result = !less; /* normal > NINF */ + else + result = less; /* normal < PINF */ + } + else + { + /* + * Otherwise go ahead and compute base +/- offset. While it's + * possible for this to overflow the numeric format, it's unlikely + * enough that we don't take measures to prevent it. + */ + NumericVar valv; + NumericVar basev; + NumericVar offsetv; + NumericVar sum; + + init_var_from_num(val, &valv); + init_var_from_num(base, &basev); + init_var_from_num(offset, &offsetv); + init_var(&sum); + + if (sub) + sub_var(&basev, &offsetv, &sum); + else + add_var(&basev, &offsetv, &sum); + + if (less) + result = (cmp_var(&valv, &sum) <= 0); + else + result = (cmp_var(&valv, &sum) >= 0); + + free_var(&sum); + } + + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(base, 1); + PG_FREE_IF_COPY(offset, 2); + + PG_RETURN_BOOL(result); +} + +Datum +hash_numeric(PG_FUNCTION_ARGS) +{ + Numeric key = PG_GETARG_NUMERIC(0); + Datum digit_hash; + Datum result; + int weight; + int start_offset; + int end_offset; + int i; + int hash_len; + NumericDigit *digits; + + /* If it's NaN or infinity, don't try to hash the rest of the fields */ + if (NUMERIC_IS_SPECIAL(key)) + PG_RETURN_UINT32(0); + + weight = NUMERIC_WEIGHT(key); + start_offset = 0; + end_offset = 0; + + /* + * Omit any leading or trailing zeros from the input to the hash. The + * numeric implementation *should* guarantee that leading and trailing + * zeros are suppressed, but we're paranoid. Note that we measure the + * starting and ending offsets in units of NumericDigits, not bytes. + */ + digits = NUMERIC_DIGITS(key); + for (i = 0; i < NUMERIC_NDIGITS(key); i++) + { + if (digits[i] != (NumericDigit) 0) + break; + + start_offset++; + + /* + * The weight is effectively the # of digits before the decimal point, + * so decrement it for each leading zero we skip. + */ + weight--; + } + + /* + * If there are no non-zero digits, then the value of the number is zero, + * regardless of any other fields. + */ + if (NUMERIC_NDIGITS(key) == start_offset) + PG_RETURN_UINT32(-1); + + for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--) + { + if (digits[i] != (NumericDigit) 0) + break; + + end_offset++; + } + + /* If we get here, there should be at least one non-zero digit */ + Assert(start_offset + end_offset < NUMERIC_NDIGITS(key)); + + /* + * Note that we don't hash on the Numeric's scale, since two numerics can + * compare equal but have different scales. We also don't hash on the + * sign, although we could: since a sign difference implies inequality, + * this shouldn't affect correctness. + */ + hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset; + digit_hash = hash_any((unsigned char *) (NUMERIC_DIGITS(key) + start_offset), + hash_len * sizeof(NumericDigit)); + + /* Mix in the weight, via XOR */ + result = digit_hash ^ weight; + + PG_RETURN_DATUM(result); +} + +/* + * Returns 64-bit value by hashing a value to a 64-bit value, with a seed. + * Otherwise, similar to hash_numeric. + */ +Datum +hash_numeric_extended(PG_FUNCTION_ARGS) +{ + Numeric key = PG_GETARG_NUMERIC(0); + uint64 seed = PG_GETARG_INT64(1); + Datum digit_hash; + Datum result; + int weight; + int start_offset; + int end_offset; + int i; + int hash_len; + NumericDigit *digits; + + /* If it's NaN or infinity, don't try to hash the rest of the fields */ + if (NUMERIC_IS_SPECIAL(key)) + PG_RETURN_UINT64(seed); + + weight = NUMERIC_WEIGHT(key); + start_offset = 0; + end_offset = 0; + + digits = NUMERIC_DIGITS(key); + for (i = 0; i < NUMERIC_NDIGITS(key); i++) + { + if (digits[i] != (NumericDigit) 0) + break; + + start_offset++; + + weight--; + } + + if (NUMERIC_NDIGITS(key) == start_offset) + PG_RETURN_UINT64(seed - 1); + + for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--) + { + if (digits[i] != (NumericDigit) 0) + break; + + end_offset++; + } + + Assert(start_offset + end_offset < NUMERIC_NDIGITS(key)); + + hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset; + digit_hash = hash_any_extended((unsigned char *) (NUMERIC_DIGITS(key) + + start_offset), + hash_len * sizeof(NumericDigit), + seed); + + result = UInt64GetDatum(DatumGetUInt64(digit_hash) ^ weight); + + PG_RETURN_DATUM(result); +} + + +/* ---------------------------------------------------------------------- + * + * Basic arithmetic functions + * + * ---------------------------------------------------------------------- + */ + + +/* + * numeric_add() - + * + * Add two numerics + */ +Datum +numeric_add(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + Numeric res; + + res = numeric_add_opt_error(num1, num2, NULL); + + PG_RETURN_NUMERIC(res); +} + +/* + * numeric_add_opt_error() - + * + * Internal version of numeric_add(). If "*have_error" flag is provided, + * on error it's set to true, NULL returned. This is helpful when caller + * need to handle errors by itself. + */ +Numeric +numeric_add_opt_error(Numeric num1, Numeric num2, bool *have_error) +{ + NumericVar arg1; + NumericVar arg2; + NumericVar result; + Numeric res; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2)) + return make_result(&const_nan); + if (NUMERIC_IS_PINF(num1)) + { + if (NUMERIC_IS_NINF(num2)) + return make_result(&const_nan); /* Inf + -Inf */ + else + return make_result(&const_pinf); + } + if (NUMERIC_IS_NINF(num1)) + { + if (NUMERIC_IS_PINF(num2)) + return make_result(&const_nan); /* -Inf + Inf */ + else + return make_result(&const_ninf); + } + /* by here, num1 must be finite, so num2 is not */ + if (NUMERIC_IS_PINF(num2)) + return make_result(&const_pinf); + Assert(NUMERIC_IS_NINF(num2)); + return make_result(&const_ninf); + } + + /* + * Unpack the values, let add_var() compute the result and return it. + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + add_var(&arg1, &arg2, &result); + + res = make_result_opt_error(&result, have_error); + + free_var(&result); + + return res; +} + + +/* + * numeric_sub() - + * + * Subtract one numeric from another + */ +Datum +numeric_sub(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + Numeric res; + + res = numeric_sub_opt_error(num1, num2, NULL); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_sub_opt_error() - + * + * Internal version of numeric_sub(). If "*have_error" flag is provided, + * on error it's set to true, NULL returned. This is helpful when caller + * need to handle errors by itself. + */ +Numeric +numeric_sub_opt_error(Numeric num1, Numeric num2, bool *have_error) +{ + NumericVar arg1; + NumericVar arg2; + NumericVar result; + Numeric res; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2)) + return make_result(&const_nan); + if (NUMERIC_IS_PINF(num1)) + { + if (NUMERIC_IS_PINF(num2)) + return make_result(&const_nan); /* Inf - Inf */ + else + return make_result(&const_pinf); + } + if (NUMERIC_IS_NINF(num1)) + { + if (NUMERIC_IS_NINF(num2)) + return make_result(&const_nan); /* -Inf - -Inf */ + else + return make_result(&const_ninf); + } + /* by here, num1 must be finite, so num2 is not */ + if (NUMERIC_IS_PINF(num2)) + return make_result(&const_ninf); + Assert(NUMERIC_IS_NINF(num2)); + return make_result(&const_pinf); + } + + /* + * Unpack the values, let sub_var() compute the result and return it. + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + sub_var(&arg1, &arg2, &result); + + res = make_result_opt_error(&result, have_error); + + free_var(&result); + + return res; +} + + +/* + * numeric_mul() - + * + * Calculate the product of two numerics + */ +Datum +numeric_mul(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + Numeric res; + + res = numeric_mul_opt_error(num1, num2, NULL); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_mul_opt_error() - + * + * Internal version of numeric_mul(). If "*have_error" flag is provided, + * on error it's set to true, NULL returned. This is helpful when caller + * need to handle errors by itself. + */ +Numeric +numeric_mul_opt_error(Numeric num1, Numeric num2, bool *have_error) +{ + NumericVar arg1; + NumericVar arg2; + NumericVar result; + Numeric res; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2)) + return make_result(&const_nan); + if (NUMERIC_IS_PINF(num1)) + { + switch (numeric_sign_internal(num2)) + { + case 0: + return make_result(&const_nan); /* Inf * 0 */ + case 1: + return make_result(&const_pinf); + case -1: + return make_result(&const_ninf); + } + Assert(false); + } + if (NUMERIC_IS_NINF(num1)) + { + switch (numeric_sign_internal(num2)) + { + case 0: + return make_result(&const_nan); /* -Inf * 0 */ + case 1: + return make_result(&const_ninf); + case -1: + return make_result(&const_pinf); + } + Assert(false); + } + /* by here, num1 must be finite, so num2 is not */ + if (NUMERIC_IS_PINF(num2)) + { + switch (numeric_sign_internal(num1)) + { + case 0: + return make_result(&const_nan); /* 0 * Inf */ + case 1: + return make_result(&const_pinf); + case -1: + return make_result(&const_ninf); + } + Assert(false); + } + Assert(NUMERIC_IS_NINF(num2)); + switch (numeric_sign_internal(num1)) + { + case 0: + return make_result(&const_nan); /* 0 * -Inf */ + case 1: + return make_result(&const_ninf); + case -1: + return make_result(&const_pinf); + } + Assert(false); + } + + /* + * Unpack the values, let mul_var() compute the result and return it. + * Unlike add_var() and sub_var(), mul_var() will round its result. In the + * case of numeric_mul(), which is invoked for the * operator on numerics, + * we request exact representation for the product (rscale = sum(dscale of + * arg1, dscale of arg2)). If the exact result has more digits after the + * decimal point than can be stored in a numeric, we round it. Rounding + * after computing the exact result ensures that the final result is + * correctly rounded (rounding in mul_var() using a truncated product + * would not guarantee this). + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + mul_var(&arg1, &arg2, &result, arg1.dscale + arg2.dscale); + + if (result.dscale > NUMERIC_DSCALE_MAX) + round_var(&result, NUMERIC_DSCALE_MAX); + + res = make_result_opt_error(&result, have_error); + + free_var(&result); + + return res; +} + + +/* + * numeric_div() - + * + * Divide one numeric into another + */ +Datum +numeric_div(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + Numeric res; + + res = numeric_div_opt_error(num1, num2, NULL); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_div_opt_error() - + * + * Internal version of numeric_div(). If "*have_error" flag is provided, + * on error it's set to true, NULL returned. This is helpful when caller + * need to handle errors by itself. + */ +Numeric +numeric_div_opt_error(Numeric num1, Numeric num2, bool *have_error) +{ + NumericVar arg1; + NumericVar arg2; + NumericVar result; + Numeric res; + int rscale; + + if (have_error) + *have_error = false; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2)) + return make_result(&const_nan); + if (NUMERIC_IS_PINF(num1)) + { + if (NUMERIC_IS_SPECIAL(num2)) + return make_result(&const_nan); /* Inf / [-]Inf */ + switch (numeric_sign_internal(num2)) + { + case 0: + if (have_error) + { + *have_error = true; + return NULL; + } + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + break; + case 1: + return make_result(&const_pinf); + case -1: + return make_result(&const_ninf); + } + Assert(false); + } + if (NUMERIC_IS_NINF(num1)) + { + if (NUMERIC_IS_SPECIAL(num2)) + return make_result(&const_nan); /* -Inf / [-]Inf */ + switch (numeric_sign_internal(num2)) + { + case 0: + if (have_error) + { + *have_error = true; + return NULL; + } + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + break; + case 1: + return make_result(&const_ninf); + case -1: + return make_result(&const_pinf); + } + Assert(false); + } + /* by here, num1 must be finite, so num2 is not */ + + /* + * POSIX would have us return zero or minus zero if num1 is zero, and + * otherwise throw an underflow error. But the numeric type doesn't + * really do underflow, so let's just return zero. + */ + return make_result(&const_zero); + } + + /* + * Unpack the arguments + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + + /* + * Select scale for division result + */ + rscale = select_div_scale(&arg1, &arg2); + + /* + * If "have_error" is provided, check for division by zero here + */ + if (have_error && (arg2.ndigits == 0 || arg2.digits[0] == 0)) + { + *have_error = true; + return NULL; + } + + /* + * Do the divide and return the result + */ + div_var(&arg1, &arg2, &result, rscale, true); + + res = make_result_opt_error(&result, have_error); + + free_var(&result); + + return res; +} + + +/* + * numeric_div_trunc() - + * + * Divide one numeric into another, truncating the result to an integer + */ +Datum +numeric_div_trunc(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + NumericVar arg1; + NumericVar arg2; + NumericVar result; + Numeric res; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + if (NUMERIC_IS_PINF(num1)) + { + if (NUMERIC_IS_SPECIAL(num2)) + PG_RETURN_NUMERIC(make_result(&const_nan)); /* Inf / [-]Inf */ + switch (numeric_sign_internal(num2)) + { + case 0: + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + break; + case 1: + PG_RETURN_NUMERIC(make_result(&const_pinf)); + case -1: + PG_RETURN_NUMERIC(make_result(&const_ninf)); + } + Assert(false); + } + if (NUMERIC_IS_NINF(num1)) + { + if (NUMERIC_IS_SPECIAL(num2)) + PG_RETURN_NUMERIC(make_result(&const_nan)); /* -Inf / [-]Inf */ + switch (numeric_sign_internal(num2)) + { + case 0: + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + break; + case 1: + PG_RETURN_NUMERIC(make_result(&const_ninf)); + case -1: + PG_RETURN_NUMERIC(make_result(&const_pinf)); + } + Assert(false); + } + /* by here, num1 must be finite, so num2 is not */ + + /* + * POSIX would have us return zero or minus zero if num1 is zero, and + * otherwise throw an underflow error. But the numeric type doesn't + * really do underflow, so let's just return zero. + */ + PG_RETURN_NUMERIC(make_result(&const_zero)); + } + + /* + * Unpack the arguments + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + + /* + * Do the divide and return the result + */ + div_var(&arg1, &arg2, &result, 0, false); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_mod() - + * + * Calculate the modulo of two numerics + */ +Datum +numeric_mod(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + Numeric res; + + res = numeric_mod_opt_error(num1, num2, NULL); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_mod_opt_error() - + * + * Internal version of numeric_mod(). If "*have_error" flag is provided, + * on error it's set to true, NULL returned. This is helpful when caller + * need to handle errors by itself. + */ +Numeric +numeric_mod_opt_error(Numeric num1, Numeric num2, bool *have_error) +{ + Numeric res; + NumericVar arg1; + NumericVar arg2; + NumericVar result; + + if (have_error) + *have_error = false; + + /* + * Handle NaN and infinities. We follow POSIX fmod() on this, except that + * POSIX treats x-is-infinite and y-is-zero identically, raising EDOM and + * returning NaN. We choose to throw error only for y-is-zero. + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2)) + return make_result(&const_nan); + if (NUMERIC_IS_INF(num1)) + { + if (numeric_sign_internal(num2) == 0) + { + if (have_error) + { + *have_error = true; + return NULL; + } + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + } + /* Inf % any nonzero = NaN */ + return make_result(&const_nan); + } + /* num2 must be [-]Inf; result is num1 regardless of sign of num2 */ + return duplicate_numeric(num1); + } + + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + + /* + * If "have_error" is provided, check for division by zero here + */ + if (have_error && (arg2.ndigits == 0 || arg2.digits[0] == 0)) + { + *have_error = true; + return NULL; + } + + mod_var(&arg1, &arg2, &result); + + res = make_result_opt_error(&result, NULL); + + free_var(&result); + + return res; +} + + +/* + * numeric_inc() - + * + * Increment a number by one + */ +Datum +numeric_inc(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + NumericVar arg; + Numeric res; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NUMERIC(duplicate_numeric(num)); + + /* + * Compute the result and return it + */ + init_var_from_num(num, &arg); + + add_var(&arg, &const_one, &arg); + + res = make_result(&arg); + + free_var(&arg); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_smaller() - + * + * Return the smaller of two numbers + */ +Datum +numeric_smaller(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + + /* + * Use cmp_numerics so that this will agree with the comparison operators, + * particularly as regards comparisons involving NaN. + */ + if (cmp_numerics(num1, num2) < 0) + PG_RETURN_NUMERIC(num1); + else + PG_RETURN_NUMERIC(num2); +} + + +/* + * numeric_larger() - + * + * Return the larger of two numbers + */ +Datum +numeric_larger(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + + /* + * Use cmp_numerics so that this will agree with the comparison operators, + * particularly as regards comparisons involving NaN. + */ + if (cmp_numerics(num1, num2) > 0) + PG_RETURN_NUMERIC(num1); + else + PG_RETURN_NUMERIC(num2); +} + + +/* ---------------------------------------------------------------------- + * + * Advanced math functions + * + * ---------------------------------------------------------------------- + */ + +/* + * numeric_gcd() - + * + * Calculate the greatest common divisor of two numerics + */ +Datum +numeric_gcd(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + NumericVar arg1; + NumericVar arg2; + NumericVar result; + Numeric res; + + /* + * Handle NaN and infinities: we consider the result to be NaN in all such + * cases. + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + + /* + * Unpack the arguments + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + + /* + * Find the GCD and return the result + */ + gcd_var(&arg1, &arg2, &result); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_lcm() - + * + * Calculate the least common multiple of two numerics + */ +Datum +numeric_lcm(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + NumericVar arg1; + NumericVar arg2; + NumericVar result; + Numeric res; + + /* + * Handle NaN and infinities: we consider the result to be NaN in all such + * cases. + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + + /* + * Unpack the arguments + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + init_var(&result); + + /* + * Compute the result using lcm(x, y) = abs(x / gcd(x, y) * y), returning + * zero if either input is zero. + * + * Note that the division is guaranteed to be exact, returning an integer + * result, so the LCM is an integral multiple of both x and y. A display + * scale of Min(x.dscale, y.dscale) would be sufficient to represent it, + * but as with other numeric functions, we choose to return a result whose + * display scale is no smaller than either input. + */ + if (arg1.ndigits == 0 || arg2.ndigits == 0) + set_var_from_var(&const_zero, &result); + else + { + gcd_var(&arg1, &arg2, &result); + div_var(&arg1, &result, &result, 0, false); + mul_var(&arg2, &result, &result, arg2.dscale); + result.sign = NUMERIC_POS; + } + + result.dscale = Max(arg1.dscale, arg2.dscale); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_fac() + * + * Compute factorial + */ +Datum +numeric_fac(PG_FUNCTION_ARGS) +{ + int64 num = PG_GETARG_INT64(0); + Numeric res; + NumericVar fact; + NumericVar result; + + if (num < 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("factorial of a negative number is undefined"))); + if (num <= 1) + { + res = make_result(&const_one); + PG_RETURN_NUMERIC(res); + } + /* Fail immediately if the result would overflow */ + if (num > 32177) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + + init_var(&fact); + init_var(&result); + + int64_to_numericvar(num, &result); + + for (num = num - 1; num > 1; num--) + { + /* this loop can take awhile, so allow it to be interrupted */ + CHECK_FOR_INTERRUPTS(); + + int64_to_numericvar(num, &fact); + + mul_var(&result, &fact, &result, 0); + } + + res = make_result(&result); + + free_var(&fact); + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_sqrt() - + * + * Compute the square root of a numeric. + */ +Datum +numeric_sqrt(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + NumericVar arg; + NumericVar result; + int sweight; + int rscale; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + { + /* error should match that in sqrt_var() */ + if (NUMERIC_IS_NINF(num)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("cannot take square root of a negative number"))); + /* For NAN or PINF, just duplicate the input */ + PG_RETURN_NUMERIC(duplicate_numeric(num)); + } + + /* + * Unpack the argument and determine the result scale. We choose a scale + * to give at least NUMERIC_MIN_SIG_DIGITS significant digits; but in any + * case not less than the input's dscale. + */ + init_var_from_num(num, &arg); + + init_var(&result); + + /* + * Assume the input was normalized, so arg.weight is accurate. The result + * then has at least sweight = floor(arg.weight * DEC_DIGITS / 2 + 1) + * digits before the decimal point. When DEC_DIGITS is even, we can save + * a few cycles, since the division is exact and there is no need to round + * towards negative infinity. + */ +#if DEC_DIGITS == ((DEC_DIGITS / 2) * 2) + sweight = arg.weight * DEC_DIGITS / 2 + 1; +#else + if (arg.weight >= 0) + sweight = arg.weight * DEC_DIGITS / 2 + 1; + else + sweight = 1 - (1 - arg.weight * DEC_DIGITS) / 2; +#endif + + rscale = NUMERIC_MIN_SIG_DIGITS - sweight; + rscale = Max(rscale, arg.dscale); + rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE); + rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE); + + /* + * Let sqrt_var() do the calculation and return the result. + */ + sqrt_var(&arg, &result, rscale); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_exp() - + * + * Raise e to the power of x + */ +Datum +numeric_exp(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + NumericVar arg; + NumericVar result; + int rscale; + double val; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + { + /* Per POSIX, exp(-Inf) is zero */ + if (NUMERIC_IS_NINF(num)) + PG_RETURN_NUMERIC(make_result(&const_zero)); + /* For NAN or PINF, just duplicate the input */ + PG_RETURN_NUMERIC(duplicate_numeric(num)); + } + + /* + * Unpack the argument and determine the result scale. We choose a scale + * to give at least NUMERIC_MIN_SIG_DIGITS significant digits; but in any + * case not less than the input's dscale. + */ + init_var_from_num(num, &arg); + + init_var(&result); + + /* convert input to float8, ignoring overflow */ + val = numericvar_to_double_no_overflow(&arg); + + /* + * log10(result) = num * log10(e), so this is approximately the decimal + * weight of the result: + */ + val *= 0.434294481903252; + + /* limit to something that won't cause integer overflow */ + val = Max(val, -NUMERIC_MAX_RESULT_SCALE); + val = Min(val, NUMERIC_MAX_RESULT_SCALE); + + rscale = NUMERIC_MIN_SIG_DIGITS - (int) val; + rscale = Max(rscale, arg.dscale); + rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE); + rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE); + + /* + * Let exp_var() do the calculation and return the result. + */ + exp_var(&arg, &result, rscale); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_ln() - + * + * Compute the natural logarithm of x + */ +Datum +numeric_ln(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + NumericVar arg; + NumericVar result; + int ln_dweight; + int rscale; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_NINF(num)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of a negative number"))); + /* For NAN or PINF, just duplicate the input */ + PG_RETURN_NUMERIC(duplicate_numeric(num)); + } + + init_var_from_num(num, &arg); + init_var(&result); + + /* Estimated dweight of logarithm */ + ln_dweight = estimate_ln_dweight(&arg); + + rscale = NUMERIC_MIN_SIG_DIGITS - ln_dweight; + rscale = Max(rscale, arg.dscale); + rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE); + rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE); + + ln_var(&arg, &result, rscale); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_log() - + * + * Compute the logarithm of x in a given base + */ +Datum +numeric_log(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + Numeric res; + NumericVar arg1; + NumericVar arg2; + NumericVar result; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + int sign1, + sign2; + + if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + /* fail on negative inputs including -Inf, as log_var would */ + sign1 = numeric_sign_internal(num1); + sign2 = numeric_sign_internal(num2); + if (sign1 < 0 || sign2 < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of a negative number"))); + /* fail on zero inputs, as log_var would */ + if (sign1 == 0 || sign2 == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of zero"))); + if (NUMERIC_IS_PINF(num1)) + { + /* log(Inf, Inf) reduces to Inf/Inf, so it's NaN */ + if (NUMERIC_IS_PINF(num2)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + /* log(Inf, finite-positive) is zero (we don't throw underflow) */ + PG_RETURN_NUMERIC(make_result(&const_zero)); + } + Assert(NUMERIC_IS_PINF(num2)); + /* log(finite-positive, Inf) is Inf */ + PG_RETURN_NUMERIC(make_result(&const_pinf)); + } + + /* + * Initialize things + */ + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + init_var(&result); + + /* + * Call log_var() to compute and return the result; note it handles scale + * selection itself. + */ + log_var(&arg1, &arg2, &result); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* + * numeric_power() - + * + * Raise x to the power of y + */ +Datum +numeric_power(PG_FUNCTION_ARGS) +{ + Numeric num1 = PG_GETARG_NUMERIC(0); + Numeric num2 = PG_GETARG_NUMERIC(1); + Numeric res; + NumericVar arg1; + NumericVar arg2; + NumericVar result; + int sign1, + sign2; + + /* + * Handle NaN and infinities + */ + if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2)) + { + /* + * We follow the POSIX spec for pow(3), which says that NaN ^ 0 = 1, + * and 1 ^ NaN = 1, while all other cases with NaN inputs yield NaN + * (with no error). + */ + if (NUMERIC_IS_NAN(num1)) + { + if (!NUMERIC_IS_SPECIAL(num2)) + { + init_var_from_num(num2, &arg2); + if (cmp_var(&arg2, &const_zero) == 0) + PG_RETURN_NUMERIC(make_result(&const_one)); + } + PG_RETURN_NUMERIC(make_result(&const_nan)); + } + if (NUMERIC_IS_NAN(num2)) + { + if (!NUMERIC_IS_SPECIAL(num1)) + { + init_var_from_num(num1, &arg1); + if (cmp_var(&arg1, &const_one) == 0) + PG_RETURN_NUMERIC(make_result(&const_one)); + } + PG_RETURN_NUMERIC(make_result(&const_nan)); + } + /* At least one input is infinite, but error rules still apply */ + sign1 = numeric_sign_internal(num1); + sign2 = numeric_sign_internal(num2); + if (sign1 == 0 && sign2 < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("zero raised to a negative power is undefined"))); + if (sign1 < 0 && !numeric_is_integral(num2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("a negative number raised to a non-integer power yields a complex result"))); + + /* + * POSIX gives this series of rules for pow(3) with infinite inputs: + * + * For any value of y, if x is +1, 1.0 shall be returned. + */ + if (!NUMERIC_IS_SPECIAL(num1)) + { + init_var_from_num(num1, &arg1); + if (cmp_var(&arg1, &const_one) == 0) + PG_RETURN_NUMERIC(make_result(&const_one)); + } + + /* + * For any value of x, if y is [-]0, 1.0 shall be returned. + */ + if (sign2 == 0) + PG_RETURN_NUMERIC(make_result(&const_one)); + + /* + * For any odd integer value of y > 0, if x is [-]0, [-]0 shall be + * returned. For y > 0 and not an odd integer, if x is [-]0, +0 shall + * be returned. (Since we don't deal in minus zero, we need not + * distinguish these two cases.) + */ + if (sign1 == 0 && sign2 > 0) + PG_RETURN_NUMERIC(make_result(&const_zero)); + + /* + * If x is -1, and y is [-]Inf, 1.0 shall be returned. + * + * For |x| < 1, if y is -Inf, +Inf shall be returned. + * + * For |x| > 1, if y is -Inf, +0 shall be returned. + * + * For |x| < 1, if y is +Inf, +0 shall be returned. + * + * For |x| > 1, if y is +Inf, +Inf shall be returned. + */ + if (NUMERIC_IS_INF(num2)) + { + bool abs_x_gt_one; + + if (NUMERIC_IS_SPECIAL(num1)) + abs_x_gt_one = true; /* x is either Inf or -Inf */ + else + { + init_var_from_num(num1, &arg1); + if (cmp_var(&arg1, &const_minus_one) == 0) + PG_RETURN_NUMERIC(make_result(&const_one)); + arg1.sign = NUMERIC_POS; /* now arg1 = abs(x) */ + abs_x_gt_one = (cmp_var(&arg1, &const_one) > 0); + } + if (abs_x_gt_one == (sign2 > 0)) + PG_RETURN_NUMERIC(make_result(&const_pinf)); + else + PG_RETURN_NUMERIC(make_result(&const_zero)); + } + + /* + * For y < 0, if x is +Inf, +0 shall be returned. + * + * For y > 0, if x is +Inf, +Inf shall be returned. + */ + if (NUMERIC_IS_PINF(num1)) + { + if (sign2 > 0) + PG_RETURN_NUMERIC(make_result(&const_pinf)); + else + PG_RETURN_NUMERIC(make_result(&const_zero)); + } + + Assert(NUMERIC_IS_NINF(num1)); + + /* + * For y an odd integer < 0, if x is -Inf, -0 shall be returned. For + * y < 0 and not an odd integer, if x is -Inf, +0 shall be returned. + * (Again, we need not distinguish these two cases.) + */ + if (sign2 < 0) + PG_RETURN_NUMERIC(make_result(&const_zero)); + + /* + * For y an odd integer > 0, if x is -Inf, -Inf shall be returned. For + * y > 0 and not an odd integer, if x is -Inf, +Inf shall be returned. + */ + init_var_from_num(num2, &arg2); + if (arg2.ndigits > 0 && arg2.ndigits == arg2.weight + 1 && + (arg2.digits[arg2.ndigits - 1] & 1)) + PG_RETURN_NUMERIC(make_result(&const_ninf)); + else + PG_RETURN_NUMERIC(make_result(&const_pinf)); + } + + /* + * The SQL spec requires that we emit a particular SQLSTATE error code for + * certain error conditions. Specifically, we don't return a + * divide-by-zero error code for 0 ^ -1. Raising a negative number to a + * non-integer power must produce the same error code, but that case is + * handled in power_var(). + */ + sign1 = numeric_sign_internal(num1); + sign2 = numeric_sign_internal(num2); + + if (sign1 == 0 && sign2 < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("zero raised to a negative power is undefined"))); + + /* + * Initialize things + */ + init_var(&result); + init_var_from_num(num1, &arg1); + init_var_from_num(num2, &arg2); + + /* + * Call power_var() to compute and return the result; note it handles + * scale selection itself. + */ + power_var(&arg1, &arg2, &result); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + +/* + * numeric_scale() - + * + * Returns the scale, i.e. the count of decimal digits in the fractional part + */ +Datum +numeric_scale(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NULL(); + + PG_RETURN_INT32(NUMERIC_DSCALE(num)); +} + +/* + * Calculate minimum scale for value. + */ +static int +get_min_scale(NumericVar *var) +{ + int min_scale; + int last_digit_pos; + + /* + * Ordinarily, the input value will be "stripped" so that the last + * NumericDigit is nonzero. But we don't want to get into an infinite + * loop if it isn't, so explicitly find the last nonzero digit. + */ + last_digit_pos = var->ndigits - 1; + while (last_digit_pos >= 0 && + var->digits[last_digit_pos] == 0) + last_digit_pos--; + + if (last_digit_pos >= 0) + { + /* compute min_scale assuming that last ndigit has no zeroes */ + min_scale = (last_digit_pos - var->weight) * DEC_DIGITS; + + /* + * We could get a negative result if there are no digits after the + * decimal point. In this case the min_scale must be zero. + */ + if (min_scale > 0) + { + /* + * Reduce min_scale if trailing digit(s) in last NumericDigit are + * zero. + */ + NumericDigit last_digit = var->digits[last_digit_pos]; + + while (last_digit % 10 == 0) + { + min_scale--; + last_digit /= 10; + } + } + else + min_scale = 0; + } + else + min_scale = 0; /* result if input is zero */ + + return min_scale; +} + +/* + * Returns minimum scale required to represent supplied value without loss. + */ +Datum +numeric_min_scale(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + NumericVar arg; + int min_scale; + + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NULL(); + + init_var_from_num(num, &arg); + min_scale = get_min_scale(&arg); + free_var(&arg); + + PG_RETURN_INT32(min_scale); +} + +/* + * Reduce scale of numeric value to represent supplied value without loss. + */ +Datum +numeric_trim_scale(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + Numeric res; + NumericVar result; + + if (NUMERIC_IS_SPECIAL(num)) + PG_RETURN_NUMERIC(duplicate_numeric(num)); + + init_var_from_num(num, &result); + result.dscale = get_min_scale(&result); + res = make_result(&result); + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +/* ---------------------------------------------------------------------- + * + * Type conversion functions + * + * ---------------------------------------------------------------------- + */ + +Numeric +int64_to_numeric(int64 val) +{ + Numeric res; + NumericVar result; + + init_var(&result); + + int64_to_numericvar(val, &result); + + res = make_result(&result); + + free_var(&result); + + return res; +} + +/* + * Convert val1/(10**log10val2) to numeric. This is much faster than normal + * numeric division. + */ +Numeric +int64_div_fast_to_numeric(int64 val1, int log10val2) +{ + Numeric res; + NumericVar result; + int rscale; + int w; + int m; + + init_var(&result); + + /* result scale */ + rscale = log10val2 < 0 ? 0 : log10val2; + + /* how much to decrease the weight by */ + w = log10val2 / DEC_DIGITS; + /* how much is left to divide by */ + m = log10val2 % DEC_DIGITS; + if (m < 0) + { + m += DEC_DIGITS; + w--; + } + + /* + * If there is anything left to divide by (10^m with 0 < m < DEC_DIGITS), + * multiply the dividend by 10^(DEC_DIGITS - m), and shift the weight by + * one more. + */ + if (m > 0) + { +#if DEC_DIGITS == 4 + static const int pow10[] = {1, 10, 100, 1000}; +#elif DEC_DIGITS == 2 + static const int pow10[] = {1, 10}; +#elif DEC_DIGITS == 1 + static const int pow10[] = {1}; +#else +#error unsupported NBASE +#endif + int64 factor = pow10[DEC_DIGITS - m]; + int64 new_val1; + + StaticAssertDecl(lengthof(pow10) == DEC_DIGITS, "mismatch with DEC_DIGITS"); + + if (unlikely(pg_mul_s64_overflow(val1, factor, &new_val1))) + { +#ifdef HAVE_INT128 + /* do the multiplication using 128-bit integers */ + int128 tmp; + + tmp = (int128) val1 * (int128) factor; + + int128_to_numericvar(tmp, &result); +#else + /* do the multiplication using numerics */ + NumericVar tmp; + + init_var(&tmp); + + int64_to_numericvar(val1, &result); + int64_to_numericvar(factor, &tmp); + mul_var(&result, &tmp, &result, 0); + + free_var(&tmp); +#endif + } + else + int64_to_numericvar(new_val1, &result); + + w++; + } + else + int64_to_numericvar(val1, &result); + + result.weight -= w; + result.dscale = rscale; + + res = make_result(&result); + + free_var(&result); + + return res; +} + +Datum +int4_numeric(PG_FUNCTION_ARGS) +{ + int32 val = PG_GETARG_INT32(0); + + PG_RETURN_NUMERIC(int64_to_numeric(val)); +} + +int32 +numeric_int4_opt_error(Numeric num, bool *have_error) +{ + NumericVar x; + int32 result; + + if (have_error) + *have_error = false; + + if (NUMERIC_IS_SPECIAL(num)) + { + if (have_error) + { + *have_error = true; + return 0; + } + else + { + if (NUMERIC_IS_NAN(num)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert NaN to %s", "integer"))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert infinity to %s", "integer"))); + } + } + + /* Convert to variable format, then convert to int4 */ + init_var_from_num(num, &x); + + if (!numericvar_to_int32(&x, &result)) + { + if (have_error) + { + *have_error = true; + return 0; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + } + + return result; +} + +Datum +numeric_int4(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + + PG_RETURN_INT32(numeric_int4_opt_error(num, NULL)); +} + +/* + * Given a NumericVar, convert it to an int32. If the NumericVar + * exceeds the range of an int32, false is returned, otherwise true is returned. + * The input NumericVar is *not* free'd. + */ +static bool +numericvar_to_int32(const NumericVar *var, int32 *result) +{ + int64 val; + + if (!numericvar_to_int64(var, &val)) + return false; + + if (unlikely(val < PG_INT32_MIN) || unlikely(val > PG_INT32_MAX)) + return false; + + /* Down-convert to int4 */ + *result = (int32) val; + + return true; +} + +Datum +int8_numeric(PG_FUNCTION_ARGS) +{ + int64 val = PG_GETARG_INT64(0); + + PG_RETURN_NUMERIC(int64_to_numeric(val)); +} + + +Datum +numeric_int8(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + NumericVar x; + int64 result; + + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_NAN(num)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert NaN to %s", "bigint"))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert infinity to %s", "bigint"))); + } + + /* Convert to variable format and thence to int8 */ + init_var_from_num(num, &x); + + if (!numericvar_to_int64(&x, &result)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + PG_RETURN_INT64(result); +} + + +Datum +int2_numeric(PG_FUNCTION_ARGS) +{ + int16 val = PG_GETARG_INT16(0); + + PG_RETURN_NUMERIC(int64_to_numeric(val)); +} + + +Datum +numeric_int2(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + NumericVar x; + int64 val; + int16 result; + + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_NAN(num)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert NaN to %s", "smallint"))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert infinity to %s", "smallint"))); + } + + /* Convert to variable format and thence to int8 */ + init_var_from_num(num, &x); + + if (!numericvar_to_int64(&x, &val)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + if (unlikely(val < PG_INT16_MIN) || unlikely(val > PG_INT16_MAX)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + /* Down-convert to int2 */ + result = (int16) val; + + PG_RETURN_INT16(result); +} + + +Datum +float8_numeric(PG_FUNCTION_ARGS) +{ + float8 val = PG_GETARG_FLOAT8(0); + Numeric res; + NumericVar result; + char buf[DBL_DIG + 100]; + const char *endptr; + + if (isnan(val)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + + if (isinf(val)) + { + if (val < 0) + PG_RETURN_NUMERIC(make_result(&const_ninf)); + else + PG_RETURN_NUMERIC(make_result(&const_pinf)); + } + + snprintf(buf, sizeof(buf), "%.*g", DBL_DIG, val); + + init_var(&result); + + /* Assume we need not worry about leading/trailing spaces */ + (void) set_var_from_str(buf, buf, &result, &endptr, NULL); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +Datum +numeric_float8(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + char *tmp; + Datum result; + + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_PINF(num)) + PG_RETURN_FLOAT8(get_float8_infinity()); + else if (NUMERIC_IS_NINF(num)) + PG_RETURN_FLOAT8(-get_float8_infinity()); + else + PG_RETURN_FLOAT8(get_float8_nan()); + } + + tmp = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(num))); + + result = DirectFunctionCall1(float8in, CStringGetDatum(tmp)); + + pfree(tmp); + + PG_RETURN_DATUM(result); +} + + +/* + * Convert numeric to float8; if out of range, return +/- HUGE_VAL + * + * (internal helper function, not directly callable from SQL) + */ +Datum +numeric_float8_no_overflow(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + double val; + + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_PINF(num)) + val = HUGE_VAL; + else if (NUMERIC_IS_NINF(num)) + val = -HUGE_VAL; + else + val = get_float8_nan(); + } + else + { + NumericVar x; + + init_var_from_num(num, &x); + val = numericvar_to_double_no_overflow(&x); + } + + PG_RETURN_FLOAT8(val); +} + +Datum +float4_numeric(PG_FUNCTION_ARGS) +{ + float4 val = PG_GETARG_FLOAT4(0); + Numeric res; + NumericVar result; + char buf[FLT_DIG + 100]; + const char *endptr; + + if (isnan(val)) + PG_RETURN_NUMERIC(make_result(&const_nan)); + + if (isinf(val)) + { + if (val < 0) + PG_RETURN_NUMERIC(make_result(&const_ninf)); + else + PG_RETURN_NUMERIC(make_result(&const_pinf)); + } + + snprintf(buf, sizeof(buf), "%.*g", FLT_DIG, val); + + init_var(&result); + + /* Assume we need not worry about leading/trailing spaces */ + (void) set_var_from_str(buf, buf, &result, &endptr, NULL); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +} + + +Datum +numeric_float4(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + char *tmp; + Datum result; + + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_PINF(num)) + PG_RETURN_FLOAT4(get_float4_infinity()); + else if (NUMERIC_IS_NINF(num)) + PG_RETURN_FLOAT4(-get_float4_infinity()); + else + PG_RETURN_FLOAT4(get_float4_nan()); + } + + tmp = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(num))); + + result = DirectFunctionCall1(float4in, CStringGetDatum(tmp)); + + pfree(tmp); + + PG_RETURN_DATUM(result); +} + + +Datum +numeric_pg_lsn(PG_FUNCTION_ARGS) +{ + Numeric num = PG_GETARG_NUMERIC(0); + NumericVar x; + XLogRecPtr result; + + if (NUMERIC_IS_SPECIAL(num)) + { + if (NUMERIC_IS_NAN(num)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert NaN to %s", "pg_lsn"))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert infinity to %s", "pg_lsn"))); + } + + /* Convert to variable format and thence to pg_lsn */ + init_var_from_num(num, &x); + + if (!numericvar_to_uint64(&x, (uint64 *) &result)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_lsn out of range"))); + + PG_RETURN_LSN(result); +} + + +/* ---------------------------------------------------------------------- + * + * Aggregate functions + * + * The transition datatype for all these aggregates is declared as INTERNAL. + * Actually, it's a pointer to a NumericAggState allocated in the aggregate + * context. The digit buffers for the NumericVars will be there too. + * + * On platforms which support 128-bit integers some aggregates instead use a + * 128-bit integer based transition datatype to speed up calculations. + * + * ---------------------------------------------------------------------- + */ + +typedef struct NumericAggState +{ + bool calcSumX2; /* if true, calculate sumX2 */ + MemoryContext agg_context; /* context we're calculating in */ + int64 N; /* count of processed numbers */ + NumericSumAccum sumX; /* sum of processed numbers */ + NumericSumAccum sumX2; /* sum of squares of processed numbers */ + int maxScale; /* maximum scale seen so far */ + int64 maxScaleCount; /* number of values seen with maximum scale */ + /* These counts are *not* included in N! Use NA_TOTAL_COUNT() as needed */ + int64 NaNcount; /* count of NaN values */ + int64 pInfcount; /* count of +Inf values */ + int64 nInfcount; /* count of -Inf values */ +} NumericAggState; + +#define NA_TOTAL_COUNT(na) \ + ((na)->N + (na)->NaNcount + (na)->pInfcount + (na)->nInfcount) + +/* + * Prepare state data for a numeric aggregate function that needs to compute + * sum, count and optionally sum of squares of the input. + */ +static NumericAggState * +makeNumericAggState(FunctionCallInfo fcinfo, bool calcSumX2) +{ + NumericAggState *state; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + old_context = MemoryContextSwitchTo(agg_context); + + state = (NumericAggState *) palloc0(sizeof(NumericAggState)); + state->calcSumX2 = calcSumX2; + state->agg_context = agg_context; + + MemoryContextSwitchTo(old_context); + + return state; +} + +/* + * Like makeNumericAggState(), but allocate the state in the current memory + * context. + */ +static NumericAggState * +makeNumericAggStateCurrentContext(bool calcSumX2) +{ + NumericAggState *state; + + state = (NumericAggState *) palloc0(sizeof(NumericAggState)); + state->calcSumX2 = calcSumX2; + state->agg_context = CurrentMemoryContext; + + return state; +} + +/* + * Accumulate a new input value for numeric aggregate functions. + */ +static void +do_numeric_accum(NumericAggState *state, Numeric newval) +{ + NumericVar X; + NumericVar X2; + MemoryContext old_context; + + /* Count NaN/infinity inputs separately from all else */ + if (NUMERIC_IS_SPECIAL(newval)) + { + if (NUMERIC_IS_PINF(newval)) + state->pInfcount++; + else if (NUMERIC_IS_NINF(newval)) + state->nInfcount++; + else + state->NaNcount++; + return; + } + + /* load processed number in short-lived context */ + init_var_from_num(newval, &X); + + /* + * Track the highest input dscale that we've seen, to support inverse + * transitions (see do_numeric_discard). + */ + if (X.dscale > state->maxScale) + { + state->maxScale = X.dscale; + state->maxScaleCount = 1; + } + else if (X.dscale == state->maxScale) + state->maxScaleCount++; + + /* if we need X^2, calculate that in short-lived context */ + if (state->calcSumX2) + { + init_var(&X2); + mul_var(&X, &X, &X2, X.dscale * 2); + } + + /* The rest of this needs to work in the aggregate context */ + old_context = MemoryContextSwitchTo(state->agg_context); + + state->N++; + + /* Accumulate sums */ + accum_sum_add(&(state->sumX), &X); + + if (state->calcSumX2) + accum_sum_add(&(state->sumX2), &X2); + + MemoryContextSwitchTo(old_context); +} + +/* + * Attempt to remove an input value from the aggregated state. + * + * If the value cannot be removed then the function will return false; the + * possible reasons for failing are described below. + * + * If we aggregate the values 1.01 and 2 then the result will be 3.01. + * If we are then asked to un-aggregate the 1.01 then we must fail as we + * won't be able to tell what the new aggregated value's dscale should be. + * We don't want to return 2.00 (dscale = 2), since the sum's dscale would + * have been zero if we'd really aggregated only 2. + * + * Note: alternatively, we could count the number of inputs with each possible + * dscale (up to some sane limit). Not yet clear if it's worth the trouble. + */ +static bool +do_numeric_discard(NumericAggState *state, Numeric newval) +{ + NumericVar X; + NumericVar X2; + MemoryContext old_context; + + /* Count NaN/infinity inputs separately from all else */ + if (NUMERIC_IS_SPECIAL(newval)) + { + if (NUMERIC_IS_PINF(newval)) + state->pInfcount--; + else if (NUMERIC_IS_NINF(newval)) + state->nInfcount--; + else + state->NaNcount--; + return true; + } + + /* load processed number in short-lived context */ + init_var_from_num(newval, &X); + + /* + * state->sumX's dscale is the maximum dscale of any of the inputs. + * Removing the last input with that dscale would require us to recompute + * the maximum dscale of the *remaining* inputs, which we cannot do unless + * no more non-NaN inputs remain at all. So we report a failure instead, + * and force the aggregation to be redone from scratch. + */ + if (X.dscale == state->maxScale) + { + if (state->maxScaleCount > 1 || state->maxScale == 0) + { + /* + * Some remaining inputs have same dscale, or dscale hasn't gotten + * above zero anyway + */ + state->maxScaleCount--; + } + else if (state->N == 1) + { + /* No remaining non-NaN inputs at all, so reset maxScale */ + state->maxScale = 0; + state->maxScaleCount = 0; + } + else + { + /* Correct new maxScale is uncertain, must fail */ + return false; + } + } + + /* if we need X^2, calculate that in short-lived context */ + if (state->calcSumX2) + { + init_var(&X2); + mul_var(&X, &X, &X2, X.dscale * 2); + } + + /* The rest of this needs to work in the aggregate context */ + old_context = MemoryContextSwitchTo(state->agg_context); + + if (state->N-- > 1) + { + /* Negate X, to subtract it from the sum */ + X.sign = (X.sign == NUMERIC_POS ? NUMERIC_NEG : NUMERIC_POS); + accum_sum_add(&(state->sumX), &X); + + if (state->calcSumX2) + { + /* Negate X^2. X^2 is always positive */ + X2.sign = NUMERIC_NEG; + accum_sum_add(&(state->sumX2), &X2); + } + } + else + { + /* Zero the sums */ + Assert(state->N == 0); + + accum_sum_reset(&state->sumX); + if (state->calcSumX2) + accum_sum_reset(&state->sumX2); + } + + MemoryContextSwitchTo(old_context); + + return true; +} + +/* + * Generic transition function for numeric aggregates that require sumX2. + */ +Datum +numeric_accum(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + /* Create the state data on the first call */ + if (state == NULL) + state = makeNumericAggState(fcinfo, true); + + if (!PG_ARGISNULL(1)) + do_numeric_accum(state, PG_GETARG_NUMERIC(1)); + + PG_RETURN_POINTER(state); +} + +/* + * Generic combine function for numeric aggregates which require sumX2 + */ +Datum +numeric_combine(PG_FUNCTION_ARGS) +{ + NumericAggState *state1; + NumericAggState *state2; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state1 = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (NumericAggState *) PG_GETARG_POINTER(1); + + if (state2 == NULL) + PG_RETURN_POINTER(state1); + + /* manually copy all fields from state2 to state1 */ + if (state1 == NULL) + { + old_context = MemoryContextSwitchTo(agg_context); + + state1 = makeNumericAggStateCurrentContext(true); + state1->N = state2->N; + state1->NaNcount = state2->NaNcount; + state1->pInfcount = state2->pInfcount; + state1->nInfcount = state2->nInfcount; + state1->maxScale = state2->maxScale; + state1->maxScaleCount = state2->maxScaleCount; + + accum_sum_copy(&state1->sumX, &state2->sumX); + accum_sum_copy(&state1->sumX2, &state2->sumX2); + + MemoryContextSwitchTo(old_context); + + PG_RETURN_POINTER(state1); + } + + state1->N += state2->N; + state1->NaNcount += state2->NaNcount; + state1->pInfcount += state2->pInfcount; + state1->nInfcount += state2->nInfcount; + + if (state2->N > 0) + { + /* + * These are currently only needed for moving aggregates, but let's do + * the right thing anyway... + */ + if (state2->maxScale > state1->maxScale) + { + state1->maxScale = state2->maxScale; + state1->maxScaleCount = state2->maxScaleCount; + } + else if (state2->maxScale == state1->maxScale) + state1->maxScaleCount += state2->maxScaleCount; + + /* The rest of this needs to work in the aggregate context */ + old_context = MemoryContextSwitchTo(agg_context); + + /* Accumulate sums */ + accum_sum_combine(&state1->sumX, &state2->sumX); + accum_sum_combine(&state1->sumX2, &state2->sumX2); + + MemoryContextSwitchTo(old_context); + } + PG_RETURN_POINTER(state1); +} + +/* + * Generic transition function for numeric aggregates that don't require sumX2. + */ +Datum +numeric_avg_accum(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + /* Create the state data on the first call */ + if (state == NULL) + state = makeNumericAggState(fcinfo, false); + + if (!PG_ARGISNULL(1)) + do_numeric_accum(state, PG_GETARG_NUMERIC(1)); + + PG_RETURN_POINTER(state); +} + +/* + * Combine function for numeric aggregates which don't require sumX2 + */ +Datum +numeric_avg_combine(PG_FUNCTION_ARGS) +{ + NumericAggState *state1; + NumericAggState *state2; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state1 = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (NumericAggState *) PG_GETARG_POINTER(1); + + if (state2 == NULL) + PG_RETURN_POINTER(state1); + + /* manually copy all fields from state2 to state1 */ + if (state1 == NULL) + { + old_context = MemoryContextSwitchTo(agg_context); + + state1 = makeNumericAggStateCurrentContext(false); + state1->N = state2->N; + state1->NaNcount = state2->NaNcount; + state1->pInfcount = state2->pInfcount; + state1->nInfcount = state2->nInfcount; + state1->maxScale = state2->maxScale; + state1->maxScaleCount = state2->maxScaleCount; + + accum_sum_copy(&state1->sumX, &state2->sumX); + + MemoryContextSwitchTo(old_context); + + PG_RETURN_POINTER(state1); + } + + state1->N += state2->N; + state1->NaNcount += state2->NaNcount; + state1->pInfcount += state2->pInfcount; + state1->nInfcount += state2->nInfcount; + + if (state2->N > 0) + { + /* + * These are currently only needed for moving aggregates, but let's do + * the right thing anyway... + */ + if (state2->maxScale > state1->maxScale) + { + state1->maxScale = state2->maxScale; + state1->maxScaleCount = state2->maxScaleCount; + } + else if (state2->maxScale == state1->maxScale) + state1->maxScaleCount += state2->maxScaleCount; + + /* The rest of this needs to work in the aggregate context */ + old_context = MemoryContextSwitchTo(agg_context); + + /* Accumulate sums */ + accum_sum_combine(&state1->sumX, &state2->sumX); + + MemoryContextSwitchTo(old_context); + } + PG_RETURN_POINTER(state1); +} + +/* + * numeric_avg_serialize + * Serialize NumericAggState for numeric aggregates that don't require + * sumX2. + */ +Datum +numeric_avg_serialize(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + StringInfoData buf; + bytea *result; + NumericVar tmp_var; + + /* Ensure we disallow calling when not in aggregate context */ + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state = (NumericAggState *) PG_GETARG_POINTER(0); + + init_var(&tmp_var); + + pq_begintypsend(&buf); + + /* N */ + pq_sendint64(&buf, state->N); + + /* sumX */ + accum_sum_final(&state->sumX, &tmp_var); + numericvar_serialize(&buf, &tmp_var); + + /* maxScale */ + pq_sendint32(&buf, state->maxScale); + + /* maxScaleCount */ + pq_sendint64(&buf, state->maxScaleCount); + + /* NaNcount */ + pq_sendint64(&buf, state->NaNcount); + + /* pInfcount */ + pq_sendint64(&buf, state->pInfcount); + + /* nInfcount */ + pq_sendint64(&buf, state->nInfcount); + + result = pq_endtypsend(&buf); + + free_var(&tmp_var); + + PG_RETURN_BYTEA_P(result); +} + +/* + * numeric_avg_deserialize + * Deserialize bytea into NumericAggState for numeric aggregates that + * don't require sumX2. + */ +Datum +numeric_avg_deserialize(PG_FUNCTION_ARGS) +{ + bytea *sstate; + NumericAggState *result; + StringInfoData buf; + NumericVar tmp_var; + + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + sstate = PG_GETARG_BYTEA_PP(0); + + init_var(&tmp_var); + + /* + * Copy the bytea into a StringInfo so that we can "receive" it using the + * standard recv-function infrastructure. + */ + initStringInfo(&buf); + appendBinaryStringInfo(&buf, + VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); + + result = makeNumericAggStateCurrentContext(false); + + /* N */ + result->N = pq_getmsgint64(&buf); + + /* sumX */ + numericvar_deserialize(&buf, &tmp_var); + accum_sum_add(&(result->sumX), &tmp_var); + + /* maxScale */ + result->maxScale = pq_getmsgint(&buf, 4); + + /* maxScaleCount */ + result->maxScaleCount = pq_getmsgint64(&buf); + + /* NaNcount */ + result->NaNcount = pq_getmsgint64(&buf); + + /* pInfcount */ + result->pInfcount = pq_getmsgint64(&buf); + + /* nInfcount */ + result->nInfcount = pq_getmsgint64(&buf); + + pq_getmsgend(&buf); + pfree(buf.data); + + free_var(&tmp_var); + + PG_RETURN_POINTER(result); +} + +/* + * numeric_serialize + * Serialization function for NumericAggState for numeric aggregates that + * require sumX2. + */ +Datum +numeric_serialize(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + StringInfoData buf; + bytea *result; + NumericVar tmp_var; + + /* Ensure we disallow calling when not in aggregate context */ + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state = (NumericAggState *) PG_GETARG_POINTER(0); + + init_var(&tmp_var); + + pq_begintypsend(&buf); + + /* N */ + pq_sendint64(&buf, state->N); + + /* sumX */ + accum_sum_final(&state->sumX, &tmp_var); + numericvar_serialize(&buf, &tmp_var); + + /* sumX2 */ + accum_sum_final(&state->sumX2, &tmp_var); + numericvar_serialize(&buf, &tmp_var); + + /* maxScale */ + pq_sendint32(&buf, state->maxScale); + + /* maxScaleCount */ + pq_sendint64(&buf, state->maxScaleCount); + + /* NaNcount */ + pq_sendint64(&buf, state->NaNcount); + + /* pInfcount */ + pq_sendint64(&buf, state->pInfcount); + + /* nInfcount */ + pq_sendint64(&buf, state->nInfcount); + + result = pq_endtypsend(&buf); + + free_var(&tmp_var); + + PG_RETURN_BYTEA_P(result); +} + +/* + * numeric_deserialize + * Deserialization function for NumericAggState for numeric aggregates that + * require sumX2. + */ +Datum +numeric_deserialize(PG_FUNCTION_ARGS) +{ + bytea *sstate; + NumericAggState *result; + StringInfoData buf; + NumericVar tmp_var; + + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + sstate = PG_GETARG_BYTEA_PP(0); + + init_var(&tmp_var); + + /* + * Copy the bytea into a StringInfo so that we can "receive" it using the + * standard recv-function infrastructure. + */ + initStringInfo(&buf); + appendBinaryStringInfo(&buf, + VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); + + result = makeNumericAggStateCurrentContext(false); + + /* N */ + result->N = pq_getmsgint64(&buf); + + /* sumX */ + numericvar_deserialize(&buf, &tmp_var); + accum_sum_add(&(result->sumX), &tmp_var); + + /* sumX2 */ + numericvar_deserialize(&buf, &tmp_var); + accum_sum_add(&(result->sumX2), &tmp_var); + + /* maxScale */ + result->maxScale = pq_getmsgint(&buf, 4); + + /* maxScaleCount */ + result->maxScaleCount = pq_getmsgint64(&buf); + + /* NaNcount */ + result->NaNcount = pq_getmsgint64(&buf); + + /* pInfcount */ + result->pInfcount = pq_getmsgint64(&buf); + + /* nInfcount */ + result->nInfcount = pq_getmsgint64(&buf); + + pq_getmsgend(&buf); + pfree(buf.data); + + free_var(&tmp_var); + + PG_RETURN_POINTER(result); +} + +/* + * Generic inverse transition function for numeric aggregates + * (with or without requirement for X^2). + */ +Datum +numeric_accum_inv(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + /* Should not get here with no state */ + if (state == NULL) + elog(ERROR, "numeric_accum_inv called with NULL state"); + + if (!PG_ARGISNULL(1)) + { + /* If we fail to perform the inverse transition, return NULL */ + if (!do_numeric_discard(state, PG_GETARG_NUMERIC(1))) + PG_RETURN_NULL(); + } + + PG_RETURN_POINTER(state); +} + + +/* + * Integer data types in general use Numeric accumulators to share code + * and avoid risk of overflow. + * + * However for performance reasons optimized special-purpose accumulator + * routines are used when possible. + * + * On platforms with 128-bit integer support, the 128-bit routines will be + * used when sum(X) or sum(X*X) fit into 128-bit. + * + * For 16 and 32 bit inputs, the N and sum(X) fit into 64-bit so the 64-bit + * accumulators will be used for SUM and AVG of these data types. + */ + +#ifdef HAVE_INT128 +typedef struct Int128AggState +{ + bool calcSumX2; /* if true, calculate sumX2 */ + int64 N; /* count of processed numbers */ + int128 sumX; /* sum of processed numbers */ + int128 sumX2; /* sum of squares of processed numbers */ +} Int128AggState; + +/* + * Prepare state data for a 128-bit aggregate function that needs to compute + * sum, count and optionally sum of squares of the input. + */ +static Int128AggState * +makeInt128AggState(FunctionCallInfo fcinfo, bool calcSumX2) +{ + Int128AggState *state; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + old_context = MemoryContextSwitchTo(agg_context); + + state = (Int128AggState *) palloc0(sizeof(Int128AggState)); + state->calcSumX2 = calcSumX2; + + MemoryContextSwitchTo(old_context); + + return state; +} + +/* + * Like makeInt128AggState(), but allocate the state in the current memory + * context. + */ +static Int128AggState * +makeInt128AggStateCurrentContext(bool calcSumX2) +{ + Int128AggState *state; + + state = (Int128AggState *) palloc0(sizeof(Int128AggState)); + state->calcSumX2 = calcSumX2; + + return state; +} + +/* + * Accumulate a new input value for 128-bit aggregate functions. + */ +static void +do_int128_accum(Int128AggState *state, int128 newval) +{ + if (state->calcSumX2) + state->sumX2 += newval * newval; + + state->sumX += newval; + state->N++; +} + +/* + * Remove an input value from the aggregated state. + */ +static void +do_int128_discard(Int128AggState *state, int128 newval) +{ + if (state->calcSumX2) + state->sumX2 -= newval * newval; + + state->sumX -= newval; + state->N--; +} + +typedef Int128AggState PolyNumAggState; +#define makePolyNumAggState makeInt128AggState +#define makePolyNumAggStateCurrentContext makeInt128AggStateCurrentContext +#else +typedef NumericAggState PolyNumAggState; +#define makePolyNumAggState makeNumericAggState +#define makePolyNumAggStateCurrentContext makeNumericAggStateCurrentContext +#endif + +Datum +int2_accum(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* Create the state data on the first call */ + if (state == NULL) + state = makePolyNumAggState(fcinfo, true); + + if (!PG_ARGISNULL(1)) + { +#ifdef HAVE_INT128 + do_int128_accum(state, (int128) PG_GETARG_INT16(1)); +#else + do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT16(1))); +#endif + } + + PG_RETURN_POINTER(state); +} + +Datum +int4_accum(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* Create the state data on the first call */ + if (state == NULL) + state = makePolyNumAggState(fcinfo, true); + + if (!PG_ARGISNULL(1)) + { +#ifdef HAVE_INT128 + do_int128_accum(state, (int128) PG_GETARG_INT32(1)); +#else + do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT32(1))); +#endif + } + + PG_RETURN_POINTER(state); +} + +Datum +int8_accum(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + /* Create the state data on the first call */ + if (state == NULL) + state = makeNumericAggState(fcinfo, true); + + if (!PG_ARGISNULL(1)) + do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT64(1))); + + PG_RETURN_POINTER(state); +} + +/* + * Combine function for numeric aggregates which require sumX2 + */ +Datum +numeric_poly_combine(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state1; + PolyNumAggState *state2; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state1 = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(1); + + if (state2 == NULL) + PG_RETURN_POINTER(state1); + + /* manually copy all fields from state2 to state1 */ + if (state1 == NULL) + { + old_context = MemoryContextSwitchTo(agg_context); + + state1 = makePolyNumAggState(fcinfo, true); + state1->N = state2->N; + +#ifdef HAVE_INT128 + state1->sumX = state2->sumX; + state1->sumX2 = state2->sumX2; +#else + accum_sum_copy(&state1->sumX, &state2->sumX); + accum_sum_copy(&state1->sumX2, &state2->sumX2); +#endif + + MemoryContextSwitchTo(old_context); + + PG_RETURN_POINTER(state1); + } + + if (state2->N > 0) + { + state1->N += state2->N; + +#ifdef HAVE_INT128 + state1->sumX += state2->sumX; + state1->sumX2 += state2->sumX2; +#else + /* The rest of this needs to work in the aggregate context */ + old_context = MemoryContextSwitchTo(agg_context); + + /* Accumulate sums */ + accum_sum_combine(&state1->sumX, &state2->sumX); + accum_sum_combine(&state1->sumX2, &state2->sumX2); + + MemoryContextSwitchTo(old_context); +#endif + + } + PG_RETURN_POINTER(state1); +} + +/* + * numeric_poly_serialize + * Serialize PolyNumAggState into bytea for aggregate functions which + * require sumX2. + */ +Datum +numeric_poly_serialize(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + StringInfoData buf; + bytea *result; + NumericVar tmp_var; + + /* Ensure we disallow calling when not in aggregate context */ + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state = (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* + * If the platform supports int128 then sumX and sumX2 will be a 128 bit + * integer type. Here we'll convert that into a numeric type so that the + * combine state is in the same format for both int128 enabled machines + * and machines which don't support that type. The logic here is that one + * day we might like to send these over to another server for further + * processing and we want a standard format to work with. + */ + + init_var(&tmp_var); + + pq_begintypsend(&buf); + + /* N */ + pq_sendint64(&buf, state->N); + + /* sumX */ +#ifdef HAVE_INT128 + int128_to_numericvar(state->sumX, &tmp_var); +#else + accum_sum_final(&state->sumX, &tmp_var); +#endif + numericvar_serialize(&buf, &tmp_var); + + /* sumX2 */ +#ifdef HAVE_INT128 + int128_to_numericvar(state->sumX2, &tmp_var); +#else + accum_sum_final(&state->sumX2, &tmp_var); +#endif + numericvar_serialize(&buf, &tmp_var); + + result = pq_endtypsend(&buf); + + free_var(&tmp_var); + + PG_RETURN_BYTEA_P(result); +} + +/* + * numeric_poly_deserialize + * Deserialize PolyNumAggState from bytea for aggregate functions which + * require sumX2. + */ +Datum +numeric_poly_deserialize(PG_FUNCTION_ARGS) +{ + bytea *sstate; + PolyNumAggState *result; + StringInfoData buf; + NumericVar tmp_var; + + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + sstate = PG_GETARG_BYTEA_PP(0); + + init_var(&tmp_var); + + /* + * Copy the bytea into a StringInfo so that we can "receive" it using the + * standard recv-function infrastructure. + */ + initStringInfo(&buf); + appendBinaryStringInfo(&buf, + VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); + + result = makePolyNumAggStateCurrentContext(false); + + /* N */ + result->N = pq_getmsgint64(&buf); + + /* sumX */ + numericvar_deserialize(&buf, &tmp_var); +#ifdef HAVE_INT128 + numericvar_to_int128(&tmp_var, &result->sumX); +#else + accum_sum_add(&result->sumX, &tmp_var); +#endif + + /* sumX2 */ + numericvar_deserialize(&buf, &tmp_var); +#ifdef HAVE_INT128 + numericvar_to_int128(&tmp_var, &result->sumX2); +#else + accum_sum_add(&result->sumX2, &tmp_var); +#endif + + pq_getmsgend(&buf); + pfree(buf.data); + + free_var(&tmp_var); + + PG_RETURN_POINTER(result); +} + +/* + * Transition function for int8 input when we don't need sumX2. + */ +Datum +int8_avg_accum(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* Create the state data on the first call */ + if (state == NULL) + state = makePolyNumAggState(fcinfo, false); + + if (!PG_ARGISNULL(1)) + { +#ifdef HAVE_INT128 + do_int128_accum(state, (int128) PG_GETARG_INT64(1)); +#else + do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT64(1))); +#endif + } + + PG_RETURN_POINTER(state); +} + +/* + * Combine function for PolyNumAggState for aggregates which don't require + * sumX2 + */ +Datum +int8_avg_combine(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state1; + PolyNumAggState *state2; + MemoryContext agg_context; + MemoryContext old_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state1 = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(1); + + if (state2 == NULL) + PG_RETURN_POINTER(state1); + + /* manually copy all fields from state2 to state1 */ + if (state1 == NULL) + { + old_context = MemoryContextSwitchTo(agg_context); + + state1 = makePolyNumAggState(fcinfo, false); + state1->N = state2->N; + +#ifdef HAVE_INT128 + state1->sumX = state2->sumX; +#else + accum_sum_copy(&state1->sumX, &state2->sumX); +#endif + MemoryContextSwitchTo(old_context); + + PG_RETURN_POINTER(state1); + } + + if (state2->N > 0) + { + state1->N += state2->N; + +#ifdef HAVE_INT128 + state1->sumX += state2->sumX; +#else + /* The rest of this needs to work in the aggregate context */ + old_context = MemoryContextSwitchTo(agg_context); + + /* Accumulate sums */ + accum_sum_combine(&state1->sumX, &state2->sumX); + + MemoryContextSwitchTo(old_context); +#endif + + } + PG_RETURN_POINTER(state1); +} + +/* + * int8_avg_serialize + * Serialize PolyNumAggState into bytea using the standard + * recv-function infrastructure. + */ +Datum +int8_avg_serialize(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + StringInfoData buf; + bytea *result; + NumericVar tmp_var; + + /* Ensure we disallow calling when not in aggregate context */ + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state = (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* + * If the platform supports int128 then sumX will be a 128 integer type. + * Here we'll convert that into a numeric type so that the combine state + * is in the same format for both int128 enabled machines and machines + * which don't support that type. The logic here is that one day we might + * like to send these over to another server for further processing and we + * want a standard format to work with. + */ + + init_var(&tmp_var); + + pq_begintypsend(&buf); + + /* N */ + pq_sendint64(&buf, state->N); + + /* sumX */ +#ifdef HAVE_INT128 + int128_to_numericvar(state->sumX, &tmp_var); +#else + accum_sum_final(&state->sumX, &tmp_var); +#endif + numericvar_serialize(&buf, &tmp_var); + + result = pq_endtypsend(&buf); + + free_var(&tmp_var); + + PG_RETURN_BYTEA_P(result); +} + +/* + * int8_avg_deserialize + * Deserialize bytea back into PolyNumAggState. + */ +Datum +int8_avg_deserialize(PG_FUNCTION_ARGS) +{ + bytea *sstate; + PolyNumAggState *result; + StringInfoData buf; + NumericVar tmp_var; + + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + sstate = PG_GETARG_BYTEA_PP(0); + + init_var(&tmp_var); + + /* + * Copy the bytea into a StringInfo so that we can "receive" it using the + * standard recv-function infrastructure. + */ + initStringInfo(&buf); + appendBinaryStringInfo(&buf, + VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); + + result = makePolyNumAggStateCurrentContext(false); + + /* N */ + result->N = pq_getmsgint64(&buf); + + /* sumX */ + numericvar_deserialize(&buf, &tmp_var); +#ifdef HAVE_INT128 + numericvar_to_int128(&tmp_var, &result->sumX); +#else + accum_sum_add(&result->sumX, &tmp_var); +#endif + + pq_getmsgend(&buf); + pfree(buf.data); + + free_var(&tmp_var); + + PG_RETURN_POINTER(result); +} + +/* + * Inverse transition functions to go with the above. + */ + +Datum +int2_accum_inv(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* Should not get here with no state */ + if (state == NULL) + elog(ERROR, "int2_accum_inv called with NULL state"); + + if (!PG_ARGISNULL(1)) + { +#ifdef HAVE_INT128 + do_int128_discard(state, (int128) PG_GETARG_INT16(1)); +#else + /* Should never fail, all inputs have dscale 0 */ + if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT16(1)))) + elog(ERROR, "do_numeric_discard failed unexpectedly"); +#endif + } + + PG_RETURN_POINTER(state); +} + +Datum +int4_accum_inv(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* Should not get here with no state */ + if (state == NULL) + elog(ERROR, "int4_accum_inv called with NULL state"); + + if (!PG_ARGISNULL(1)) + { +#ifdef HAVE_INT128 + do_int128_discard(state, (int128) PG_GETARG_INT32(1)); +#else + /* Should never fail, all inputs have dscale 0 */ + if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT32(1)))) + elog(ERROR, "do_numeric_discard failed unexpectedly"); +#endif + } + + PG_RETURN_POINTER(state); +} + +Datum +int8_accum_inv(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + /* Should not get here with no state */ + if (state == NULL) + elog(ERROR, "int8_accum_inv called with NULL state"); + + if (!PG_ARGISNULL(1)) + { + /* Should never fail, all inputs have dscale 0 */ + if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT64(1)))) + elog(ERROR, "do_numeric_discard failed unexpectedly"); + } + + PG_RETURN_POINTER(state); +} + +Datum +int8_avg_accum_inv(PG_FUNCTION_ARGS) +{ + PolyNumAggState *state; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* Should not get here with no state */ + if (state == NULL) + elog(ERROR, "int8_avg_accum_inv called with NULL state"); + + if (!PG_ARGISNULL(1)) + { +#ifdef HAVE_INT128 + do_int128_discard(state, (int128) PG_GETARG_INT64(1)); +#else + /* Should never fail, all inputs have dscale 0 */ + if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT64(1)))) + elog(ERROR, "do_numeric_discard failed unexpectedly"); +#endif + } + + PG_RETURN_POINTER(state); +} + +Datum +numeric_poly_sum(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT128 + PolyNumAggState *state; + Numeric res; + NumericVar result; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* If there were no non-null inputs, return NULL */ + if (state == NULL || state->N == 0) + PG_RETURN_NULL(); + + init_var(&result); + + int128_to_numericvar(state->sumX, &result); + + res = make_result(&result); + + free_var(&result); + + PG_RETURN_NUMERIC(res); +#else + return numeric_sum(fcinfo); +#endif +} + +Datum +numeric_poly_avg(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT128 + PolyNumAggState *state; + NumericVar result; + Datum countd, + sumd; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + /* If there were no non-null inputs, return NULL */ + if (state == NULL || state->N == 0) + PG_RETURN_NULL(); + + init_var(&result); + + int128_to_numericvar(state->sumX, &result); + + countd = NumericGetDatum(int64_to_numeric(state->N)); + sumd = NumericGetDatum(make_result(&result)); + + free_var(&result); + + PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd)); +#else + return numeric_avg(fcinfo); +#endif +} + +Datum +numeric_avg(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + Datum N_datum; + Datum sumX_datum; + NumericVar sumX_var; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + /* If there were no non-null inputs, return NULL */ + if (state == NULL || NA_TOTAL_COUNT(state) == 0) + PG_RETURN_NULL(); + + if (state->NaNcount > 0) /* there was at least one NaN input */ + PG_RETURN_NUMERIC(make_result(&const_nan)); + + /* adding plus and minus infinities gives NaN */ + if (state->pInfcount > 0 && state->nInfcount > 0) + PG_RETURN_NUMERIC(make_result(&const_nan)); + if (state->pInfcount > 0) + PG_RETURN_NUMERIC(make_result(&const_pinf)); + if (state->nInfcount > 0) + PG_RETURN_NUMERIC(make_result(&const_ninf)); + + N_datum = NumericGetDatum(int64_to_numeric(state->N)); + + init_var(&sumX_var); + accum_sum_final(&state->sumX, &sumX_var); + sumX_datum = NumericGetDatum(make_result(&sumX_var)); + free_var(&sumX_var); + + PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumX_datum, N_datum)); +} + +Datum +numeric_sum(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + NumericVar sumX_var; + Numeric result; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + /* If there were no non-null inputs, return NULL */ + if (state == NULL || NA_TOTAL_COUNT(state) == 0) + PG_RETURN_NULL(); + + if (state->NaNcount > 0) /* there was at least one NaN input */ + PG_RETURN_NUMERIC(make_result(&const_nan)); + + /* adding plus and minus infinities gives NaN */ + if (state->pInfcount > 0 && state->nInfcount > 0) + PG_RETURN_NUMERIC(make_result(&const_nan)); + if (state->pInfcount > 0) + PG_RETURN_NUMERIC(make_result(&const_pinf)); + if (state->nInfcount > 0) + PG_RETURN_NUMERIC(make_result(&const_ninf)); + + init_var(&sumX_var); + accum_sum_final(&state->sumX, &sumX_var); + result = make_result(&sumX_var); + free_var(&sumX_var); + + PG_RETURN_NUMERIC(result); +} + +/* + * Workhorse routine for the standard deviance and variance + * aggregates. 'state' is aggregate's transition state. + * 'variance' specifies whether we should calculate the + * variance or the standard deviation. 'sample' indicates whether the + * caller is interested in the sample or the population + * variance/stddev. + * + * If appropriate variance statistic is undefined for the input, + * *is_null is set to true and NULL is returned. + */ +static Numeric +numeric_stddev_internal(NumericAggState *state, + bool variance, bool sample, + bool *is_null) +{ + Numeric res; + NumericVar vN, + vsumX, + vsumX2, + vNminus1; + int64 totCount; + int rscale; + + /* + * Sample stddev and variance are undefined when N <= 1; population stddev + * is undefined when N == 0. Return NULL in either case (note that NaNs + * and infinities count as normal inputs for this purpose). + */ + if (state == NULL || (totCount = NA_TOTAL_COUNT(state)) == 0) + { + *is_null = true; + return NULL; + } + + if (sample && totCount <= 1) + { + *is_null = true; + return NULL; + } + + *is_null = false; + + /* + * Deal with NaN and infinity cases. By analogy to the behavior of the + * float8 functions, any infinity input produces NaN output. + */ + if (state->NaNcount > 0 || state->pInfcount > 0 || state->nInfcount > 0) + return make_result(&const_nan); + + /* OK, normal calculation applies */ + init_var(&vN); + init_var(&vsumX); + init_var(&vsumX2); + + int64_to_numericvar(state->N, &vN); + accum_sum_final(&(state->sumX), &vsumX); + accum_sum_final(&(state->sumX2), &vsumX2); + + init_var(&vNminus1); + sub_var(&vN, &const_one, &vNminus1); + + /* compute rscale for mul_var calls */ + rscale = vsumX.dscale * 2; + + mul_var(&vsumX, &vsumX, &vsumX, rscale); /* vsumX = sumX * sumX */ + mul_var(&vN, &vsumX2, &vsumX2, rscale); /* vsumX2 = N * sumX2 */ + sub_var(&vsumX2, &vsumX, &vsumX2); /* N * sumX2 - sumX * sumX */ + + if (cmp_var(&vsumX2, &const_zero) <= 0) + { + /* Watch out for roundoff error producing a negative numerator */ + res = make_result(&const_zero); + } + else + { + if (sample) + mul_var(&vN, &vNminus1, &vNminus1, 0); /* N * (N - 1) */ + else + mul_var(&vN, &vN, &vNminus1, 0); /* N * N */ + rscale = select_div_scale(&vsumX2, &vNminus1); + div_var(&vsumX2, &vNminus1, &vsumX, rscale, true); /* variance */ + if (!variance) + sqrt_var(&vsumX, &vsumX, rscale); /* stddev */ + + res = make_result(&vsumX); + } + + free_var(&vNminus1); + free_var(&vsumX); + free_var(&vsumX2); + + return res; +} + +Datum +numeric_var_samp(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + res = numeric_stddev_internal(state, true, true, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +} + +Datum +numeric_stddev_samp(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + res = numeric_stddev_internal(state, false, true, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +} + +Datum +numeric_var_pop(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + res = numeric_stddev_internal(state, true, false, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +} + +Datum +numeric_stddev_pop(PG_FUNCTION_ARGS) +{ + NumericAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0); + + res = numeric_stddev_internal(state, false, false, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +} + +#ifdef HAVE_INT128 +static Numeric +numeric_poly_stddev_internal(Int128AggState *state, + bool variance, bool sample, + bool *is_null) +{ + NumericAggState numstate; + Numeric res; + + /* Initialize an empty agg state */ + memset(&numstate, 0, sizeof(NumericAggState)); + + if (state) + { + NumericVar tmp_var; + + numstate.N = state->N; + + init_var(&tmp_var); + + int128_to_numericvar(state->sumX, &tmp_var); + accum_sum_add(&numstate.sumX, &tmp_var); + + int128_to_numericvar(state->sumX2, &tmp_var); + accum_sum_add(&numstate.sumX2, &tmp_var); + + free_var(&tmp_var); + } + + res = numeric_stddev_internal(&numstate, variance, sample, is_null); + + if (numstate.sumX.ndigits > 0) + { + pfree(numstate.sumX.pos_digits); + pfree(numstate.sumX.neg_digits); + } + if (numstate.sumX2.ndigits > 0) + { + pfree(numstate.sumX2.pos_digits); + pfree(numstate.sumX2.neg_digits); + } + + return res; +} +#endif + +Datum +numeric_poly_var_samp(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT128 + PolyNumAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + res = numeric_poly_stddev_internal(state, true, true, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +#else + return numeric_var_samp(fcinfo); +#endif +} + +Datum +numeric_poly_stddev_samp(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT128 + PolyNumAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + res = numeric_poly_stddev_internal(state, false, true, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +#else + return numeric_stddev_samp(fcinfo); +#endif +} + +Datum +numeric_poly_var_pop(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT128 + PolyNumAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + res = numeric_poly_stddev_internal(state, true, false, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +#else + return numeric_var_pop(fcinfo); +#endif +} + +Datum +numeric_poly_stddev_pop(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT128 + PolyNumAggState *state; + Numeric res; + bool is_null; + + state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + + res = numeric_poly_stddev_internal(state, false, false, &is_null); + + if (is_null) + PG_RETURN_NULL(); + else + PG_RETURN_NUMERIC(res); +#else + return numeric_stddev_pop(fcinfo); +#endif +} + +/* + * SUM transition functions for integer datatypes. + * + * To avoid overflow, we use accumulators wider than the input datatype. + * A Numeric accumulator is needed for int8 input; for int4 and int2 + * inputs, we use int8 accumulators which should be sufficient for practical + * purposes. (The latter two therefore don't really belong in this file, + * but we keep them here anyway.) + * + * Because SQL defines the SUM() of no values to be NULL, not zero, + * the initial condition of the transition data value needs to be NULL. This + * means we can't rely on ExecAgg to automatically insert the first non-null + * data value into the transition data: it doesn't know how to do the type + * conversion. The upshot is that these routines have to be marked non-strict + * and handle substitution of the first non-null input themselves. + * + * Note: these functions are used only in plain aggregation mode. + * In moving-aggregate mode, we use intX_avg_accum and intX_avg_accum_inv. + */ + +Datum +int2_sum(PG_FUNCTION_ARGS) +{ + int64 newval; + + if (PG_ARGISNULL(0)) + { + /* No non-null input seen so far... */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); /* still no non-null */ + /* This is the first non-null input. */ + newval = (int64) PG_GETARG_INT16(1); + PG_RETURN_INT64(newval); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to avoid palloc overhead. If not, we need to return + * the new value of the transition variable. (If int8 is pass-by-value, + * then of course this is useless as well as incorrect, so just ifdef it + * out.) + */ +#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ + if (AggCheckCallContext(fcinfo, NULL)) + { + int64 *oldsum = (int64 *) PG_GETARG_POINTER(0); + + /* Leave the running sum unchanged in the new input is null */ + if (!PG_ARGISNULL(1)) + *oldsum = *oldsum + (int64) PG_GETARG_INT16(1); + + PG_RETURN_POINTER(oldsum); + } + else +#endif + { + int64 oldsum = PG_GETARG_INT64(0); + + /* Leave sum unchanged if new input is null. */ + if (PG_ARGISNULL(1)) + PG_RETURN_INT64(oldsum); + + /* OK to do the addition. */ + newval = oldsum + (int64) PG_GETARG_INT16(1); + + PG_RETURN_INT64(newval); + } +} + +Datum +int4_sum(PG_FUNCTION_ARGS) +{ + int64 newval; + + if (PG_ARGISNULL(0)) + { + /* No non-null input seen so far... */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); /* still no non-null */ + /* This is the first non-null input. */ + newval = (int64) PG_GETARG_INT32(1); + PG_RETURN_INT64(newval); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to avoid palloc overhead. If not, we need to return + * the new value of the transition variable. (If int8 is pass-by-value, + * then of course this is useless as well as incorrect, so just ifdef it + * out.) + */ +#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ + if (AggCheckCallContext(fcinfo, NULL)) + { + int64 *oldsum = (int64 *) PG_GETARG_POINTER(0); + + /* Leave the running sum unchanged in the new input is null */ + if (!PG_ARGISNULL(1)) + *oldsum = *oldsum + (int64) PG_GETARG_INT32(1); + + PG_RETURN_POINTER(oldsum); + } + else +#endif + { + int64 oldsum = PG_GETARG_INT64(0); + + /* Leave sum unchanged if new input is null. */ + if (PG_ARGISNULL(1)) + PG_RETURN_INT64(oldsum); + + /* OK to do the addition. */ + newval = oldsum + (int64) PG_GETARG_INT32(1); + + PG_RETURN_INT64(newval); + } +} + +/* + * Note: this function is obsolete, it's no longer used for SUM(int8). + */ +Datum +int8_sum(PG_FUNCTION_ARGS) +{ + Numeric oldsum; + + if (PG_ARGISNULL(0)) + { + /* No non-null input seen so far... */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); /* still no non-null */ + /* This is the first non-null input. */ + PG_RETURN_NUMERIC(int64_to_numeric(PG_GETARG_INT64(1))); + } + + /* + * Note that we cannot special-case the aggregate case here, as we do for + * int2_sum and int4_sum: numeric is of variable size, so we cannot modify + * our first parameter in-place. + */ + + oldsum = PG_GETARG_NUMERIC(0); + + /* Leave sum unchanged if new input is null. */ + if (PG_ARGISNULL(1)) + PG_RETURN_NUMERIC(oldsum); + + /* OK to do the addition. */ + PG_RETURN_DATUM(DirectFunctionCall2(numeric_add, + NumericGetDatum(oldsum), + NumericGetDatum(int64_to_numeric(PG_GETARG_INT64(1))))); +} + + +/* + * Routines for avg(int2) and avg(int4). The transition datatype + * is a two-element int8 array, holding count and sum. + * + * These functions are also used for sum(int2) and sum(int4) when + * operating in moving-aggregate mode, since for correct inverse transitions + * we need to count the inputs. + */ + +typedef struct Int8TransTypeData +{ + int64 count; + int64 sum; +} Int8TransTypeData; + +Datum +int2_avg_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray; + int16 newval = PG_GETARG_INT16(1); + Int8TransTypeData *transdata; + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we need to make + * a copy of it before scribbling on it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + transarray = PG_GETARG_ARRAYTYPE_P(0); + else + transarray = PG_GETARG_ARRAYTYPE_P_COPY(0); + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + transdata->count++; + transdata->sum += newval; + + PG_RETURN_ARRAYTYPE_P(transarray); +} + +Datum +int4_avg_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray; + int32 newval = PG_GETARG_INT32(1); + Int8TransTypeData *transdata; + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we need to make + * a copy of it before scribbling on it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + transarray = PG_GETARG_ARRAYTYPE_P(0); + else + transarray = PG_GETARG_ARRAYTYPE_P_COPY(0); + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + transdata->count++; + transdata->sum += newval; + + PG_RETURN_ARRAYTYPE_P(transarray); +} + +Datum +int4_avg_combine(PG_FUNCTION_ARGS) +{ + ArrayType *transarray1; + ArrayType *transarray2; + Int8TransTypeData *state1; + Int8TransTypeData *state2; + + if (!AggCheckCallContext(fcinfo, NULL)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + transarray1 = PG_GETARG_ARRAYTYPE_P(0); + transarray2 = PG_GETARG_ARRAYTYPE_P(1); + + if (ARR_HASNULL(transarray1) || + ARR_SIZE(transarray1) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + if (ARR_HASNULL(transarray2) || + ARR_SIZE(transarray2) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + state1 = (Int8TransTypeData *) ARR_DATA_PTR(transarray1); + state2 = (Int8TransTypeData *) ARR_DATA_PTR(transarray2); + + state1->count += state2->count; + state1->sum += state2->sum; + + PG_RETURN_ARRAYTYPE_P(transarray1); +} + +Datum +int2_avg_accum_inv(PG_FUNCTION_ARGS) +{ + ArrayType *transarray; + int16 newval = PG_GETARG_INT16(1); + Int8TransTypeData *transdata; + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we need to make + * a copy of it before scribbling on it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + transarray = PG_GETARG_ARRAYTYPE_P(0); + else + transarray = PG_GETARG_ARRAYTYPE_P_COPY(0); + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + transdata->count--; + transdata->sum -= newval; + + PG_RETURN_ARRAYTYPE_P(transarray); +} + +Datum +int4_avg_accum_inv(PG_FUNCTION_ARGS) +{ + ArrayType *transarray; + int32 newval = PG_GETARG_INT32(1); + Int8TransTypeData *transdata; + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we need to make + * a copy of it before scribbling on it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + transarray = PG_GETARG_ARRAYTYPE_P(0); + else + transarray = PG_GETARG_ARRAYTYPE_P_COPY(0); + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + transdata->count--; + transdata->sum -= newval; + + PG_RETURN_ARRAYTYPE_P(transarray); +} + +Datum +int8_avg(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Int8TransTypeData *transdata; + Datum countd, + sumd; + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + + /* SQL defines AVG of no values to be NULL */ + if (transdata->count == 0) + PG_RETURN_NULL(); + + countd = NumericGetDatum(int64_to_numeric(transdata->count)); + sumd = NumericGetDatum(int64_to_numeric(transdata->sum)); + + PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd)); +} + +/* + * SUM(int2) and SUM(int4) both return int8, so we can use this + * final function for both. + */ +Datum +int2int4_sum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Int8TransTypeData *transdata; + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + + /* SQL defines SUM of no values to be NULL */ + if (transdata->count == 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(Int64GetDatumFast(transdata->sum)); +} + + +/* ---------------------------------------------------------------------- + * + * Debug support + * + * ---------------------------------------------------------------------- + */ + +#ifdef NUMERIC_DEBUG + +/* + * dump_numeric() - Dump a value in the db storage format for debugging + */ +static void +dump_numeric(const char *str, Numeric num) +{ + NumericDigit *digits = NUMERIC_DIGITS(num); + int ndigits; + int i; + + ndigits = NUMERIC_NDIGITS(num); + + printf("%s: NUMERIC w=%d d=%d ", str, + NUMERIC_WEIGHT(num), NUMERIC_DSCALE(num)); + switch (NUMERIC_SIGN(num)) + { + case NUMERIC_POS: + printf("POS"); + break; + case NUMERIC_NEG: + printf("NEG"); + break; + case NUMERIC_NAN: + printf("NaN"); + break; + case NUMERIC_PINF: + printf("Infinity"); + break; + case NUMERIC_NINF: + printf("-Infinity"); + break; + default: + printf("SIGN=0x%x", NUMERIC_SIGN(num)); + break; + } + + for (i = 0; i < ndigits; i++) + printf(" %0*d", DEC_DIGITS, digits[i]); + printf("\n"); +} + + +/* + * dump_var() - Dump a value in the variable format for debugging + */ +static void +dump_var(const char *str, NumericVar *var) +{ + int i; + + printf("%s: VAR w=%d d=%d ", str, var->weight, var->dscale); + switch (var->sign) + { + case NUMERIC_POS: + printf("POS"); + break; + case NUMERIC_NEG: + printf("NEG"); + break; + case NUMERIC_NAN: + printf("NaN"); + break; + case NUMERIC_PINF: + printf("Infinity"); + break; + case NUMERIC_NINF: + printf("-Infinity"); + break; + default: + printf("SIGN=0x%x", var->sign); + break; + } + + for (i = 0; i < var->ndigits; i++) + printf(" %0*d", DEC_DIGITS, var->digits[i]); + + printf("\n"); +} +#endif /* NUMERIC_DEBUG */ + + +/* ---------------------------------------------------------------------- + * + * Local functions follow + * + * In general, these do not support "special" (NaN or infinity) inputs; + * callers should handle those possibilities first. + * (There are one or two exceptions, noted in their header comments.) + * + * ---------------------------------------------------------------------- + */ + + +/* + * alloc_var() - + * + * Allocate a digit buffer of ndigits digits (plus a spare digit for rounding) + */ +static void +alloc_var(NumericVar *var, int ndigits) +{ + digitbuf_free(var->buf); + var->buf = digitbuf_alloc(ndigits + 1); + var->buf[0] = 0; /* spare digit for rounding */ + var->digits = var->buf + 1; + var->ndigits = ndigits; +} + + +/* + * free_var() - + * + * Return the digit buffer of a variable to the free pool + */ +static void +free_var(NumericVar *var) +{ + digitbuf_free(var->buf); + var->buf = NULL; + var->digits = NULL; + var->sign = NUMERIC_NAN; +} + + +/* + * zero_var() - + * + * Set a variable to ZERO. + * Note: its dscale is not touched. + */ +static void +zero_var(NumericVar *var) +{ + digitbuf_free(var->buf); + var->buf = NULL; + var->digits = NULL; + var->ndigits = 0; + var->weight = 0; /* by convention; doesn't really matter */ + var->sign = NUMERIC_POS; /* anything but NAN... */ +} + + +/* + * set_var_from_str() + * + * Parse a string and put the number into a variable + * + * This function does not handle leading or trailing spaces. It returns + * the end+1 position parsed into *endptr, so that caller can check for + * trailing spaces/garbage if deemed necessary. + * + * cp is the place to actually start parsing; str is what to use in error + * reports. (Typically cp would be the same except advanced over spaces.) + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +static bool +set_var_from_str(const char *str, const char *cp, + NumericVar *dest, const char **endptr, + Node *escontext) +{ + bool have_dp = false; + int i; + unsigned char *decdigits; + int sign = NUMERIC_POS; + int dweight = -1; + int ddigits; + int dscale = 0; + int weight; + int ndigits; + int offset; + NumericDigit *digits; + + /* + * We first parse the string to extract decimal digits and determine the + * correct decimal weight. Then convert to NBASE representation. + */ + switch (*cp) + { + case '+': + sign = NUMERIC_POS; + cp++; + break; + + case '-': + sign = NUMERIC_NEG; + cp++; + break; + } + + if (*cp == '.') + { + have_dp = true; + cp++; + } + + if (!isdigit((unsigned char) *cp)) + goto invalid_syntax; + + decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2); + + /* leading padding for digit alignment later */ + memset(decdigits, 0, DEC_DIGITS); + i = DEC_DIGITS; + + while (*cp) + { + if (isdigit((unsigned char) *cp)) + { + decdigits[i++] = *cp++ - '0'; + if (!have_dp) + dweight++; + else + dscale++; + } + else if (*cp == '.') + { + if (have_dp) + goto invalid_syntax; + have_dp = true; + cp++; + /* decimal point must not be followed by underscore */ + if (*cp == '_') + goto invalid_syntax; + } + else if (*cp == '_') + { + /* underscore must be followed by more digits */ + cp++; + if (!isdigit((unsigned char) *cp)) + goto invalid_syntax; + } + else + break; + } + + ddigits = i - DEC_DIGITS; + /* trailing padding for digit alignment later */ + memset(decdigits + i, 0, DEC_DIGITS - 1); + + /* Handle exponent, if any */ + if (*cp == 'e' || *cp == 'E') + { + int64 exponent = 0; + bool neg = false; + + /* + * At this point, dweight and dscale can't be more than about + * INT_MAX/2 due to the MaxAllocSize limit on string length, so + * constraining the exponent similarly should be enough to prevent + * integer overflow in this function. If the value is too large to + * fit in storage format, make_result() will complain about it later; + * for consistency use the same ereport errcode/text as make_result(). + */ + + /* exponent sign */ + cp++; + if (*cp == '+') + cp++; + else if (*cp == '-') + { + neg = true; + cp++; + } + + /* exponent digits */ + if (!isdigit((unsigned char) *cp)) + goto invalid_syntax; + + while (*cp) + { + if (isdigit((unsigned char) *cp)) + { + exponent = exponent * 10 + (*cp++ - '0'); + if (exponent > PG_INT32_MAX / 2) + goto out_of_range; + } + else if (*cp == '_') + { + /* underscore must be followed by more digits */ + cp++; + if (!isdigit((unsigned char) *cp)) + goto invalid_syntax; + } + else + break; + } + + if (neg) + exponent = -exponent; + + dweight += (int) exponent; + dscale -= (int) exponent; + if (dscale < 0) + dscale = 0; + } + + /* + * Okay, convert pure-decimal representation to base NBASE. First we need + * to determine the converted weight and ndigits. offset is the number of + * decimal zeroes to insert before the first given digit to have a + * correctly aligned first NBASE digit. + */ + if (dweight >= 0) + weight = (dweight + 1 + DEC_DIGITS - 1) / DEC_DIGITS - 1; + else + weight = -((-dweight - 1) / DEC_DIGITS + 1); + offset = (weight + 1) * DEC_DIGITS - (dweight + 1); + ndigits = (ddigits + offset + DEC_DIGITS - 1) / DEC_DIGITS; + + alloc_var(dest, ndigits); + dest->sign = sign; + dest->weight = weight; + dest->dscale = dscale; + + i = DEC_DIGITS - offset; + digits = dest->digits; + + while (ndigits-- > 0) + { +#if DEC_DIGITS == 4 + *digits++ = ((decdigits[i] * 10 + decdigits[i + 1]) * 10 + + decdigits[i + 2]) * 10 + decdigits[i + 3]; +#elif DEC_DIGITS == 2 + *digits++ = decdigits[i] * 10 + decdigits[i + 1]; +#elif DEC_DIGITS == 1 + *digits++ = decdigits[i]; +#else +#error unsupported NBASE +#endif + i += DEC_DIGITS; + } + + pfree(decdigits); + + /* Strip any leading/trailing zeroes, and normalize weight if zero */ + strip_var(dest); + + /* Return end+1 position for caller */ + *endptr = cp; + + return true; + +out_of_range: + ereturn(escontext, false, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + +invalid_syntax: + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "numeric", str))); +} + + +/* + * Return the numeric value of a single hex digit. + */ +static inline int +xdigit_value(char dig) +{ + return dig >= '0' && dig <= '9' ? dig - '0' : + dig >= 'a' && dig <= 'f' ? dig - 'a' + 10 : + dig >= 'A' && dig <= 'F' ? dig - 'A' + 10 : -1; +} + +/* + * set_var_from_non_decimal_integer_str() + * + * Parse a string containing a non-decimal integer + * + * This function does not handle leading or trailing spaces. It returns + * the end+1 position parsed into *endptr, so that caller can check for + * trailing spaces/garbage if deemed necessary. + * + * cp is the place to actually start parsing; str is what to use in error + * reports. The number's sign and base prefix indicator (e.g., "0x") are + * assumed to have already been parsed, so cp should point to the number's + * first digit in the base specified. + * + * base is expected to be 2, 8 or 16. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +static bool +set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign, + int base, NumericVar *dest, + const char **endptr, Node *escontext) +{ + const char *firstdigit = cp; + int64 tmp; + int64 mul; + NumericVar tmp_var; + + init_var(&tmp_var); + + zero_var(dest); + + /* + * Process input digits in groups that fit in int64. Here "tmp" is the + * value of the digits in the group, and "mul" is base^n, where n is the + * number of digits in the group. Thus tmp < mul, and we must start a new + * group when mul * base threatens to overflow PG_INT64_MAX. + */ + tmp = 0; + mul = 1; + + if (base == 16) + { + while (*cp) + { + if (isxdigit((unsigned char) *cp)) + { + if (mul > PG_INT64_MAX / 16) + { + /* Add the contribution from this group of digits */ + int64_to_numericvar(mul, &tmp_var); + mul_var(dest, &tmp_var, dest, 0); + int64_to_numericvar(tmp, &tmp_var); + add_var(dest, &tmp_var, dest); + + /* Result will overflow if weight overflows int16 */ + if (dest->weight > SHRT_MAX) + goto out_of_range; + + /* Begin a new group */ + tmp = 0; + mul = 1; + } + + tmp = tmp * 16 + xdigit_value(*cp++); + mul = mul * 16; + } + else if (*cp == '_') + { + /* Underscore must be followed by more digits */ + cp++; + if (!isxdigit((unsigned char) *cp)) + goto invalid_syntax; + } + else + break; + } + } + else if (base == 8) + { + while (*cp) + { + if (*cp >= '0' && *cp <= '7') + { + if (mul > PG_INT64_MAX / 8) + { + /* Add the contribution from this group of digits */ + int64_to_numericvar(mul, &tmp_var); + mul_var(dest, &tmp_var, dest, 0); + int64_to_numericvar(tmp, &tmp_var); + add_var(dest, &tmp_var, dest); + + /* Result will overflow if weight overflows int16 */ + if (dest->weight > SHRT_MAX) + goto out_of_range; + + /* Begin a new group */ + tmp = 0; + mul = 1; + } + + tmp = tmp * 8 + (*cp++ - '0'); + mul = mul * 8; + } + else if (*cp == '_') + { + /* Underscore must be followed by more digits */ + cp++; + if (*cp < '0' || *cp > '7') + goto invalid_syntax; + } + else + break; + } + } + else if (base == 2) + { + while (*cp) + { + if (*cp >= '0' && *cp <= '1') + { + if (mul > PG_INT64_MAX / 2) + { + /* Add the contribution from this group of digits */ + int64_to_numericvar(mul, &tmp_var); + mul_var(dest, &tmp_var, dest, 0); + int64_to_numericvar(tmp, &tmp_var); + add_var(dest, &tmp_var, dest); + + /* Result will overflow if weight overflows int16 */ + if (dest->weight > SHRT_MAX) + goto out_of_range; + + /* Begin a new group */ + tmp = 0; + mul = 1; + } + + tmp = tmp * 2 + (*cp++ - '0'); + mul = mul * 2; + } + else if (*cp == '_') + { + /* Underscore must be followed by more digits */ + cp++; + if (*cp < '0' || *cp > '1') + goto invalid_syntax; + } + else + break; + } + } + else + /* Should never happen; treat as invalid input */ + goto invalid_syntax; + + /* Check that we got at least one digit */ + if (unlikely(cp == firstdigit)) + goto invalid_syntax; + + /* Add the contribution from the final group of digits */ + int64_to_numericvar(mul, &tmp_var); + mul_var(dest, &tmp_var, dest, 0); + int64_to_numericvar(tmp, &tmp_var); + add_var(dest, &tmp_var, dest); + + if (dest->weight > SHRT_MAX) + goto out_of_range; + + dest->sign = sign; + + free_var(&tmp_var); + + /* Return end+1 position for caller */ + *endptr = cp; + + return true; + +out_of_range: + ereturn(escontext, false, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + +invalid_syntax: + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "numeric", str))); +} + + +/* + * set_var_from_num() - + * + * Convert the packed db format into a variable + */ +static void +set_var_from_num(Numeric num, NumericVar *dest) +{ + int ndigits; + + ndigits = NUMERIC_NDIGITS(num); + + alloc_var(dest, ndigits); + + dest->weight = NUMERIC_WEIGHT(num); + dest->sign = NUMERIC_SIGN(num); + dest->dscale = NUMERIC_DSCALE(num); + + memcpy(dest->digits, NUMERIC_DIGITS(num), ndigits * sizeof(NumericDigit)); +} + + +/* + * init_var_from_num() - + * + * Initialize a variable from packed db format. The digits array is not + * copied, which saves some cycles when the resulting var is not modified. + * Also, there's no need to call free_var(), as long as you don't assign any + * other value to it (with set_var_* functions, or by using the var as the + * destination of a function like add_var()) + * + * CAUTION: Do not modify the digits buffer of a var initialized with this + * function, e.g by calling round_var() or trunc_var(), as the changes will + * propagate to the original Numeric! It's OK to use it as the destination + * argument of one of the calculational functions, though. + */ +static void +init_var_from_num(Numeric num, NumericVar *dest) +{ + dest->ndigits = NUMERIC_NDIGITS(num); + dest->weight = NUMERIC_WEIGHT(num); + dest->sign = NUMERIC_SIGN(num); + dest->dscale = NUMERIC_DSCALE(num); + dest->digits = NUMERIC_DIGITS(num); + dest->buf = NULL; /* digits array is not palloc'd */ +} + + +/* + * set_var_from_var() - + * + * Copy one variable into another + */ +static void +set_var_from_var(const NumericVar *value, NumericVar *dest) +{ + NumericDigit *newbuf; + + newbuf = digitbuf_alloc(value->ndigits + 1); + newbuf[0] = 0; /* spare digit for rounding */ + if (value->ndigits > 0) /* else value->digits might be null */ + memcpy(newbuf + 1, value->digits, + value->ndigits * sizeof(NumericDigit)); + + digitbuf_free(dest->buf); + + memmove(dest, value, sizeof(NumericVar)); + dest->buf = newbuf; + dest->digits = newbuf + 1; +} + + +/* + * get_str_from_var() - + * + * Convert a var to text representation (guts of numeric_out). + * The var is displayed to the number of digits indicated by its dscale. + * Returns a palloc'd string. + */ +static char * +get_str_from_var(const NumericVar *var) +{ + int dscale; + char *str; + char *cp; + char *endcp; + int i; + int d; + NumericDigit dig; + +#if DEC_DIGITS > 1 + NumericDigit d1; +#endif + + dscale = var->dscale; + + /* + * Allocate space for the result. + * + * i is set to the # of decimal digits before decimal point. dscale is the + * # of decimal digits we will print after decimal point. We may generate + * as many as DEC_DIGITS-1 excess digits at the end, and in addition we + * need room for sign, decimal point, null terminator. + */ + i = (var->weight + 1) * DEC_DIGITS; + if (i <= 0) + i = 1; + + str = palloc(i + dscale + DEC_DIGITS + 2); + cp = str; + + /* + * Output a dash for negative values + */ + if (var->sign == NUMERIC_NEG) + *cp++ = '-'; + + /* + * Output all digits before the decimal point + */ + if (var->weight < 0) + { + d = var->weight + 1; + *cp++ = '0'; + } + else + { + for (d = 0; d <= var->weight; d++) + { + dig = (d < var->ndigits) ? var->digits[d] : 0; + /* In the first digit, suppress extra leading decimal zeroes */ +#if DEC_DIGITS == 4 + { + bool putit = (d > 0); + + d1 = dig / 1000; + dig -= d1 * 1000; + putit |= (d1 > 0); + if (putit) + *cp++ = d1 + '0'; + d1 = dig / 100; + dig -= d1 * 100; + putit |= (d1 > 0); + if (putit) + *cp++ = d1 + '0'; + d1 = dig / 10; + dig -= d1 * 10; + putit |= (d1 > 0); + if (putit) + *cp++ = d1 + '0'; + *cp++ = dig + '0'; + } +#elif DEC_DIGITS == 2 + d1 = dig / 10; + dig -= d1 * 10; + if (d1 > 0 || d > 0) + *cp++ = d1 + '0'; + *cp++ = dig + '0'; +#elif DEC_DIGITS == 1 + *cp++ = dig + '0'; +#else +#error unsupported NBASE +#endif + } + } + + /* + * If requested, output a decimal point and all the digits that follow it. + * We initially put out a multiple of DEC_DIGITS digits, then truncate if + * needed. + */ + if (dscale > 0) + { + *cp++ = '.'; + endcp = cp + dscale; + for (i = 0; i < dscale; d++, i += DEC_DIGITS) + { + dig = (d >= 0 && d < var->ndigits) ? var->digits[d] : 0; +#if DEC_DIGITS == 4 + d1 = dig / 1000; + dig -= d1 * 1000; + *cp++ = d1 + '0'; + d1 = dig / 100; + dig -= d1 * 100; + *cp++ = d1 + '0'; + d1 = dig / 10; + dig -= d1 * 10; + *cp++ = d1 + '0'; + *cp++ = dig + '0'; +#elif DEC_DIGITS == 2 + d1 = dig / 10; + dig -= d1 * 10; + *cp++ = d1 + '0'; + *cp++ = dig + '0'; +#elif DEC_DIGITS == 1 + *cp++ = dig + '0'; +#else +#error unsupported NBASE +#endif + } + cp = endcp; + } + + /* + * terminate the string and return it + */ + *cp = '\0'; + return str; +} + +/* + * get_str_from_var_sci() - + * + * Convert a var to a normalised scientific notation text representation. + * This function does the heavy lifting for numeric_out_sci(). + * + * This notation has the general form a * 10^b, where a is known as the + * "significand" and b is known as the "exponent". + * + * Because we can't do superscript in ASCII (and because we want to copy + * printf's behaviour) we display the exponent using E notation, with a + * minimum of two exponent digits. + * + * For example, the value 1234 could be output as 1.2e+03. + * + * We assume that the exponent can fit into an int32. + * + * rscale is the number of decimal digits desired after the decimal point in + * the output, negative values will be treated as meaning zero. + * + * Returns a palloc'd string. + */ +static char * +get_str_from_var_sci(const NumericVar *var, int rscale) +{ + int32 exponent; + NumericVar tmp_var; + size_t len; + char *str; + char *sig_out; + + if (rscale < 0) + rscale = 0; + + /* + * Determine the exponent of this number in normalised form. + * + * This is the exponent required to represent the number with only one + * significant digit before the decimal place. + */ + if (var->ndigits > 0) + { + exponent = (var->weight + 1) * DEC_DIGITS; + + /* + * Compensate for leading decimal zeroes in the first numeric digit by + * decrementing the exponent. + */ + exponent -= DEC_DIGITS - (int) log10(var->digits[0]); + } + else + { + /* + * If var has no digits, then it must be zero. + * + * Zero doesn't technically have a meaningful exponent in normalised + * notation, but we just display the exponent as zero for consistency + * of output. + */ + exponent = 0; + } + + /* + * Divide var by 10^exponent to get the significand, rounding to rscale + * decimal digits in the process. + */ + init_var(&tmp_var); + + power_ten_int(exponent, &tmp_var); + div_var(var, &tmp_var, &tmp_var, rscale, true); + sig_out = get_str_from_var(&tmp_var); + + free_var(&tmp_var); + + /* + * Allocate space for the result. + * + * In addition to the significand, we need room for the exponent + * decoration ("e"), the sign of the exponent, up to 10 digits for the + * exponent itself, and of course the null terminator. + */ + len = strlen(sig_out) + 13; + str = palloc(len); + snprintf(str, len, "%se%+03d", sig_out, exponent); + + pfree(sig_out); + + return str; +} + + +/* + * numericvar_serialize - serialize NumericVar to binary format + * + * At variable level, no checks are performed on the weight or dscale, allowing + * us to pass around intermediate values with higher precision than supported + * by the numeric type. Note: this is incompatible with numeric_send/recv(), + * which use 16-bit integers for these fields. + */ +static void +numericvar_serialize(StringInfo buf, const NumericVar *var) +{ + int i; + + pq_sendint32(buf, var->ndigits); + pq_sendint32(buf, var->weight); + pq_sendint32(buf, var->sign); + pq_sendint32(buf, var->dscale); + for (i = 0; i < var->ndigits; i++) + pq_sendint16(buf, var->digits[i]); +} + +/* + * numericvar_deserialize - deserialize binary format to NumericVar + */ +static void +numericvar_deserialize(StringInfo buf, NumericVar *var) +{ + int len, + i; + + len = pq_getmsgint(buf, sizeof(int32)); + + alloc_var(var, len); /* sets var->ndigits */ + + var->weight = pq_getmsgint(buf, sizeof(int32)); + var->sign = pq_getmsgint(buf, sizeof(int32)); + var->dscale = pq_getmsgint(buf, sizeof(int32)); + for (i = 0; i < len; i++) + var->digits[i] = pq_getmsgint(buf, sizeof(int16)); +} + + +/* + * duplicate_numeric() - copy a packed-format Numeric + * + * This will handle NaN and Infinity cases. + */ +static Numeric +duplicate_numeric(Numeric num) +{ + Numeric res; + + res = (Numeric) palloc(VARSIZE(num)); + memcpy(res, num, VARSIZE(num)); + return res; +} + +/* + * make_result_opt_error() - + * + * Create the packed db numeric format in palloc()'d memory from + * a variable. This will handle NaN and Infinity cases. + * + * If "have_error" isn't NULL, on overflow *have_error is set to true and + * NULL is returned. This is helpful when caller needs to handle errors. + */ +static Numeric +make_result_opt_error(const NumericVar *var, bool *have_error) +{ + Numeric result; + NumericDigit *digits = var->digits; + int weight = var->weight; + int sign = var->sign; + int n; + Size len; + + if (have_error) + *have_error = false; + + if ((sign & NUMERIC_SIGN_MASK) == NUMERIC_SPECIAL) + { + /* + * Verify valid special value. This could be just an Assert, perhaps, + * but it seems worthwhile to expend a few cycles to ensure that we + * never write any nonzero reserved bits to disk. + */ + if (!(sign == NUMERIC_NAN || + sign == NUMERIC_PINF || + sign == NUMERIC_NINF)) + elog(ERROR, "invalid numeric sign value 0x%x", sign); + + result = (Numeric) palloc(NUMERIC_HDRSZ_SHORT); + + SET_VARSIZE(result, NUMERIC_HDRSZ_SHORT); + result->choice.n_header = sign; + /* the header word is all we need */ + + dump_numeric("make_result()", result); + return result; + } + + n = var->ndigits; + + /* truncate leading zeroes */ + while (n > 0 && *digits == 0) + { + digits++; + weight--; + n--; + } + /* truncate trailing zeroes */ + while (n > 0 && digits[n - 1] == 0) + n--; + + /* If zero result, force to weight=0 and positive sign */ + if (n == 0) + { + weight = 0; + sign = NUMERIC_POS; + } + + /* Build the result */ + if (NUMERIC_CAN_BE_SHORT(var->dscale, weight)) + { + len = NUMERIC_HDRSZ_SHORT + n * sizeof(NumericDigit); + result = (Numeric) palloc(len); + SET_VARSIZE(result, len); + result->choice.n_short.n_header = + (sign == NUMERIC_NEG ? (NUMERIC_SHORT | NUMERIC_SHORT_SIGN_MASK) + : NUMERIC_SHORT) + | (var->dscale << NUMERIC_SHORT_DSCALE_SHIFT) + | (weight < 0 ? NUMERIC_SHORT_WEIGHT_SIGN_MASK : 0) + | (weight & NUMERIC_SHORT_WEIGHT_MASK); + } + else + { + len = NUMERIC_HDRSZ + n * sizeof(NumericDigit); + result = (Numeric) palloc(len); + SET_VARSIZE(result, len); + result->choice.n_long.n_sign_dscale = + sign | (var->dscale & NUMERIC_DSCALE_MASK); + result->choice.n_long.n_weight = weight; + } + + Assert(NUMERIC_NDIGITS(result) == n); + if (n > 0) + memcpy(NUMERIC_DIGITS(result), digits, n * sizeof(NumericDigit)); + + /* Check for overflow of int16 fields */ + if (NUMERIC_WEIGHT(result) != weight || + NUMERIC_DSCALE(result) != var->dscale) + { + if (have_error) + { + *have_error = true; + return NULL; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + } + } + + dump_numeric("make_result()", result); + return result; +} + + +/* + * make_result() - + * + * An interface to make_result_opt_error() without "have_error" argument. + */ +static Numeric +make_result(const NumericVar *var) +{ + return make_result_opt_error(var, NULL); +} + + +/* + * apply_typmod() - + * + * Do bounds checking and rounding according to the specified typmod. + * Note that this is only applied to normal finite values. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +static bool +apply_typmod(NumericVar *var, int32 typmod, Node *escontext) +{ + int precision; + int scale; + int maxdigits; + int ddigits; + int i; + + /* Do nothing if we have an invalid typmod */ + if (!is_valid_numeric_typmod(typmod)) + return true; + + precision = numeric_typmod_precision(typmod); + scale = numeric_typmod_scale(typmod); + maxdigits = precision - scale; + + /* Round to target scale (and set var->dscale) */ + round_var(var, scale); + + /* but don't allow var->dscale to be negative */ + if (var->dscale < 0) + var->dscale = 0; + + /* + * Check for overflow - note we can't do this before rounding, because + * rounding could raise the weight. Also note that the var's weight could + * be inflated by leading zeroes, which will be stripped before storage + * but perhaps might not have been yet. In any case, we must recognize a + * true zero, whose weight doesn't mean anything. + */ + ddigits = (var->weight + 1) * DEC_DIGITS; + if (ddigits > maxdigits) + { + /* Determine true weight; and check for all-zero result */ + for (i = 0; i < var->ndigits; i++) + { + NumericDigit dig = var->digits[i]; + + if (dig) + { + /* Adjust for any high-order decimal zero digits */ +#if DEC_DIGITS == 4 + if (dig < 10) + ddigits -= 3; + else if (dig < 100) + ddigits -= 2; + else if (dig < 1000) + ddigits -= 1; +#elif DEC_DIGITS == 2 + if (dig < 10) + ddigits -= 1; +#elif DEC_DIGITS == 1 + /* no adjustment */ +#else +#error unsupported NBASE +#endif + if (ddigits > maxdigits) + ereturn(escontext, false, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("numeric field overflow"), + errdetail("A field with precision %d, scale %d must round to an absolute value less than %s%d.", + precision, scale, + /* Display 10^0 as 1 */ + maxdigits ? "10^" : "", + maxdigits ? maxdigits : 1 + ))); + break; + } + ddigits -= DEC_DIGITS; + } + } + + return true; +} + +/* + * apply_typmod_special() - + * + * Do bounds checking according to the specified typmod, for an Inf or NaN. + * For convenience of most callers, the value is presented in packed form. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +static bool +apply_typmod_special(Numeric num, int32 typmod, Node *escontext) +{ + int precision; + int scale; + + Assert(NUMERIC_IS_SPECIAL(num)); /* caller error if not */ + + /* + * NaN is allowed regardless of the typmod; that's rather dubious perhaps, + * but it's a longstanding behavior. Inf is rejected if we have any + * typmod restriction, since an infinity shouldn't be claimed to fit in + * any finite number of digits. + */ + if (NUMERIC_IS_NAN(num)) + return true; + + /* Do nothing if we have a default typmod (-1) */ + if (!is_valid_numeric_typmod(typmod)) + return true; + + precision = numeric_typmod_precision(typmod); + scale = numeric_typmod_scale(typmod); + + ereturn(escontext, false, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("numeric field overflow"), + errdetail("A field with precision %d, scale %d cannot hold an infinite value.", + precision, scale))); +} + + +/* + * Convert numeric to int8, rounding if needed. + * + * If overflow, return false (no error is raised). Return true if okay. + */ +static bool +numericvar_to_int64(const NumericVar *var, int64 *result) +{ + NumericDigit *digits; + int ndigits; + int weight; + int i; + int64 val; + bool neg; + NumericVar rounded; + + /* Round to nearest integer */ + init_var(&rounded); + set_var_from_var(var, &rounded); + round_var(&rounded, 0); + + /* Check for zero input */ + strip_var(&rounded); + ndigits = rounded.ndigits; + if (ndigits == 0) + { + *result = 0; + free_var(&rounded); + return true; + } + + /* + * For input like 10000000000, we must treat stripped digits as real. So + * the loop assumes there are weight+1 digits before the decimal point. + */ + weight = rounded.weight; + Assert(weight >= 0 && ndigits <= weight + 1); + + /* + * Construct the result. To avoid issues with converting a value + * corresponding to INT64_MIN (which can't be represented as a positive 64 + * bit two's complement integer), accumulate value as a negative number. + */ + digits = rounded.digits; + neg = (rounded.sign == NUMERIC_NEG); + val = -digits[0]; + for (i = 1; i <= weight; i++) + { + if (unlikely(pg_mul_s64_overflow(val, NBASE, &val))) + { + free_var(&rounded); + return false; + } + + if (i < ndigits) + { + if (unlikely(pg_sub_s64_overflow(val, digits[i], &val))) + { + free_var(&rounded); + return false; + } + } + } + + free_var(&rounded); + + if (!neg) + { + if (unlikely(val == PG_INT64_MIN)) + return false; + val = -val; + } + *result = val; + + return true; +} + +/* + * Convert int8 value to numeric. + */ +static void +int64_to_numericvar(int64 val, NumericVar *var) +{ + uint64 uval, + newuval; + NumericDigit *ptr; + int ndigits; + + /* int64 can require at most 19 decimal digits; add one for safety */ + alloc_var(var, 20 / DEC_DIGITS); + if (val < 0) + { + var->sign = NUMERIC_NEG; + uval = -val; + } + else + { + var->sign = NUMERIC_POS; + uval = val; + } + var->dscale = 0; + if (val == 0) + { + var->ndigits = 0; + var->weight = 0; + return; + } + ptr = var->digits + var->ndigits; + ndigits = 0; + do + { + ptr--; + ndigits++; + newuval = uval / NBASE; + *ptr = uval - newuval * NBASE; + uval = newuval; + } while (uval); + var->digits = ptr; + var->ndigits = ndigits; + var->weight = ndigits - 1; +} + +/* + * Convert numeric to uint64, rounding if needed. + * + * If overflow, return false (no error is raised). Return true if okay. + */ +static bool +numericvar_to_uint64(const NumericVar *var, uint64 *result) +{ + NumericDigit *digits; + int ndigits; + int weight; + int i; + uint64 val; + NumericVar rounded; + + /* Round to nearest integer */ + init_var(&rounded); + set_var_from_var(var, &rounded); + round_var(&rounded, 0); + + /* Check for zero input */ + strip_var(&rounded); + ndigits = rounded.ndigits; + if (ndigits == 0) + { + *result = 0; + free_var(&rounded); + return true; + } + + /* Check for negative input */ + if (rounded.sign == NUMERIC_NEG) + { + free_var(&rounded); + return false; + } + + /* + * For input like 10000000000, we must treat stripped digits as real. So + * the loop assumes there are weight+1 digits before the decimal point. + */ + weight = rounded.weight; + Assert(weight >= 0 && ndigits <= weight + 1); + + /* Construct the result */ + digits = rounded.digits; + val = digits[0]; + for (i = 1; i <= weight; i++) + { + if (unlikely(pg_mul_u64_overflow(val, NBASE, &val))) + { + free_var(&rounded); + return false; + } + + if (i < ndigits) + { + if (unlikely(pg_add_u64_overflow(val, digits[i], &val))) + { + free_var(&rounded); + return false; + } + } + } + + free_var(&rounded); + + *result = val; + + return true; +} + +#ifdef HAVE_INT128 +/* + * Convert numeric to int128, rounding if needed. + * + * If overflow, return false (no error is raised). Return true if okay. + */ +static bool +numericvar_to_int128(const NumericVar *var, int128 *result) +{ + NumericDigit *digits; + int ndigits; + int weight; + int i; + int128 val, + oldval; + bool neg; + NumericVar rounded; + + /* Round to nearest integer */ + init_var(&rounded); + set_var_from_var(var, &rounded); + round_var(&rounded, 0); + + /* Check for zero input */ + strip_var(&rounded); + ndigits = rounded.ndigits; + if (ndigits == 0) + { + *result = 0; + free_var(&rounded); + return true; + } + + /* + * For input like 10000000000, we must treat stripped digits as real. So + * the loop assumes there are weight+1 digits before the decimal point. + */ + weight = rounded.weight; + Assert(weight >= 0 && ndigits <= weight + 1); + + /* Construct the result */ + digits = rounded.digits; + neg = (rounded.sign == NUMERIC_NEG); + val = digits[0]; + for (i = 1; i <= weight; i++) + { + oldval = val; + val *= NBASE; + if (i < ndigits) + val += digits[i]; + + /* + * The overflow check is a bit tricky because we want to accept + * INT128_MIN, which will overflow the positive accumulator. We can + * detect this case easily though because INT128_MIN is the only + * nonzero value for which -val == val (on a two's complement machine, + * anyway). + */ + if ((val / NBASE) != oldval) /* possible overflow? */ + { + if (!neg || (-val) != val || val == 0 || oldval < 0) + { + free_var(&rounded); + return false; + } + } + } + + free_var(&rounded); + + *result = neg ? -val : val; + return true; +} + +/* + * Convert 128 bit integer to numeric. + */ +static void +int128_to_numericvar(int128 val, NumericVar *var) +{ + uint128 uval, + newuval; + NumericDigit *ptr; + int ndigits; + + /* int128 can require at most 39 decimal digits; add one for safety */ + alloc_var(var, 40 / DEC_DIGITS); + if (val < 0) + { + var->sign = NUMERIC_NEG; + uval = -val; + } + else + { + var->sign = NUMERIC_POS; + uval = val; + } + var->dscale = 0; + if (val == 0) + { + var->ndigits = 0; + var->weight = 0; + return; + } + ptr = var->digits + var->ndigits; + ndigits = 0; + do + { + ptr--; + ndigits++; + newuval = uval / NBASE; + *ptr = uval - newuval * NBASE; + uval = newuval; + } while (uval); + var->digits = ptr; + var->ndigits = ndigits; + var->weight = ndigits - 1; +} +#endif + +/* + * Convert a NumericVar to float8; if out of range, return +/- HUGE_VAL + */ +static double +numericvar_to_double_no_overflow(const NumericVar *var) +{ + char *tmp; + double val; + char *endptr; + + tmp = get_str_from_var(var); + + /* unlike float8in, we ignore ERANGE from strtod */ + val = strtod(tmp, &endptr); + if (*endptr != '\0') + { + /* shouldn't happen ... */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "double precision", tmp))); + } + + pfree(tmp); + + return val; +} + + +/* + * cmp_var() - + * + * Compare two values on variable level. We assume zeroes have been + * truncated to no digits. + */ +static int +cmp_var(const NumericVar *var1, const NumericVar *var2) +{ + return cmp_var_common(var1->digits, var1->ndigits, + var1->weight, var1->sign, + var2->digits, var2->ndigits, + var2->weight, var2->sign); +} + +/* + * cmp_var_common() - + * + * Main routine of cmp_var(). This function can be used by both + * NumericVar and Numeric. + */ +static int +cmp_var_common(const NumericDigit *var1digits, int var1ndigits, + int var1weight, int var1sign, + const NumericDigit *var2digits, int var2ndigits, + int var2weight, int var2sign) +{ + if (var1ndigits == 0) + { + if (var2ndigits == 0) + return 0; + if (var2sign == NUMERIC_NEG) + return 1; + return -1; + } + if (var2ndigits == 0) + { + if (var1sign == NUMERIC_POS) + return 1; + return -1; + } + + if (var1sign == NUMERIC_POS) + { + if (var2sign == NUMERIC_NEG) + return 1; + return cmp_abs_common(var1digits, var1ndigits, var1weight, + var2digits, var2ndigits, var2weight); + } + + if (var2sign == NUMERIC_POS) + return -1; + + return cmp_abs_common(var2digits, var2ndigits, var2weight, + var1digits, var1ndigits, var1weight); +} + + +/* + * add_var() - + * + * Full version of add functionality on variable level (handling signs). + * result might point to one of the operands too without danger. + */ +static void +add_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result) +{ + /* + * Decide on the signs of the two variables what to do + */ + if (var1->sign == NUMERIC_POS) + { + if (var2->sign == NUMERIC_POS) + { + /* + * Both are positive result = +(ABS(var1) + ABS(var2)) + */ + add_abs(var1, var2, result); + result->sign = NUMERIC_POS; + } + else + { + /* + * var1 is positive, var2 is negative Must compare absolute values + */ + switch (cmp_abs(var1, var2)) + { + case 0: + /* ---------- + * ABS(var1) == ABS(var2) + * result = ZERO + * ---------- + */ + zero_var(result); + result->dscale = Max(var1->dscale, var2->dscale); + break; + + case 1: + /* ---------- + * ABS(var1) > ABS(var2) + * result = +(ABS(var1) - ABS(var2)) + * ---------- + */ + sub_abs(var1, var2, result); + result->sign = NUMERIC_POS; + break; + + case -1: + /* ---------- + * ABS(var1) < ABS(var2) + * result = -(ABS(var2) - ABS(var1)) + * ---------- + */ + sub_abs(var2, var1, result); + result->sign = NUMERIC_NEG; + break; + } + } + } + else + { + if (var2->sign == NUMERIC_POS) + { + /* ---------- + * var1 is negative, var2 is positive + * Must compare absolute values + * ---------- + */ + switch (cmp_abs(var1, var2)) + { + case 0: + /* ---------- + * ABS(var1) == ABS(var2) + * result = ZERO + * ---------- + */ + zero_var(result); + result->dscale = Max(var1->dscale, var2->dscale); + break; + + case 1: + /* ---------- + * ABS(var1) > ABS(var2) + * result = -(ABS(var1) - ABS(var2)) + * ---------- + */ + sub_abs(var1, var2, result); + result->sign = NUMERIC_NEG; + break; + + case -1: + /* ---------- + * ABS(var1) < ABS(var2) + * result = +(ABS(var2) - ABS(var1)) + * ---------- + */ + sub_abs(var2, var1, result); + result->sign = NUMERIC_POS; + break; + } + } + else + { + /* ---------- + * Both are negative + * result = -(ABS(var1) + ABS(var2)) + * ---------- + */ + add_abs(var1, var2, result); + result->sign = NUMERIC_NEG; + } + } +} + + +/* + * sub_var() - + * + * Full version of sub functionality on variable level (handling signs). + * result might point to one of the operands too without danger. + */ +static void +sub_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result) +{ + /* + * Decide on the signs of the two variables what to do + */ + if (var1->sign == NUMERIC_POS) + { + if (var2->sign == NUMERIC_NEG) + { + /* ---------- + * var1 is positive, var2 is negative + * result = +(ABS(var1) + ABS(var2)) + * ---------- + */ + add_abs(var1, var2, result); + result->sign = NUMERIC_POS; + } + else + { + /* ---------- + * Both are positive + * Must compare absolute values + * ---------- + */ + switch (cmp_abs(var1, var2)) + { + case 0: + /* ---------- + * ABS(var1) == ABS(var2) + * result = ZERO + * ---------- + */ + zero_var(result); + result->dscale = Max(var1->dscale, var2->dscale); + break; + + case 1: + /* ---------- + * ABS(var1) > ABS(var2) + * result = +(ABS(var1) - ABS(var2)) + * ---------- + */ + sub_abs(var1, var2, result); + result->sign = NUMERIC_POS; + break; + + case -1: + /* ---------- + * ABS(var1) < ABS(var2) + * result = -(ABS(var2) - ABS(var1)) + * ---------- + */ + sub_abs(var2, var1, result); + result->sign = NUMERIC_NEG; + break; + } + } + } + else + { + if (var2->sign == NUMERIC_NEG) + { + /* ---------- + * Both are negative + * Must compare absolute values + * ---------- + */ + switch (cmp_abs(var1, var2)) + { + case 0: + /* ---------- + * ABS(var1) == ABS(var2) + * result = ZERO + * ---------- + */ + zero_var(result); + result->dscale = Max(var1->dscale, var2->dscale); + break; + + case 1: + /* ---------- + * ABS(var1) > ABS(var2) + * result = -(ABS(var1) - ABS(var2)) + * ---------- + */ + sub_abs(var1, var2, result); + result->sign = NUMERIC_NEG; + break; + + case -1: + /* ---------- + * ABS(var1) < ABS(var2) + * result = +(ABS(var2) - ABS(var1)) + * ---------- + */ + sub_abs(var2, var1, result); + result->sign = NUMERIC_POS; + break; + } + } + else + { + /* ---------- + * var1 is negative, var2 is positive + * result = -(ABS(var1) + ABS(var2)) + * ---------- + */ + add_abs(var1, var2, result); + result->sign = NUMERIC_NEG; + } + } +} + + +/* + * mul_var() - + * + * Multiplication on variable level. Product of var1 * var2 is stored + * in result. Result is rounded to no more than rscale fractional digits. + */ +static void +mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, + int rscale) +{ + int res_ndigits; + int res_sign; + int res_weight; + int maxdigits; + int *dig; + int carry; + int maxdig; + int newdig; + int var1ndigits; + int var2ndigits; + NumericDigit *var1digits; + NumericDigit *var2digits; + NumericDigit *res_digits; + int i, + i1, + i2; + + /* + * Arrange for var1 to be the shorter of the two numbers. This improves + * performance because the inner multiplication loop is much simpler than + * the outer loop, so it's better to have a smaller number of iterations + * of the outer loop. This also reduces the number of times that the + * accumulator array needs to be normalized. + */ + if (var1->ndigits > var2->ndigits) + { + const NumericVar *tmp = var1; + + var1 = var2; + var2 = tmp; + } + + /* copy these values into local vars for speed in inner loop */ + var1ndigits = var1->ndigits; + var2ndigits = var2->ndigits; + var1digits = var1->digits; + var2digits = var2->digits; + + if (var1ndigits == 0 || var2ndigits == 0) + { + /* one or both inputs is zero; so is result */ + zero_var(result); + result->dscale = rscale; + return; + } + + /* Determine result sign and (maximum possible) weight */ + if (var1->sign == var2->sign) + res_sign = NUMERIC_POS; + else + res_sign = NUMERIC_NEG; + res_weight = var1->weight + var2->weight + 2; + + /* + * Determine the number of result digits to compute. If the exact result + * would have more than rscale fractional digits, truncate the computation + * with MUL_GUARD_DIGITS guard digits, i.e., ignore input digits that + * would only contribute to the right of that. (This will give the exact + * rounded-to-rscale answer unless carries out of the ignored positions + * would have propagated through more than MUL_GUARD_DIGITS digits.) + * + * Note: an exact computation could not produce more than var1ndigits + + * var2ndigits digits, but we allocate one extra output digit in case + * rscale-driven rounding produces a carry out of the highest exact digit. + */ + res_ndigits = var1ndigits + var2ndigits + 1; + maxdigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS + + MUL_GUARD_DIGITS; + res_ndigits = Min(res_ndigits, maxdigits); + + if (res_ndigits < 3) + { + /* All input digits will be ignored; so result is zero */ + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * We do the arithmetic in an array "dig[]" of signed int's. Since + * INT_MAX is noticeably larger than NBASE*NBASE, this gives us headroom + * to avoid normalizing carries immediately. + * + * maxdig tracks the maximum possible value of any dig[] entry; when this + * threatens to exceed INT_MAX, we take the time to propagate carries. + * Furthermore, we need to ensure that overflow doesn't occur during the + * carry propagation passes either. The carry values could be as much as + * INT_MAX/NBASE, so really we must normalize when digits threaten to + * exceed INT_MAX - INT_MAX/NBASE. + * + * To avoid overflow in maxdig itself, it actually represents the max + * possible value divided by NBASE-1, ie, at the top of the loop it is + * known that no dig[] entry exceeds maxdig * (NBASE-1). + */ + dig = (int *) palloc0(res_ndigits * sizeof(int)); + maxdig = 0; + + /* + * The least significant digits of var1 should be ignored if they don't + * contribute directly to the first res_ndigits digits of the result that + * we are computing. + * + * Digit i1 of var1 and digit i2 of var2 are multiplied and added to digit + * i1+i2+2 of the accumulator array, so we need only consider digits of + * var1 for which i1 <= res_ndigits - 3. + */ + for (i1 = Min(var1ndigits - 1, res_ndigits - 3); i1 >= 0; i1--) + { + NumericDigit var1digit = var1digits[i1]; + + if (var1digit == 0) + continue; + + /* Time to normalize? */ + maxdig += var1digit; + if (maxdig > (INT_MAX - INT_MAX / NBASE) / (NBASE - 1)) + { + /* Yes, do it */ + carry = 0; + for (i = res_ndigits - 1; i >= 0; i--) + { + newdig = dig[i] + carry; + if (newdig >= NBASE) + { + carry = newdig / NBASE; + newdig -= carry * NBASE; + } + else + carry = 0; + dig[i] = newdig; + } + Assert(carry == 0); + /* Reset maxdig to indicate new worst-case */ + maxdig = 1 + var1digit; + } + + /* + * Add the appropriate multiple of var2 into the accumulator. + * + * As above, digits of var2 can be ignored if they don't contribute, + * so we only include digits for which i1+i2+2 < res_ndigits. + * + * This inner loop is the performance bottleneck for multiplication, + * so we want to keep it simple enough so that it can be + * auto-vectorized. Accordingly, process the digits left-to-right + * even though schoolbook multiplication would suggest right-to-left. + * Since we aren't propagating carries in this loop, the order does + * not matter. + */ + { + int i2limit = Min(var2ndigits, res_ndigits - i1 - 2); + int *dig_i1_2 = &dig[i1 + 2]; + + for (i2 = 0; i2 < i2limit; i2++) + dig_i1_2[i2] += var1digit * var2digits[i2]; + } + } + + /* + * Now we do a final carry propagation pass to normalize the result, which + * we combine with storing the result digits into the output. Note that + * this is still done at full precision w/guard digits. + */ + alloc_var(result, res_ndigits); + res_digits = result->digits; + carry = 0; + for (i = res_ndigits - 1; i >= 0; i--) + { + newdig = dig[i] + carry; + if (newdig >= NBASE) + { + carry = newdig / NBASE; + newdig -= carry * NBASE; + } + else + carry = 0; + res_digits[i] = newdig; + } + Assert(carry == 0); + + pfree(dig); + + /* + * Finally, round the result to the requested precision. + */ + result->weight = res_weight; + result->sign = res_sign; + + /* Round to target rscale (and set result->dscale) */ + round_var(result, rscale); + + /* Strip leading and trailing zeroes */ + strip_var(result); +} + + +/* + * div_var() - + * + * Division on variable level. Quotient of var1 / var2 is stored in result. + * The quotient is figured to exactly rscale fractional digits. + * If round is true, it is rounded at the rscale'th digit; if false, it + * is truncated (towards zero) at that digit. + */ +static void +div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, + int rscale, bool round) +{ + int div_ndigits; + int res_ndigits; + int res_sign; + int res_weight; + int carry; + int borrow; + int divisor1; + int divisor2; + NumericDigit *dividend; + NumericDigit *divisor; + NumericDigit *res_digits; + int i; + int j; + + /* copy these values into local vars for speed in inner loop */ + int var1ndigits = var1->ndigits; + int var2ndigits = var2->ndigits; + + /* + * First of all division by zero check; we must not be handed an + * unnormalized divisor. + */ + if (var2ndigits == 0 || var2->digits[0] == 0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + /* + * If the divisor has just one or two digits, delegate to div_var_int(), + * which uses fast short division. + * + * Similarly, on platforms with 128-bit integer support, delegate to + * div_var_int64() for divisors with three or four digits. + */ + if (var2ndigits <= 2) + { + int idivisor; + int idivisor_weight; + + idivisor = var2->digits[0]; + idivisor_weight = var2->weight; + if (var2ndigits == 2) + { + idivisor = idivisor * NBASE + var2->digits[1]; + idivisor_weight--; + } + if (var2->sign == NUMERIC_NEG) + idivisor = -idivisor; + + div_var_int(var1, idivisor, idivisor_weight, result, rscale, round); + return; + } +#ifdef HAVE_INT128 + if (var2ndigits <= 4) + { + int64 idivisor; + int idivisor_weight; + + idivisor = var2->digits[0]; + idivisor_weight = var2->weight; + for (i = 1; i < var2ndigits; i++) + { + idivisor = idivisor * NBASE + var2->digits[i]; + idivisor_weight--; + } + if (var2->sign == NUMERIC_NEG) + idivisor = -idivisor; + + div_var_int64(var1, idivisor, idivisor_weight, result, rscale, round); + return; + } +#endif + + /* + * Otherwise, perform full long division. + */ + + /* Result zero check */ + if (var1ndigits == 0) + { + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * Determine the result sign, weight and number of digits to calculate. + * The weight figured here is correct if the emitted quotient has no + * leading zero digits; otherwise strip_var() will fix things up. + */ + if (var1->sign == var2->sign) + res_sign = NUMERIC_POS; + else + res_sign = NUMERIC_NEG; + res_weight = var1->weight - var2->weight; + /* The number of accurate result digits we need to produce: */ + res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS; + /* ... but always at least 1 */ + res_ndigits = Max(res_ndigits, 1); + /* If rounding needed, figure one more digit to ensure correct result */ + if (round) + res_ndigits++; + + /* + * The working dividend normally requires res_ndigits + var2ndigits + * digits, but make it at least var1ndigits so we can load all of var1 + * into it. (There will be an additional digit dividend[0] in the + * dividend space, but for consistency with Knuth's notation we don't + * count that in div_ndigits.) + */ + div_ndigits = res_ndigits + var2ndigits; + div_ndigits = Max(div_ndigits, var1ndigits); + + /* + * We need a workspace with room for the working dividend (div_ndigits+1 + * digits) plus room for the possibly-normalized divisor (var2ndigits + * digits). It is convenient also to have a zero at divisor[0] with the + * actual divisor data in divisor[1 .. var2ndigits]. Transferring the + * digits into the workspace also allows us to realloc the result (which + * might be the same as either input var) before we begin the main loop. + * Note that we use palloc0 to ensure that divisor[0], dividend[0], and + * any additional dividend positions beyond var1ndigits, start out 0. + */ + dividend = (NumericDigit *) + palloc0((div_ndigits + var2ndigits + 2) * sizeof(NumericDigit)); + divisor = dividend + (div_ndigits + 1); + memcpy(dividend + 1, var1->digits, var1ndigits * sizeof(NumericDigit)); + memcpy(divisor + 1, var2->digits, var2ndigits * sizeof(NumericDigit)); + + /* + * Now we can realloc the result to hold the generated quotient digits. + */ + alloc_var(result, res_ndigits); + res_digits = result->digits; + + /* + * The full multiple-place algorithm is taken from Knuth volume 2, + * Algorithm 4.3.1D. + * + * We need the first divisor digit to be >= NBASE/2. If it isn't, make it + * so by scaling up both the divisor and dividend by the factor "d". (The + * reason for allocating dividend[0] above is to leave room for possible + * carry here.) + */ + if (divisor[1] < HALF_NBASE) + { + int d = NBASE / (divisor[1] + 1); + + carry = 0; + for (i = var2ndigits; i > 0; i--) + { + carry += divisor[i] * d; + divisor[i] = carry % NBASE; + carry = carry / NBASE; + } + Assert(carry == 0); + carry = 0; + /* at this point only var1ndigits of dividend can be nonzero */ + for (i = var1ndigits; i >= 0; i--) + { + carry += dividend[i] * d; + dividend[i] = carry % NBASE; + carry = carry / NBASE; + } + Assert(carry == 0); + Assert(divisor[1] >= HALF_NBASE); + } + /* First 2 divisor digits are used repeatedly in main loop */ + divisor1 = divisor[1]; + divisor2 = divisor[2]; + + /* + * Begin the main loop. Each iteration of this loop produces the j'th + * quotient digit by dividing dividend[j .. j + var2ndigits] by the + * divisor; this is essentially the same as the common manual procedure + * for long division. + */ + for (j = 0; j < res_ndigits; j++) + { + /* Estimate quotient digit from the first two dividend digits */ + int next2digits = dividend[j] * NBASE + dividend[j + 1]; + int qhat; + + /* + * If next2digits are 0, then quotient digit must be 0 and there's no + * need to adjust the working dividend. It's worth testing here to + * fall out ASAP when processing trailing zeroes in a dividend. + */ + if (next2digits == 0) + { + res_digits[j] = 0; + continue; + } + + if (dividend[j] == divisor1) + qhat = NBASE - 1; + else + qhat = next2digits / divisor1; + + /* + * Adjust quotient digit if it's too large. Knuth proves that after + * this step, the quotient digit will be either correct or just one + * too large. (Note: it's OK to use dividend[j+2] here because we + * know the divisor length is at least 2.) + */ + while (divisor2 * qhat > + (next2digits - qhat * divisor1) * NBASE + dividend[j + 2]) + qhat--; + + /* As above, need do nothing more when quotient digit is 0 */ + if (qhat > 0) + { + NumericDigit *dividend_j = ÷nd[j]; + + /* + * Multiply the divisor by qhat, and subtract that from the + * working dividend. The multiplication and subtraction are + * folded together here, noting that qhat <= NBASE (since it might + * be one too large), and so the intermediate result "tmp_result" + * is in the range [-NBASE^2, NBASE - 1], and "borrow" is in the + * range [0, NBASE]. + */ + borrow = 0; + for (i = var2ndigits; i >= 0; i--) + { + int tmp_result; + + tmp_result = dividend_j[i] - borrow - divisor[i] * qhat; + borrow = (NBASE - 1 - tmp_result) / NBASE; + dividend_j[i] = tmp_result + borrow * NBASE; + } + + /* + * If we got a borrow out of the top dividend digit, then indeed + * qhat was one too large. Fix it, and add back the divisor to + * correct the working dividend. (Knuth proves that this will + * occur only about 3/NBASE of the time; hence, it's a good idea + * to test this code with small NBASE to be sure this section gets + * exercised.) + */ + if (borrow) + { + qhat--; + carry = 0; + for (i = var2ndigits; i >= 0; i--) + { + carry += dividend_j[i] + divisor[i]; + if (carry >= NBASE) + { + dividend_j[i] = carry - NBASE; + carry = 1; + } + else + { + dividend_j[i] = carry; + carry = 0; + } + } + /* A carry should occur here to cancel the borrow above */ + Assert(carry == 1); + } + } + + /* And we're done with this quotient digit */ + res_digits[j] = qhat; + } + + pfree(dividend); + + /* + * Finally, round or truncate the result to the requested precision. + */ + result->weight = res_weight; + result->sign = res_sign; + + /* Round or truncate to target rscale (and set result->dscale) */ + if (round) + round_var(result, rscale); + else + trunc_var(result, rscale); + + /* Strip leading and trailing zeroes */ + strip_var(result); +} + + +/* + * div_var_fast() - + * + * This has the same API as div_var, but is implemented using the division + * algorithm from the "FM" library, rather than Knuth's schoolbook-division + * approach. This is significantly faster but can produce inaccurate + * results, because it sometimes has to propagate rounding to the left, + * and so we can never be entirely sure that we know the requested digits + * exactly. We compute DIV_GUARD_DIGITS extra digits, but there is + * no certainty that that's enough. We use this only in the transcendental + * function calculation routines, where everything is approximate anyway. + * + * Although we provide a "round" argument for consistency with div_var, + * it is unwise to use this function with round=false. In truncation mode + * it is possible to get a result with no significant digits, for example + * with rscale=0 we might compute 0.99999... and truncate that to 0 when + * the correct answer is 1. + */ +static void +div_var_fast(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, int rscale, bool round) +{ + int div_ndigits; + int load_ndigits; + int res_sign; + int res_weight; + int *div; + int qdigit; + int carry; + int maxdiv; + int newdig; + NumericDigit *res_digits; + double fdividend, + fdivisor, + fdivisorinverse, + fquotient; + int qi; + int i; + + /* copy these values into local vars for speed in inner loop */ + int var1ndigits = var1->ndigits; + int var2ndigits = var2->ndigits; + NumericDigit *var1digits = var1->digits; + NumericDigit *var2digits = var2->digits; + + /* + * First of all division by zero check; we must not be handed an + * unnormalized divisor. + */ + if (var2ndigits == 0 || var2digits[0] == 0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + /* + * If the divisor has just one or two digits, delegate to div_var_int(), + * which uses fast short division. + * + * Similarly, on platforms with 128-bit integer support, delegate to + * div_var_int64() for divisors with three or four digits. + */ + if (var2ndigits <= 2) + { + int idivisor; + int idivisor_weight; + + idivisor = var2->digits[0]; + idivisor_weight = var2->weight; + if (var2ndigits == 2) + { + idivisor = idivisor * NBASE + var2->digits[1]; + idivisor_weight--; + } + if (var2->sign == NUMERIC_NEG) + idivisor = -idivisor; + + div_var_int(var1, idivisor, idivisor_weight, result, rscale, round); + return; + } +#ifdef HAVE_INT128 + if (var2ndigits <= 4) + { + int64 idivisor; + int idivisor_weight; + + idivisor = var2->digits[0]; + idivisor_weight = var2->weight; + for (i = 1; i < var2ndigits; i++) + { + idivisor = idivisor * NBASE + var2->digits[i]; + idivisor_weight--; + } + if (var2->sign == NUMERIC_NEG) + idivisor = -idivisor; + + div_var_int64(var1, idivisor, idivisor_weight, result, rscale, round); + return; + } +#endif + + /* + * Otherwise, perform full long division. + */ + + /* Result zero check */ + if (var1ndigits == 0) + { + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * Determine the result sign, weight and number of digits to calculate + */ + if (var1->sign == var2->sign) + res_sign = NUMERIC_POS; + else + res_sign = NUMERIC_NEG; + res_weight = var1->weight - var2->weight + 1; + /* The number of accurate result digits we need to produce: */ + div_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS; + /* Add guard digits for roundoff error */ + div_ndigits += DIV_GUARD_DIGITS; + if (div_ndigits < DIV_GUARD_DIGITS) + div_ndigits = DIV_GUARD_DIGITS; + + /* + * We do the arithmetic in an array "div[]" of signed int's. Since + * INT_MAX is noticeably larger than NBASE*NBASE, this gives us headroom + * to avoid normalizing carries immediately. + * + * We start with div[] containing one zero digit followed by the + * dividend's digits (plus appended zeroes to reach the desired precision + * including guard digits). Each step of the main loop computes an + * (approximate) quotient digit and stores it into div[], removing one + * position of dividend space. A final pass of carry propagation takes + * care of any mistaken quotient digits. + * + * Note that div[] doesn't necessarily contain all of the digits from the + * dividend --- the desired precision plus guard digits might be less than + * the dividend's precision. This happens, for example, in the square + * root algorithm, where we typically divide a 2N-digit number by an + * N-digit number, and only require a result with N digits of precision. + */ + div = (int *) palloc0((div_ndigits + 1) * sizeof(int)); + load_ndigits = Min(div_ndigits, var1ndigits); + for (i = 0; i < load_ndigits; i++) + div[i + 1] = var1digits[i]; + + /* + * We estimate each quotient digit using floating-point arithmetic, taking + * the first four digits of the (current) dividend and divisor. This must + * be float to avoid overflow. The quotient digits will generally be off + * by no more than one from the exact answer. + */ + fdivisor = (double) var2digits[0]; + for (i = 1; i < 4; i++) + { + fdivisor *= NBASE; + if (i < var2ndigits) + fdivisor += (double) var2digits[i]; + } + fdivisorinverse = 1.0 / fdivisor; + + /* + * maxdiv tracks the maximum possible absolute value of any div[] entry; + * when this threatens to exceed INT_MAX, we take the time to propagate + * carries. Furthermore, we need to ensure that overflow doesn't occur + * during the carry propagation passes either. The carry values may have + * an absolute value as high as INT_MAX/NBASE + 1, so really we must + * normalize when digits threaten to exceed INT_MAX - INT_MAX/NBASE - 1. + * + * To avoid overflow in maxdiv itself, it represents the max absolute + * value divided by NBASE-1, ie, at the top of the loop it is known that + * no div[] entry has an absolute value exceeding maxdiv * (NBASE-1). + * + * Actually, though, that holds good only for div[] entries after div[qi]; + * the adjustment done at the bottom of the loop may cause div[qi + 1] to + * exceed the maxdiv limit, so that div[qi] in the next iteration is + * beyond the limit. This does not cause problems, as explained below. + */ + maxdiv = 1; + + /* + * Outer loop computes next quotient digit, which will go into div[qi] + */ + for (qi = 0; qi < div_ndigits; qi++) + { + /* Approximate the current dividend value */ + fdividend = (double) div[qi]; + for (i = 1; i < 4; i++) + { + fdividend *= NBASE; + if (qi + i <= div_ndigits) + fdividend += (double) div[qi + i]; + } + /* Compute the (approximate) quotient digit */ + fquotient = fdividend * fdivisorinverse; + qdigit = (fquotient >= 0.0) ? ((int) fquotient) : + (((int) fquotient) - 1); /* truncate towards -infinity */ + + if (qdigit != 0) + { + /* Do we need to normalize now? */ + maxdiv += abs(qdigit); + if (maxdiv > (INT_MAX - INT_MAX / NBASE - 1) / (NBASE - 1)) + { + /* + * Yes, do it. Note that if var2ndigits is much smaller than + * div_ndigits, we can save a significant amount of effort + * here by noting that we only need to normalise those div[] + * entries touched where prior iterations subtracted multiples + * of the divisor. + */ + carry = 0; + for (i = Min(qi + var2ndigits - 2, div_ndigits); i > qi; i--) + { + newdig = div[i] + carry; + if (newdig < 0) + { + carry = -((-newdig - 1) / NBASE) - 1; + newdig -= carry * NBASE; + } + else if (newdig >= NBASE) + { + carry = newdig / NBASE; + newdig -= carry * NBASE; + } + else + carry = 0; + div[i] = newdig; + } + newdig = div[qi] + carry; + div[qi] = newdig; + + /* + * All the div[] digits except possibly div[qi] are now in the + * range 0..NBASE-1. We do not need to consider div[qi] in + * the maxdiv value anymore, so we can reset maxdiv to 1. + */ + maxdiv = 1; + + /* + * Recompute the quotient digit since new info may have + * propagated into the top four dividend digits + */ + fdividend = (double) div[qi]; + for (i = 1; i < 4; i++) + { + fdividend *= NBASE; + if (qi + i <= div_ndigits) + fdividend += (double) div[qi + i]; + } + /* Compute the (approximate) quotient digit */ + fquotient = fdividend * fdivisorinverse; + qdigit = (fquotient >= 0.0) ? ((int) fquotient) : + (((int) fquotient) - 1); /* truncate towards -infinity */ + maxdiv += abs(qdigit); + } + + /* + * Subtract off the appropriate multiple of the divisor. + * + * The digits beyond div[qi] cannot overflow, because we know they + * will fall within the maxdiv limit. As for div[qi] itself, note + * that qdigit is approximately trunc(div[qi] / vardigits[0]), + * which would make the new value simply div[qi] mod vardigits[0]. + * The lower-order terms in qdigit can change this result by not + * more than about twice INT_MAX/NBASE, so overflow is impossible. + * + * This inner loop is the performance bottleneck for division, so + * code it in the same way as the inner loop of mul_var() so that + * it can be auto-vectorized. We cast qdigit to NumericDigit + * before multiplying to allow the compiler to generate more + * efficient code (using 16-bit multiplication), which is safe + * since we know that the quotient digit is off by at most one, so + * there is no overflow risk. + */ + if (qdigit != 0) + { + int istop = Min(var2ndigits, div_ndigits - qi + 1); + int *div_qi = &div[qi]; + + for (i = 0; i < istop; i++) + div_qi[i] -= ((NumericDigit) qdigit) * var2digits[i]; + } + } + + /* + * The dividend digit we are about to replace might still be nonzero. + * Fold it into the next digit position. + * + * There is no risk of overflow here, although proving that requires + * some care. Much as with the argument for div[qi] not overflowing, + * if we consider the first two terms in the numerator and denominator + * of qdigit, we can see that the final value of div[qi + 1] will be + * approximately a remainder mod (vardigits[0]*NBASE + vardigits[1]). + * Accounting for the lower-order terms is a bit complicated but ends + * up adding not much more than INT_MAX/NBASE to the possible range. + * Thus, div[qi + 1] cannot overflow here, and in its role as div[qi] + * in the next loop iteration, it can't be large enough to cause + * overflow in the carry propagation step (if any), either. + * + * But having said that: div[qi] can be more than INT_MAX/NBASE, as + * noted above, which means that the product div[qi] * NBASE *can* + * overflow. When that happens, adding it to div[qi + 1] will always + * cause a canceling overflow so that the end result is correct. We + * could avoid the intermediate overflow by doing the multiplication + * and addition in int64 arithmetic, but so far there appears no need. + */ + div[qi + 1] += div[qi] * NBASE; + + div[qi] = qdigit; + } + + /* + * Approximate and store the last quotient digit (div[div_ndigits]) + */ + fdividend = (double) div[qi]; + for (i = 1; i < 4; i++) + fdividend *= NBASE; + fquotient = fdividend * fdivisorinverse; + qdigit = (fquotient >= 0.0) ? ((int) fquotient) : + (((int) fquotient) - 1); /* truncate towards -infinity */ + div[qi] = qdigit; + + /* + * Because the quotient digits might be off by one, some of them might be + * -1 or NBASE at this point. The represented value is correct in a + * mathematical sense, but it doesn't look right. We do a final carry + * propagation pass to normalize the digits, which we combine with storing + * the result digits into the output. Note that this is still done at + * full precision w/guard digits. + */ + alloc_var(result, div_ndigits + 1); + res_digits = result->digits; + carry = 0; + for (i = div_ndigits; i >= 0; i--) + { + newdig = div[i] + carry; + if (newdig < 0) + { + carry = -((-newdig - 1) / NBASE) - 1; + newdig -= carry * NBASE; + } + else if (newdig >= NBASE) + { + carry = newdig / NBASE; + newdig -= carry * NBASE; + } + else + carry = 0; + res_digits[i] = newdig; + } + Assert(carry == 0); + + pfree(div); + + /* + * Finally, round the result to the requested precision. + */ + result->weight = res_weight; + result->sign = res_sign; + + /* Round to target rscale (and set result->dscale) */ + if (round) + round_var(result, rscale); + else + trunc_var(result, rscale); + + /* Strip leading and trailing zeroes */ + strip_var(result); +} + + +/* + * div_var_int() - + * + * Divide a numeric variable by a 32-bit integer with the specified weight. + * The quotient var / (ival * NBASE^ival_weight) is stored in result. + */ +static void +div_var_int(const NumericVar *var, int ival, int ival_weight, + NumericVar *result, int rscale, bool round) +{ + NumericDigit *var_digits = var->digits; + int var_ndigits = var->ndigits; + int res_sign; + int res_weight; + int res_ndigits; + NumericDigit *res_buf; + NumericDigit *res_digits; + uint32 divisor; + int i; + + /* Guard against division by zero */ + if (ival == 0) + ereport(ERROR, + errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero")); + + /* Result zero check */ + if (var_ndigits == 0) + { + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * Determine the result sign, weight and number of digits to calculate. + * The weight figured here is correct if the emitted quotient has no + * leading zero digits; otherwise strip_var() will fix things up. + */ + if (var->sign == NUMERIC_POS) + res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG; + else + res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS; + res_weight = var->weight - ival_weight; + /* The number of accurate result digits we need to produce: */ + res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS; + /* ... but always at least 1 */ + res_ndigits = Max(res_ndigits, 1); + /* If rounding needed, figure one more digit to ensure correct result */ + if (round) + res_ndigits++; + + res_buf = digitbuf_alloc(res_ndigits + 1); + res_buf[0] = 0; /* spare digit for later rounding */ + res_digits = res_buf + 1; + + /* + * Now compute the quotient digits. This is the short division algorithm + * described in Knuth volume 2, section 4.3.1 exercise 16, except that we + * allow the divisor to exceed the internal base. + * + * In this algorithm, the carry from one digit to the next is at most + * divisor - 1. Therefore, while processing the next digit, carry may + * become as large as divisor * NBASE - 1, and so it requires a 64-bit + * integer if this exceeds UINT_MAX. + */ + divisor = abs(ival); + + if (divisor <= UINT_MAX / NBASE) + { + /* carry cannot overflow 32 bits */ + uint32 carry = 0; + + for (i = 0; i < res_ndigits; i++) + { + carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0); + res_digits[i] = (NumericDigit) (carry / divisor); + carry = carry % divisor; + } + } + else + { + /* carry may exceed 32 bits */ + uint64 carry = 0; + + for (i = 0; i < res_ndigits; i++) + { + carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0); + res_digits[i] = (NumericDigit) (carry / divisor); + carry = carry % divisor; + } + } + + /* Store the quotient in result */ + digitbuf_free(result->buf); + result->ndigits = res_ndigits; + result->buf = res_buf; + result->digits = res_digits; + result->weight = res_weight; + result->sign = res_sign; + + /* Round or truncate to target rscale (and set result->dscale) */ + if (round) + round_var(result, rscale); + else + trunc_var(result, rscale); + + /* Strip leading/trailing zeroes */ + strip_var(result); +} + + +#ifdef HAVE_INT128 +/* + * div_var_int64() - + * + * Divide a numeric variable by a 64-bit integer with the specified weight. + * The quotient var / (ival * NBASE^ival_weight) is stored in result. + * + * This duplicates the logic in div_var_int(), so any changes made there + * should be made here too. + */ +static void +div_var_int64(const NumericVar *var, int64 ival, int ival_weight, + NumericVar *result, int rscale, bool round) +{ + NumericDigit *var_digits = var->digits; + int var_ndigits = var->ndigits; + int res_sign; + int res_weight; + int res_ndigits; + NumericDigit *res_buf; + NumericDigit *res_digits; + uint64 divisor; + int i; + + /* Guard against division by zero */ + if (ival == 0) + ereport(ERROR, + errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero")); + + /* Result zero check */ + if (var_ndigits == 0) + { + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * Determine the result sign, weight and number of digits to calculate. + * The weight figured here is correct if the emitted quotient has no + * leading zero digits; otherwise strip_var() will fix things up. + */ + if (var->sign == NUMERIC_POS) + res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG; + else + res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS; + res_weight = var->weight - ival_weight; + /* The number of accurate result digits we need to produce: */ + res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS; + /* ... but always at least 1 */ + res_ndigits = Max(res_ndigits, 1); + /* If rounding needed, figure one more digit to ensure correct result */ + if (round) + res_ndigits++; + + res_buf = digitbuf_alloc(res_ndigits + 1); + res_buf[0] = 0; /* spare digit for later rounding */ + res_digits = res_buf + 1; + + /* + * Now compute the quotient digits. This is the short division algorithm + * described in Knuth volume 2, section 4.3.1 exercise 16, except that we + * allow the divisor to exceed the internal base. + * + * In this algorithm, the carry from one digit to the next is at most + * divisor - 1. Therefore, while processing the next digit, carry may + * become as large as divisor * NBASE - 1, and so it requires a 128-bit + * integer if this exceeds PG_UINT64_MAX. + */ + divisor = i64abs(ival); + + if (divisor <= PG_UINT64_MAX / NBASE) + { + /* carry cannot overflow 64 bits */ + uint64 carry = 0; + + for (i = 0; i < res_ndigits; i++) + { + carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0); + res_digits[i] = (NumericDigit) (carry / divisor); + carry = carry % divisor; + } + } + else + { + /* carry may exceed 64 bits */ + uint128 carry = 0; + + for (i = 0; i < res_ndigits; i++) + { + carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0); + res_digits[i] = (NumericDigit) (carry / divisor); + carry = carry % divisor; + } + } + + /* Store the quotient in result */ + digitbuf_free(result->buf); + result->ndigits = res_ndigits; + result->buf = res_buf; + result->digits = res_digits; + result->weight = res_weight; + result->sign = res_sign; + + /* Round or truncate to target rscale (and set result->dscale) */ + if (round) + round_var(result, rscale); + else + trunc_var(result, rscale); + + /* Strip leading/trailing zeroes */ + strip_var(result); +} +#endif + + +/* + * Default scale selection for division + * + * Returns the appropriate result scale for the division result. + */ +static int +select_div_scale(const NumericVar *var1, const NumericVar *var2) +{ + int weight1, + weight2, + qweight, + i; + NumericDigit firstdigit1, + firstdigit2; + int rscale; + + /* + * The result scale of a division isn't specified in any SQL standard. For + * PostgreSQL we select a result scale that will give at least + * NUMERIC_MIN_SIG_DIGITS significant digits, so that numeric gives a + * result no less accurate than float8; but use a scale not less than + * either input's display scale. + */ + + /* Get the actual (normalized) weight and first digit of each input */ + + weight1 = 0; /* values to use if var1 is zero */ + firstdigit1 = 0; + for (i = 0; i < var1->ndigits; i++) + { + firstdigit1 = var1->digits[i]; + if (firstdigit1 != 0) + { + weight1 = var1->weight - i; + break; + } + } + + weight2 = 0; /* values to use if var2 is zero */ + firstdigit2 = 0; + for (i = 0; i < var2->ndigits; i++) + { + firstdigit2 = var2->digits[i]; + if (firstdigit2 != 0) + { + weight2 = var2->weight - i; + break; + } + } + + /* + * Estimate weight of quotient. If the two first digits are equal, we + * can't be sure, but assume that var1 is less than var2. + */ + qweight = weight1 - weight2; + if (firstdigit1 <= firstdigit2) + qweight--; + + /* Select result scale */ + rscale = NUMERIC_MIN_SIG_DIGITS - qweight * DEC_DIGITS; + rscale = Max(rscale, var1->dscale); + rscale = Max(rscale, var2->dscale); + rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE); + rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE); + + return rscale; +} + + +/* + * mod_var() - + * + * Calculate the modulo of two numerics at variable level + */ +static void +mod_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result) +{ + NumericVar tmp; + + init_var(&tmp); + + /* --------- + * We do this using the equation + * mod(x,y) = x - trunc(x/y)*y + * div_var can be persuaded to give us trunc(x/y) directly. + * ---------- + */ + div_var(var1, var2, &tmp, 0, false); + + mul_var(var2, &tmp, &tmp, var2->dscale); + + sub_var(var1, &tmp, result); + + free_var(&tmp); +} + + +/* + * div_mod_var() - + * + * Calculate the truncated integer quotient and numeric remainder of two + * numeric variables. The remainder is precise to var2's dscale. + */ +static void +div_mod_var(const NumericVar *var1, const NumericVar *var2, + NumericVar *quot, NumericVar *rem) +{ + NumericVar q; + NumericVar r; + + init_var(&q); + init_var(&r); + + /* + * Use div_var_fast() to get an initial estimate for the integer quotient. + * This might be inaccurate (per the warning in div_var_fast's comments), + * but we can correct it below. + */ + div_var_fast(var1, var2, &q, 0, false); + + /* Compute initial estimate of remainder using the quotient estimate. */ + mul_var(var2, &q, &r, var2->dscale); + sub_var(var1, &r, &r); + + /* + * Adjust the results if necessary --- the remainder should have the same + * sign as var1, and its absolute value should be less than the absolute + * value of var2. + */ + while (r.ndigits != 0 && r.sign != var1->sign) + { + /* The absolute value of the quotient is too large */ + if (var1->sign == var2->sign) + { + sub_var(&q, &const_one, &q); + add_var(&r, var2, &r); + } + else + { + add_var(&q, &const_one, &q); + sub_var(&r, var2, &r); + } + } + + while (cmp_abs(&r, var2) >= 0) + { + /* The absolute value of the quotient is too small */ + if (var1->sign == var2->sign) + { + add_var(&q, &const_one, &q); + sub_var(&r, var2, &r); + } + else + { + sub_var(&q, &const_one, &q); + add_var(&r, var2, &r); + } + } + + set_var_from_var(&q, quot); + set_var_from_var(&r, rem); + + free_var(&q); + free_var(&r); +} + + +/* + * ceil_var() - + * + * Return the smallest integer greater than or equal to the argument + * on variable level + */ +static void +ceil_var(const NumericVar *var, NumericVar *result) +{ + NumericVar tmp; + + init_var(&tmp); + set_var_from_var(var, &tmp); + + trunc_var(&tmp, 0); + + if (var->sign == NUMERIC_POS && cmp_var(var, &tmp) != 0) + add_var(&tmp, &const_one, &tmp); + + set_var_from_var(&tmp, result); + free_var(&tmp); +} + + +/* + * floor_var() - + * + * Return the largest integer equal to or less than the argument + * on variable level + */ +static void +floor_var(const NumericVar *var, NumericVar *result) +{ + NumericVar tmp; + + init_var(&tmp); + set_var_from_var(var, &tmp); + + trunc_var(&tmp, 0); + + if (var->sign == NUMERIC_NEG && cmp_var(var, &tmp) != 0) + sub_var(&tmp, &const_one, &tmp); + + set_var_from_var(&tmp, result); + free_var(&tmp); +} + + +/* + * gcd_var() - + * + * Calculate the greatest common divisor of two numerics at variable level + */ +static void +gcd_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result) +{ + int res_dscale; + int cmp; + NumericVar tmp_arg; + NumericVar mod; + + res_dscale = Max(var1->dscale, var2->dscale); + + /* + * Arrange for var1 to be the number with the greater absolute value. + * + * This would happen automatically in the loop below, but avoids an + * expensive modulo operation. + */ + cmp = cmp_abs(var1, var2); + if (cmp < 0) + { + const NumericVar *tmp = var1; + + var1 = var2; + var2 = tmp; + } + + /* + * Also avoid the taking the modulo if the inputs have the same absolute + * value, or if the smaller input is zero. + */ + if (cmp == 0 || var2->ndigits == 0) + { + set_var_from_var(var1, result); + result->sign = NUMERIC_POS; + result->dscale = res_dscale; + return; + } + + init_var(&tmp_arg); + init_var(&mod); + + /* Use the Euclidean algorithm to find the GCD */ + set_var_from_var(var1, &tmp_arg); + set_var_from_var(var2, result); + + for (;;) + { + /* this loop can take a while, so allow it to be interrupted */ + CHECK_FOR_INTERRUPTS(); + + mod_var(&tmp_arg, result, &mod); + if (mod.ndigits == 0) + break; + set_var_from_var(result, &tmp_arg); + set_var_from_var(&mod, result); + } + result->sign = NUMERIC_POS; + result->dscale = res_dscale; + + free_var(&tmp_arg); + free_var(&mod); +} + + +/* + * sqrt_var() - + * + * Compute the square root of x using the Karatsuba Square Root algorithm. + * NOTE: we allow rscale < 0 here, implying rounding before the decimal + * point. + */ +static void +sqrt_var(const NumericVar *arg, NumericVar *result, int rscale) +{ + int stat; + int res_weight; + int res_ndigits; + int src_ndigits; + int step; + int ndigits[32]; + int blen; + int64 arg_int64; + int src_idx; + int64 s_int64; + int64 r_int64; + NumericVar s_var; + NumericVar r_var; + NumericVar a0_var; + NumericVar a1_var; + NumericVar q_var; + NumericVar u_var; + + stat = cmp_var(arg, &const_zero); + if (stat == 0) + { + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * SQL2003 defines sqrt() in terms of power, so we need to emit the right + * SQLSTATE error code if the operand is negative. + */ + if (stat < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("cannot take square root of a negative number"))); + + init_var(&s_var); + init_var(&r_var); + init_var(&a0_var); + init_var(&a1_var); + init_var(&q_var); + init_var(&u_var); + + /* + * The result weight is half the input weight, rounded towards minus + * infinity --- res_weight = floor(arg->weight / 2). + */ + if (arg->weight >= 0) + res_weight = arg->weight / 2; + else + res_weight = -((-arg->weight - 1) / 2 + 1); + + /* + * Number of NBASE digits to compute. To ensure correct rounding, compute + * at least 1 extra decimal digit. We explicitly allow rscale to be + * negative here, but must always compute at least 1 NBASE digit. Thus + * res_ndigits = res_weight + 1 + ceil((rscale + 1) / DEC_DIGITS) or 1. + */ + if (rscale + 1 >= 0) + res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS) / DEC_DIGITS; + else + res_ndigits = res_weight + 1 - (-rscale - 1) / DEC_DIGITS; + res_ndigits = Max(res_ndigits, 1); + + /* + * Number of source NBASE digits logically required to produce a result + * with this precision --- every digit before the decimal point, plus 2 + * for each result digit after the decimal point (or minus 2 for each + * result digit we round before the decimal point). + */ + src_ndigits = arg->weight + 1 + (res_ndigits - res_weight - 1) * 2; + src_ndigits = Max(src_ndigits, 1); + + /* ---------- + * From this point on, we treat the input and the result as integers and + * compute the integer square root and remainder using the Karatsuba + * Square Root algorithm, which may be written recursively as follows: + * + * SqrtRem(n = a3*b^3 + a2*b^2 + a1*b + a0): + * [ for some base b, and coefficients a0,a1,a2,a3 chosen so that + * 0 <= a0,a1,a2 < b and a3 >= b/4 ] + * Let (s,r) = SqrtRem(a3*b + a2) + * Let (q,u) = DivRem(r*b + a1, 2*s) + * Let s = s*b + q + * Let r = u*b + a0 - q^2 + * If r < 0 Then + * Let r = r + s + * Let s = s - 1 + * Let r = r + s + * Return (s,r) + * + * See "Karatsuba Square Root", Paul Zimmermann, INRIA Research Report + * RR-3805, November 1999. At the time of writing this was available + * on the net at <https://hal.inria.fr/inria-00072854>. + * + * The way to read the assumption "n = a3*b^3 + a2*b^2 + a1*b + a0" is + * "choose a base b such that n requires at least four base-b digits to + * express; then those digits are a3,a2,a1,a0, with a3 possibly larger + * than b". For optimal performance, b should have approximately a + * quarter the number of digits in the input, so that the outer square + * root computes roughly twice as many digits as the inner one. For + * simplicity, we choose b = NBASE^blen, an integer power of NBASE. + * + * We implement the algorithm iteratively rather than recursively, to + * allow the working variables to be reused. With this approach, each + * digit of the input is read precisely once --- src_idx tracks the number + * of input digits used so far. + * + * The array ndigits[] holds the number of NBASE digits of the input that + * will have been used at the end of each iteration, which roughly doubles + * each time. Note that the array elements are stored in reverse order, + * so if the final iteration requires src_ndigits = 37 input digits, the + * array will contain [37,19,11,7,5,3], and we would start by computing + * the square root of the 3 most significant NBASE digits. + * + * In each iteration, we choose blen to be the largest integer for which + * the input number has a3 >= b/4, when written in the form above. In + * general, this means blen = src_ndigits / 4 (truncated), but if + * src_ndigits is a multiple of 4, that might lead to the coefficient a3 + * being less than b/4 (if the first input digit is less than NBASE/4), in + * which case we choose blen = src_ndigits / 4 - 1. The number of digits + * in the inner square root is then src_ndigits - 2*blen. So, for + * example, if we have src_ndigits = 26 initially, the array ndigits[] + * will be either [26,14,8,4] or [26,14,8,6,4], depending on the size of + * the first input digit. + * + * Additionally, we can put an upper bound on the number of steps required + * as follows --- suppose that the number of source digits is an n-bit + * number in the range [2^(n-1), 2^n-1], then blen will be in the range + * [2^(n-3)-1, 2^(n-2)-1] and the number of digits in the inner square + * root will be in the range [2^(n-2), 2^(n-1)+1]. In the next step, blen + * will be in the range [2^(n-4)-1, 2^(n-3)] and the number of digits in + * the next inner square root will be in the range [2^(n-3), 2^(n-2)+1]. + * This pattern repeats, and in the worst case the array ndigits[] will + * contain [2^n-1, 2^(n-1)+1, 2^(n-2)+1, ... 9, 5, 3], and the computation + * will require n steps. Therefore, since all digit array sizes are + * signed 32-bit integers, the number of steps required is guaranteed to + * be less than 32. + * ---------- + */ + step = 0; + while ((ndigits[step] = src_ndigits) > 4) + { + /* Choose b so that a3 >= b/4, as described above */ + blen = src_ndigits / 4; + if (blen * 4 == src_ndigits && arg->digits[0] < NBASE / 4) + blen--; + + /* Number of digits in the next step (inner square root) */ + src_ndigits -= 2 * blen; + step++; + } + + /* + * First iteration (innermost square root and remainder): + * + * Here src_ndigits <= 4, and the input fits in an int64. Its square root + * has at most 9 decimal digits, so estimate it using double precision + * arithmetic, which will in fact almost certainly return the correct + * result with no further correction required. + */ + arg_int64 = arg->digits[0]; + for (src_idx = 1; src_idx < src_ndigits; src_idx++) + { + arg_int64 *= NBASE; + if (src_idx < arg->ndigits) + arg_int64 += arg->digits[src_idx]; + } + + s_int64 = (int64) sqrt((double) arg_int64); + r_int64 = arg_int64 - s_int64 * s_int64; + + /* + * Use Newton's method to correct the result, if necessary. + * + * This uses integer division with truncation to compute the truncated + * integer square root by iterating using the formula x -> (x + n/x) / 2. + * This is known to converge to isqrt(n), unless n+1 is a perfect square. + * If n+1 is a perfect square, the sequence will oscillate between the two + * values isqrt(n) and isqrt(n)+1, so we can be assured of convergence by + * checking the remainder. + */ + while (r_int64 < 0 || r_int64 > 2 * s_int64) + { + s_int64 = (s_int64 + arg_int64 / s_int64) / 2; + r_int64 = arg_int64 - s_int64 * s_int64; + } + + /* + * Iterations with src_ndigits <= 8: + * + * The next 1 or 2 iterations compute larger (outer) square roots with + * src_ndigits <= 8, so the result still fits in an int64 (even though the + * input no longer does) and we can continue to compute using int64 + * variables to avoid more expensive numeric computations. + * + * It is fairly easy to see that there is no risk of the intermediate + * values below overflowing 64-bit integers. In the worst case, the + * previous iteration will have computed a 3-digit square root (of a + * 6-digit input less than NBASE^6 / 4), so at the start of this + * iteration, s will be less than NBASE^3 / 2 = 10^12 / 2, and r will be + * less than 10^12. In this case, blen will be 1, so numer will be less + * than 10^17, and denom will be less than 10^12 (and hence u will also be + * less than 10^12). Finally, since q^2 = u*b + a0 - r, we can also be + * sure that q^2 < 10^17. Therefore all these quantities fit comfortably + * in 64-bit integers. + */ + step--; + while (step >= 0 && (src_ndigits = ndigits[step]) <= 8) + { + int b; + int a0; + int a1; + int i; + int64 numer; + int64 denom; + int64 q; + int64 u; + + blen = (src_ndigits - src_idx) / 2; + + /* Extract a1 and a0, and compute b */ + a0 = 0; + a1 = 0; + b = 1; + + for (i = 0; i < blen; i++, src_idx++) + { + b *= NBASE; + a1 *= NBASE; + if (src_idx < arg->ndigits) + a1 += arg->digits[src_idx]; + } + + for (i = 0; i < blen; i++, src_idx++) + { + a0 *= NBASE; + if (src_idx < arg->ndigits) + a0 += arg->digits[src_idx]; + } + + /* Compute (q,u) = DivRem(r*b + a1, 2*s) */ + numer = r_int64 * b + a1; + denom = 2 * s_int64; + q = numer / denom; + u = numer - q * denom; + + /* Compute s = s*b + q and r = u*b + a0 - q^2 */ + s_int64 = s_int64 * b + q; + r_int64 = u * b + a0 - q * q; + + if (r_int64 < 0) + { + /* s is too large by 1; set r += s, s--, r += s */ + r_int64 += s_int64; + s_int64--; + r_int64 += s_int64; + } + + Assert(src_idx == src_ndigits); /* All input digits consumed */ + step--; + } + + /* + * On platforms with 128-bit integer support, we can further delay the + * need to use numeric variables. + */ +#ifdef HAVE_INT128 + if (step >= 0) + { + int128 s_int128; + int128 r_int128; + + s_int128 = s_int64; + r_int128 = r_int64; + + /* + * Iterations with src_ndigits <= 16: + * + * The result fits in an int128 (even though the input doesn't) so we + * use int128 variables to avoid more expensive numeric computations. + */ + while (step >= 0 && (src_ndigits = ndigits[step]) <= 16) + { + int64 b; + int64 a0; + int64 a1; + int64 i; + int128 numer; + int128 denom; + int128 q; + int128 u; + + blen = (src_ndigits - src_idx) / 2; + + /* Extract a1 and a0, and compute b */ + a0 = 0; + a1 = 0; + b = 1; + + for (i = 0; i < blen; i++, src_idx++) + { + b *= NBASE; + a1 *= NBASE; + if (src_idx < arg->ndigits) + a1 += arg->digits[src_idx]; + } + + for (i = 0; i < blen; i++, src_idx++) + { + a0 *= NBASE; + if (src_idx < arg->ndigits) + a0 += arg->digits[src_idx]; + } + + /* Compute (q,u) = DivRem(r*b + a1, 2*s) */ + numer = r_int128 * b + a1; + denom = 2 * s_int128; + q = numer / denom; + u = numer - q * denom; + + /* Compute s = s*b + q and r = u*b + a0 - q^2 */ + s_int128 = s_int128 * b + q; + r_int128 = u * b + a0 - q * q; + + if (r_int128 < 0) + { + /* s is too large by 1; set r += s, s--, r += s */ + r_int128 += s_int128; + s_int128--; + r_int128 += s_int128; + } + + Assert(src_idx == src_ndigits); /* All input digits consumed */ + step--; + } + + /* + * All remaining iterations require numeric variables. Convert the + * integer values to NumericVar and continue. Note that in the final + * iteration we don't need the remainder, so we can save a few cycles + * there by not fully computing it. + */ + int128_to_numericvar(s_int128, &s_var); + if (step >= 0) + int128_to_numericvar(r_int128, &r_var); + } + else + { + int64_to_numericvar(s_int64, &s_var); + /* step < 0, so we certainly don't need r */ + } +#else /* !HAVE_INT128 */ + int64_to_numericvar(s_int64, &s_var); + if (step >= 0) + int64_to_numericvar(r_int64, &r_var); +#endif /* HAVE_INT128 */ + + /* + * The remaining iterations with src_ndigits > 8 (or 16, if have int128) + * use numeric variables. + */ + while (step >= 0) + { + int tmp_len; + + src_ndigits = ndigits[step]; + blen = (src_ndigits - src_idx) / 2; + + /* Extract a1 and a0 */ + if (src_idx < arg->ndigits) + { + tmp_len = Min(blen, arg->ndigits - src_idx); + alloc_var(&a1_var, tmp_len); + memcpy(a1_var.digits, arg->digits + src_idx, + tmp_len * sizeof(NumericDigit)); + a1_var.weight = blen - 1; + a1_var.sign = NUMERIC_POS; + a1_var.dscale = 0; + strip_var(&a1_var); + } + else + { + zero_var(&a1_var); + a1_var.dscale = 0; + } + src_idx += blen; + + if (src_idx < arg->ndigits) + { + tmp_len = Min(blen, arg->ndigits - src_idx); + alloc_var(&a0_var, tmp_len); + memcpy(a0_var.digits, arg->digits + src_idx, + tmp_len * sizeof(NumericDigit)); + a0_var.weight = blen - 1; + a0_var.sign = NUMERIC_POS; + a0_var.dscale = 0; + strip_var(&a0_var); + } + else + { + zero_var(&a0_var); + a0_var.dscale = 0; + } + src_idx += blen; + + /* Compute (q,u) = DivRem(r*b + a1, 2*s) */ + set_var_from_var(&r_var, &q_var); + q_var.weight += blen; + add_var(&q_var, &a1_var, &q_var); + add_var(&s_var, &s_var, &u_var); + div_mod_var(&q_var, &u_var, &q_var, &u_var); + + /* Compute s = s*b + q */ + s_var.weight += blen; + add_var(&s_var, &q_var, &s_var); + + /* + * Compute r = u*b + a0 - q^2. + * + * In the final iteration, we don't actually need r; we just need to + * know whether it is negative, so that we know whether to adjust s. + * So instead of the final subtraction we can just compare. + */ + u_var.weight += blen; + add_var(&u_var, &a0_var, &u_var); + mul_var(&q_var, &q_var, &q_var, 0); + + if (step > 0) + { + /* Need r for later iterations */ + sub_var(&u_var, &q_var, &r_var); + if (r_var.sign == NUMERIC_NEG) + { + /* s is too large by 1; set r += s, s--, r += s */ + add_var(&r_var, &s_var, &r_var); + sub_var(&s_var, &const_one, &s_var); + add_var(&r_var, &s_var, &r_var); + } + } + else + { + /* Don't need r anymore, except to test if s is too large by 1 */ + if (cmp_var(&u_var, &q_var) < 0) + sub_var(&s_var, &const_one, &s_var); + } + + Assert(src_idx == src_ndigits); /* All input digits consumed */ + step--; + } + + /* + * Construct the final result, rounding it to the requested precision. + */ + set_var_from_var(&s_var, result); + result->weight = res_weight; + result->sign = NUMERIC_POS; + + /* Round to target rscale (and set result->dscale) */ + round_var(result, rscale); + + /* Strip leading and trailing zeroes */ + strip_var(result); + + free_var(&s_var); + free_var(&r_var); + free_var(&a0_var); + free_var(&a1_var); + free_var(&q_var); + free_var(&u_var); +} + + +/* + * exp_var() - + * + * Raise e to the power of x, computed to rscale fractional digits + */ +static void +exp_var(const NumericVar *arg, NumericVar *result, int rscale) +{ + NumericVar x; + NumericVar elem; + int ni; + double val; + int dweight; + int ndiv2; + int sig_digits; + int local_rscale; + + init_var(&x); + init_var(&elem); + + set_var_from_var(arg, &x); + + /* + * Estimate the dweight of the result using floating point arithmetic, so + * that we can choose an appropriate local rscale for the calculation. + */ + val = numericvar_to_double_no_overflow(&x); + + /* Guard against overflow/underflow */ + /* If you change this limit, see also power_var()'s limit */ + if (fabs(val) >= NUMERIC_MAX_RESULT_SCALE * 3) + { + if (val > 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + zero_var(result); + result->dscale = rscale; + return; + } + + /* decimal weight = log10(e^x) = x * log10(e) */ + dweight = (int) (val * 0.434294481903252); + + /* + * Reduce x to the range -0.01 <= x <= 0.01 (approximately) by dividing by + * 2^ndiv2, to improve the convergence rate of the Taylor series. + * + * Note that the overflow check above ensures that fabs(x) < 6000, which + * means that ndiv2 <= 20 here. + */ + if (fabs(val) > 0.01) + { + ndiv2 = 1; + val /= 2; + + while (fabs(val) > 0.01) + { + ndiv2++; + val /= 2; + } + + local_rscale = x.dscale + ndiv2; + div_var_int(&x, 1 << ndiv2, 0, &x, local_rscale, true); + } + else + ndiv2 = 0; + + /* + * Set the scale for the Taylor series expansion. The final result has + * (dweight + rscale + 1) significant digits. In addition, we have to + * raise the Taylor series result to the power 2^ndiv2, which introduces + * an error of up to around log10(2^ndiv2) digits, so work with this many + * extra digits of precision (plus a few more for good measure). + */ + sig_digits = 1 + dweight + rscale + (int) (ndiv2 * 0.301029995663981); + sig_digits = Max(sig_digits, 0) + 8; + + local_rscale = sig_digits - 1; + + /* + * Use the Taylor series + * + * exp(x) = 1 + x + x^2/2! + x^3/3! + ... + * + * Given the limited range of x, this should converge reasonably quickly. + * We run the series until the terms fall below the local_rscale limit. + */ + add_var(&const_one, &x, result); + + mul_var(&x, &x, &elem, local_rscale); + ni = 2; + div_var_int(&elem, ni, 0, &elem, local_rscale, true); + + while (elem.ndigits != 0) + { + add_var(result, &elem, result); + + mul_var(&elem, &x, &elem, local_rscale); + ni++; + div_var_int(&elem, ni, 0, &elem, local_rscale, true); + } + + /* + * Compensate for the argument range reduction. Since the weight of the + * result doubles with each multiplication, we can reduce the local rscale + * as we proceed. + */ + while (ndiv2-- > 0) + { + local_rscale = sig_digits - result->weight * 2 * DEC_DIGITS; + local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE); + mul_var(result, result, result, local_rscale); + } + + /* Round to requested rscale */ + round_var(result, rscale); + + free_var(&x); + free_var(&elem); +} + + +/* + * Estimate the dweight of the most significant decimal digit of the natural + * logarithm of a number. + * + * Essentially, we're approximating log10(abs(ln(var))). This is used to + * determine the appropriate rscale when computing natural logarithms. + * + * Note: many callers call this before range-checking the input. Therefore, + * we must be robust against values that are invalid to apply ln() to. + * We don't wish to throw an error here, so just return zero in such cases. + */ +static int +estimate_ln_dweight(const NumericVar *var) +{ + int ln_dweight; + + /* Caller should fail on ln(negative), but for the moment return zero */ + if (var->sign != NUMERIC_POS) + return 0; + + if (cmp_var(var, &const_zero_point_nine) >= 0 && + cmp_var(var, &const_one_point_one) <= 0) + { + /* + * 0.9 <= var <= 1.1 + * + * ln(var) has a negative weight (possibly very large). To get a + * reasonably accurate result, estimate it using ln(1+x) ~= x. + */ + NumericVar x; + + init_var(&x); + sub_var(var, &const_one, &x); + + if (x.ndigits > 0) + { + /* Use weight of most significant decimal digit of x */ + ln_dweight = x.weight * DEC_DIGITS + (int) log10(x.digits[0]); + } + else + { + /* x = 0. Since ln(1) = 0 exactly, we don't need extra digits */ + ln_dweight = 0; + } + + free_var(&x); + } + else + { + /* + * Estimate the logarithm using the first couple of digits from the + * input number. This will give an accurate result whenever the input + * is not too close to 1. + */ + if (var->ndigits > 0) + { + int digits; + int dweight; + double ln_var; + + digits = var->digits[0]; + dweight = var->weight * DEC_DIGITS; + + if (var->ndigits > 1) + { + digits = digits * NBASE + var->digits[1]; + dweight -= DEC_DIGITS; + } + + /*---------- + * We have var ~= digits * 10^dweight + * so ln(var) ~= ln(digits) + dweight * ln(10) + *---------- + */ + ln_var = log((double) digits) + dweight * 2.302585092994046; + ln_dweight = (int) log10(fabs(ln_var)); + } + else + { + /* Caller should fail on ln(0), but for the moment return zero */ + ln_dweight = 0; + } + } + + return ln_dweight; +} + + +/* + * ln_var() - + * + * Compute the natural log of x + */ +static void +ln_var(const NumericVar *arg, NumericVar *result, int rscale) +{ + NumericVar x; + NumericVar xx; + int ni; + NumericVar elem; + NumericVar fact; + int nsqrt; + int local_rscale; + int cmp; + + cmp = cmp_var(arg, &const_zero); + if (cmp == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of zero"))); + else if (cmp < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG), + errmsg("cannot take logarithm of a negative number"))); + + init_var(&x); + init_var(&xx); + init_var(&elem); + init_var(&fact); + + set_var_from_var(arg, &x); + set_var_from_var(&const_two, &fact); + + /* + * Reduce input into range 0.9 < x < 1.1 with repeated sqrt() operations. + * + * The final logarithm will have up to around rscale+6 significant digits. + * Each sqrt() will roughly halve the weight of x, so adjust the local + * rscale as we work so that we keep this many significant digits at each + * step (plus a few more for good measure). + * + * Note that we allow local_rscale < 0 during this input reduction + * process, which implies rounding before the decimal point. sqrt_var() + * explicitly supports this, and it significantly reduces the work + * required to reduce very large inputs to the required range. Once the + * input reduction is complete, x.weight will be 0 and its display scale + * will be non-negative again. + */ + nsqrt = 0; + while (cmp_var(&x, &const_zero_point_nine) <= 0) + { + local_rscale = rscale - x.weight * DEC_DIGITS / 2 + 8; + sqrt_var(&x, &x, local_rscale); + mul_var(&fact, &const_two, &fact, 0); + nsqrt++; + } + while (cmp_var(&x, &const_one_point_one) >= 0) + { + local_rscale = rscale - x.weight * DEC_DIGITS / 2 + 8; + sqrt_var(&x, &x, local_rscale); + mul_var(&fact, &const_two, &fact, 0); + nsqrt++; + } + + /* + * We use the Taylor series for 0.5 * ln((1+z)/(1-z)), + * + * z + z^3/3 + z^5/5 + ... + * + * where z = (x-1)/(x+1) is in the range (approximately) -0.053 .. 0.048 + * due to the above range-reduction of x. + * + * The convergence of this is not as fast as one would like, but is + * tolerable given that z is small. + * + * The Taylor series result will be multiplied by 2^(nsqrt+1), which has a + * decimal weight of (nsqrt+1) * log10(2), so work with this many extra + * digits of precision (plus a few more for good measure). + */ + local_rscale = rscale + (int) ((nsqrt + 1) * 0.301029995663981) + 8; + + sub_var(&x, &const_one, result); + add_var(&x, &const_one, &elem); + div_var_fast(result, &elem, result, local_rscale, true); + set_var_from_var(result, &xx); + mul_var(result, result, &x, local_rscale); + + ni = 1; + + for (;;) + { + ni += 2; + mul_var(&xx, &x, &xx, local_rscale); + div_var_int(&xx, ni, 0, &elem, local_rscale, true); + + if (elem.ndigits == 0) + break; + + add_var(result, &elem, result); + + if (elem.weight < (result->weight - local_rscale * 2 / DEC_DIGITS)) + break; + } + + /* Compensate for argument range reduction, round to requested rscale */ + mul_var(result, &fact, result, rscale); + + free_var(&x); + free_var(&xx); + free_var(&elem); + free_var(&fact); +} + + +/* + * log_var() - + * + * Compute the logarithm of num in a given base. + * + * Note: this routine chooses dscale of the result. + */ +static void +log_var(const NumericVar *base, const NumericVar *num, NumericVar *result) +{ + NumericVar ln_base; + NumericVar ln_num; + int ln_base_dweight; + int ln_num_dweight; + int result_dweight; + int rscale; + int ln_base_rscale; + int ln_num_rscale; + + init_var(&ln_base); + init_var(&ln_num); + + /* Estimated dweights of ln(base), ln(num) and the final result */ + ln_base_dweight = estimate_ln_dweight(base); + ln_num_dweight = estimate_ln_dweight(num); + result_dweight = ln_num_dweight - ln_base_dweight; + + /* + * Select the scale of the result so that it will have at least + * NUMERIC_MIN_SIG_DIGITS significant digits and is not less than either + * input's display scale. + */ + rscale = NUMERIC_MIN_SIG_DIGITS - result_dweight; + rscale = Max(rscale, base->dscale); + rscale = Max(rscale, num->dscale); + rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE); + rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE); + + /* + * Set the scales for ln(base) and ln(num) so that they each have more + * significant digits than the final result. + */ + ln_base_rscale = rscale + result_dweight - ln_base_dweight + 8; + ln_base_rscale = Max(ln_base_rscale, NUMERIC_MIN_DISPLAY_SCALE); + + ln_num_rscale = rscale + result_dweight - ln_num_dweight + 8; + ln_num_rscale = Max(ln_num_rscale, NUMERIC_MIN_DISPLAY_SCALE); + + /* Form natural logarithms */ + ln_var(base, &ln_base, ln_base_rscale); + ln_var(num, &ln_num, ln_num_rscale); + + /* Divide and round to the required scale */ + div_var_fast(&ln_num, &ln_base, result, rscale, true); + + free_var(&ln_num); + free_var(&ln_base); +} + + +/* + * power_var() - + * + * Raise base to the power of exp + * + * Note: this routine chooses dscale of the result. + */ +static void +power_var(const NumericVar *base, const NumericVar *exp, NumericVar *result) +{ + int res_sign; + NumericVar abs_base; + NumericVar ln_base; + NumericVar ln_num; + int ln_dweight; + int rscale; + int sig_digits; + int local_rscale; + double val; + + /* If exp can be represented as an integer, use power_var_int */ + if (exp->ndigits == 0 || exp->ndigits <= exp->weight + 1) + { + /* exact integer, but does it fit in int? */ + int64 expval64; + + if (numericvar_to_int64(exp, &expval64)) + { + if (expval64 >= PG_INT32_MIN && expval64 <= PG_INT32_MAX) + { + /* Okay, use power_var_int */ + power_var_int(base, (int) expval64, exp->dscale, result); + return; + } + } + } + + /* + * This avoids log(0) for cases of 0 raised to a non-integer. 0 ^ 0 is + * handled by power_var_int(). + */ + if (cmp_var(base, &const_zero) == 0) + { + set_var_from_var(&const_zero, result); + result->dscale = NUMERIC_MIN_SIG_DIGITS; /* no need to round */ + return; + } + + init_var(&abs_base); + init_var(&ln_base); + init_var(&ln_num); + + /* + * If base is negative, insist that exp be an integer. The result is then + * positive if exp is even and negative if exp is odd. + */ + if (base->sign == NUMERIC_NEG) + { + /* + * Check that exp is an integer. This error code is defined by the + * SQL standard, and matches other errors in numeric_power(). + */ + if (exp->ndigits > 0 && exp->ndigits > exp->weight + 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION), + errmsg("a negative number raised to a non-integer power yields a complex result"))); + + /* Test if exp is odd or even */ + if (exp->ndigits > 0 && exp->ndigits == exp->weight + 1 && + (exp->digits[exp->ndigits - 1] & 1)) + res_sign = NUMERIC_NEG; + else + res_sign = NUMERIC_POS; + + /* Then work with abs(base) below */ + set_var_from_var(base, &abs_base); + abs_base.sign = NUMERIC_POS; + base = &abs_base; + } + else + res_sign = NUMERIC_POS; + + /*---------- + * Decide on the scale for the ln() calculation. For this we need an + * estimate of the weight of the result, which we obtain by doing an + * initial low-precision calculation of exp * ln(base). + * + * We want result = e ^ (exp * ln(base)) + * so result dweight = log10(result) = exp * ln(base) * log10(e) + * + * We also perform a crude overflow test here so that we can exit early if + * the full-precision result is sure to overflow, and to guard against + * integer overflow when determining the scale for the real calculation. + * exp_var() supports inputs up to NUMERIC_MAX_RESULT_SCALE * 3, so the + * result will overflow if exp * ln(base) >= NUMERIC_MAX_RESULT_SCALE * 3. + * Since the values here are only approximations, we apply a small fuzz + * factor to this overflow test and let exp_var() determine the exact + * overflow threshold so that it is consistent for all inputs. + *---------- + */ + ln_dweight = estimate_ln_dweight(base); + + /* + * Set the scale for the low-precision calculation, computing ln(base) to + * around 8 significant digits. Note that ln_dweight may be as small as + * -SHRT_MAX, so the scale may exceed NUMERIC_MAX_DISPLAY_SCALE here. + */ + local_rscale = 8 - ln_dweight; + local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE); + + ln_var(base, &ln_base, local_rscale); + + mul_var(&ln_base, exp, &ln_num, local_rscale); + + val = numericvar_to_double_no_overflow(&ln_num); + + /* initial overflow/underflow test with fuzz factor */ + if (fabs(val) > NUMERIC_MAX_RESULT_SCALE * 3.01) + { + if (val > 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + zero_var(result); + result->dscale = NUMERIC_MAX_DISPLAY_SCALE; + return; + } + + val *= 0.434294481903252; /* approximate decimal result weight */ + + /* choose the result scale */ + rscale = NUMERIC_MIN_SIG_DIGITS - (int) val; + rscale = Max(rscale, base->dscale); + rscale = Max(rscale, exp->dscale); + rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE); + rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE); + + /* significant digits required in the result */ + sig_digits = rscale + (int) val; + sig_digits = Max(sig_digits, 0); + + /* set the scale for the real exp * ln(base) calculation */ + local_rscale = sig_digits - ln_dweight + 8; + local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE); + + /* and do the real calculation */ + + ln_var(base, &ln_base, local_rscale); + + mul_var(&ln_base, exp, &ln_num, local_rscale); + + exp_var(&ln_num, result, rscale); + + if (res_sign == NUMERIC_NEG && result->ndigits > 0) + result->sign = NUMERIC_NEG; + + free_var(&ln_num); + free_var(&ln_base); + free_var(&abs_base); +} + +/* + * power_var_int() - + * + * Raise base to the power of exp, where exp is an integer. + * + * Note: this routine chooses dscale of the result. + */ +static void +power_var_int(const NumericVar *base, int exp, int exp_dscale, + NumericVar *result) +{ + double f; + int p; + int i; + int rscale; + int sig_digits; + unsigned int mask; + bool neg; + NumericVar base_prod; + int local_rscale; + + /* + * Choose the result scale. For this we need an estimate of the decimal + * weight of the result, which we obtain by approximating using double + * precision arithmetic. + * + * We also perform crude overflow/underflow tests here so that we can exit + * early if the result is sure to overflow/underflow, and to guard against + * integer overflow when choosing the result scale. + */ + if (base->ndigits != 0) + { + /*---------- + * Choose f (double) and p (int) such that base ~= f * 10^p. + * Then log10(result) = log10(base^exp) ~= exp * (log10(f) + p). + *---------- + */ + f = base->digits[0]; + p = base->weight * DEC_DIGITS; + + for (i = 1; i < base->ndigits && i * DEC_DIGITS < 16; i++) + { + f = f * NBASE + base->digits[i]; + p -= DEC_DIGITS; + } + + f = exp * (log10(f) + p); /* approximate decimal result weight */ + } + else + f = 0; /* result is 0 or 1 (weight 0), or error */ + + /* overflow/underflow tests with fuzz factors */ + if (f > (SHRT_MAX + 1) * DEC_DIGITS) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + if (f + 1 < -NUMERIC_MAX_DISPLAY_SCALE) + { + zero_var(result); + result->dscale = NUMERIC_MAX_DISPLAY_SCALE; + return; + } + + /* + * Choose the result scale in the same way as power_var(), so it has at + * least NUMERIC_MIN_SIG_DIGITS significant digits and is not less than + * either input's display scale. + */ + rscale = NUMERIC_MIN_SIG_DIGITS - (int) f; + rscale = Max(rscale, base->dscale); + rscale = Max(rscale, exp_dscale); + rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE); + rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE); + + /* Handle some common special cases, as well as corner cases */ + switch (exp) + { + case 0: + + /* + * While 0 ^ 0 can be either 1 or indeterminate (error), we treat + * it as 1 because most programming languages do this. SQL:2003 + * also requires a return value of 1. + * https://en.wikipedia.org/wiki/Exponentiation#Zero_to_the_zero_power + */ + set_var_from_var(&const_one, result); + result->dscale = rscale; /* no need to round */ + return; + case 1: + set_var_from_var(base, result); + round_var(result, rscale); + return; + case -1: + div_var(&const_one, base, result, rscale, true); + return; + case 2: + mul_var(base, base, result, rscale); + return; + default: + break; + } + + /* Handle the special case where the base is zero */ + if (base->ndigits == 0) + { + if (exp < 0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * The general case repeatedly multiplies base according to the bit + * pattern of exp. + * + * The local rscale used for each multiplication is varied to keep a fixed + * number of significant digits, sufficient to give the required result + * scale. + */ + + /* + * Approximate number of significant digits in the result. Note that the + * underflow test above, together with the choice of rscale, ensures that + * this approximation is necessarily > 0. + */ + sig_digits = 1 + rscale + (int) f; + + /* + * The multiplications to produce the result may introduce an error of up + * to around log10(abs(exp)) digits, so work with this many extra digits + * of precision (plus a few more for good measure). + */ + sig_digits += (int) log(fabs((double) exp)) + 8; + + /* + * Now we can proceed with the multiplications. + */ + neg = (exp < 0); + mask = abs(exp); + + init_var(&base_prod); + set_var_from_var(base, &base_prod); + + if (mask & 1) + set_var_from_var(base, result); + else + set_var_from_var(&const_one, result); + + while ((mask >>= 1) > 0) + { + /* + * Do the multiplications using rscales large enough to hold the + * results to the required number of significant digits, but don't + * waste time by exceeding the scales of the numbers themselves. + */ + local_rscale = sig_digits - 2 * base_prod.weight * DEC_DIGITS; + local_rscale = Min(local_rscale, 2 * base_prod.dscale); + local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE); + + mul_var(&base_prod, &base_prod, &base_prod, local_rscale); + + if (mask & 1) + { + local_rscale = sig_digits - + (base_prod.weight + result->weight) * DEC_DIGITS; + local_rscale = Min(local_rscale, + base_prod.dscale + result->dscale); + local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE); + + mul_var(&base_prod, result, result, local_rscale); + } + + /* + * When abs(base) > 1, the number of digits to the left of the decimal + * point in base_prod doubles at each iteration, so if exp is large we + * could easily spend large amounts of time and memory space doing the + * multiplications. But once the weight exceeds what will fit in + * int16, the final result is guaranteed to overflow (or underflow, if + * exp < 0), so we can give up before wasting too many cycles. + */ + if (base_prod.weight > SHRT_MAX || result->weight > SHRT_MAX) + { + /* overflow, unless neg, in which case result should be 0 */ + if (!neg) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + zero_var(result); + neg = false; + break; + } + } + + free_var(&base_prod); + + /* Compensate for input sign, and round to requested rscale */ + if (neg) + div_var_fast(&const_one, result, result, rscale, true); + else + round_var(result, rscale); +} + +/* + * power_ten_int() - + * + * Raise ten to the power of exp, where exp is an integer. Note that unlike + * power_var_int(), this does no overflow/underflow checking or rounding. + */ +static void +power_ten_int(int exp, NumericVar *result) +{ + /* Construct the result directly, starting from 10^0 = 1 */ + set_var_from_var(&const_one, result); + + /* Scale needed to represent the result exactly */ + result->dscale = exp < 0 ? -exp : 0; + + /* Base-NBASE weight of result and remaining exponent */ + if (exp >= 0) + result->weight = exp / DEC_DIGITS; + else + result->weight = (exp + 1) / DEC_DIGITS - 1; + + exp -= result->weight * DEC_DIGITS; + + /* Final adjustment of the result's single NBASE digit */ + while (exp-- > 0) + result->digits[0] *= 10; +} + + +/* ---------------------------------------------------------------------- + * + * Following are the lowest level functions that operate unsigned + * on the variable level + * + * ---------------------------------------------------------------------- + */ + + +/* ---------- + * cmp_abs() - + * + * Compare the absolute values of var1 and var2 + * Returns: -1 for ABS(var1) < ABS(var2) + * 0 for ABS(var1) == ABS(var2) + * 1 for ABS(var1) > ABS(var2) + * ---------- + */ +static int +cmp_abs(const NumericVar *var1, const NumericVar *var2) +{ + return cmp_abs_common(var1->digits, var1->ndigits, var1->weight, + var2->digits, var2->ndigits, var2->weight); +} + +/* ---------- + * cmp_abs_common() - + * + * Main routine of cmp_abs(). This function can be used by both + * NumericVar and Numeric. + * ---------- + */ +static int +cmp_abs_common(const NumericDigit *var1digits, int var1ndigits, int var1weight, + const NumericDigit *var2digits, int var2ndigits, int var2weight) +{ + int i1 = 0; + int i2 = 0; + + /* Check any digits before the first common digit */ + + while (var1weight > var2weight && i1 < var1ndigits) + { + if (var1digits[i1++] != 0) + return 1; + var1weight--; + } + while (var2weight > var1weight && i2 < var2ndigits) + { + if (var2digits[i2++] != 0) + return -1; + var2weight--; + } + + /* At this point, either w1 == w2 or we've run out of digits */ + + if (var1weight == var2weight) + { + while (i1 < var1ndigits && i2 < var2ndigits) + { + int stat = var1digits[i1++] - var2digits[i2++]; + + if (stat) + { + if (stat > 0) + return 1; + return -1; + } + } + } + + /* + * At this point, we've run out of digits on one side or the other; so any + * remaining nonzero digits imply that side is larger + */ + while (i1 < var1ndigits) + { + if (var1digits[i1++] != 0) + return 1; + } + while (i2 < var2ndigits) + { + if (var2digits[i2++] != 0) + return -1; + } + + return 0; +} + + +/* + * add_abs() - + * + * Add the absolute values of two variables into result. + * result might point to one of the operands without danger. + */ +static void +add_abs(const NumericVar *var1, const NumericVar *var2, NumericVar *result) +{ + NumericDigit *res_buf; + NumericDigit *res_digits; + int res_ndigits; + int res_weight; + int res_rscale, + rscale1, + rscale2; + int res_dscale; + int i, + i1, + i2; + int carry = 0; + + /* copy these values into local vars for speed in inner loop */ + int var1ndigits = var1->ndigits; + int var2ndigits = var2->ndigits; + NumericDigit *var1digits = var1->digits; + NumericDigit *var2digits = var2->digits; + + res_weight = Max(var1->weight, var2->weight) + 1; + + res_dscale = Max(var1->dscale, var2->dscale); + + /* Note: here we are figuring rscale in base-NBASE digits */ + rscale1 = var1->ndigits - var1->weight - 1; + rscale2 = var2->ndigits - var2->weight - 1; + res_rscale = Max(rscale1, rscale2); + + res_ndigits = res_rscale + res_weight + 1; + if (res_ndigits <= 0) + res_ndigits = 1; + + res_buf = digitbuf_alloc(res_ndigits + 1); + res_buf[0] = 0; /* spare digit for later rounding */ + res_digits = res_buf + 1; + + i1 = res_rscale + var1->weight + 1; + i2 = res_rscale + var2->weight + 1; + for (i = res_ndigits - 1; i >= 0; i--) + { + i1--; + i2--; + if (i1 >= 0 && i1 < var1ndigits) + carry += var1digits[i1]; + if (i2 >= 0 && i2 < var2ndigits) + carry += var2digits[i2]; + + if (carry >= NBASE) + { + res_digits[i] = carry - NBASE; + carry = 1; + } + else + { + res_digits[i] = carry; + carry = 0; + } + } + + Assert(carry == 0); /* else we failed to allow for carry out */ + + digitbuf_free(result->buf); + result->ndigits = res_ndigits; + result->buf = res_buf; + result->digits = res_digits; + result->weight = res_weight; + result->dscale = res_dscale; + + /* Remove leading/trailing zeroes */ + strip_var(result); +} + + +/* + * sub_abs() + * + * Subtract the absolute value of var2 from the absolute value of var1 + * and store in result. result might point to one of the operands + * without danger. + * + * ABS(var1) MUST BE GREATER OR EQUAL ABS(var2) !!! + */ +static void +sub_abs(const NumericVar *var1, const NumericVar *var2, NumericVar *result) +{ + NumericDigit *res_buf; + NumericDigit *res_digits; + int res_ndigits; + int res_weight; + int res_rscale, + rscale1, + rscale2; + int res_dscale; + int i, + i1, + i2; + int borrow = 0; + + /* copy these values into local vars for speed in inner loop */ + int var1ndigits = var1->ndigits; + int var2ndigits = var2->ndigits; + NumericDigit *var1digits = var1->digits; + NumericDigit *var2digits = var2->digits; + + res_weight = var1->weight; + + res_dscale = Max(var1->dscale, var2->dscale); + + /* Note: here we are figuring rscale in base-NBASE digits */ + rscale1 = var1->ndigits - var1->weight - 1; + rscale2 = var2->ndigits - var2->weight - 1; + res_rscale = Max(rscale1, rscale2); + + res_ndigits = res_rscale + res_weight + 1; + if (res_ndigits <= 0) + res_ndigits = 1; + + res_buf = digitbuf_alloc(res_ndigits + 1); + res_buf[0] = 0; /* spare digit for later rounding */ + res_digits = res_buf + 1; + + i1 = res_rscale + var1->weight + 1; + i2 = res_rscale + var2->weight + 1; + for (i = res_ndigits - 1; i >= 0; i--) + { + i1--; + i2--; + if (i1 >= 0 && i1 < var1ndigits) + borrow += var1digits[i1]; + if (i2 >= 0 && i2 < var2ndigits) + borrow -= var2digits[i2]; + + if (borrow < 0) + { + res_digits[i] = borrow + NBASE; + borrow = -1; + } + else + { + res_digits[i] = borrow; + borrow = 0; + } + } + + Assert(borrow == 0); /* else caller gave us var1 < var2 */ + + digitbuf_free(result->buf); + result->ndigits = res_ndigits; + result->buf = res_buf; + result->digits = res_digits; + result->weight = res_weight; + result->dscale = res_dscale; + + /* Remove leading/trailing zeroes */ + strip_var(result); +} + +/* + * round_var + * + * Round the value of a variable to no more than rscale decimal digits + * after the decimal point. NOTE: we allow rscale < 0 here, implying + * rounding before the decimal point. + */ +static void +round_var(NumericVar *var, int rscale) +{ + NumericDigit *digits = var->digits; + int di; + int ndigits; + int carry; + + var->dscale = rscale; + + /* decimal digits wanted */ + di = (var->weight + 1) * DEC_DIGITS + rscale; + + /* + * If di = 0, the value loses all digits, but could round up to 1 if its + * first extra digit is >= 5. If di < 0 the result must be 0. + */ + if (di < 0) + { + var->ndigits = 0; + var->weight = 0; + var->sign = NUMERIC_POS; + } + else + { + /* NBASE digits wanted */ + ndigits = (di + DEC_DIGITS - 1) / DEC_DIGITS; + + /* 0, or number of decimal digits to keep in last NBASE digit */ + di %= DEC_DIGITS; + + if (ndigits < var->ndigits || + (ndigits == var->ndigits && di > 0)) + { + var->ndigits = ndigits; + +#if DEC_DIGITS == 1 + /* di must be zero */ + carry = (digits[ndigits] >= HALF_NBASE) ? 1 : 0; +#else + if (di == 0) + carry = (digits[ndigits] >= HALF_NBASE) ? 1 : 0; + else + { + /* Must round within last NBASE digit */ + int extra, + pow10; + +#if DEC_DIGITS == 4 + pow10 = round_powers[di]; +#elif DEC_DIGITS == 2 + pow10 = 10; +#else +#error unsupported NBASE +#endif + extra = digits[--ndigits] % pow10; + digits[ndigits] -= extra; + carry = 0; + if (extra >= pow10 / 2) + { + pow10 += digits[ndigits]; + if (pow10 >= NBASE) + { + pow10 -= NBASE; + carry = 1; + } + digits[ndigits] = pow10; + } + } +#endif + + /* Propagate carry if needed */ + while (carry) + { + carry += digits[--ndigits]; + if (carry >= NBASE) + { + digits[ndigits] = carry - NBASE; + carry = 1; + } + else + { + digits[ndigits] = carry; + carry = 0; + } + } + + if (ndigits < 0) + { + Assert(ndigits == -1); /* better not have added > 1 digit */ + Assert(var->digits > var->buf); + var->digits--; + var->ndigits++; + var->weight++; + } + } + } +} + +/* + * trunc_var + * + * Truncate (towards zero) the value of a variable at rscale decimal digits + * after the decimal point. NOTE: we allow rscale < 0 here, implying + * truncation before the decimal point. + */ +static void +trunc_var(NumericVar *var, int rscale) +{ + int di; + int ndigits; + + var->dscale = rscale; + + /* decimal digits wanted */ + di = (var->weight + 1) * DEC_DIGITS + rscale; + + /* + * If di <= 0, the value loses all digits. + */ + if (di <= 0) + { + var->ndigits = 0; + var->weight = 0; + var->sign = NUMERIC_POS; + } + else + { + /* NBASE digits wanted */ + ndigits = (di + DEC_DIGITS - 1) / DEC_DIGITS; + + if (ndigits <= var->ndigits) + { + var->ndigits = ndigits; + +#if DEC_DIGITS == 1 + /* no within-digit stuff to worry about */ +#else + /* 0, or number of decimal digits to keep in last NBASE digit */ + di %= DEC_DIGITS; + + if (di > 0) + { + /* Must truncate within last NBASE digit */ + NumericDigit *digits = var->digits; + int extra, + pow10; + +#if DEC_DIGITS == 4 + pow10 = round_powers[di]; +#elif DEC_DIGITS == 2 + pow10 = 10; +#else +#error unsupported NBASE +#endif + extra = digits[--ndigits] % pow10; + digits[ndigits] -= extra; + } +#endif + } + } +} + +/* + * strip_var + * + * Strip any leading and trailing zeroes from a numeric variable + */ +static void +strip_var(NumericVar *var) +{ + NumericDigit *digits = var->digits; + int ndigits = var->ndigits; + + /* Strip leading zeroes */ + while (ndigits > 0 && *digits == 0) + { + digits++; + var->weight--; + ndigits--; + } + + /* Strip trailing zeroes */ + while (ndigits > 0 && digits[ndigits - 1] == 0) + ndigits--; + + /* If it's zero, normalize the sign and weight */ + if (ndigits == 0) + { + var->sign = NUMERIC_POS; + var->weight = 0; + } + + var->digits = digits; + var->ndigits = ndigits; +} + + +/* ---------------------------------------------------------------------- + * + * Fast sum accumulator functions + * + * ---------------------------------------------------------------------- + */ + +/* + * Reset the accumulator's value to zero. The buffers to hold the digits + * are not free'd. + */ +static void +accum_sum_reset(NumericSumAccum *accum) +{ + int i; + + accum->dscale = 0; + for (i = 0; i < accum->ndigits; i++) + { + accum->pos_digits[i] = 0; + accum->neg_digits[i] = 0; + } +} + +/* + * Accumulate a new value. + */ +static void +accum_sum_add(NumericSumAccum *accum, const NumericVar *val) +{ + int32 *accum_digits; + int i, + val_i; + int val_ndigits; + NumericDigit *val_digits; + + /* + * If we have accumulated too many values since the last carry + * propagation, do it now, to avoid overflowing. (We could allow more + * than NBASE - 1, if we reserved two extra digits, rather than one, for + * carry propagation. But even with NBASE - 1, this needs to be done so + * seldom, that the performance difference is negligible.) + */ + if (accum->num_uncarried == NBASE - 1) + accum_sum_carry(accum); + + /* + * Adjust the weight or scale of the old value, so that it can accommodate + * the new value. + */ + accum_sum_rescale(accum, val); + + /* */ + if (val->sign == NUMERIC_POS) + accum_digits = accum->pos_digits; + else + accum_digits = accum->neg_digits; + + /* copy these values into local vars for speed in loop */ + val_ndigits = val->ndigits; + val_digits = val->digits; + + i = accum->weight - val->weight; + for (val_i = 0; val_i < val_ndigits; val_i++) + { + accum_digits[i] += (int32) val_digits[val_i]; + i++; + } + + accum->num_uncarried++; +} + +/* + * Propagate carries. + */ +static void +accum_sum_carry(NumericSumAccum *accum) +{ + int i; + int ndigits; + int32 *dig; + int32 carry; + int32 newdig = 0; + + /* + * If no new values have been added since last carry propagation, nothing + * to do. + */ + if (accum->num_uncarried == 0) + return; + + /* + * We maintain that the weight of the accumulator is always one larger + * than needed to hold the current value, before carrying, to make sure + * there is enough space for the possible extra digit when carry is + * propagated. We cannot expand the buffer here, unless we require + * callers of accum_sum_final() to switch to the right memory context. + */ + Assert(accum->pos_digits[0] == 0 && accum->neg_digits[0] == 0); + + ndigits = accum->ndigits; + + /* Propagate carry in the positive sum */ + dig = accum->pos_digits; + carry = 0; + for (i = ndigits - 1; i >= 0; i--) + { + newdig = dig[i] + carry; + if (newdig >= NBASE) + { + carry = newdig / NBASE; + newdig -= carry * NBASE; + } + else + carry = 0; + dig[i] = newdig; + } + /* Did we use up the digit reserved for carry propagation? */ + if (newdig > 0) + accum->have_carry_space = false; + + /* And the same for the negative sum */ + dig = accum->neg_digits; + carry = 0; + for (i = ndigits - 1; i >= 0; i--) + { + newdig = dig[i] + carry; + if (newdig >= NBASE) + { + carry = newdig / NBASE; + newdig -= carry * NBASE; + } + else + carry = 0; + dig[i] = newdig; + } + if (newdig > 0) + accum->have_carry_space = false; + + accum->num_uncarried = 0; +} + +/* + * Re-scale accumulator to accommodate new value. + * + * If the new value has more digits than the current digit buffers in the + * accumulator, enlarge the buffers. + */ +static void +accum_sum_rescale(NumericSumAccum *accum, const NumericVar *val) +{ + int old_weight = accum->weight; + int old_ndigits = accum->ndigits; + int accum_ndigits; + int accum_weight; + int accum_rscale; + int val_rscale; + + accum_weight = old_weight; + accum_ndigits = old_ndigits; + + /* + * Does the new value have a larger weight? If so, enlarge the buffers, + * and shift the existing value to the new weight, by adding leading + * zeros. + * + * We enforce that the accumulator always has a weight one larger than + * needed for the inputs, so that we have space for an extra digit at the + * final carry-propagation phase, if necessary. + */ + if (val->weight >= accum_weight) + { + accum_weight = val->weight + 1; + accum_ndigits = accum_ndigits + (accum_weight - old_weight); + } + + /* + * Even though the new value is small, we might've used up the space + * reserved for the carry digit in the last call to accum_sum_carry(). If + * so, enlarge to make room for another one. + */ + else if (!accum->have_carry_space) + { + accum_weight++; + accum_ndigits++; + } + + /* Is the new value wider on the right side? */ + accum_rscale = accum_ndigits - accum_weight - 1; + val_rscale = val->ndigits - val->weight - 1; + if (val_rscale > accum_rscale) + accum_ndigits = accum_ndigits + (val_rscale - accum_rscale); + + if (accum_ndigits != old_ndigits || + accum_weight != old_weight) + { + int32 *new_pos_digits; + int32 *new_neg_digits; + int weightdiff; + + weightdiff = accum_weight - old_weight; + + new_pos_digits = palloc0(accum_ndigits * sizeof(int32)); + new_neg_digits = palloc0(accum_ndigits * sizeof(int32)); + + if (accum->pos_digits) + { + memcpy(&new_pos_digits[weightdiff], accum->pos_digits, + old_ndigits * sizeof(int32)); + pfree(accum->pos_digits); + + memcpy(&new_neg_digits[weightdiff], accum->neg_digits, + old_ndigits * sizeof(int32)); + pfree(accum->neg_digits); + } + + accum->pos_digits = new_pos_digits; + accum->neg_digits = new_neg_digits; + + accum->weight = accum_weight; + accum->ndigits = accum_ndigits; + + Assert(accum->pos_digits[0] == 0 && accum->neg_digits[0] == 0); + accum->have_carry_space = true; + } + + if (val->dscale > accum->dscale) + accum->dscale = val->dscale; +} + +/* + * Return the current value of the accumulator. This perform final carry + * propagation, and adds together the positive and negative sums. + * + * Unlike all the other routines, the caller is not required to switch to + * the memory context that holds the accumulator. + */ +static void +accum_sum_final(NumericSumAccum *accum, NumericVar *result) +{ + int i; + NumericVar pos_var; + NumericVar neg_var; + + if (accum->ndigits == 0) + { + set_var_from_var(&const_zero, result); + return; + } + + /* Perform final carry */ + accum_sum_carry(accum); + + /* Create NumericVars representing the positive and negative sums */ + init_var(&pos_var); + init_var(&neg_var); + + pos_var.ndigits = neg_var.ndigits = accum->ndigits; + pos_var.weight = neg_var.weight = accum->weight; + pos_var.dscale = neg_var.dscale = accum->dscale; + pos_var.sign = NUMERIC_POS; + neg_var.sign = NUMERIC_NEG; + + pos_var.buf = pos_var.digits = digitbuf_alloc(accum->ndigits); + neg_var.buf = neg_var.digits = digitbuf_alloc(accum->ndigits); + + for (i = 0; i < accum->ndigits; i++) + { + Assert(accum->pos_digits[i] < NBASE); + pos_var.digits[i] = (int16) accum->pos_digits[i]; + + Assert(accum->neg_digits[i] < NBASE); + neg_var.digits[i] = (int16) accum->neg_digits[i]; + } + + /* And add them together */ + add_var(&pos_var, &neg_var, result); + + /* Remove leading/trailing zeroes */ + strip_var(result); +} + +/* + * Copy an accumulator's state. + * + * 'dst' is assumed to be uninitialized beforehand. No attempt is made at + * freeing old values. + */ +static void +accum_sum_copy(NumericSumAccum *dst, NumericSumAccum *src) +{ + dst->pos_digits = palloc(src->ndigits * sizeof(int32)); + dst->neg_digits = palloc(src->ndigits * sizeof(int32)); + + memcpy(dst->pos_digits, src->pos_digits, src->ndigits * sizeof(int32)); + memcpy(dst->neg_digits, src->neg_digits, src->ndigits * sizeof(int32)); + dst->num_uncarried = src->num_uncarried; + dst->ndigits = src->ndigits; + dst->weight = src->weight; + dst->dscale = src->dscale; +} + +/* + * Add the current value of 'accum2' into 'accum'. + */ +static void +accum_sum_combine(NumericSumAccum *accum, NumericSumAccum *accum2) +{ + NumericVar tmp_var; + + init_var(&tmp_var); + + accum_sum_final(accum2, &tmp_var); + accum_sum_add(accum, &tmp_var); + + free_var(&tmp_var); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/numutils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/numutils.c new file mode 100644 index 00000000000..d07a5602076 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/numutils.c @@ -0,0 +1,1315 @@ +/*------------------------------------------------------------------------- + * + * numutils.c + * utility functions for I/O of built-in numeric types. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/numutils.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> +#include <limits.h> +#include <ctype.h> + +#include "common/int.h" +#include "utils/builtins.h" +#include "port/pg_bitutils.h" + +/* + * A table of all two-digit numbers. This is used to speed up decimal digit + * generation by copying pairs of digits into the final output. + */ +static const char DIGIT_TABLE[200] = +"00" "01" "02" "03" "04" "05" "06" "07" "08" "09" +"10" "11" "12" "13" "14" "15" "16" "17" "18" "19" +"20" "21" "22" "23" "24" "25" "26" "27" "28" "29" +"30" "31" "32" "33" "34" "35" "36" "37" "38" "39" +"40" "41" "42" "43" "44" "45" "46" "47" "48" "49" +"50" "51" "52" "53" "54" "55" "56" "57" "58" "59" +"60" "61" "62" "63" "64" "65" "66" "67" "68" "69" +"70" "71" "72" "73" "74" "75" "76" "77" "78" "79" +"80" "81" "82" "83" "84" "85" "86" "87" "88" "89" +"90" "91" "92" "93" "94" "95" "96" "97" "98" "99"; + +/* + * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + */ +static inline int +decimalLength32(const uint32 v) +{ + int t; + static const uint32 PowersOfTen[] = { + 1, 10, 100, + 1000, 10000, 100000, + 1000000, 10000000, 100000000, + 1000000000 + }; + + /* + * Compute base-10 logarithm by dividing the base-2 logarithm by a + * good-enough approximation of the base-2 logarithm of 10 + */ + t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096; + return t + (v >= PowersOfTen[t]); +} + +static inline int +decimalLength64(const uint64 v) +{ + int t; + static const uint64 PowersOfTen[] = { + UINT64CONST(1), UINT64CONST(10), + UINT64CONST(100), UINT64CONST(1000), + UINT64CONST(10000), UINT64CONST(100000), + UINT64CONST(1000000), UINT64CONST(10000000), + UINT64CONST(100000000), UINT64CONST(1000000000), + UINT64CONST(10000000000), UINT64CONST(100000000000), + UINT64CONST(1000000000000), UINT64CONST(10000000000000), + UINT64CONST(100000000000000), UINT64CONST(1000000000000000), + UINT64CONST(10000000000000000), UINT64CONST(100000000000000000), + UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000) + }; + + /* + * Compute base-10 logarithm by dividing the base-2 logarithm by a + * good-enough approximation of the base-2 logarithm of 10 + */ + t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096; + return t + (v >= PowersOfTen[t]); +} + +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +/* + * Convert input string to a signed 16 bit integer. Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers. Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix. The binary representation is recognized by the 0b or 0B prefix. + * + * Allows any number of leading or trailing whitespace characters. Digits may + * optionally be separated by a single underscore character. These can only + * come between digits and not before or after the digits. Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings. + * + * pg_strtoint16() will throw ereport() upon bad input format or overflow; + * while pg_strtoint16_safe() instead returns such complaints in *escontext, + * if it's an ErrorSaveContext. +* + * NB: Accumulate input as an unsigned number, to deal with two's complement + * representation of the most negative number, which can't be represented as a + * signed positive number. + */ +int16 +pg_strtoint16(const char *s) +{ + return pg_strtoint16_safe(s, NULL); +} + +int16 +pg_strtoint16_safe(const char *s, Node *escontext) +{ + const char *ptr = s; + const char *firstdigit; + uint16 tmp = 0; + bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string + * with the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + else + { + /* we need at least one digit */ + goto slow; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT16_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)) + goto out_of_range; + return -((int16) tmp); + } + + if (unlikely(tmp > PG_INT16_MAX)) + goto out_of_range; + + return (int16) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ + + /* skip leading spaces */ + while (isspace((unsigned char) *ptr)) + ptr++; + + /* handle sign */ + if (*ptr == '-') + { + ptr++; + neg = true; + } + else if (*ptr == '+') + ptr++; + + /* process digits */ + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT16_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT16_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT16_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; + } + } + else + { + firstdigit = ptr; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '9') + { + if (unlikely(tmp > -(PG_INT16_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; + } + } + + /* require at least one digit */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + + /* allow trailing whitespace, but not other trailing chars */ + while (isspace((unsigned char) *ptr)) + ptr++; + + if (unlikely(*ptr != '\0')) + goto invalid_syntax; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1) + goto out_of_range; + return -((int16) tmp); + } + + if (tmp > PG_INT16_MAX) + goto out_of_range; + + return (int16) tmp; + +out_of_range: + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, "smallint"))); + +invalid_syntax: + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "smallint", s))); +} + +/* + * Convert input string to a signed 32 bit integer. Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers. Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix. The binary representation is recognized by the 0b or 0B prefix. + * + * Allows any number of leading or trailing whitespace characters. Digits may + * optionally be separated by a single underscore character. These can only + * come between digits and not before or after the digits. Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings. + * + * pg_strtoint32() will throw ereport() upon bad input format or overflow; + * while pg_strtoint32_safe() instead returns such complaints in *escontext, + * if it's an ErrorSaveContext. + * + * NB: Accumulate input as an unsigned number, to deal with two's complement + * representation of the most negative number, which can't be represented as a + * signed positive number. + */ +int32 +pg_strtoint32(const char *s) +{ + return pg_strtoint32_safe(s, NULL); +} + +int32 +pg_strtoint32_safe(const char *s, Node *escontext) +{ + const char *ptr = s; + const char *firstdigit; + uint32 tmp = 0; + bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string + * with the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + else + { + /* we need at least one digit */ + goto slow; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT32_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)) + goto out_of_range; + return -((int32) tmp); + } + + if (unlikely(tmp > PG_INT32_MAX)) + goto out_of_range; + + return (int32) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ + + /* skip leading spaces */ + while (isspace((unsigned char) *ptr)) + ptr++; + + /* handle sign */ + if (*ptr == '-') + { + ptr++; + neg = true; + } + else if (*ptr == '+') + ptr++; + + /* process digits */ + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT32_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT32_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT32_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; + } + } + else + { + firstdigit = ptr; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '9') + { + if (unlikely(tmp > -(PG_INT32_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; + } + } + + /* require at least one digit */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + + /* allow trailing whitespace, but not other trailing chars */ + while (isspace((unsigned char) *ptr)) + ptr++; + + if (unlikely(*ptr != '\0')) + goto invalid_syntax; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1) + goto out_of_range; + return -((int32) tmp); + } + + if (tmp > PG_INT32_MAX) + goto out_of_range; + + return (int32) tmp; + +out_of_range: + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, "integer"))); + +invalid_syntax: + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "integer", s))); +} + +/* + * Convert input string to a signed 64 bit integer. Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers. Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix. The binary representation is recognized by the 0b or 0B prefix. + * + * Allows any number of leading or trailing whitespace characters. Digits may + * optionally be separated by a single underscore character. These can only + * come between digits and not before or after the digits. Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings. + * + * pg_strtoint64() will throw ereport() upon bad input format or overflow; + * while pg_strtoint64_safe() instead returns such complaints in *escontext, + * if it's an ErrorSaveContext. + * + * NB: Accumulate input as an unsigned number, to deal with two's complement + * representation of the most negative number, which can't be represented as a + * signed positive number. + */ +int64 +pg_strtoint64(const char *s) +{ + return pg_strtoint64_safe(s, NULL); +} + +int64 +pg_strtoint64_safe(const char *s, Node *escontext) +{ + const char *ptr = s; + const char *firstdigit; + uint64 tmp = 0; + bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string + * with the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + else + { + /* we need at least one digit */ + goto slow; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT64_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)) + goto out_of_range; + return -((int64) tmp); + } + + if (unlikely(tmp > PG_INT64_MAX)) + goto out_of_range; + + return (int64) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ + + /* skip leading spaces */ + while (isspace((unsigned char) *ptr)) + ptr++; + + /* handle sign */ + if (*ptr == '-') + { + ptr++; + neg = true; + } + else if (*ptr == '+') + ptr++; + + /* process digits */ + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT64_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT64_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + firstdigit = ptr += 2; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT64_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; + } + } + else + { + firstdigit = ptr; + + for (;;) + { + if (*ptr >= '0' && *ptr <= '9') + { + if (unlikely(tmp > -(PG_INT64_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; + } + } + + /* require at least one digit */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + + /* allow trailing whitespace, but not other trailing chars */ + while (isspace((unsigned char) *ptr)) + ptr++; + + if (unlikely(*ptr != '\0')) + goto invalid_syntax; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1) + goto out_of_range; + return -((int64) tmp); + } + + if (tmp > PG_INT64_MAX) + goto out_of_range; + + return (int64) tmp; + +out_of_range: + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, "bigint"))); + +invalid_syntax: + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "bigint", s))); +} + +/* + * Convert input string to an unsigned 32 bit integer. + * + * Allows any number of leading or trailing whitespace characters. + * + * If endloc isn't NULL, store a pointer to the rest of the string there, + * so that caller can parse the rest. Otherwise, it's an error if anything + * but whitespace follows. + * + * typname is what is reported in error messges. + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() + * to detect errors. + */ +uint32 +uint32in_subr(const char *s, char **endloc, + const char *typname, Node *escontext) +{ + uint32 result; + unsigned long cvt; + char *endptr; + + errno = 0; + cvt = strtoul(s, &endptr, 0); + + /* + * strtoul() normally only sets ERANGE. On some systems it may also set + * EINVAL, which simply means it couldn't parse the input string. Be sure + * to report that the same way as the standard error indication (that + * endptr == s). + */ + if ((errno && errno != ERANGE) || endptr == s) + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + typname, s))); + + if (errno == ERANGE) + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, typname))); + + if (endloc) + { + /* caller wants to deal with rest of string */ + *endloc = endptr; + } + else + { + /* allow only whitespace after number */ + while (*endptr && isspace((unsigned char) *endptr)) + endptr++; + if (*endptr) + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + typname, s))); + } + + result = (uint32) cvt; + + /* + * Cope with possibility that unsigned long is wider than uint32, in which + * case strtoul will not raise an error for some values that are out of + * the range of uint32. + * + * For backwards compatibility, we want to accept inputs that are given + * with a minus sign, so allow the input value if it matches after either + * signed or unsigned extension to long. + * + * To ensure consistent results on 32-bit and 64-bit platforms, make sure + * the error message is the same as if strtoul() had returned ERANGE. + */ +#if PG_UINT32_MAX != ULONG_MAX + if (cvt != (unsigned long) result && + cvt != (unsigned long) ((int) result)) + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, typname))); +#endif + + return result; +} + +/* + * Convert input string to an unsigned 64 bit integer. + * + * Allows any number of leading or trailing whitespace characters. + * + * If endloc isn't NULL, store a pointer to the rest of the string there, + * so that caller can parse the rest. Otherwise, it's an error if anything + * but whitespace follows. + * + * typname is what is reported in error messges. + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() + * to detect errors. + */ +uint64 +uint64in_subr(const char *s, char **endloc, + const char *typname, Node *escontext) +{ + uint64 result; + char *endptr; + + errno = 0; + result = strtou64(s, &endptr, 0); + + /* + * strtoul[l] normally only sets ERANGE. On some systems it may also set + * EINVAL, which simply means it couldn't parse the input string. Be sure + * to report that the same way as the standard error indication (that + * endptr == s). + */ + if ((errno && errno != ERANGE) || endptr == s) + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + typname, s))); + + if (errno == ERANGE) + ereturn(escontext, 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, typname))); + + if (endloc) + { + /* caller wants to deal with rest of string */ + *endloc = endptr; + } + else + { + /* allow only whitespace after number */ + while (*endptr && isspace((unsigned char) *endptr)) + endptr++; + if (*endptr) + ereturn(escontext, 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + typname, s))); + } + + return result; +} + +/* + * pg_itoa: converts a signed 16-bit integer to its string representation + * and returns strlen(a). + * + * Caller must ensure that 'a' points to enough memory to hold the result + * (at least 7 bytes, counting a leading sign and trailing NUL). + * + * It doesn't seem worth implementing this separately. + */ +int +pg_itoa(int16 i, char *a) +{ + return pg_ltoa((int32) i, a); +} + +/* + * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation, + * not NUL-terminated, and returns the length of that string representation + * + * Caller must ensure that 'a' points to enough memory to hold the result (at + * least 10 bytes) + */ +int +pg_ultoa_n(uint32 value, char *a) +{ + int olength, + i = 0; + + /* Degenerate case */ + if (value == 0) + { + *a = '0'; + return 1; + } + + olength = decimalLength32(value); + + /* Compute the result string. */ + while (value >= 10000) + { + const uint32 c = value - 10000 * (value / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + char *pos = a + olength - i; + + value /= 10000; + + memcpy(pos - 2, DIGIT_TABLE + c0, 2); + memcpy(pos - 4, DIGIT_TABLE + c1, 2); + i += 4; + } + if (value >= 100) + { + const uint32 c = (value % 100) << 1; + + char *pos = a + olength - i; + + value /= 100; + + memcpy(pos - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (value >= 10) + { + const uint32 c = value << 1; + + char *pos = a + olength - i; + + memcpy(pos - 2, DIGIT_TABLE + c, 2); + } + else + { + *a = (char) ('0' + value); + } + + return olength; +} + +/* + * pg_ltoa: converts a signed 32-bit integer to its string representation and + * returns strlen(a). + * + * It is the caller's responsibility to ensure that a is at least 12 bytes long, + * which is enough room to hold a minus sign, a maximally long int32, and the + * above terminating NUL. + */ +int +pg_ltoa(int32 value, char *a) +{ + uint32 uvalue = (uint32) value; + int len = 0; + + if (value < 0) + { + uvalue = (uint32) 0 - uvalue; + a[len++] = '-'; + } + len += pg_ultoa_n(uvalue, a + len); + a[len] = '\0'; + return len; +} + +/* + * Get the decimal representation, not NUL-terminated, and return the length of + * same. Caller must ensure that a points to at least MAXINT8LEN bytes. + */ +int +pg_ulltoa_n(uint64 value, char *a) +{ + int olength, + i = 0; + uint32 value2; + + /* Degenerate case */ + if (value == 0) + { + *a = '0'; + return 1; + } + + olength = decimalLength64(value); + + /* Compute the result string. */ + while (value >= 100000000) + { + const uint64 q = value / 100000000; + uint32 value3 = (uint32) (value - 100000000 * q); + + const uint32 c = value3 % 10000; + const uint32 d = value3 / 10000; + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + const uint32 d0 = (d % 100) << 1; + const uint32 d1 = (d / 100) << 1; + + char *pos = a + olength - i; + + value = q; + + memcpy(pos - 2, DIGIT_TABLE + c0, 2); + memcpy(pos - 4, DIGIT_TABLE + c1, 2); + memcpy(pos - 6, DIGIT_TABLE + d0, 2); + memcpy(pos - 8, DIGIT_TABLE + d1, 2); + i += 8; + } + + /* Switch to 32-bit for speed */ + value2 = (uint32) value; + + if (value2 >= 10000) + { + const uint32 c = value2 - 10000 * (value2 / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + char *pos = a + olength - i; + + value2 /= 10000; + + memcpy(pos - 2, DIGIT_TABLE + c0, 2); + memcpy(pos - 4, DIGIT_TABLE + c1, 2); + i += 4; + } + if (value2 >= 100) + { + const uint32 c = (value2 % 100) << 1; + char *pos = a + olength - i; + + value2 /= 100; + + memcpy(pos - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (value2 >= 10) + { + const uint32 c = value2 << 1; + char *pos = a + olength - i; + + memcpy(pos - 2, DIGIT_TABLE + c, 2); + } + else + *a = (char) ('0' + value2); + + return olength; +} + +/* + * pg_lltoa: converts a signed 64-bit integer to its string representation and + * returns strlen(a). + * + * Caller must ensure that 'a' points to enough memory to hold the result + * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL). + */ +int +pg_lltoa(int64 value, char *a) +{ + uint64 uvalue = value; + int len = 0; + + if (value < 0) + { + uvalue = (uint64) 0 - uvalue; + a[len++] = '-'; + } + + len += pg_ulltoa_n(uvalue, a + len); + a[len] = '\0'; + return len; +} + + +/* + * pg_ultostr_zeropad + * Converts 'value' into a decimal string representation stored at 'str'. + * 'minwidth' specifies the minimum width of the result; any extra space + * is filled up by prefixing the number with zeros. + * + * Returns the ending address of the string result (the last character written + * plus 1). Note that no NUL terminator is written. + * + * The intended use-case for this function is to build strings that contain + * multiple individual numbers, for example: + * + * str = pg_ultostr_zeropad(str, hours, 2); + * *str++ = ':'; + * str = pg_ultostr_zeropad(str, mins, 2); + * *str++ = ':'; + * str = pg_ultostr_zeropad(str, secs, 2); + * *str = '\0'; + * + * Note: Caller must ensure that 'str' points to enough memory to hold the + * result. + */ +char * +pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth) +{ + int len; + + Assert(minwidth > 0); + + if (value < 100 && minwidth == 2) /* Short cut for common case */ + { + memcpy(str, DIGIT_TABLE + value * 2, 2); + return str + 2; + } + + len = pg_ultoa_n(value, str); + if (len >= minwidth) + return str + len; + + memmove(str + minwidth - len, str, len); + memset(str, '0', minwidth - len); + return str + minwidth; +} + +/* + * pg_ultostr + * Converts 'value' into a decimal string representation stored at 'str'. + * + * Returns the ending address of the string result (the last character written + * plus 1). Note that no NUL terminator is written. + * + * The intended use-case for this function is to build strings that contain + * multiple individual numbers, for example: + * + * str = pg_ultostr(str, a); + * *str++ = ' '; + * str = pg_ultostr(str, b); + * *str = '\0'; + * + * Note: Caller must ensure that 'str' points to enough memory to hold the + * result. + */ +char * +pg_ultostr(char *str, uint32 value) +{ + int len = pg_ultoa_n(value, str); + + return str + len; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oid.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oid.c new file mode 100644 index 00000000000..3f7af5b3a06 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oid.c @@ -0,0 +1,392 @@ +/*------------------------------------------------------------------------- + * + * oid.c + * Functions for the built-in type Oid ... also oidvector. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/oid.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> + +#include "catalog/pg_type.h" +#include "libpq/pqformat.h" +#include "nodes/miscnodes.h" +#include "nodes/value.h" +#include "utils/array.h" +#include "utils/builtins.h" + + +#define OidVectorSize(n) (offsetof(oidvector, values) + (n) * sizeof(Oid)) + + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +Datum +oidin(PG_FUNCTION_ARGS) +{ + char *s = PG_GETARG_CSTRING(0); + Oid result; + + result = uint32in_subr(s, NULL, "oid", fcinfo->context); + PG_RETURN_OID(result); +} + +Datum +oidout(PG_FUNCTION_ARGS) +{ + Oid o = PG_GETARG_OID(0); + char *result = (char *) palloc(12); + + snprintf(result, 12, "%u", o); + PG_RETURN_CSTRING(result); +} + +/* + * oidrecv - converts external binary format to oid + */ +Datum +oidrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_OID((Oid) pq_getmsgint(buf, sizeof(Oid))); +} + +/* + * oidsend - converts oid to binary format + */ +Datum +oidsend(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * construct oidvector given a raw array of Oids + * + * If oids is NULL then caller must fill values[] afterward + */ +oidvector * +buildoidvector(const Oid *oids, int n) +{ + oidvector *result; + + result = (oidvector *) palloc0(OidVectorSize(n)); + + if (n > 0 && oids) + memcpy(result->values, oids, n * sizeof(Oid)); + + /* + * Attach standard array header. For historical reasons, we set the index + * lower bound to 0 not 1. + */ + SET_VARSIZE(result, OidVectorSize(n)); + result->ndim = 1; + result->dataoffset = 0; /* never any nulls */ + result->elemtype = OIDOID; + result->dim1 = n; + result->lbound1 = 0; + + return result; +} + +/* + * oidvectorin - converts "num num ..." to internal form + */ +Datum +oidvectorin(PG_FUNCTION_ARGS) +{ + char *oidString = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + oidvector *result; + int nalloc; + int n; + + nalloc = 32; /* arbitrary initial size guess */ + result = (oidvector *) palloc0(OidVectorSize(nalloc)); + + for (n = 0;; n++) + { + while (*oidString && isspace((unsigned char) *oidString)) + oidString++; + if (*oidString == '\0') + break; + + if (n >= nalloc) + { + nalloc *= 2; + result = (oidvector *) repalloc(result, OidVectorSize(nalloc)); + } + + result->values[n] = uint32in_subr(oidString, &oidString, + "oid", escontext); + if (SOFT_ERROR_OCCURRED(escontext)) + PG_RETURN_NULL(); + } + + SET_VARSIZE(result, OidVectorSize(n)); + result->ndim = 1; + result->dataoffset = 0; /* never any nulls */ + result->elemtype = OIDOID; + result->dim1 = n; + result->lbound1 = 0; + + PG_RETURN_POINTER(result); +} + +/* + * oidvectorout - converts internal form to "num num ..." + */ +Datum +oidvectorout(PG_FUNCTION_ARGS) +{ + oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0); + int num, + nnums = oidArray->dim1; + char *rp; + char *result; + + /* assumes sign, 10 digits, ' ' */ + rp = result = (char *) palloc(nnums * 12 + 1); + for (num = 0; num < nnums; num++) + { + if (num != 0) + *rp++ = ' '; + sprintf(rp, "%u", oidArray->values[num]); + while (*++rp != '\0') + ; + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * oidvectorrecv - converts external binary format to oidvector + */ +Datum +oidvectorrecv(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(locfcinfo, 3); + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + oidvector *result; + + /* + * Normally one would call array_recv() using DirectFunctionCall3, but + * that does not work since array_recv wants to cache some data using + * fcinfo->flinfo->fn_extra. So we need to pass it our own flinfo + * parameter. + */ + InitFunctionCallInfoData(*locfcinfo, fcinfo->flinfo, 3, + InvalidOid, NULL, NULL); + + locfcinfo->args[0].value = PointerGetDatum(buf); + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = ObjectIdGetDatum(OIDOID); + locfcinfo->args[1].isnull = false; + locfcinfo->args[2].value = Int32GetDatum(-1); + locfcinfo->args[2].isnull = false; + + result = (oidvector *) DatumGetPointer(array_recv(locfcinfo)); + + Assert(!locfcinfo->isnull); + + /* sanity checks: oidvector must be 1-D, 0-based, no nulls */ + if (ARR_NDIM(result) != 1 || + ARR_HASNULL(result) || + ARR_ELEMTYPE(result) != OIDOID || + ARR_LBOUND(result)[0] != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid oidvector data"))); + + PG_RETURN_POINTER(result); +} + +/* + * oidvectorsend - converts oidvector to binary format + */ +Datum +oidvectorsend(PG_FUNCTION_ARGS) +{ + return array_send(fcinfo); +} + +/* + * oidparse - get OID from ICONST/FCONST node + */ +Oid +oidparse(Node *node) +{ + switch (nodeTag(node)) + { + case T_Integer: + return intVal(node); + case T_Float: + + /* + * Values too large for int4 will be represented as Float + * constants by the lexer. Accept these if they are valid OID + * strings. + */ + return uint32in_subr(castNode(Float, node)->fval, NULL, + "oid", NULL); + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); + } + return InvalidOid; /* keep compiler quiet */ +} + +/* qsort comparison function for Oids */ +int +oid_cmp(const void *p1, const void *p2) +{ + Oid v1 = *((const Oid *) p1); + Oid v2 = *((const Oid *) p2); + + if (v1 < v2) + return -1; + if (v1 > v2) + return 1; + return 0; +} + + +/***************************************************************************** + * PUBLIC ROUTINES * + *****************************************************************************/ + +Datum +oideq(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_BOOL(arg1 == arg2); +} + +Datum +oidne(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_BOOL(arg1 != arg2); +} + +Datum +oidlt(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_BOOL(arg1 < arg2); +} + +Datum +oidle(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_BOOL(arg1 <= arg2); +} + +Datum +oidge(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_BOOL(arg1 >= arg2); +} + +Datum +oidgt(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_BOOL(arg1 > arg2); +} + +Datum +oidlarger(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_OID((arg1 > arg2) ? arg1 : arg2); +} + +Datum +oidsmaller(PG_FUNCTION_ARGS) +{ + Oid arg1 = PG_GETARG_OID(0); + Oid arg2 = PG_GETARG_OID(1); + + PG_RETURN_OID((arg1 < arg2) ? arg1 : arg2); +} + +Datum +oidvectoreq(PG_FUNCTION_ARGS) +{ + int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo)); + + PG_RETURN_BOOL(cmp == 0); +} + +Datum +oidvectorne(PG_FUNCTION_ARGS) +{ + int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo)); + + PG_RETURN_BOOL(cmp != 0); +} + +Datum +oidvectorlt(PG_FUNCTION_ARGS) +{ + int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo)); + + PG_RETURN_BOOL(cmp < 0); +} + +Datum +oidvectorle(PG_FUNCTION_ARGS) +{ + int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo)); + + PG_RETURN_BOOL(cmp <= 0); +} + +Datum +oidvectorge(PG_FUNCTION_ARGS) +{ + int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo)); + + PG_RETURN_BOOL(cmp >= 0); +} + +Datum +oidvectorgt(PG_FUNCTION_ARGS) +{ + int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo)); + + PG_RETURN_BOOL(cmp > 0); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oracle_compat.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oracle_compat.c new file mode 100644 index 00000000000..3b5b794afb3 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oracle_compat.c @@ -0,0 +1,1157 @@ +/*------------------------------------------------------------------------- + * oracle_compat.c + * Oracle compatible functions. + * + * Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * Author: Edmund Mergl <E.Mergl@bawue.de> + * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> + * + * + * IDENTIFICATION + * src/backend/utils/adt/oracle_compat.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/int.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/formatting.h" +#include "utils/memutils.h" +#include "varatt.h" + + +static text *dotrim(const char *string, int stringlen, + const char *set, int setlen, + bool doltrim, bool dortrim); +static bytea *dobyteatrim(bytea *string, bytea *set, + bool doltrim, bool dortrim); + + +/******************************************************************** + * + * lower + * + * Syntax: + * + * text lower(text string) + * + * Purpose: + * + * Returns string, with all letters forced to lowercase. + * + ********************************************************************/ + +Datum +lower(PG_FUNCTION_ARGS) +{ + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; + + out_string = str_tolower(VARDATA_ANY(in_string), + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); + result = cstring_to_text(out_string); + pfree(out_string); + + PG_RETURN_TEXT_P(result); +} + + +/******************************************************************** + * + * upper + * + * Syntax: + * + * text upper(text string) + * + * Purpose: + * + * Returns string, with all letters forced to uppercase. + * + ********************************************************************/ + +Datum +upper(PG_FUNCTION_ARGS) +{ + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; + + out_string = str_toupper(VARDATA_ANY(in_string), + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); + result = cstring_to_text(out_string); + pfree(out_string); + + PG_RETURN_TEXT_P(result); +} + + +/******************************************************************** + * + * initcap + * + * Syntax: + * + * text initcap(text string) + * + * Purpose: + * + * Returns string, with first letter of each word in uppercase, all + * other letters in lowercase. A word is defined as a sequence of + * alphanumeric characters, delimited by non-alphanumeric + * characters. + * + ********************************************************************/ + +Datum +initcap(PG_FUNCTION_ARGS) +{ + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; + + out_string = str_initcap(VARDATA_ANY(in_string), + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); + result = cstring_to_text(out_string); + pfree(out_string); + + PG_RETURN_TEXT_P(result); +} + + +/******************************************************************** + * + * lpad + * + * Syntax: + * + * text lpad(text string1, int4 len, text string2) + * + * Purpose: + * + * Returns string1, left-padded to length len with the sequence of + * characters in string2. If len is less than the length of string1, + * instead truncate (on the right) to len. + * + ********************************************************************/ + +Datum +lpad(PG_FUNCTION_ARGS) +{ + text *string1 = PG_GETARG_TEXT_PP(0); + int32 len = PG_GETARG_INT32(1); + text *string2 = PG_GETARG_TEXT_PP(2); + text *ret; + char *ptr1, + *ptr2, + *ptr2start, + *ptr2end, + *ptr_ret; + int m, + s1len, + s2len; + int bytelen; + + /* Negative len is silently taken as zero */ + if (len < 0) + len = 0; + + s1len = VARSIZE_ANY_EXHDR(string1); + if (s1len < 0) + s1len = 0; /* shouldn't happen */ + + s2len = VARSIZE_ANY_EXHDR(string2); + if (s2len < 0) + s2len = 0; /* shouldn't happen */ + + s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); + + if (s1len > len) + s1len = len; /* truncate string1 to len chars */ + + if (s2len <= 0) + len = s1len; /* nothing to pad with, so don't pad */ + + /* compute worst-case output length */ + if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len, + &bytelen)) || + unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || + unlikely(!AllocSizeIsValid(bytelen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + ret = (text *) palloc(bytelen); + + m = len - s1len; + + ptr2 = ptr2start = VARDATA_ANY(string2); + ptr2end = ptr2 + s2len; + ptr_ret = VARDATA(ret); + + while (m--) + { + int mlen = pg_mblen(ptr2); + + memcpy(ptr_ret, ptr2, mlen); + ptr_ret += mlen; + ptr2 += mlen; + if (ptr2 == ptr2end) /* wrap around at end of s2 */ + ptr2 = ptr2start; + } + + ptr1 = VARDATA_ANY(string1); + + while (s1len--) + { + int mlen = pg_mblen(ptr1); + + memcpy(ptr_ret, ptr1, mlen); + ptr_ret += mlen; + ptr1 += mlen; + } + + SET_VARSIZE(ret, ptr_ret - (char *) ret); + + PG_RETURN_TEXT_P(ret); +} + + +/******************************************************************** + * + * rpad + * + * Syntax: + * + * text rpad(text string1, int4 len, text string2) + * + * Purpose: + * + * Returns string1, right-padded to length len with the sequence of + * characters in string2. If len is less than the length of string1, + * instead truncate (on the right) to len. + * + ********************************************************************/ + +Datum +rpad(PG_FUNCTION_ARGS) +{ + text *string1 = PG_GETARG_TEXT_PP(0); + int32 len = PG_GETARG_INT32(1); + text *string2 = PG_GETARG_TEXT_PP(2); + text *ret; + char *ptr1, + *ptr2, + *ptr2start, + *ptr2end, + *ptr_ret; + int m, + s1len, + s2len; + int bytelen; + + /* Negative len is silently taken as zero */ + if (len < 0) + len = 0; + + s1len = VARSIZE_ANY_EXHDR(string1); + if (s1len < 0) + s1len = 0; /* shouldn't happen */ + + s2len = VARSIZE_ANY_EXHDR(string2); + if (s2len < 0) + s2len = 0; /* shouldn't happen */ + + s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); + + if (s1len > len) + s1len = len; /* truncate string1 to len chars */ + + if (s2len <= 0) + len = s1len; /* nothing to pad with, so don't pad */ + + /* compute worst-case output length */ + if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len, + &bytelen)) || + unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || + unlikely(!AllocSizeIsValid(bytelen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + ret = (text *) palloc(bytelen); + + m = len - s1len; + + ptr1 = VARDATA_ANY(string1); + ptr_ret = VARDATA(ret); + + while (s1len--) + { + int mlen = pg_mblen(ptr1); + + memcpy(ptr_ret, ptr1, mlen); + ptr_ret += mlen; + ptr1 += mlen; + } + + ptr2 = ptr2start = VARDATA_ANY(string2); + ptr2end = ptr2 + s2len; + + while (m--) + { + int mlen = pg_mblen(ptr2); + + memcpy(ptr_ret, ptr2, mlen); + ptr_ret += mlen; + ptr2 += mlen; + if (ptr2 == ptr2end) /* wrap around at end of s2 */ + ptr2 = ptr2start; + } + + SET_VARSIZE(ret, ptr_ret - (char *) ret); + + PG_RETURN_TEXT_P(ret); +} + + +/******************************************************************** + * + * btrim + * + * Syntax: + * + * text btrim(text string, text set) + * + * Purpose: + * + * Returns string with characters removed from the front and back + * up to the first character not in set. + * + ********************************************************************/ + +Datum +btrim(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *set = PG_GETARG_TEXT_PP(1); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), + true, true); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * btrim1 --- btrim with set fixed as ' ' + * + ********************************************************************/ + +Datum +btrim1(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + " ", 1, + true, true); + + PG_RETURN_TEXT_P(ret); +} + +/* + * Common implementation for btrim, ltrim, rtrim + */ +static text * +dotrim(const char *string, int stringlen, + const char *set, int setlen, + bool doltrim, bool dortrim) +{ + int i; + + /* Nothing to do if either string or set is empty */ + if (stringlen > 0 && setlen > 0) + { + if (pg_database_encoding_max_length() > 1) + { + /* + * In the multibyte-encoding case, build arrays of pointers to + * character starts, so that we can avoid inefficient checks in + * the inner loops. + */ + const char **stringchars; + const char **setchars; + int *stringmblen; + int *setmblen; + int stringnchars; + int setnchars; + int resultndx; + int resultnchars; + const char *p; + int len; + int mblen; + const char *str_pos; + int str_len; + + stringchars = (const char **) palloc(stringlen * sizeof(char *)); + stringmblen = (int *) palloc(stringlen * sizeof(int)); + stringnchars = 0; + p = string; + len = stringlen; + while (len > 0) + { + stringchars[stringnchars] = p; + stringmblen[stringnchars] = mblen = pg_mblen(p); + stringnchars++; + p += mblen; + len -= mblen; + } + + setchars = (const char **) palloc(setlen * sizeof(char *)); + setmblen = (int *) palloc(setlen * sizeof(int)); + setnchars = 0; + p = set; + len = setlen; + while (len > 0) + { + setchars[setnchars] = p; + setmblen[setnchars] = mblen = pg_mblen(p); + setnchars++; + p += mblen; + len -= mblen; + } + + resultndx = 0; /* index in stringchars[] */ + resultnchars = stringnchars; + + if (doltrim) + { + while (resultnchars > 0) + { + str_pos = stringchars[resultndx]; + str_len = stringmblen[resultndx]; + for (i = 0; i < setnchars; i++) + { + if (str_len == setmblen[i] && + memcmp(str_pos, setchars[i], str_len) == 0) + break; + } + if (i >= setnchars) + break; /* no match here */ + string += str_len; + stringlen -= str_len; + resultndx++; + resultnchars--; + } + } + + if (dortrim) + { + while (resultnchars > 0) + { + str_pos = stringchars[resultndx + resultnchars - 1]; + str_len = stringmblen[resultndx + resultnchars - 1]; + for (i = 0; i < setnchars; i++) + { + if (str_len == setmblen[i] && + memcmp(str_pos, setchars[i], str_len) == 0) + break; + } + if (i >= setnchars) + break; /* no match here */ + stringlen -= str_len; + resultnchars--; + } + } + + pfree(stringchars); + pfree(stringmblen); + pfree(setchars); + pfree(setmblen); + } + else + { + /* + * In the single-byte-encoding case, we don't need such overhead. + */ + if (doltrim) + { + while (stringlen > 0) + { + char str_ch = *string; + + for (i = 0; i < setlen; i++) + { + if (str_ch == set[i]) + break; + } + if (i >= setlen) + break; /* no match here */ + string++; + stringlen--; + } + } + + if (dortrim) + { + while (stringlen > 0) + { + char str_ch = string[stringlen - 1]; + + for (i = 0; i < setlen; i++) + { + if (str_ch == set[i]) + break; + } + if (i >= setlen) + break; /* no match here */ + stringlen--; + } + } + } + } + + /* Return selected portion of string */ + return cstring_to_text_with_len(string, stringlen); +} + +/* + * Common implementation for bytea versions of btrim, ltrim, rtrim + */ +bytea * +dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim) +{ + bytea *ret; + char *ptr, + *end, + *ptr2, + *ptr2start, + *end2; + int m, + stringlen, + setlen; + + stringlen = VARSIZE_ANY_EXHDR(string); + setlen = VARSIZE_ANY_EXHDR(set); + + if (stringlen <= 0 || setlen <= 0) + return string; + + m = stringlen; + ptr = VARDATA_ANY(string); + end = ptr + stringlen - 1; + ptr2start = VARDATA_ANY(set); + end2 = ptr2start + setlen - 1; + + if (doltrim) + { + while (m > 0) + { + ptr2 = ptr2start; + while (ptr2 <= end2) + { + if (*ptr == *ptr2) + break; + ++ptr2; + } + if (ptr2 > end2) + break; + ptr++; + m--; + } + } + + if (dortrim) + { + while (m > 0) + { + ptr2 = ptr2start; + while (ptr2 <= end2) + { + if (*end == *ptr2) + break; + ++ptr2; + } + if (ptr2 > end2) + break; + end--; + m--; + } + } + + ret = (bytea *) palloc(VARHDRSZ + m); + SET_VARSIZE(ret, VARHDRSZ + m); + memcpy(VARDATA(ret), ptr, m); + return ret; +} + +/******************************************************************** + * + * byteatrim + * + * Syntax: + * + * bytea byteatrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with characters removed from the front and back + * up to the first character not in set. + * + * Cloned from btrim and modified as required. + ********************************************************************/ + +Datum +byteatrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, true, true); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * bytealtrim + * + * Syntax: + * + * bytea bytealtrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with initial characters removed up to the first + * character not in set. + * + ********************************************************************/ + +Datum +bytealtrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, true, false); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * byteartrim + * + * Syntax: + * + * bytea byteartrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with final characters removed after the last + * character not in set. + * + ********************************************************************/ + +Datum +byteartrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, false, true); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * ltrim + * + * Syntax: + * + * text ltrim(text string, text set) + * + * Purpose: + * + * Returns string with initial characters removed up to the first + * character not in set. + * + ********************************************************************/ + +Datum +ltrim(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *set = PG_GETARG_TEXT_PP(1); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), + true, false); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * ltrim1 --- ltrim with set fixed as ' ' + * + ********************************************************************/ + +Datum +ltrim1(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + " ", 1, + true, false); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * rtrim + * + * Syntax: + * + * text rtrim(text string, text set) + * + * Purpose: + * + * Returns string with final characters removed after the last + * character not in set. + * + ********************************************************************/ + +Datum +rtrim(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *set = PG_GETARG_TEXT_PP(1); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), + false, true); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * rtrim1 --- rtrim with set fixed as ' ' + * + ********************************************************************/ + +Datum +rtrim1(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + " ", 1, + false, true); + + PG_RETURN_TEXT_P(ret); +} + + +/******************************************************************** + * + * translate + * + * Syntax: + * + * text translate(text string, text from, text to) + * + * Purpose: + * + * Returns string after replacing all occurrences of characters in from + * with the corresponding character in to. If from is longer than to, + * occurrences of the extra characters in from are deleted. + * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>. + * + ********************************************************************/ + +Datum +translate(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *from = PG_GETARG_TEXT_PP(1); + text *to = PG_GETARG_TEXT_PP(2); + text *result; + char *from_ptr, + *to_ptr, + *to_end; + char *source, + *target; + int m, + fromlen, + tolen, + retlen, + i; + int bytelen; + int len; + int source_len; + int from_index; + + m = VARSIZE_ANY_EXHDR(string); + if (m <= 0) + PG_RETURN_TEXT_P(string); + source = VARDATA_ANY(string); + + fromlen = VARSIZE_ANY_EXHDR(from); + from_ptr = VARDATA_ANY(from); + tolen = VARSIZE_ANY_EXHDR(to); + to_ptr = VARDATA_ANY(to); + to_end = to_ptr + tolen; + + /* + * The worst-case expansion is to substitute a max-length character for a + * single-byte character at each position of the string. + */ + if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), m, + &bytelen)) || + unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || + unlikely(!AllocSizeIsValid(bytelen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + result = (text *) palloc(bytelen); + + target = VARDATA(result); + retlen = 0; + + while (m > 0) + { + source_len = pg_mblen(source); + from_index = 0; + + for (i = 0; i < fromlen; i += len) + { + len = pg_mblen(&from_ptr[i]); + if (len == source_len && + memcmp(source, &from_ptr[i], len) == 0) + break; + + from_index++; + } + if (i < fromlen) + { + /* substitute, or delete if no corresponding "to" character */ + char *p = to_ptr; + + for (i = 0; i < from_index; i++) + { + if (p >= to_end) + break; + p += pg_mblen(p); + } + if (p < to_end) + { + len = pg_mblen(p); + memcpy(target, p, len); + target += len; + retlen += len; + } + } + else + { + /* no match, so copy */ + memcpy(target, source, source_len); + target += source_len; + retlen += source_len; + } + + source += source_len; + m -= source_len; + } + + SET_VARSIZE(result, retlen + VARHDRSZ); + + /* + * The function result is probably much bigger than needed, if we're using + * a multibyte encoding, but it's not worth reallocating it; the result + * probably won't live long anyway. + */ + + PG_RETURN_TEXT_P(result); +} + +/******************************************************************** + * + * ascii + * + * Syntax: + * + * int ascii(text string) + * + * Purpose: + * + * Returns the decimal representation of the first character from + * string. + * If the string is empty we return 0. + * If the database encoding is UTF8, we return the Unicode codepoint. + * If the database encoding is any other multi-byte encoding, we + * return the value of the first byte if it is an ASCII character + * (range 1 .. 127), or raise an error. + * For all other encodings we return the value of the first byte, + * (range 1..255). + * + ********************************************************************/ + +Datum +ascii(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + int encoding = GetDatabaseEncoding(); + unsigned char *data; + + if (VARSIZE_ANY_EXHDR(string) <= 0) + PG_RETURN_INT32(0); + + data = (unsigned char *) VARDATA_ANY(string); + + if (encoding == PG_UTF8 && *data > 127) + { + /* return the code point for Unicode */ + + int result = 0, + tbytes = 0, + i; + + if (*data >= 0xF0) + { + result = *data & 0x07; + tbytes = 3; + } + else if (*data >= 0xE0) + { + result = *data & 0x0F; + tbytes = 2; + } + else + { + Assert(*data > 0xC0); + result = *data & 0x1f; + tbytes = 1; + } + + Assert(tbytes > 0); + + for (i = 1; i <= tbytes; i++) + { + Assert((data[i] & 0xC0) == 0x80); + result = (result << 6) + (data[i] & 0x3f); + } + + PG_RETURN_INT32(result); + } + else + { + if (pg_encoding_max_length(encoding) > 1 && *data > 127) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large"))); + + + PG_RETURN_INT32((int32) *data); + } +} + +/******************************************************************** + * + * chr + * + * Syntax: + * + * text chr(int val) + * + * Purpose: + * + * Returns the character having the binary equivalent to val. + * + * For UTF8 we treat the argument as a Unicode code point. + * For other multi-byte encodings we raise an error for arguments + * outside the strict ASCII range (1..127). + * + * It's important that we don't ever return a value that is not valid + * in the database encoding, so that this doesn't become a way for + * invalid data to enter the database. + * + ********************************************************************/ + +Datum +chr (PG_FUNCTION_ARGS) +{ + int32 arg = PG_GETARG_INT32(0); + uint32 cvalue; + text *result; + int encoding = GetDatabaseEncoding(); + + /* + * Error out on arguments that make no sense or that we can't validly + * represent in the encoding. + */ + if (arg < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("character number must be positive"))); + else if (arg == 0) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("null character not permitted"))); + + cvalue = arg; + + if (encoding == PG_UTF8 && cvalue > 127) + { + /* for Unicode we treat the argument as a code point */ + int bytes; + unsigned char *wch; + + /* + * We only allow valid Unicode code points; per RFC3629 that stops at + * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to + * U+1FFFFF. + */ + if (cvalue > 0x0010ffff) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large for encoding: %u", + cvalue))); + + if (cvalue > 0xffff) + bytes = 4; + else if (cvalue > 0x07ff) + bytes = 3; + else + bytes = 2; + + result = (text *) palloc(VARHDRSZ + bytes); + SET_VARSIZE(result, VARHDRSZ + bytes); + wch = (unsigned char *) VARDATA(result); + + if (bytes == 2) + { + wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F); + wch[1] = 0x80 | (cvalue & 0x3F); + } + else if (bytes == 3) + { + wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F); + wch[1] = 0x80 | ((cvalue >> 6) & 0x3F); + wch[2] = 0x80 | (cvalue & 0x3F); + } + else + { + wch[0] = 0xF0 | ((cvalue >> 18) & 0x07); + wch[1] = 0x80 | ((cvalue >> 12) & 0x3F); + wch[2] = 0x80 | ((cvalue >> 6) & 0x3F); + wch[3] = 0x80 | (cvalue & 0x3F); + } + + /* + * The preceding range check isn't sufficient, because UTF8 excludes + * Unicode "surrogate pair" codes. Make sure what we created is valid + * UTF8. + */ + if (!pg_utf8_islegal(wch, bytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character not valid for encoding: %u", + cvalue))); + } + else + { + bool is_mb; + + is_mb = pg_encoding_max_length(encoding) > 1; + + if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large for encoding: %u", + cvalue))); + + result = (text *) palloc(VARHDRSZ + 1); + SET_VARSIZE(result, VARHDRSZ + 1); + *VARDATA(result) = (char) cvalue; + } + + PG_RETURN_TEXT_P(result); +} + +/******************************************************************** + * + * repeat + * + * Syntax: + * + * text repeat(text string, int val) + * + * Purpose: + * + * Repeat string by val. + * + ********************************************************************/ + +Datum +repeat(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + int32 count = PG_GETARG_INT32(1); + text *result; + int slen, + tlen; + int i; + char *cp, + *sp; + + if (count < 0) + count = 0; + + slen = VARSIZE_ANY_EXHDR(string); + + if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) || + unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)) || + unlikely(!AllocSizeIsValid(tlen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + result = (text *) palloc(tlen); + + SET_VARSIZE(result, tlen); + cp = VARDATA(result); + sp = VARDATA_ANY(string); + for (i = 0; i < count; i++) + { + memcpy(cp, sp, slen); + cp += slen; + CHECK_FOR_INTERRUPTS(); + } + + PG_RETURN_TEXT_P(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/orderedsetaggs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/orderedsetaggs.c new file mode 100644 index 00000000000..2582a5cf459 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/orderedsetaggs.c @@ -0,0 +1,1432 @@ +/*------------------------------------------------------------------------- + * + * orderedsetaggs.c + * Ordered-set aggregate functions. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/orderedsetaggs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "catalog/pg_aggregate.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/timestamp.h" +#include "utils/tuplesort.h" + + +/* + * Generic support for ordered-set aggregates + * + * The state for an ordered-set aggregate is divided into a per-group struct + * (which is the internal-type transition state datum returned to nodeAgg.c) + * and a per-query struct, which contains data and sub-objects that we can + * create just once per query because they will not change across groups. + * The per-query struct and subsidiary data live in the executor's per-query + * memory context, and go away implicitly at ExecutorEnd(). + * + * These structs are set up during the first call of the transition function. + * Because we allow nodeAgg.c to merge ordered-set aggregates (but not + * hypothetical aggregates) with identical inputs and transition functions, + * this info must not depend on the particular aggregate (ie, particular + * final-function), nor on the direct argument(s) of the aggregate. + */ + +typedef struct OSAPerQueryState +{ + /* Representative Aggref for this aggregate: */ + Aggref *aggref; + /* Memory context containing this struct and other per-query data: */ + MemoryContext qcontext; + /* Context for expression evaluation */ + ExprContext *econtext; + /* Do we expect multiple final-function calls within one group? */ + bool rescan_needed; + + /* These fields are used only when accumulating tuples: */ + + /* Tuple descriptor for tuples inserted into sortstate: */ + TupleDesc tupdesc; + /* Tuple slot we can use for inserting/extracting tuples: */ + TupleTableSlot *tupslot; + /* Per-sort-column sorting information */ + int numSortCols; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *eqOperators; + Oid *sortCollations; + bool *sortNullsFirsts; + /* Equality operator call info, created only if needed: */ + ExprState *compareTuple; + + /* These fields are used only when accumulating datums: */ + + /* Info about datatype of datums being sorted: */ + Oid sortColType; + int16 typLen; + bool typByVal; + char typAlign; + /* Info about sort ordering: */ + Oid sortOperator; + Oid eqOperator; + Oid sortCollation; + bool sortNullsFirst; + /* Equality operator call info, created only if needed: */ + FmgrInfo equalfn; +} OSAPerQueryState; + +typedef struct OSAPerGroupState +{ + /* Link to the per-query state for this aggregate: */ + OSAPerQueryState *qstate; + /* Memory context containing per-group data: */ + MemoryContext gcontext; + /* Sort object we're accumulating data in: */ + Tuplesortstate *sortstate; + /* Number of normal rows inserted into sortstate: */ + int64 number_of_rows; + /* Have we already done tuplesort_performsort? */ + bool sort_done; +} OSAPerGroupState; + +static void ordered_set_shutdown(Datum arg); + + +/* + * Set up working state for an ordered-set aggregate + */ +static OSAPerGroupState * +ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples) +{ + OSAPerGroupState *osastate; + OSAPerQueryState *qstate; + MemoryContext gcontext; + MemoryContext oldcontext; + int tuplesortopt; + + /* + * Check we're called as aggregate (and not a window function), and get + * the Agg node's group-lifespan context (which might change from group to + * group, so we shouldn't cache it in the per-query state). + */ + if (AggCheckCallContext(fcinfo, &gcontext) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* + * We keep a link to the per-query state in fn_extra; if it's not there, + * create it, and do the per-query setup we need. + */ + qstate = (OSAPerQueryState *) fcinfo->flinfo->fn_extra; + if (qstate == NULL) + { + Aggref *aggref; + MemoryContext qcontext; + List *sortlist; + int numSortCols; + + /* Get the Aggref so we can examine aggregate's arguments */ + aggref = AggGetAggref(fcinfo); + if (!aggref) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + if (!AGGKIND_IS_ORDERED_SET(aggref->aggkind)) + elog(ERROR, "ordered-set aggregate support function called for non-ordered-set aggregate"); + + /* + * Prepare per-query structures in the fn_mcxt, which we assume is the + * executor's per-query context; in any case it's the right place to + * keep anything found via fn_extra. + */ + qcontext = fcinfo->flinfo->fn_mcxt; + oldcontext = MemoryContextSwitchTo(qcontext); + + qstate = (OSAPerQueryState *) palloc0(sizeof(OSAPerQueryState)); + qstate->aggref = aggref; + qstate->qcontext = qcontext; + + /* We need to support rescans if the trans state is shared */ + qstate->rescan_needed = AggStateIsShared(fcinfo); + + /* Extract the sort information */ + sortlist = aggref->aggorder; + numSortCols = list_length(sortlist); + + if (use_tuples) + { + bool ishypothetical = (aggref->aggkind == AGGKIND_HYPOTHETICAL); + ListCell *lc; + int i; + + if (ishypothetical) + numSortCols++; /* make space for flag column */ + qstate->numSortCols = numSortCols; + qstate->sortColIdx = (AttrNumber *) palloc(numSortCols * sizeof(AttrNumber)); + qstate->sortOperators = (Oid *) palloc(numSortCols * sizeof(Oid)); + qstate->eqOperators = (Oid *) palloc(numSortCols * sizeof(Oid)); + qstate->sortCollations = (Oid *) palloc(numSortCols * sizeof(Oid)); + qstate->sortNullsFirsts = (bool *) palloc(numSortCols * sizeof(bool)); + + i = 0; + foreach(lc, sortlist) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, + aggref->args); + + /* the parser should have made sure of this */ + Assert(OidIsValid(sortcl->sortop)); + + qstate->sortColIdx[i] = tle->resno; + qstate->sortOperators[i] = sortcl->sortop; + qstate->eqOperators[i] = sortcl->eqop; + qstate->sortCollations[i] = exprCollation((Node *) tle->expr); + qstate->sortNullsFirsts[i] = sortcl->nulls_first; + i++; + } + + if (ishypothetical) + { + /* Add an integer flag column as the last sort column */ + qstate->sortColIdx[i] = list_length(aggref->args) + 1; + qstate->sortOperators[i] = Int4LessOperator; + qstate->eqOperators[i] = Int4EqualOperator; + qstate->sortCollations[i] = InvalidOid; + qstate->sortNullsFirsts[i] = false; + i++; + } + + Assert(i == numSortCols); + + /* + * Get a tupledesc corresponding to the aggregated inputs + * (including sort expressions) of the agg. + */ + qstate->tupdesc = ExecTypeFromTL(aggref->args); + + /* If we need a flag column, hack the tupledesc to include that */ + if (ishypothetical) + { + TupleDesc newdesc; + int natts = qstate->tupdesc->natts; + + newdesc = CreateTemplateTupleDesc(natts + 1); + for (i = 1; i <= natts; i++) + TupleDescCopyEntry(newdesc, i, qstate->tupdesc, i); + + TupleDescInitEntry(newdesc, + (AttrNumber) ++natts, + "flag", + INT4OID, + -1, + 0); + + FreeTupleDesc(qstate->tupdesc); + qstate->tupdesc = newdesc; + } + + /* Create slot we'll use to store/retrieve rows */ + qstate->tupslot = MakeSingleTupleTableSlot(qstate->tupdesc, + &TTSOpsMinimalTuple); + } + else + { + /* Sort single datums */ + SortGroupClause *sortcl; + TargetEntry *tle; + + if (numSortCols != 1 || aggref->aggkind == AGGKIND_HYPOTHETICAL) + elog(ERROR, "ordered-set aggregate support function does not support multiple aggregated columns"); + + sortcl = (SortGroupClause *) linitial(sortlist); + tle = get_sortgroupclause_tle(sortcl, aggref->args); + + /* the parser should have made sure of this */ + Assert(OidIsValid(sortcl->sortop)); + + /* Save sort ordering info */ + qstate->sortColType = exprType((Node *) tle->expr); + qstate->sortOperator = sortcl->sortop; + qstate->eqOperator = sortcl->eqop; + qstate->sortCollation = exprCollation((Node *) tle->expr); + qstate->sortNullsFirst = sortcl->nulls_first; + + /* Save datatype info */ + get_typlenbyvalalign(qstate->sortColType, + &qstate->typLen, + &qstate->typByVal, + &qstate->typAlign); + } + + fcinfo->flinfo->fn_extra = (void *) qstate; + + MemoryContextSwitchTo(oldcontext); + } + + /* Now build the stuff we need in group-lifespan context */ + oldcontext = MemoryContextSwitchTo(gcontext); + + osastate = (OSAPerGroupState *) palloc(sizeof(OSAPerGroupState)); + osastate->qstate = qstate; + osastate->gcontext = gcontext; + + tuplesortopt = TUPLESORT_NONE; + + if (qstate->rescan_needed) + tuplesortopt |= TUPLESORT_RANDOMACCESS; + + /* + * Initialize tuplesort object. + */ + if (use_tuples) + osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc, + qstate->numSortCols, + qstate->sortColIdx, + qstate->sortOperators, + qstate->sortCollations, + qstate->sortNullsFirsts, + work_mem, + NULL, + tuplesortopt); + else + osastate->sortstate = tuplesort_begin_datum(qstate->sortColType, + qstate->sortOperator, + qstate->sortCollation, + qstate->sortNullsFirst, + work_mem, + NULL, + tuplesortopt); + + osastate->number_of_rows = 0; + osastate->sort_done = false; + + /* Now register a shutdown callback to clean things up at end of group */ + AggRegisterCallback(fcinfo, + ordered_set_shutdown, + PointerGetDatum(osastate)); + + MemoryContextSwitchTo(oldcontext); + + return osastate; +} + +/* + * Clean up when evaluation of an ordered-set aggregate is complete. + * + * We don't need to bother freeing objects in the per-group memory context, + * since that will get reset anyway by nodeAgg.c; nor should we free anything + * in the per-query context, which will get cleared (if this was the last + * group) by ExecutorEnd. But we must take care to release any potential + * non-memory resources. + * + * In the case where we're not expecting multiple finalfn calls, we could + * arguably rely on the finalfn to clean up; but it's easier and more testable + * if we just do it the same way in either case. + */ +static void +ordered_set_shutdown(Datum arg) +{ + OSAPerGroupState *osastate = (OSAPerGroupState *) DatumGetPointer(arg); + + /* Tuplesort object might have temp files. */ + if (osastate->sortstate) + tuplesort_end(osastate->sortstate); + osastate->sortstate = NULL; + /* The tupleslot probably can't be holding a pin, but let's be safe. */ + if (osastate->qstate->tupslot) + ExecClearTuple(osastate->qstate->tupslot); +} + + +/* + * Generic transition function for ordered-set aggregates + * with a single input column in which we want to suppress nulls + */ +Datum +ordered_set_transition(PG_FUNCTION_ARGS) +{ + OSAPerGroupState *osastate; + + /* If first call, create the transition state workspace */ + if (PG_ARGISNULL(0)) + osastate = ordered_set_startup(fcinfo, false); + else + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + + /* Load the datum into the tuplesort object, but only if it's not null */ + if (!PG_ARGISNULL(1)) + { + tuplesort_putdatum(osastate->sortstate, PG_GETARG_DATUM(1), false); + osastate->number_of_rows++; + } + + PG_RETURN_POINTER(osastate); +} + +/* + * Generic transition function for ordered-set aggregates + * with (potentially) multiple aggregated input columns + */ +Datum +ordered_set_transition_multi(PG_FUNCTION_ARGS) +{ + OSAPerGroupState *osastate; + TupleTableSlot *slot; + int nargs; + int i; + + /* If first call, create the transition state workspace */ + if (PG_ARGISNULL(0)) + osastate = ordered_set_startup(fcinfo, true); + else + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + + /* Form a tuple from all the other inputs besides the transition value */ + slot = osastate->qstate->tupslot; + ExecClearTuple(slot); + nargs = PG_NARGS() - 1; + for (i = 0; i < nargs; i++) + { + slot->tts_values[i] = PG_GETARG_DATUM(i + 1); + slot->tts_isnull[i] = PG_ARGISNULL(i + 1); + } + if (osastate->qstate->aggref->aggkind == AGGKIND_HYPOTHETICAL) + { + /* Add a zero flag value to mark this row as a normal input row */ + slot->tts_values[i] = Int32GetDatum(0); + slot->tts_isnull[i] = false; + i++; + } + Assert(i == slot->tts_tupleDescriptor->natts); + ExecStoreVirtualTuple(slot); + + /* Load the row into the tuplesort object */ + tuplesort_puttupleslot(osastate->sortstate, slot); + osastate->number_of_rows++; + + PG_RETURN_POINTER(osastate); +} + + +/* + * percentile_disc(float8) within group(anyelement) - discrete percentile + */ +Datum +percentile_disc_final(PG_FUNCTION_ARGS) +{ + OSAPerGroupState *osastate; + double percentile; + Datum val; + bool isnull; + int64 rownum; + + Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); + + /* Get and check the percentile argument */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + + percentile = PG_GETARG_FLOAT8(1); + + if (percentile < 0 || percentile > 1 || isnan(percentile)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("percentile value %g is not between 0 and 1", + percentile))); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + /* Finish the sort, or rescan if we already did */ + if (!osastate->sort_done) + { + tuplesort_performsort(osastate->sortstate); + osastate->sort_done = true; + } + else + tuplesort_rescan(osastate->sortstate); + + /*---------- + * We need the smallest K such that (K/N) >= percentile. + * N>0, therefore K >= N*percentile, therefore K = ceil(N*percentile). + * So we skip K-1 rows (if K>0) and return the next row fetched. + *---------- + */ + rownum = (int64) ceil(percentile * osastate->number_of_rows); + Assert(rownum <= osastate->number_of_rows); + + if (rownum > 1) + { + if (!tuplesort_skiptuples(osastate->sortstate, rownum - 1, true)) + elog(ERROR, "missing row in percentile_disc"); + } + + if (!tuplesort_getdatum(osastate->sortstate, true, true, &val, &isnull, + NULL)) + elog(ERROR, "missing row in percentile_disc"); + + /* We shouldn't have stored any nulls, but do the right thing anyway */ + if (isnull) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(val); +} + + +/* + * For percentile_cont, we need a way to interpolate between consecutive + * values. Use a helper function for that, so that we can share the rest + * of the code between types. + */ +typedef Datum (*LerpFunc) (Datum lo, Datum hi, double pct); + +static Datum +float8_lerp(Datum lo, Datum hi, double pct) +{ + double loval = DatumGetFloat8(lo); + double hival = DatumGetFloat8(hi); + + return Float8GetDatum(loval + (pct * (hival - loval))); +} + +static Datum +interval_lerp(Datum lo, Datum hi, double pct) +{ + Datum diff_result = DirectFunctionCall2(interval_mi, hi, lo); + Datum mul_result = DirectFunctionCall2(interval_mul, + diff_result, + Float8GetDatumFast(pct)); + + return DirectFunctionCall2(interval_pl, mul_result, lo); +} + +/* + * Continuous percentile + */ +static Datum +percentile_cont_final_common(FunctionCallInfo fcinfo, + Oid expect_type, + LerpFunc lerpfunc) +{ + OSAPerGroupState *osastate; + double percentile; + int64 first_row = 0; + int64 second_row = 0; + Datum val; + Datum first_val; + Datum second_val; + double proportion; + bool isnull; + + Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); + + /* Get and check the percentile argument */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + + percentile = PG_GETARG_FLOAT8(1); + + if (percentile < 0 || percentile > 1 || isnan(percentile)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("percentile value %g is not between 0 and 1", + percentile))); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + Assert(expect_type == osastate->qstate->sortColType); + + /* Finish the sort, or rescan if we already did */ + if (!osastate->sort_done) + { + tuplesort_performsort(osastate->sortstate); + osastate->sort_done = true; + } + else + tuplesort_rescan(osastate->sortstate); + + first_row = floor(percentile * (osastate->number_of_rows - 1)); + second_row = ceil(percentile * (osastate->number_of_rows - 1)); + + Assert(first_row < osastate->number_of_rows); + + if (!tuplesort_skiptuples(osastate->sortstate, first_row, true)) + elog(ERROR, "missing row in percentile_cont"); + + if (!tuplesort_getdatum(osastate->sortstate, true, true, &first_val, + &isnull, NULL)) + elog(ERROR, "missing row in percentile_cont"); + if (isnull) + PG_RETURN_NULL(); + + if (first_row == second_row) + { + val = first_val; + } + else + { + if (!tuplesort_getdatum(osastate->sortstate, true, true, &second_val, + &isnull, NULL)) + elog(ERROR, "missing row in percentile_cont"); + + if (isnull) + PG_RETURN_NULL(); + + proportion = (percentile * (osastate->number_of_rows - 1)) - first_row; + val = lerpfunc(first_val, second_val, proportion); + } + + PG_RETURN_DATUM(val); +} + +/* + * percentile_cont(float8) within group (float8) - continuous percentile + */ +Datum +percentile_cont_float8_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_final_common(fcinfo, FLOAT8OID, float8_lerp); +} + +/* + * percentile_cont(float8) within group (interval) - continuous percentile + */ +Datum +percentile_cont_interval_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_final_common(fcinfo, INTERVALOID, interval_lerp); +} + + +/* + * Support code for handling arrays of percentiles + * + * Note: in each pct_info entry, second_row should be equal to or + * exactly one more than first_row. + */ +struct pct_info +{ + int64 first_row; /* first row to sample */ + int64 second_row; /* possible second row to sample */ + double proportion; /* interpolation fraction */ + int idx; /* index of this item in original array */ +}; + +/* + * Sort comparator to sort pct_infos by first_row then second_row + */ +static int +pct_info_cmp(const void *pa, const void *pb) +{ + const struct pct_info *a = (const struct pct_info *) pa; + const struct pct_info *b = (const struct pct_info *) pb; + + if (a->first_row != b->first_row) + return (a->first_row < b->first_row) ? -1 : 1; + if (a->second_row != b->second_row) + return (a->second_row < b->second_row) ? -1 : 1; + return 0; +} + +/* + * Construct array showing which rows to sample for percentiles. + */ +static struct pct_info * +setup_pct_info(int num_percentiles, + Datum *percentiles_datum, + bool *percentiles_null, + int64 rowcount, + bool continuous) +{ + struct pct_info *pct_info; + int i; + + pct_info = (struct pct_info *) palloc(num_percentiles * sizeof(struct pct_info)); + + for (i = 0; i < num_percentiles; i++) + { + pct_info[i].idx = i; + + if (percentiles_null[i]) + { + /* dummy entry for any NULL in array */ + pct_info[i].first_row = 0; + pct_info[i].second_row = 0; + pct_info[i].proportion = 0; + } + else + { + double p = DatumGetFloat8(percentiles_datum[i]); + + if (p < 0 || p > 1 || isnan(p)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("percentile value %g is not between 0 and 1", + p))); + + if (continuous) + { + pct_info[i].first_row = 1 + floor(p * (rowcount - 1)); + pct_info[i].second_row = 1 + ceil(p * (rowcount - 1)); + pct_info[i].proportion = (p * (rowcount - 1)) - floor(p * (rowcount - 1)); + } + else + { + /*---------- + * We need the smallest K such that (K/N) >= percentile. + * N>0, therefore K >= N*percentile, therefore + * K = ceil(N*percentile); but not less than 1. + *---------- + */ + int64 row = (int64) ceil(p * rowcount); + + row = Max(1, row); + pct_info[i].first_row = row; + pct_info[i].second_row = row; + pct_info[i].proportion = 0; + } + } + } + + /* + * The parameter array wasn't necessarily in sorted order, but we need to + * visit the rows in order, so sort by first_row/second_row. + */ + qsort(pct_info, num_percentiles, sizeof(struct pct_info), pct_info_cmp); + + return pct_info; +} + +/* + * percentile_disc(float8[]) within group (anyelement) - discrete percentiles + */ +Datum +percentile_disc_multi_final(PG_FUNCTION_ARGS) +{ + OSAPerGroupState *osastate; + ArrayType *param; + Datum *percentiles_datum; + bool *percentiles_null; + int num_percentiles; + struct pct_info *pct_info; + Datum *result_datum; + bool *result_isnull; + int64 rownum = 0; + Datum val = (Datum) 0; + bool isnull = true; + int i; + + Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + /* Deconstruct the percentile-array input */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + param = PG_GETARG_ARRAYTYPE_P(1); + + deconstruct_array_builtin(param, FLOAT8OID, + &percentiles_datum, + &percentiles_null, + &num_percentiles); + + if (num_percentiles == 0) + PG_RETURN_POINTER(construct_empty_array(osastate->qstate->sortColType)); + + pct_info = setup_pct_info(num_percentiles, + percentiles_datum, + percentiles_null, + osastate->number_of_rows, + false); + + result_datum = (Datum *) palloc(num_percentiles * sizeof(Datum)); + result_isnull = (bool *) palloc(num_percentiles * sizeof(bool)); + + /* + * Start by dealing with any nulls in the param array - those are sorted + * to the front on row=0, so set the corresponding result indexes to null + */ + for (i = 0; i < num_percentiles; i++) + { + int idx = pct_info[i].idx; + + if (pct_info[i].first_row > 0) + break; + + result_datum[idx] = (Datum) 0; + result_isnull[idx] = true; + } + + /* + * If there's anything left after doing the nulls, then grind the input + * and extract the needed values + */ + if (i < num_percentiles) + { + /* Finish the sort, or rescan if we already did */ + if (!osastate->sort_done) + { + tuplesort_performsort(osastate->sortstate); + osastate->sort_done = true; + } + else + tuplesort_rescan(osastate->sortstate); + + for (; i < num_percentiles; i++) + { + int64 target_row = pct_info[i].first_row; + int idx = pct_info[i].idx; + + /* Advance to target row, if not already there */ + if (target_row > rownum) + { + if (!tuplesort_skiptuples(osastate->sortstate, target_row - rownum - 1, true)) + elog(ERROR, "missing row in percentile_disc"); + + if (!tuplesort_getdatum(osastate->sortstate, true, true, &val, + &isnull, NULL)) + elog(ERROR, "missing row in percentile_disc"); + + rownum = target_row; + } + + result_datum[idx] = val; + result_isnull[idx] = isnull; + } + } + + /* We make the output array the same shape as the input */ + PG_RETURN_POINTER(construct_md_array(result_datum, result_isnull, + ARR_NDIM(param), + ARR_DIMS(param), + ARR_LBOUND(param), + osastate->qstate->sortColType, + osastate->qstate->typLen, + osastate->qstate->typByVal, + osastate->qstate->typAlign)); +} + +/* + * percentile_cont(float8[]) within group () - continuous percentiles + */ +static Datum +percentile_cont_multi_final_common(FunctionCallInfo fcinfo, + Oid expect_type, + int16 typLen, bool typByVal, char typAlign, + LerpFunc lerpfunc) +{ + OSAPerGroupState *osastate; + ArrayType *param; + Datum *percentiles_datum; + bool *percentiles_null; + int num_percentiles; + struct pct_info *pct_info; + Datum *result_datum; + bool *result_isnull; + int64 rownum = 0; + Datum first_val = (Datum) 0; + Datum second_val = (Datum) 0; + bool isnull; + int i; + + Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + Assert(expect_type == osastate->qstate->sortColType); + + /* Deconstruct the percentile-array input */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + param = PG_GETARG_ARRAYTYPE_P(1); + + deconstruct_array_builtin(param, FLOAT8OID, + &percentiles_datum, + &percentiles_null, + &num_percentiles); + + if (num_percentiles == 0) + PG_RETURN_POINTER(construct_empty_array(osastate->qstate->sortColType)); + + pct_info = setup_pct_info(num_percentiles, + percentiles_datum, + percentiles_null, + osastate->number_of_rows, + true); + + result_datum = (Datum *) palloc(num_percentiles * sizeof(Datum)); + result_isnull = (bool *) palloc(num_percentiles * sizeof(bool)); + + /* + * Start by dealing with any nulls in the param array - those are sorted + * to the front on row=0, so set the corresponding result indexes to null + */ + for (i = 0; i < num_percentiles; i++) + { + int idx = pct_info[i].idx; + + if (pct_info[i].first_row > 0) + break; + + result_datum[idx] = (Datum) 0; + result_isnull[idx] = true; + } + + /* + * If there's anything left after doing the nulls, then grind the input + * and extract the needed values + */ + if (i < num_percentiles) + { + /* Finish the sort, or rescan if we already did */ + if (!osastate->sort_done) + { + tuplesort_performsort(osastate->sortstate); + osastate->sort_done = true; + } + else + tuplesort_rescan(osastate->sortstate); + + for (; i < num_percentiles; i++) + { + int64 first_row = pct_info[i].first_row; + int64 second_row = pct_info[i].second_row; + int idx = pct_info[i].idx; + + /* + * Advance to first_row, if not already there. Note that we might + * already have rownum beyond first_row, in which case first_val + * is already correct. (This occurs when interpolating between + * the same two input rows as for the previous percentile.) + */ + if (first_row > rownum) + { + if (!tuplesort_skiptuples(osastate->sortstate, first_row - rownum - 1, true)) + elog(ERROR, "missing row in percentile_cont"); + + if (!tuplesort_getdatum(osastate->sortstate, true, true, + &first_val, &isnull, NULL) || isnull) + elog(ERROR, "missing row in percentile_cont"); + + rownum = first_row; + /* Always advance second_val to be latest input value */ + second_val = first_val; + } + else if (first_row == rownum) + { + /* + * We are already at the desired row, so we must previously + * have read its value into second_val (and perhaps first_val + * as well, but this assignment is harmless in that case). + */ + first_val = second_val; + } + + /* Fetch second_row if needed */ + if (second_row > rownum) + { + if (!tuplesort_getdatum(osastate->sortstate, true, true, + &second_val, &isnull, NULL) || isnull) + elog(ERROR, "missing row in percentile_cont"); + rownum++; + } + /* We should now certainly be on second_row exactly */ + Assert(second_row == rownum); + + /* Compute appropriate result */ + if (second_row > first_row) + result_datum[idx] = lerpfunc(first_val, second_val, + pct_info[i].proportion); + else + result_datum[idx] = first_val; + + result_isnull[idx] = false; + } + } + + /* We make the output array the same shape as the input */ + PG_RETURN_POINTER(construct_md_array(result_datum, result_isnull, + ARR_NDIM(param), + ARR_DIMS(param), ARR_LBOUND(param), + expect_type, + typLen, + typByVal, + typAlign)); +} + +/* + * percentile_cont(float8[]) within group (float8) - continuous percentiles + */ +Datum +percentile_cont_float8_multi_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_multi_final_common(fcinfo, + FLOAT8OID, + /* hard-wired info on type float8 */ + sizeof(float8), + FLOAT8PASSBYVAL, + TYPALIGN_DOUBLE, + float8_lerp); +} + +/* + * percentile_cont(float8[]) within group (interval) - continuous percentiles + */ +Datum +percentile_cont_interval_multi_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_multi_final_common(fcinfo, + INTERVALOID, + /* hard-wired info on type interval */ + 16, false, TYPALIGN_DOUBLE, + interval_lerp); +} + + +/* + * mode() within group (anyelement) - most common value + */ +Datum +mode_final(PG_FUNCTION_ARGS) +{ + OSAPerGroupState *osastate; + Datum val; + bool isnull; + Datum mode_val = (Datum) 0; + int64 mode_freq = 0; + Datum last_val = (Datum) 0; + int64 last_val_freq = 0; + bool last_val_is_mode = false; + FmgrInfo *equalfn; + Datum abbrev_val = (Datum) 0; + Datum last_abbrev_val = (Datum) 0; + bool shouldfree; + + Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + /* Look up the equality function for the datatype, if we didn't already */ + equalfn = &(osastate->qstate->equalfn); + if (!OidIsValid(equalfn->fn_oid)) + fmgr_info_cxt(get_opcode(osastate->qstate->eqOperator), equalfn, + osastate->qstate->qcontext); + + shouldfree = !(osastate->qstate->typByVal); + + /* Finish the sort, or rescan if we already did */ + if (!osastate->sort_done) + { + tuplesort_performsort(osastate->sortstate); + osastate->sort_done = true; + } + else + tuplesort_rescan(osastate->sortstate); + + /* Scan tuples and count frequencies */ + while (tuplesort_getdatum(osastate->sortstate, true, true, &val, &isnull, + &abbrev_val)) + { + /* we don't expect any nulls, but ignore them if found */ + if (isnull) + continue; + + if (last_val_freq == 0) + { + /* first nonnull value - it's the mode for now */ + mode_val = last_val = val; + mode_freq = last_val_freq = 1; + last_val_is_mode = true; + last_abbrev_val = abbrev_val; + } + else if (abbrev_val == last_abbrev_val && + DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val))) + { + /* value equal to previous value, count it */ + if (last_val_is_mode) + mode_freq++; /* needn't maintain last_val_freq */ + else if (++last_val_freq > mode_freq) + { + /* last_val becomes new mode */ + if (shouldfree) + pfree(DatumGetPointer(mode_val)); + mode_val = last_val; + mode_freq = last_val_freq; + last_val_is_mode = true; + } + if (shouldfree) + pfree(DatumGetPointer(val)); + } + else + { + /* val should replace last_val */ + if (shouldfree && !last_val_is_mode) + pfree(DatumGetPointer(last_val)); + last_val = val; + /* avoid equality function calls by reusing abbreviated keys */ + last_abbrev_val = abbrev_val; + last_val_freq = 1; + last_val_is_mode = false; + } + + CHECK_FOR_INTERRUPTS(); + } + + if (shouldfree && !last_val_is_mode) + pfree(DatumGetPointer(last_val)); + + if (mode_freq) + PG_RETURN_DATUM(mode_val); + else + PG_RETURN_NULL(); +} + + +/* + * Common code to sanity-check args for hypothetical-set functions. No need + * for friendly errors, these can only happen if someone's messing up the + * aggregate definitions. The checks are needed for security, however. + */ +static void +hypothetical_check_argtypes(FunctionCallInfo fcinfo, int nargs, + TupleDesc tupdesc) +{ + int i; + + /* check that we have an int4 flag column */ + if (!tupdesc || + (nargs + 1) != tupdesc->natts || + TupleDescAttr(tupdesc, nargs)->atttypid != INT4OID) + elog(ERROR, "type mismatch in hypothetical-set function"); + + /* check that direct args match in type with aggregated args */ + for (i = 0; i < nargs; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + + if (get_fn_expr_argtype(fcinfo->flinfo, i + 1) != attr->atttypid) + elog(ERROR, "type mismatch in hypothetical-set function"); + } +} + +/* + * compute rank of hypothetical row + * + * flag should be -1 to sort hypothetical row ahead of its peers, or +1 + * to sort behind. + * total number of regular rows is returned into *number_of_rows. + */ +static int64 +hypothetical_rank_common(FunctionCallInfo fcinfo, int flag, + int64 *number_of_rows) +{ + int nargs = PG_NARGS() - 1; + int64 rank = 1; + OSAPerGroupState *osastate; + TupleTableSlot *slot; + int i; + + Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); + + /* If there were no regular rows, the rank is always 1 */ + if (PG_ARGISNULL(0)) + { + *number_of_rows = 0; + return 1; + } + + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + *number_of_rows = osastate->number_of_rows; + + /* Adjust nargs to be the number of direct (or aggregated) args */ + if (nargs % 2 != 0) + elog(ERROR, "wrong number of arguments in hypothetical-set function"); + nargs /= 2; + + hypothetical_check_argtypes(fcinfo, nargs, osastate->qstate->tupdesc); + + /* because we need a hypothetical row, we can't share transition state */ + Assert(!osastate->sort_done); + + /* insert the hypothetical row into the sort */ + slot = osastate->qstate->tupslot; + ExecClearTuple(slot); + for (i = 0; i < nargs; i++) + { + slot->tts_values[i] = PG_GETARG_DATUM(i + 1); + slot->tts_isnull[i] = PG_ARGISNULL(i + 1); + } + slot->tts_values[i] = Int32GetDatum(flag); + slot->tts_isnull[i] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(osastate->sortstate, slot); + + /* finish the sort */ + tuplesort_performsort(osastate->sortstate); + osastate->sort_done = true; + + /* iterate till we find the hypothetical row */ + while (tuplesort_gettupleslot(osastate->sortstate, true, true, slot, NULL)) + { + bool isnull; + Datum d = slot_getattr(slot, nargs + 1, &isnull); + + if (!isnull && DatumGetInt32(d) != 0) + break; + + rank++; + + CHECK_FOR_INTERRUPTS(); + } + + ExecClearTuple(slot); + + return rank; +} + + +/* + * rank() - rank of hypothetical row + */ +Datum +hypothetical_rank_final(PG_FUNCTION_ARGS) +{ + int64 rank; + int64 rowcount; + + rank = hypothetical_rank_common(fcinfo, -1, &rowcount); + + PG_RETURN_INT64(rank); +} + +/* + * percent_rank() - percentile rank of hypothetical row + */ +Datum +hypothetical_percent_rank_final(PG_FUNCTION_ARGS) +{ + int64 rank; + int64 rowcount; + double result_val; + + rank = hypothetical_rank_common(fcinfo, -1, &rowcount); + + if (rowcount == 0) + PG_RETURN_FLOAT8(0); + + result_val = (double) (rank - 1) / (double) (rowcount); + + PG_RETURN_FLOAT8(result_val); +} + +/* + * cume_dist() - cumulative distribution of hypothetical row + */ +Datum +hypothetical_cume_dist_final(PG_FUNCTION_ARGS) +{ + int64 rank; + int64 rowcount; + double result_val; + + rank = hypothetical_rank_common(fcinfo, 1, &rowcount); + + result_val = (double) (rank) / (double) (rowcount + 1); + + PG_RETURN_FLOAT8(result_val); +} + +/* + * dense_rank() - rank of hypothetical row without gaps in ranking + */ +Datum +hypothetical_dense_rank_final(PG_FUNCTION_ARGS) +{ + ExprContext *econtext; + ExprState *compareTuple; + int nargs = PG_NARGS() - 1; + int64 rank = 1; + int64 duplicate_count = 0; + OSAPerGroupState *osastate; + int numDistinctCols; + Datum abbrevVal = (Datum) 0; + Datum abbrevOld = (Datum) 0; + TupleTableSlot *slot; + TupleTableSlot *extraslot; + TupleTableSlot *slot2; + int i; + + Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); + + /* If there were no regular rows, the rank is always 1 */ + if (PG_ARGISNULL(0)) + PG_RETURN_INT64(rank); + + osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0); + econtext = osastate->qstate->econtext; + if (!econtext) + { + MemoryContext oldcontext; + + /* Make sure to we create econtext under correct parent context. */ + oldcontext = MemoryContextSwitchTo(osastate->qstate->qcontext); + osastate->qstate->econtext = CreateStandaloneExprContext(); + econtext = osastate->qstate->econtext; + MemoryContextSwitchTo(oldcontext); + } + + /* Adjust nargs to be the number of direct (or aggregated) args */ + if (nargs % 2 != 0) + elog(ERROR, "wrong number of arguments in hypothetical-set function"); + nargs /= 2; + + hypothetical_check_argtypes(fcinfo, nargs, osastate->qstate->tupdesc); + + /* + * When comparing tuples, we can omit the flag column since we will only + * compare rows with flag == 0. + */ + numDistinctCols = osastate->qstate->numSortCols - 1; + + /* Build tuple comparator, if we didn't already */ + compareTuple = osastate->qstate->compareTuple; + if (compareTuple == NULL) + { + AttrNumber *sortColIdx = osastate->qstate->sortColIdx; + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(osastate->qstate->qcontext); + compareTuple = execTuplesMatchPrepare(osastate->qstate->tupdesc, + numDistinctCols, + sortColIdx, + osastate->qstate->eqOperators, + osastate->qstate->sortCollations, + NULL); + MemoryContextSwitchTo(oldContext); + osastate->qstate->compareTuple = compareTuple; + } + + /* because we need a hypothetical row, we can't share transition state */ + Assert(!osastate->sort_done); + + /* insert the hypothetical row into the sort */ + slot = osastate->qstate->tupslot; + ExecClearTuple(slot); + for (i = 0; i < nargs; i++) + { + slot->tts_values[i] = PG_GETARG_DATUM(i + 1); + slot->tts_isnull[i] = PG_ARGISNULL(i + 1); + } + slot->tts_values[i] = Int32GetDatum(-1); + slot->tts_isnull[i] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(osastate->sortstate, slot); + + /* finish the sort */ + tuplesort_performsort(osastate->sortstate); + osastate->sort_done = true; + + /* + * We alternate fetching into tupslot and extraslot so that we have the + * previous row available for comparisons. This is accomplished by + * swapping the slot pointer variables after each row. + */ + extraslot = MakeSingleTupleTableSlot(osastate->qstate->tupdesc, + &TTSOpsMinimalTuple); + slot2 = extraslot; + + /* iterate till we find the hypothetical row */ + while (tuplesort_gettupleslot(osastate->sortstate, true, true, slot, + &abbrevVal)) + { + bool isnull; + Datum d = slot_getattr(slot, nargs + 1, &isnull); + TupleTableSlot *tmpslot; + + if (!isnull && DatumGetInt32(d) != 0) + break; + + /* count non-distinct tuples */ + econtext->ecxt_outertuple = slot; + econtext->ecxt_innertuple = slot2; + + if (!TupIsNull(slot2) && + abbrevVal == abbrevOld && + ExecQualAndReset(compareTuple, econtext)) + duplicate_count++; + + tmpslot = slot2; + slot2 = slot; + slot = tmpslot; + /* avoid ExecQual() calls by reusing abbreviated keys */ + abbrevOld = abbrevVal; + + rank++; + + CHECK_FOR_INTERRUPTS(); + } + + ExecClearTuple(slot); + ExecClearTuple(slot2); + + ExecDropSingleTupleTableSlot(extraslot); + + rank = rank - duplicate_count; + + PG_RETURN_INT64(rank); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/partitionfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/partitionfuncs.c new file mode 100644 index 00000000000..70e4c1308c9 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/partitionfuncs.c @@ -0,0 +1,239 @@ +/*------------------------------------------------------------------------- + * + * partitionfuncs.c + * Functions for accessing partition-related metadata + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/partitionfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/partition.h" +#include "catalog/pg_class.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + +/* + * Checks if a given relation can be part of a partition tree. Returns + * false if the relation cannot be processed, in which case it is up to + * the caller to decide what to do, by either raising an error or doing + * something else. + */ +static bool +check_rel_can_be_partition(Oid relid) +{ + char relkind; + bool relispartition; + + /* Check if relation exists */ + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid))) + return false; + + relkind = get_rel_relkind(relid); + relispartition = get_rel_relispartition(relid); + + /* Only allow relation types that can appear in partition trees. */ + if (!relispartition && !RELKIND_HAS_PARTITIONS(relkind)) + return false; + + return true; +} + +/* + * pg_partition_tree + * + * Produce a view with one row per member of a partition tree, beginning + * from the top-most parent given by the caller. This gives information + * about each partition, its immediate partitioned parent, if it is + * a leaf partition and its level in the hierarchy. + */ +Datum +pg_partition_tree(PG_FUNCTION_ARGS) +{ +#define PG_PARTITION_TREE_COLS 4 + Oid rootrelid = PG_GETARG_OID(0); + FuncCallContext *funcctx; + List *partitions; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcxt; + TupleDesc tupdesc; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + if (!check_rel_can_be_partition(rootrelid)) + SRF_RETURN_DONE(funcctx); + + /* switch to memory context appropriate for multiple function calls */ + oldcxt = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* + * Find all members of inheritance set. We only need AccessShareLock + * on the children for the partition information lookup. + */ + partitions = find_all_inheritors(rootrelid, AccessShareLock, NULL); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + funcctx->tuple_desc = tupdesc; + + /* The only state we need is the partition list */ + funcctx->user_fctx = (void *) partitions; + + MemoryContextSwitchTo(oldcxt); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + partitions = (List *) funcctx->user_fctx; + + if (funcctx->call_cntr < list_length(partitions)) + { + Datum result; + Datum values[PG_PARTITION_TREE_COLS] = {0}; + bool nulls[PG_PARTITION_TREE_COLS] = {0}; + HeapTuple tuple; + Oid parentid = InvalidOid; + Oid relid = list_nth_oid(partitions, funcctx->call_cntr); + char relkind = get_rel_relkind(relid); + int level = 0; + List *ancestors = get_partition_ancestors(relid); + ListCell *lc; + + /* + * Form tuple with appropriate data. + */ + + /* relid */ + values[0] = ObjectIdGetDatum(relid); + + /* parentid */ + if (ancestors != NIL) + parentid = linitial_oid(ancestors); + if (OidIsValid(parentid)) + values[1] = ObjectIdGetDatum(parentid); + else + nulls[1] = true; + + /* isleaf */ + values[2] = BoolGetDatum(!RELKIND_HAS_PARTITIONS(relkind)); + + /* level */ + if (relid != rootrelid) + { + foreach(lc, ancestors) + { + level++; + if (lfirst_oid(lc) == rootrelid) + break; + } + } + values[3] = Int32GetDatum(level); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + SRF_RETURN_NEXT(funcctx, result); + } + + /* done when there are no more elements left */ + SRF_RETURN_DONE(funcctx); +} + +/* + * pg_partition_root + * + * Returns the top-most parent of the partition tree to which a given + * relation belongs, or NULL if it's not (or cannot be) part of any + * partition tree. + */ +Datum +pg_partition_root(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Oid rootrelid; + List *ancestors; + + if (!check_rel_can_be_partition(relid)) + PG_RETURN_NULL(); + + /* fetch the list of ancestors */ + ancestors = get_partition_ancestors(relid); + + /* + * If the input relation is already the top-most parent, just return + * itself. + */ + if (ancestors == NIL) + PG_RETURN_OID(relid); + + rootrelid = llast_oid(ancestors); + list_free(ancestors); + + /* + * "rootrelid" must contain a valid OID, given that the input relation is + * a valid partition tree member as checked above. + */ + Assert(OidIsValid(rootrelid)); + PG_RETURN_OID(rootrelid); +} + +/* + * pg_partition_ancestors + * + * Produces a view with one row per ancestor of the given partition, + * including the input relation itself. + */ +Datum +pg_partition_ancestors(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + FuncCallContext *funcctx; + List *ancestors; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcxt; + + funcctx = SRF_FIRSTCALL_INIT(); + + if (!check_rel_can_be_partition(relid)) + SRF_RETURN_DONE(funcctx); + + oldcxt = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + ancestors = get_partition_ancestors(relid); + ancestors = lcons_oid(relid, ancestors); + + /* The only state we need is the ancestors list */ + funcctx->user_fctx = (void *) ancestors; + + MemoryContextSwitchTo(oldcxt); + } + + funcctx = SRF_PERCALL_SETUP(); + ancestors = (List *) funcctx->user_fctx; + + if (funcctx->call_cntr < list_length(ancestors)) + { + Oid resultrel = list_nth_oid(ancestors, funcctx->call_cntr); + + SRF_RETURN_NEXT(funcctx, ObjectIdGetDatum(resultrel)); + } + + SRF_RETURN_DONE(funcctx); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_locale.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_locale.c new file mode 100644 index 00000000000..5b0ff75414d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_locale.c @@ -0,0 +1,3086 @@ +/*----------------------------------------------------------------------- + * + * PostgreSQL locale utilities + * + * Portions Copyright (c) 2002-2023, PostgreSQL Global Development Group + * + * src/backend/utils/adt/pg_locale.c + * + *----------------------------------------------------------------------- + */ + +/*---------- + * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE + * are fixed at CREATE DATABASE time, stored in pg_database, and cannot + * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(), + * toupper(), etc. are always in the same fixed locale. + * + * LC_MESSAGES is settable at run time and will take effect + * immediately. + * + * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also + * settable at run-time. However, we don't actually set those locale + * categories permanently. This would have bizarre effects like no + * longer accepting standard floating-point literals in some locales. + * Instead, we only set these locale categories briefly when needed, + * cache the required information obtained from localeconv() or + * strftime(), and then set the locale categories back to "C". + * The cached information is only used by the formatting functions + * (to_char, etc.) and the money type. For the user, this should all be + * transparent. + * + * !!! NOW HEAR THIS !!! + * + * We've been bitten repeatedly by this bug, so let's try to keep it in + * mind in future: on some platforms, the locale functions return pointers + * to static data that will be overwritten by any later locale function. + * Thus, for example, the obvious-looking sequence + * save = setlocale(category, NULL); + * if (!setlocale(category, value)) + * fail = true; + * setlocale(category, save); + * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call + * will change the memory save is pointing at. To do this sort of thing + * safely, you *must* pstrdup what setlocale returns the first time. + * + * The POSIX locale standard is available here: + * + * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html + *---------- + */ + + +#include "postgres.h" + +#include <time.h> + +#include "access/htup_details.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_control.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/formatting.h" +#include "utils/guc_hooks.h" +#include "utils/hsearch.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_locale.h" +#include "utils/syscache.h" + +#ifdef USE_ICU +#include <unicode/ucnv.h> +#include <unicode/ustring.h> +#endif + +#ifdef __GLIBC__ +#include <gnu/libc-version.h> +#endif + +#ifdef WIN32 +#include <shlwapi.h> +#endif + +/* Error triggered for locale-sensitive subroutines */ +#define PGLOCALE_SUPPORT_ERROR(provider) \ + elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider) + +/* + * This should be large enough that most strings will fit, but small enough + * that we feel comfortable putting it on the stack + */ +#define TEXTBUFLEN 1024 + +#define MAX_L10N_DATA 80 + + +/* GUC settings */ +__thread char *locale_messages; +__thread char *locale_monetary; +__thread char *locale_numeric; +__thread char *locale_time; + +__thread int icu_validation_level = WARNING; + +/* + * lc_time localization cache. + * + * We use only the first 7 or 12 entries of these arrays. The last array + * element is left as NULL for the convenience of outside code that wants + * to sequentially scan these arrays. + */ +__thread char *localized_abbrev_days[7 + 1]; +__thread char *localized_full_days[7 + 1]; +__thread char *localized_abbrev_months[12 + 1]; +__thread char *localized_full_months[12 + 1]; + +/* is the databases's LC_CTYPE the C locale? */ +__thread bool database_ctype_is_c = false; + +/* indicates whether locale information cache is valid */ +static __thread bool CurrentLocaleConvValid = false; +static __thread bool CurrentLCTimeValid = false; +static __thread struct lconv CurrentLocaleConv; +static __thread bool CurrentLocaleConvAllocated = false; + +/* Cache for collation-related knowledge */ + +typedef struct +{ + Oid collid; /* hash key: pg_collation OID */ + bool collate_is_c; /* is collation's LC_COLLATE C? */ + bool ctype_is_c; /* is collation's LC_CTYPE C? */ + bool flags_valid; /* true if above flags are valid */ + pg_locale_t locale; /* locale_t struct, or 0 if not valid */ +} collation_cache_entry; + +static __thread HTAB *collation_cache = NULL; + + +#if defined(WIN32) && defined(LC_MESSAGES) +static char *IsoLocaleName(const char *); +#endif + +#ifdef USE_ICU +/* + * Converter object for converting between ICU's UChar strings and C strings + * in database encoding. Since the database encoding doesn't change, we only + * need one of these per session. + */ +static __thread UConverter *icu_converter = NULL; + +static UCollator *pg_ucol_open(const char *loc_str); +static void init_icu_converter(void); +static size_t uchar_length(UConverter *converter, + const char *str, int32_t len); +static int32_t uchar_convert(UConverter *converter, + UChar *dest, int32_t destlen, + const char *src, int32_t srclen); +static void icu_set_collation_attributes(UCollator *collator, const char *loc, + UErrorCode *status); +#endif + +/* + * pg_perm_setlocale + * + * This wraps the libc function setlocale(), with two additions. First, when + * changing LC_CTYPE, update gettext's encoding for the current message + * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but + * not on Windows. Second, if the operation is successful, the corresponding + * LC_XXX environment variable is set to match. By setting the environment + * variable, we ensure that any subsequent use of setlocale(..., "") will + * preserve the settings made through this routine. Of course, LC_ALL must + * also be unset to fully ensure that, but that has to be done elsewhere after + * all the individual LC_XXX variables have been set correctly. (Thank you + * Perl for making this kluge necessary.) + */ +char * +pg_perm_setlocale(int category, const char *locale) +{ + char *result; + const char *envvar; + +#ifndef WIN32 + result = setlocale(category, locale); +#else + + /* + * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that + * the given value is good and set it in the environment variables. We + * must ignore attempts to set to "", which means "keep using the old + * environment value". + */ +#ifdef LC_MESSAGES + if (category == LC_MESSAGES) + { + result = (char *) locale; + if (locale == NULL || locale[0] == '\0') + return result; + } + else +#endif + result = setlocale(category, locale); +#endif /* WIN32 */ + + if (result == NULL) + return result; /* fall out immediately on failure */ + + /* + * Use the right encoding in translated messages. Under ENABLE_NLS, let + * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message + * format strings are ASCII, but database-encoding strings may enter the + * message via %s. This makes the overall message encoding equal to the + * database encoding. + */ + if (category == LC_CTYPE) + { + static __thread char save_lc_ctype[LOCALE_NAME_BUFLEN]; + + /* copy setlocale() return value before callee invokes it again */ + strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype)); + result = save_lc_ctype; + +#ifdef ENABLE_NLS + SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL))); +#else + SetMessageEncoding(GetDatabaseEncoding()); +#endif + } + + switch (category) + { + case LC_COLLATE: + envvar = "LC_COLLATE"; + break; + case LC_CTYPE: + envvar = "LC_CTYPE"; + break; +#ifdef LC_MESSAGES + case LC_MESSAGES: + envvar = "LC_MESSAGES"; +#ifdef WIN32 + result = IsoLocaleName(locale); + if (result == NULL) + result = (char *) locale; + elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result); +#endif /* WIN32 */ + break; +#endif /* LC_MESSAGES */ + case LC_MONETARY: + envvar = "LC_MONETARY"; + break; + case LC_NUMERIC: + envvar = "LC_NUMERIC"; + break; + case LC_TIME: + envvar = "LC_TIME"; + break; + default: + elog(FATAL, "unrecognized LC category: %d", category); + return NULL; /* keep compiler quiet */ + } + + if (setenv(envvar, result, 1) != 0) + return NULL; + + return result; +} + + +/* + * Is the locale name valid for the locale category? + * + * If successful, and canonname isn't NULL, a palloc'd copy of the locale's + * canonical name is stored there. This is especially useful for figuring out + * what locale name "" means (ie, the server environment value). (Actually, + * it seems that on most implementations that's the only thing it's good for; + * we could wish that setlocale gave back a canonically spelled version of + * the locale name, but typically it doesn't.) + */ +bool +check_locale(int category, const char *locale, char **canonname) +{ + char *save; + char *res; + + if (canonname) + *canonname = NULL; /* in case of failure */ + + save = setlocale(category, NULL); + if (!save) + return false; /* won't happen, we hope */ + + /* save may be pointing at a modifiable scratch variable, see above. */ + save = pstrdup(save); + + /* set the locale with setlocale, to see if it accepts it. */ + res = setlocale(category, locale); + + /* save canonical name if requested. */ + if (res && canonname) + *canonname = pstrdup(res); + + /* restore old value. */ + if (!setlocale(category, save)) + elog(WARNING, "failed to restore old locale \"%s\"", save); + pfree(save); + + return (res != NULL); +} + + +/* + * GUC check/assign hooks + * + * For most locale categories, the assign hook doesn't actually set the locale + * permanently, just reset flags so that the next use will cache the + * appropriate values. (See explanation at the top of this file.) + * + * Note: we accept value = "" as selecting the postmaster's environment + * value, whatever it was (so long as the environment setting is legal). + * This will have been locked down by an earlier call to pg_perm_setlocale. + */ +bool +check_locale_monetary(char **newval, void **extra, GucSource source) +{ + return check_locale(LC_MONETARY, *newval, NULL); +} + +void +assign_locale_monetary(const char *newval, void *extra) +{ + CurrentLocaleConvValid = false; +} + +bool +check_locale_numeric(char **newval, void **extra, GucSource source) +{ + return check_locale(LC_NUMERIC, *newval, NULL); +} + +void +assign_locale_numeric(const char *newval, void *extra) +{ + CurrentLocaleConvValid = false; +} + +bool +check_locale_time(char **newval, void **extra, GucSource source) +{ + return check_locale(LC_TIME, *newval, NULL); +} + +void +assign_locale_time(const char *newval, void *extra) +{ + CurrentLCTimeValid = false; +} + +/* + * We allow LC_MESSAGES to actually be set globally. + * + * Note: we normally disallow value = "" because it wouldn't have consistent + * semantics (it'd effectively just use the previous value). However, this + * is the value passed for PGC_S_DEFAULT, so don't complain in that case, + * not even if the attempted setting fails due to invalid environment value. + * The idea there is just to accept the environment setting *if possible* + * during startup, until we can read the proper value from postgresql.conf. + */ +bool +check_locale_messages(char **newval, void **extra, GucSource source) +{ + if (**newval == '\0') + { + if (source == PGC_S_DEFAULT) + return true; + else + return false; + } + + /* + * LC_MESSAGES category does not exist everywhere, but accept it anyway + * + * On Windows, we can't even check the value, so accept blindly + */ +#if defined(LC_MESSAGES) && !defined(WIN32) + return check_locale(LC_MESSAGES, *newval, NULL); +#else + return true; +#endif +} + +void +assign_locale_messages(const char *newval, void *extra) +{ + /* + * LC_MESSAGES category does not exist everywhere, but accept it anyway. + * We ignore failure, as per comment above. + */ +#ifdef LC_MESSAGES + (void) pg_perm_setlocale(LC_MESSAGES, newval); +#endif +} + + +/* + * Frees the malloced content of a struct lconv. (But not the struct + * itself.) It's important that this not throw elog(ERROR). + */ +static void +free_struct_lconv(struct lconv *s) +{ + free(s->decimal_point); + free(s->thousands_sep); + free(s->grouping); + free(s->int_curr_symbol); + free(s->currency_symbol); + free(s->mon_decimal_point); + free(s->mon_thousands_sep); + free(s->mon_grouping); + free(s->positive_sign); + free(s->negative_sign); +} + +void free_current_locale_conv() +{ + if (CurrentLocaleConvAllocated) + { + free_struct_lconv(&CurrentLocaleConv); + CurrentLocaleConvAllocated = false; + } +} +/* + * Check that all fields of a struct lconv (or at least, the ones we care + * about) are non-NULL. The field list must match free_struct_lconv(). + */ +static bool +struct_lconv_is_valid(struct lconv *s) +{ + if (s->decimal_point == NULL) + return false; + if (s->thousands_sep == NULL) + return false; + if (s->grouping == NULL) + return false; + if (s->int_curr_symbol == NULL) + return false; + if (s->currency_symbol == NULL) + return false; + if (s->mon_decimal_point == NULL) + return false; + if (s->mon_thousands_sep == NULL) + return false; + if (s->mon_grouping == NULL) + return false; + if (s->positive_sign == NULL) + return false; + if (s->negative_sign == NULL) + return false; + return true; +} + + +/* + * Convert the strdup'd string at *str from the specified encoding to the + * database encoding. + */ +static void +db_encoding_convert(int encoding, char **str) +{ + char *pstr; + char *mstr; + + /* convert the string to the database encoding */ + pstr = pg_any_to_server(*str, strlen(*str), encoding); + if (pstr == *str) + return; /* no conversion happened */ + + /* need it malloc'd not palloc'd */ + mstr = strdup(pstr); + if (mstr == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* replace old string */ + free(*str); + *str = mstr; + + pfree(pstr); +} + + +/* + * Return the POSIX lconv struct (contains number/money formatting + * information) with locale information for all categories. + */ +struct lconv * +PGLC_localeconv(void) +{ + struct lconv *extlconv; + struct lconv worklconv; + char *save_lc_monetary; + char *save_lc_numeric; +#ifdef WIN32 + char *save_lc_ctype; +#endif + + /* Did we do it already? */ + if (CurrentLocaleConvValid) + return &CurrentLocaleConv; + + /* Free any already-allocated storage */ + if (CurrentLocaleConvAllocated) + { + free_struct_lconv(&CurrentLocaleConv); + CurrentLocaleConvAllocated = false; + } + + /* + * This is tricky because we really don't want to risk throwing error + * while the locale is set to other than our usual settings. Therefore, + * the process is: collect the usual settings, set locale to special + * setting, copy relevant data into worklconv using strdup(), restore + * normal settings, convert data to desired encoding, and finally stash + * the collected data in CurrentLocaleConv. This makes it safe if we + * throw an error during encoding conversion or run out of memory anywhere + * in the process. All data pointed to by struct lconv members is + * allocated with strdup, to avoid premature elog(ERROR) and to allow + * using a single cleanup routine. + */ + memset(&worklconv, 0, sizeof(worklconv)); + + /* Save prevailing values of monetary and numeric locales */ + save_lc_monetary = setlocale(LC_MONETARY, NULL); + if (!save_lc_monetary) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_monetary = pstrdup(save_lc_monetary); + + save_lc_numeric = setlocale(LC_NUMERIC, NULL); + if (!save_lc_numeric) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_numeric = pstrdup(save_lc_numeric); + +#ifdef WIN32 + + /* + * The POSIX standard explicitly says that it is undefined what happens if + * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from + * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to + * believe that localeconv() should return strings that are encoded in the + * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence, + * once we have successfully collected the localeconv() results, we will + * convert them from that codeset to the desired server encoding. + * + * Windows, of course, resolutely does things its own way; on that + * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane + * results. Hence, we must temporarily set that category as well. + */ + + /* Save prevailing value of ctype locale */ + save_lc_ctype = setlocale(LC_CTYPE, NULL); + if (!save_lc_ctype) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_ctype = pstrdup(save_lc_ctype); + + /* Here begins the critical section where we must not throw error */ + + /* use numeric to set the ctype */ + setlocale(LC_CTYPE, locale_numeric); +#endif + + /* Get formatting information for numeric */ + setlocale(LC_NUMERIC, locale_numeric); + extlconv = localeconv(); + + /* Must copy data now in case setlocale() overwrites it */ + worklconv.decimal_point = strdup(extlconv->decimal_point); + worklconv.thousands_sep = strdup(extlconv->thousands_sep); + worklconv.grouping = strdup(extlconv->grouping); + +#ifdef WIN32 + /* use monetary to set the ctype */ + setlocale(LC_CTYPE, locale_monetary); +#endif + + /* Get formatting information for monetary */ + setlocale(LC_MONETARY, locale_monetary); + extlconv = localeconv(); + + /* Must copy data now in case setlocale() overwrites it */ + worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol); + worklconv.currency_symbol = strdup(extlconv->currency_symbol); + worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point); + worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep); + worklconv.mon_grouping = strdup(extlconv->mon_grouping); + worklconv.positive_sign = strdup(extlconv->positive_sign); + worklconv.negative_sign = strdup(extlconv->negative_sign); + /* Copy scalar fields as well */ + worklconv.int_frac_digits = extlconv->int_frac_digits; + worklconv.frac_digits = extlconv->frac_digits; + worklconv.p_cs_precedes = extlconv->p_cs_precedes; + worklconv.p_sep_by_space = extlconv->p_sep_by_space; + worklconv.n_cs_precedes = extlconv->n_cs_precedes; + worklconv.n_sep_by_space = extlconv->n_sep_by_space; + worklconv.p_sign_posn = extlconv->p_sign_posn; + worklconv.n_sign_posn = extlconv->n_sign_posn; + + /* + * Restore the prevailing locale settings; failure to do so is fatal. + * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC, + * but proceeding with the wrong value of LC_CTYPE would certainly be bad + * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC + * are almost certainly "C", there's really no reason that restoring those + * should fail. + */ +#ifdef WIN32 + if (!setlocale(LC_CTYPE, save_lc_ctype)) + elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); +#endif + if (!setlocale(LC_MONETARY, save_lc_monetary)) + elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary); + if (!setlocale(LC_NUMERIC, save_lc_numeric)) + elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric); + + /* + * At this point we've done our best to clean up, and can call functions + * that might possibly throw errors with a clean conscience. But let's + * make sure we don't leak any already-strdup'd fields in worklconv. + */ + PG_TRY(); + { + int encoding; + + /* Release the pstrdup'd locale names */ + pfree(save_lc_monetary); + pfree(save_lc_numeric); +#ifdef WIN32 + pfree(save_lc_ctype); +#endif + + /* If any of the preceding strdup calls failed, complain now. */ + if (!struct_lconv_is_valid(&worklconv)) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* + * Now we must perform encoding conversion from whatever's associated + * with the locales into the database encoding. If we can't identify + * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1), + * use PG_SQL_ASCII, which will result in just validating that the + * strings are OK in the database encoding. + */ + encoding = pg_get_encoding_from_locale(locale_numeric, true); + if (encoding < 0) + encoding = PG_SQL_ASCII; + + db_encoding_convert(encoding, &worklconv.decimal_point); + db_encoding_convert(encoding, &worklconv.thousands_sep); + /* grouping is not text and does not require conversion */ + + encoding = pg_get_encoding_from_locale(locale_monetary, true); + if (encoding < 0) + encoding = PG_SQL_ASCII; + + db_encoding_convert(encoding, &worklconv.int_curr_symbol); + db_encoding_convert(encoding, &worklconv.currency_symbol); + db_encoding_convert(encoding, &worklconv.mon_decimal_point); + db_encoding_convert(encoding, &worklconv.mon_thousands_sep); + /* mon_grouping is not text and does not require conversion */ + db_encoding_convert(encoding, &worklconv.positive_sign); + db_encoding_convert(encoding, &worklconv.negative_sign); + } + PG_CATCH(); + { + free_struct_lconv(&worklconv); + PG_RE_THROW(); + } + PG_END_TRY(); + + /* + * Everything is good, so save the results. + */ + CurrentLocaleConv = worklconv; + CurrentLocaleConvAllocated = true; + CurrentLocaleConvValid = true; + return &CurrentLocaleConv; +} + +#ifdef WIN32 +/* + * On Windows, strftime() returns its output in encoding CP_ACP (the default + * operating system codepage for the computer), which is likely different + * from SERVER_ENCODING. This is especially important in Japanese versions + * of Windows which will use SJIS encoding, which we don't support as a + * server encoding. + * + * So, instead of using strftime(), use wcsftime() to return the value in + * wide characters (internally UTF16) and then convert to UTF8, which we + * know how to handle directly. + * + * Note that this only affects the calls to strftime() in this file, which are + * used to get the locale-aware strings. Other parts of the backend use + * pg_strftime(), which isn't locale-aware and does not need to be replaced. + */ +static size_t +strftime_win32(char *dst, size_t dstlen, + const char *format, const struct tm *tm) +{ + size_t len; + wchar_t wformat[8]; /* formats used below need 3 chars */ + wchar_t wbuf[MAX_L10N_DATA]; + + /* + * Get a wchar_t version of the format string. We only actually use + * plain-ASCII formats in this file, so we can say that they're UTF8. + */ + len = MultiByteToWideChar(CP_UTF8, 0, format, -1, + wformat, lengthof(wformat)); + if (len == 0) + elog(ERROR, "could not convert format string from UTF-8: error code %lu", + GetLastError()); + + len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm); + if (len == 0) + { + /* + * wcsftime failed, possibly because the result would not fit in + * MAX_L10N_DATA. Return 0 with the contents of dst unspecified. + */ + return 0; + } + + len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1, + NULL, NULL); + if (len == 0) + elog(ERROR, "could not convert string to UTF-8: error code %lu", + GetLastError()); + + dst[len] = '\0'; + + return len; +} + +/* redefine strftime() */ +#define strftime(a,b,c,d) strftime_win32(a,b,c,d) +#endif /* WIN32 */ + +/* + * Subroutine for cache_locale_time(). + * Convert the given string from encoding "encoding" to the database + * encoding, and store the result at *dst, replacing any previous value. + */ +static void +cache_single_string(char **dst, const char *src, int encoding) +{ + char *ptr; + char *olddst; + + /* Convert the string to the database encoding, or validate it's OK */ + ptr = pg_any_to_server(src, strlen(src), encoding); + + /* Store the string in long-lived storage, replacing any previous value */ + olddst = *dst; + *dst = MemoryContextStrdup(TopMemoryContext, ptr); + if (olddst) + pfree(olddst); + + /* Might as well clean up any palloc'd conversion result, too */ + if (ptr != src) + pfree(ptr); +} + +/* + * Update the lc_time localization cache variables if needed. + */ +void +cache_locale_time(void) +{ + char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA]; + char *bufptr; + time_t timenow; + struct tm *timeinfo; + bool strftimefail = false; + int encoding; + int i; + char *save_lc_time; +#ifdef WIN32 + char *save_lc_ctype; +#endif + + /* did we do this already? */ + if (CurrentLCTimeValid) + return; + + elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time); + + /* + * As in PGLC_localeconv(), it's critical that we not throw error while + * libc's locale settings have nondefault values. Hence, we just call + * strftime() within the critical section, and then convert and save its + * results afterwards. + */ + + /* Save prevailing value of time locale */ + save_lc_time = setlocale(LC_TIME, NULL); + if (!save_lc_time) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_time = pstrdup(save_lc_time); + +#ifdef WIN32 + + /* + * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we + * must set it here. This code looks the same as what PGLC_localeconv() + * does, but the underlying reason is different: this does NOT determine + * the encoding we'll get back from strftime_win32(). + */ + + /* Save prevailing value of ctype locale */ + save_lc_ctype = setlocale(LC_CTYPE, NULL); + if (!save_lc_ctype) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_ctype = pstrdup(save_lc_ctype); + + /* use lc_time to set the ctype */ + setlocale(LC_CTYPE, locale_time); +#endif + + setlocale(LC_TIME, locale_time); + + /* We use times close to current time as data for strftime(). */ + timenow = time(NULL); + timeinfo = localtime(&timenow); + + /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */ + bufptr = buf; + + /* + * MAX_L10N_DATA is sufficient buffer space for every known locale, and + * POSIX defines no strftime() errors. (Buffer space exhaustion is not an + * error.) An implementation might report errors (e.g. ENOMEM) by + * returning 0 (or, less plausibly, a negative value) and setting errno. + * Report errno just in case the implementation did that, but clear it in + * advance of the calls so we don't emit a stale, unrelated errno. + */ + errno = 0; + + /* localized days */ + for (i = 0; i < 7; i++) + { + timeinfo->tm_wday = i; + if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; + if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; + } + + /* localized months */ + for (i = 0; i < 12; i++) + { + timeinfo->tm_mon = i; + timeinfo->tm_mday = 1; /* make sure we don't have invalid date */ + if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; + if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; + } + + /* + * Restore the prevailing locale settings; as in PGLC_localeconv(), + * failure to do so is fatal. + */ +#ifdef WIN32 + if (!setlocale(LC_CTYPE, save_lc_ctype)) + elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); +#endif + if (!setlocale(LC_TIME, save_lc_time)) + elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time); + + /* + * At this point we've done our best to clean up, and can throw errors, or + * call functions that might throw errors, with a clean conscience. + */ + if (strftimefail) + elog(ERROR, "strftime() failed: %m"); + + /* Release the pstrdup'd locale names */ + pfree(save_lc_time); +#ifdef WIN32 + pfree(save_lc_ctype); +#endif + +#ifndef WIN32 + + /* + * As in PGLC_localeconv(), we must convert strftime()'s output from the + * encoding implied by LC_TIME to the database encoding. If we can't + * identify the LC_TIME encoding, just perform encoding validation. + */ + encoding = pg_get_encoding_from_locale(locale_time, true); + if (encoding < 0) + encoding = PG_SQL_ASCII; + +#else + + /* + * On Windows, strftime_win32() always returns UTF8 data, so convert from + * that if necessary. + */ + encoding = PG_UTF8; + +#endif /* WIN32 */ + + bufptr = buf; + + /* localized days */ + for (i = 0; i < 7; i++) + { + cache_single_string(&localized_abbrev_days[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + cache_single_string(&localized_full_days[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + } + localized_abbrev_days[7] = NULL; + localized_full_days[7] = NULL; + + /* localized months */ + for (i = 0; i < 12; i++) + { + cache_single_string(&localized_abbrev_months[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + cache_single_string(&localized_full_months[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + } + localized_abbrev_months[12] = NULL; + localized_full_months[12] = NULL; + + CurrentLCTimeValid = true; +} + + +#if defined(WIN32) && defined(LC_MESSAGES) +/* + * Convert a Windows setlocale() argument to a Unix-style one. + * + * Regardless of platform, we install message catalogs under a Unix-style + * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings + * following that style will elicit localized interface strings. + * + * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C" + * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>], + * case-insensitive. setlocale() returns the fully-qualified form; for + * example, setlocale("thaI") returns "Thai_Thailand.874". Internally, + * setlocale() and _create_locale() select a "locale identifier"[1] and store + * it in an undocumented _locale_t field. From that LCID, we can retrieve the + * ISO 639 language and the ISO 3166 country. Character encoding does not + * matter, because the server and client encodings govern that. + * + * Windows Vista introduced the "locale name" concept[2], closely following + * RFC 4646. Locale identifiers are now deprecated. Starting with Visual + * Studio 2012, setlocale() accepts locale names in addition to the strings it + * accepted historically. It does not standardize them; setlocale("Th-tH") + * returns "Th-tH". setlocale(category, "") still returns a traditional + * string. Furthermore, msvcr110.dll changed the undocumented _locale_t + * content to carry locale names instead of locale identifiers. + * + * Visual Studio 2015 should still be able to do the same as Visual Studio + * 2012, but the declaration of locale_name is missing in _locale_t, causing + * this code compilation to fail, hence this falls back instead on to + * enumerating all system locales by using EnumSystemLocalesEx to find the + * required locale name. If the input argument is in Unix-style then we can + * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as + * LOCALE_SNAME. + * + * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in + * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built + * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit + * localized messages. In particular, every lc_messages setting that initdb + * can select automatically will yield only C-locale messages. XXX This could + * be fixed by running the fully-qualified locale name through a lookup table. + * + * This function returns a pointer to a static buffer bearing the converted + * name or NULL if conversion fails. + * + * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers + * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names + */ + +#if defined(_MSC_VER) + +/* + * Callback function for EnumSystemLocalesEx() in get_iso_localename(). + * + * This function enumerates all system locales, searching for one that matches + * an input with the format: <Language>[_<Country>], e.g. + * English[_United States] + * + * The input is a three wchar_t array as an LPARAM. The first element is the + * locale_name we want to match, the second element is an allocated buffer + * where the Unix-style locale is copied if a match is found, and the third + * element is the search status, 1 if a match was found, 0 otherwise. + */ +static BOOL CALLBACK +search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam) +{ + wchar_t test_locale[LOCALE_NAME_MAX_LENGTH]; + wchar_t **argv; + + (void) (dwFlags); + + argv = (wchar_t **) lparam; + *argv[2] = (wchar_t) 0; + + memset(test_locale, 0, sizeof(test_locale)); + + /* Get the name of the <Language> in English */ + if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME, + test_locale, LOCALE_NAME_MAX_LENGTH)) + { + /* + * If the enumerated locale does not have a hyphen ("en") OR the + * locale_name input does not have an underscore ("English"), we only + * need to compare the <Language> tags. + */ + if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL) + { + if (_wcsicmp(argv[0], test_locale) == 0) + { + wcscpy(argv[1], pStr); + *argv[2] = (wchar_t) 1; + return FALSE; + } + } + + /* + * We have to compare a full <Language>_<Country> tag, so we append + * the underscore and name of the country/region in English, e.g. + * "English_United States". + */ + else + { + size_t len; + + wcscat(test_locale, L"_"); + len = wcslen(test_locale); + if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME, + test_locale + len, + LOCALE_NAME_MAX_LENGTH - len)) + { + if (_wcsicmp(argv[0], test_locale) == 0) + { + wcscpy(argv[1], pStr); + *argv[2] = (wchar_t) 1; + return FALSE; + } + } + } + } + + return TRUE; +} + +/* + * This function converts a Windows locale name to an ISO formatted version + * for Visual Studio 2015 or greater. + * + * Returns NULL, if no valid conversion was found. + */ +static char * +get_iso_localename(const char *winlocname) +{ + wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH]; + wchar_t buffer[LOCALE_NAME_MAX_LENGTH]; + static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH]; + char *period; + int len; + int ret_val; + + /* + * Valid locales have the following syntax: + * <Language>[_<Country>[.<CodePage>]] + * + * GetLocaleInfoEx can only take locale name without code-page and for the + * purpose of this API the code-page doesn't matter. + */ + period = strchr(winlocname, '.'); + if (period != NULL) + len = period - winlocname; + else + len = pg_mbstrlen(winlocname); + + memset(wc_locale_name, 0, sizeof(wc_locale_name)); + memset(buffer, 0, sizeof(buffer)); + MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name, + LOCALE_NAME_MAX_LENGTH); + + /* + * If the lc_messages is already a Unix-style string, we have a direct + * match with LOCALE_SNAME, e.g. en-US, en_US. + */ + ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer, + LOCALE_NAME_MAX_LENGTH); + if (!ret_val) + { + /* + * Search for a locale in the system that matches language and country + * name. + */ + wchar_t *argv[3]; + + argv[0] = wc_locale_name; + argv[1] = buffer; + argv[2] = (wchar_t *) &ret_val; + EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv, + NULL); + } + + if (ret_val) + { + size_t rc; + char *hyphen; + + /* Locale names use only ASCII, any conversion locale suffices. */ + rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL); + if (rc == -1 || rc == sizeof(iso_lc_messages)) + return NULL; + + /* + * Since the message catalogs sit on a case-insensitive filesystem, we + * need not standardize letter case here. So long as we do not ship + * message catalogs for which it would matter, we also need not + * translate the script/variant portion, e.g. uz-Cyrl-UZ to + * uz_UZ@cyrillic. Simply replace the hyphen with an underscore. + */ + hyphen = strchr(iso_lc_messages, '-'); + if (hyphen) + *hyphen = '_'; + return iso_lc_messages; + } + + return NULL; +} + +static char * +IsoLocaleName(const char *winlocname) +{ + static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH]; + + if (pg_strcasecmp("c", winlocname) == 0 || + pg_strcasecmp("posix", winlocname) == 0) + { + strcpy(iso_lc_messages, "C"); + return iso_lc_messages; + } + else + return get_iso_localename(winlocname); +} + +#else /* !defined(_MSC_VER) */ + +static char * +IsoLocaleName(const char *winlocname) +{ + return NULL; /* Not supported on MinGW */ +} + +#endif /* defined(_MSC_VER) */ + +#endif /* WIN32 && LC_MESSAGES */ + + +/* + * Cache mechanism for collation information. + * + * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C + * (or POSIX), so we can optimize a few code paths in various places. + * For the built-in C and POSIX collations, we can know that without even + * doing a cache lookup, but we want to support aliases for C/POSIX too. + * For the "default" collation, there are separate static cache variables, + * since consulting the pg_collation catalog doesn't tell us what we need. + * + * Also, if a pg_locale_t has been requested for a collation, we cache that + * for the life of a backend. + * + * Note that some code relies on the flags not reporting false negatives + * (that is, saying it's not C when it is). For example, char2wchar() + * could fail if the locale is C, so str_tolower() shouldn't call it + * in that case. + * + * Note that we currently lack any way to flush the cache. Since we don't + * support ALTER COLLATION, this is OK. The worst case is that someone + * drops a collation, and a useless cache entry hangs around in existing + * backends. + */ + +static collation_cache_entry * +lookup_collation_cache(Oid collation, bool set_flags) +{ + collation_cache_entry *cache_entry; + bool found; + + Assert(OidIsValid(collation)); + Assert(collation != DEFAULT_COLLATION_OID); + + if (collation_cache == NULL) + { + /* First time through, initialize the hash table */ + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(collation_cache_entry); + collation_cache = hash_create("Collation cache", 100, &ctl, + HASH_ELEM | HASH_BLOBS); + } + + cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found); + if (!found) + { + /* + * Make sure cache entry is marked invalid, in case we fail before + * setting things. + */ + cache_entry->flags_valid = false; + cache_entry->locale = 0; + } + + if (set_flags && !cache_entry->flags_valid) + { + /* Attempt to set the flags */ + HeapTuple tp; + Form_pg_collation collform; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collation); + collform = (Form_pg_collation) GETSTRUCT(tp); + + if (collform->collprovider == COLLPROVIDER_LIBC) + { + Datum datum; + const char *collcollate; + const char *collctype; + + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate); + collcollate = TextDatumGetCString(datum); + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype); + collctype = TextDatumGetCString(datum); + + cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) || + (strcmp(collcollate, "POSIX") == 0)); + cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) || + (strcmp(collctype, "POSIX") == 0)); + } + else + { + cache_entry->collate_is_c = false; + cache_entry->ctype_is_c = false; + } + + cache_entry->flags_valid = true; + + ReleaseSysCache(tp); + } + + return cache_entry; +} + + +/* + * Detect whether collation's LC_COLLATE property is C + */ +bool +lc_collate_is_c(Oid collation) +{ + /* + * If we're asked about "collation 0", return false, so that the code will + * go into the non-C path and report that the collation is bogus. + */ + if (!OidIsValid(collation)) + return false; + + /* + * If we're asked about the default collation, we have to inquire of the C + * library. Cache the result so we only have to compute it once. + */ + if (collation == DEFAULT_COLLATION_OID) + { + static __thread int result = -1; + char *localeptr; + + if (default_locale.provider == COLLPROVIDER_ICU) + return false; + + if (result >= 0) + return (bool) result; + localeptr = setlocale(LC_COLLATE, NULL); + if (!localeptr) + elog(ERROR, "invalid LC_COLLATE setting"); + + if (strcmp(localeptr, "C") == 0) + result = true; + else if (strcmp(localeptr, "POSIX") == 0) + result = true; + else + result = false; + return (bool) result; + } + + /* + * If we're asked about the built-in C/POSIX collations, we know that. + */ + if (collation == C_COLLATION_OID || + collation == POSIX_COLLATION_OID) + return true; + + /* + * Otherwise, we have to consult pg_collation, but we cache that. + */ + return (lookup_collation_cache(collation, true))->collate_is_c; +} + +/* + * Detect whether collation's LC_CTYPE property is C + */ +bool +lc_ctype_is_c(Oid collation) +{ + /* + * If we're asked about "collation 0", return false, so that the code will + * go into the non-C path and report that the collation is bogus. + */ + if (!OidIsValid(collation)) + return false; + + /* + * If we're asked about the default collation, we have to inquire of the C + * library. Cache the result so we only have to compute it once. + */ + if (collation == DEFAULT_COLLATION_OID) + { + static __thread int result = -1; + char *localeptr; + + if (default_locale.provider == COLLPROVIDER_ICU) + return false; + + if (result >= 0) + return (bool) result; + localeptr = setlocale(LC_CTYPE, NULL); + if (!localeptr) + elog(ERROR, "invalid LC_CTYPE setting"); + + if (strcmp(localeptr, "C") == 0) + result = true; + else if (strcmp(localeptr, "POSIX") == 0) + result = true; + else + result = false; + return (bool) result; + } + + /* + * If we're asked about the built-in C/POSIX collations, we know that. + */ + if (collation == C_COLLATION_OID || + collation == POSIX_COLLATION_OID) + return true; + + /* + * Otherwise, we have to consult pg_collation, but we cache that. + */ + return (lookup_collation_cache(collation, true))->ctype_is_c; +} + +__thread struct pg_locale_struct default_locale; + +void +make_icu_collator(const char *iculocstr, + const char *icurules, + struct pg_locale_struct *resultp) +{ +#ifdef USE_ICU + UCollator *collator; + + collator = pg_ucol_open(iculocstr); + + /* + * If rules are specified, we extract the rules of the standard collation, + * add our own rules, and make a new collator with the combined rules. + */ + if (icurules) + { + const UChar *default_rules; + UChar *agg_rules; + UChar *my_rules; + UErrorCode status; + int32_t length; + + default_rules = ucol_getRules(collator, &length); + icu_to_uchar(&my_rules, icurules, strlen(icurules)); + + agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1); + u_strcpy(agg_rules, default_rules); + u_strcat(agg_rules, my_rules); + + ucol_close(collator); + + status = U_ZERO_ERROR; + collator = ucol_openRules(agg_rules, u_strlen(agg_rules), + UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s", + iculocstr, icurules, u_errorName(status)))); + } + + /* We will leak this string if the caller errors later :-( */ + resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr); + resultp->info.icu.ucol = collator; +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"))); +#endif /* not USE_ICU */ +} + + +/* simple subroutine for reporting errors from newlocale() */ +#ifdef HAVE_LOCALE_T +static void +report_newlocale_failure(const char *localename) +{ + int save_errno; + + /* + * Windows doesn't provide any useful error indication from + * _create_locale(), and BSD-derived platforms don't seem to feel they + * need to set errno either (even though POSIX is pretty clear that + * newlocale should do so). So, if errno hasn't been set, assume ENOENT + * is what to report. + */ + if (errno == 0) + errno = ENOENT; + + /* + * ENOENT means "no such locale", not "no such file", so clarify that + * errno with an errdetail message. + */ + save_errno = errno; /* auxiliary funcs might change errno */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not create locale \"%s\": %m", + localename), + (save_errno == ENOENT ? + errdetail("The operating system could not find any locale data for the locale name \"%s\".", + localename) : 0))); +} +#endif /* HAVE_LOCALE_T */ + +bool +pg_locale_deterministic(pg_locale_t locale) +{ + /* default locale must always be deterministic */ + if (locale == NULL) + return true; + else + return locale->deterministic; +} + +/* + * Create a locale_t from a collation OID. Results are cached for the + * lifetime of the backend. Thus, do not free the result with freelocale(). + * + * As a special optimization, the default/database collation returns 0. + * Callers should then revert to the non-locale_t-enabled code path. + * Also, callers should avoid calling this before going down a C/POSIX + * fastpath, because such a fastpath should work even on platforms without + * locale_t support in the C library. + * + * For simplicity, we always generate COLLATE + CTYPE even though we + * might only need one of them. Since this is called only once per session, + * it shouldn't cost much. + */ +pg_locale_t +pg_newlocale_from_collation(Oid collid) +{ + collation_cache_entry *cache_entry; + + /* Callers must pass a valid OID */ + Assert(OidIsValid(collid)); + + if (collid == DEFAULT_COLLATION_OID) + { + if (default_locale.provider == COLLPROVIDER_ICU) + return &default_locale; + else + return (pg_locale_t) 0; + } + + cache_entry = lookup_collation_cache(collid, false); + + if (cache_entry->locale == 0) + { + /* We haven't computed this yet in this session, so do it */ + HeapTuple tp; + Form_pg_collation collform; + struct pg_locale_struct result; + pg_locale_t resultp; + Datum datum; + bool isnull; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collid); + collform = (Form_pg_collation) GETSTRUCT(tp); + + /* We'll fill in the result struct locally before allocating memory */ + memset(&result, 0, sizeof(result)); + result.provider = collform->collprovider; + result.deterministic = collform->collisdeterministic; + + if (collform->collprovider == COLLPROVIDER_LIBC) + { +#ifdef HAVE_LOCALE_T + const char *collcollate; + const char *collctype pg_attribute_unused(); + locale_t loc; + + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate); + collcollate = TextDatumGetCString(datum); + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype); + collctype = TextDatumGetCString(datum); + + if (strcmp(collcollate, collctype) == 0) + { + /* Normal case where they're the same */ + errno = 0; +#ifndef WIN32 + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, + NULL); +#else + loc = _create_locale(LC_ALL, collcollate); +#endif + if (!loc) + report_newlocale_failure(collcollate); + } + else + { +#ifndef WIN32 + /* We need two newlocale() steps */ + locale_t loc1; + + errno = 0; + loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + if (!loc1) + report_newlocale_failure(collcollate); + errno = 0; + loc = newlocale(LC_CTYPE_MASK, collctype, loc1); + if (!loc) + report_newlocale_failure(collctype); +#else + + /* + * XXX The _create_locale() API doesn't appear to support + * this. Could perhaps be worked around by changing + * pg_locale_t to contain two separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + result.info.lt = loc; +#else /* not HAVE_LOCALE_T */ + /* platform that doesn't support locale_t */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collation provider LIBC is not supported on this platform"))); +#endif /* not HAVE_LOCALE_T */ + } + else if (collform->collprovider == COLLPROVIDER_ICU) + { + const char *iculocstr; + const char *icurules; + + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colliculocale); + iculocstr = TextDatumGetCString(datum); + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(iculocstr, icurules, &result); + } + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, + &isnull); + if (!isnull) + { + char *actual_versionstr; + char *collversionstr; + + collversionstr = TextDatumGetCString(datum); + + datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate); + + actual_versionstr = get_collation_actual_version(collform->collprovider, + TextDatumGetCString(datum)); + if (!actual_versionstr) + { + /* + * This could happen when specifying a version in CREATE + * COLLATION but the provider does not support versioning, or + * manually creating a mess in the catalogs. + */ + ereport(ERROR, + (errmsg("collation \"%s\" has no actual version, but a version was recorded", + NameStr(collform->collname)))); + } + + if (strcmp(actual_versionstr, collversionstr) != 0) + ereport(WARNING, + (errmsg("collation \"%s\" has version mismatch", + NameStr(collform->collname)), + errdetail("The collation in the database was created using version %s, " + "but the operating system provides version %s.", + collversionstr, actual_versionstr), + errhint("Rebuild all objects affected by this collation and run " + "ALTER COLLATION %s REFRESH VERSION, " + "or build PostgreSQL with the right library version.", + quote_qualified_identifier(get_namespace_name(collform->collnamespace), + NameStr(collform->collname))))); + } + + ReleaseSysCache(tp); + + /* We'll keep the pg_locale_t structures in TopMemoryContext */ + resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp)); + *resultp = result; + + cache_entry->locale = resultp; + } + + return cache_entry->locale; +} + +/* + * Get provider-specific collation version string for the given collation from + * the operating system/library. + */ +char * +get_collation_actual_version(char collprovider, const char *collcollate) +{ + char *collversion = NULL; + +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + UCollator *collator; + UVersionInfo versioninfo; + char buf[U_MAX_VERSION_STRING_LENGTH]; + + collator = pg_ucol_open(collcollate); + + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + u_versionToString(versioninfo, buf); + collversion = pstrdup(buf); + } + else +#endif + if (collprovider == COLLPROVIDER_LIBC && + pg_strcasecmp("C", collcollate) != 0 && + pg_strncasecmp("C.", collcollate, 2) != 0 && + pg_strcasecmp("POSIX", collcollate) != 0) + { +#if defined(__GLIBC__) + /* Use the glibc version because we don't have anything better. */ + collversion = pstrdup(gnu_get_libc_version()); +#elif defined(LC_VERSION_MASK) + locale_t loc; + + /* Look up FreeBSD collation version. */ + loc = newlocale(LC_COLLATE, collcollate, NULL); + if (loc) + { + collversion = + pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc)); + freelocale(loc); + } + else + ereport(ERROR, + (errmsg("could not load locale \"%s\"", collcollate))); +#elif defined(WIN32) + /* + * If we are targeting Windows Vista and above, we can ask for a name + * given a collation name (earlier versions required a location code + * that we don't have). + */ + NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)}; + WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH]; + + MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate, + LOCALE_NAME_MAX_LENGTH); + if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version)) + { + /* + * GetNLSVersionEx() wants a language tag such as "en-US", not a + * locale name like "English_United States.1252". Until those + * values can be prevented from entering the system, or 100% + * reliably converted to the more useful tag format, tolerate the + * resulting error and report that we have no version data. + */ + if (GetLastError() == ERROR_INVALID_PARAMETER) + return NULL; + + ereport(ERROR, + (errmsg("could not get collation version for locale \"%s\": error code %lu", + collcollate, + GetLastError()))); + } + collversion = psprintf("%lu.%lu,%lu.%lu", + (version.dwNLSVersion >> 8) & 0xFFFF, + version.dwNLSVersion & 0xFF, + (version.dwDefinedVersion >> 8) & 0xFFFF, + version.dwDefinedVersion & 0xFF); +#endif + } + + return collversion; +} + +/* + * pg_strncoll_libc_win32_utf8 + * + * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and + * invoke wcscoll() or wcscoll_l(). + */ +#ifdef WIN32 +static int +pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, + size_t len2, pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + char *a1p, + *a2p; + int a1len = len1 * 2 + 2; + int a2len = len2 * 2 + 2; + int r; + int result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(GetDatabaseEncoding() == PG_UTF8); +#ifndef WIN32 + Assert(false); +#endif + + if (a1len + a2len > TEXTBUFLEN) + buf = palloc(a1len + a2len); + + a1p = buf; + a2p = buf + a1len; + + /* API does not work for zero-length input */ + if (len1 == 0) + r = 0; + else + { + r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1, + (LPWSTR) a1p, a1len / 2); + if (!r) + ereport(ERROR, + (errmsg("could not convert string to UTF-16: error code %lu", + GetLastError()))); + } + ((LPWSTR) a1p)[r] = 0; + + if (len2 == 0) + r = 0; + else + { + r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2, + (LPWSTR) a2p, a2len / 2); + if (!r) + ereport(ERROR, + (errmsg("could not convert string to UTF-16: error code %lu", + GetLastError()))); + } + ((LPWSTR) a2p)[r] = 0; + + errno = 0; +#ifdef HAVE_LOCALE_T + if (locale) + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); + else +#endif + result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); + if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ + ereport(ERROR, + (errmsg("could not compare Unicode strings: %m"))); + + if (buf != sbuf) + pfree(buf); + + return result; +} +#endif /* WIN32 */ + +/* + * pg_strcoll_libc + * + * Call strcoll(), strcoll_l(), wcscoll(), or wcscoll_l() as appropriate for + * the given locale, platform, and database encoding. If the locale is NULL, + * use the database collation. + * + * Arguments must be encoded in the database encoding and nul-terminated. + */ +static int +pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) +{ + int result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); +#ifdef WIN32 + if (GetDatabaseEncoding() == PG_UTF8) + { + size_t len1 = strlen(arg1); + size_t len2 = strlen(arg2); + + result = pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale); + } + else +#endif /* WIN32 */ + if (locale) + { +#ifdef HAVE_LOCALE_T + result = strcoll_l(arg1, arg2, locale->info.lt); +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +#endif + } + else + result = strcoll(arg1, arg2); + + return result; +} + +/* + * pg_strncoll_libc + * + * Nul-terminate the arguments and call pg_strcoll_libc(). + */ +static int +pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + size_t bufsize1 = len1 + 1; + size_t bufsize2 = len2 + 1; + char *arg1n; + char *arg2n; + int result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + +#ifdef WIN32 + /* check for this case before doing the work for nul-termination */ + if (GetDatabaseEncoding() == PG_UTF8) + return pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale); +#endif /* WIN32 */ + + if (bufsize1 + bufsize2 > TEXTBUFLEN) + buf = palloc(bufsize1 + bufsize2); + + arg1n = buf; + arg2n = buf + bufsize1; + + /* nul-terminate arguments */ + memcpy(arg1n, arg1, len1); + arg1n[len1] = '\0'; + memcpy(arg2n, arg2, len2); + arg2n[len2] = '\0'; + + result = pg_strcoll_libc(arg1n, arg2n, locale); + + if (buf != sbuf) + pfree(buf); + + return result; +} + +#ifdef USE_ICU + +/* + * pg_strncoll_icu_no_utf8 + * + * Convert the arguments from the database encoding to UChar strings, then + * call ucol_strcoll(). An argument length of -1 means that the string is + * NUL-terminated. + * + * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(), + * caller should call that instead. + */ +static int +pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1, + const char *arg2, int32_t len2, pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + int32_t ulen1; + int32_t ulen2; + size_t bufsize1; + size_t bufsize2; + UChar *uchar1, + *uchar2; + int result; + + Assert(locale->provider == COLLPROVIDER_ICU); +#ifdef HAVE_UCOL_STRCOLLUTF8 + Assert(GetDatabaseEncoding() != PG_UTF8); +#endif + + init_icu_converter(); + + ulen1 = uchar_length(icu_converter, arg1, len1); + ulen2 = uchar_length(icu_converter, arg2, len2); + + bufsize1 = (ulen1 + 1) * sizeof(UChar); + bufsize2 = (ulen2 + 1) * sizeof(UChar); + + if (bufsize1 + bufsize2 > TEXTBUFLEN) + buf = palloc(bufsize1 + bufsize2); + + uchar1 = (UChar *) buf; + uchar2 = (UChar *) (buf + bufsize1); + + ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1); + ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2); + + result = ucol_strcoll(locale->info.icu.ucol, + uchar1, ulen1, + uchar2, ulen2); + + if (buf != sbuf) + pfree(buf); + + return result; +} + +/* + * pg_strncoll_icu + * + * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given + * database encoding. An argument length of -1 means the string is + * NUL-terminated. + * + * Arguments must be encoded in the database encoding. + */ +static int +pg_strncoll_icu(const char *arg1, int32_t len1, const char *arg2, int32_t len2, + pg_locale_t locale) +{ + int result; + + Assert(locale->provider == COLLPROVIDER_ICU); + +#ifdef HAVE_UCOL_STRCOLLUTF8 + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(locale->info.icu.ucol, + arg1, len1, + arg2, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else +#endif + { + result = pg_strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale); + } + + return result; +} + +#endif /* USE_ICU */ + +/* + * pg_strcoll + * + * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(), + * or wcscoll_l() as appropriate for the given locale, platform, and database + * encoding. If the locale is not specified, use the database collation. + * + * Arguments must be encoded in the database encoding and nul-terminated. + * + * The caller is responsible for breaking ties if the collation is + * deterministic; this maintains consistency with pg_strxfrm(), which cannot + * easily account for deterministic collations. + */ +int +pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) +{ + int result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strcoll_libc(arg1, arg2, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strncoll_icu(arg1, -1, arg2, -1, locale); +#endif + else + /* shouldn't happen */ + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return result; +} + +/* + * pg_strncoll + * + * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(), + * or wcscoll_l() as appropriate for the given locale, platform, and database + * encoding. If the locale is not specified, use the database collation. + * + * Arguments must be encoded in the database encoding. + * + * This function may need to nul-terminate the arguments for libc functions; + * so if the caller already has nul-terminated strings, it should call + * pg_strcoll() instead. + * + * The caller is responsible for breaking ties if the collation is + * deterministic; this maintains consistency with pg_strnxfrm(), which cannot + * easily account for deterministic collations. + */ +int +pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, + pg_locale_t locale) +{ + int result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strncoll_libc(arg1, len1, arg2, len2, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strncoll_icu(arg1, len1, arg2, len2, locale); +#endif + else + /* shouldn't happen */ + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return result; +} + + +static size_t +pg_strxfrm_libc(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + +#ifdef TRUST_STRXFRM +#ifdef HAVE_LOCALE_T + if (locale) + return strxfrm_l(dest, src, destsize, locale->info.lt); + else +#endif + return strxfrm(dest, src, destsize); +#else + /* shouldn't happen */ + PGLOCALE_SUPPORT_ERROR(locale->provider); + return 0; /* keep compiler quiet */ +#endif +} + +static size_t +pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + size_t bufsize = srclen + 1; + size_t result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + + if (bufsize > TEXTBUFLEN) + buf = palloc(bufsize); + + /* nul-terminate arguments */ + memcpy(buf, src, srclen); + buf[srclen] = '\0'; + + result = pg_strxfrm_libc(dest, buf, destsize, locale); + + if (buf != sbuf) + pfree(buf); + + /* if dest is defined, it should be nul-terminated */ + Assert(result >= destsize || dest[result] == '\0'); + + return result; +} + +#ifdef USE_ICU + +/* 'srclen' of -1 means the strings are NUL-terminated */ +static size_t +pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + UChar *uchar; + int32_t ulen; + size_t uchar_bsize; + Size result_bsize; + + Assert(locale->provider == COLLPROVIDER_ICU); + + init_icu_converter(); + + ulen = uchar_length(icu_converter, src, srclen); + + uchar_bsize = (ulen + 1) * sizeof(UChar); + + if (uchar_bsize > TEXTBUFLEN) + buf = palloc(uchar_bsize); + + uchar = (UChar *) buf; + + ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); + + result_bsize = ucol_getSortKey(locale->info.icu.ucol, + uchar, ulen, + (uint8_t *) dest, destsize); + + /* + * ucol_getSortKey() counts the nul-terminator in the result length, but + * this function should not. + */ + Assert(result_bsize > 0); + result_bsize--; + + if (buf != sbuf) + pfree(buf); + + /* if dest is defined, it should be nul-terminated */ + Assert(result_bsize >= destsize || dest[result_bsize] == '\0'); + + return result_bsize; +} + +/* 'srclen' of -1 means the strings are NUL-terminated */ +static size_t +pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen, + int32_t destsize, pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + UCharIterator iter; + uint32_t state[2]; + UErrorCode status; + int32_t ulen = -1; + UChar *uchar = NULL; + size_t uchar_bsize; + Size result_bsize; + + Assert(locale->provider == COLLPROVIDER_ICU); + Assert(GetDatabaseEncoding() != PG_UTF8); + + init_icu_converter(); + + ulen = uchar_length(icu_converter, src, srclen); + + uchar_bsize = (ulen + 1) * sizeof(UChar); + + if (uchar_bsize > TEXTBUFLEN) + buf = palloc(uchar_bsize); + + uchar = (UChar *) buf; + + ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); + + uiter_setString(&iter, uchar, ulen); + state[0] = state[1] = 0; /* won't need that again */ + status = U_ZERO_ERROR; + result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol, + &iter, + state, + (uint8_t *) dest, + destsize, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("sort key generation failed: %s", + u_errorName(status)))); + + return result_bsize; +} + +/* 'srclen' of -1 means the strings are NUL-terminated */ +static size_t +pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen, + int32_t destsize, pg_locale_t locale) +{ + size_t result; + + Assert(locale->provider == COLLPROVIDER_ICU); + + if (GetDatabaseEncoding() == PG_UTF8) + { + UCharIterator iter; + uint32_t state[2]; + UErrorCode status; + + uiter_setUTF8(&iter, src, srclen); + state[0] = state[1] = 0; /* won't need that again */ + status = U_ZERO_ERROR; + result = ucol_nextSortKeyPart(locale->info.icu.ucol, + &iter, + state, + (uint8_t *) dest, + destsize, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("sort key generation failed: %s", + u_errorName(status)))); + } + else + result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize, + locale); + + return result; +} + +#endif + +/* + * Return true if the collation provider supports pg_strxfrm() and + * pg_strnxfrm(); otherwise false. + * + * Unfortunately, it seems that strxfrm() for non-C collations is broken on + * many common platforms; testing of multiple versions of glibc reveals that, + * for many locales, strcoll() and strxfrm() do not return consistent + * results. While no other libc other than Cygwin has so far been shown to + * have a problem, we take the conservative course of action for right now and + * disable this categorically. (Users who are certain this isn't a problem on + * their system can define TRUST_STRXFRM.) + * + * No similar problem is known for the ICU provider. + */ +bool +pg_strxfrm_enabled(pg_locale_t locale) +{ + if (!locale || locale->provider == COLLPROVIDER_LIBC) +#ifdef TRUST_STRXFRM + return true; +#else + return false; +#endif + else if (locale->provider == COLLPROVIDER_ICU) + return true; + else + /* shouldn't happen */ + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return false; /* keep compiler quiet */ +} + +/* + * pg_strxfrm + * + * Transforms 'src' to a nul-terminated string stored in 'dest' such that + * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on + * untransformed strings. + * + * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest' + * may be NULL. + * + * Returns the number of bytes needed to store the transformed string, + * excluding the terminating nul byte. If the value returned is 'destsize' or + * greater, the resulting contents of 'dest' are undefined. + */ +size_t +pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) +{ + size_t result = 0; /* keep compiler quiet */ + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strxfrm_libc(dest, src, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_icu(dest, src, -1, destsize, locale); +#endif + else + /* shouldn't happen */ + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return result; +} + +/* + * pg_strnxfrm + * + * Transforms 'src' to a nul-terminated string stored in 'dest' such that + * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on + * untransformed strings. + * + * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may + * be NULL. + * + * Returns the number of bytes needed to store the transformed string, + * excluding the terminating nul byte. If the value returned is 'destsize' or + * greater, the resulting contents of 'dest' are undefined. + * + * This function may need to nul-terminate the argument for libc functions; + * so if the caller already has a nul-terminated string, it should call + * pg_strxfrm() instead. + */ +size_t +pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, + pg_locale_t locale) +{ + size_t result = 0; /* keep compiler quiet */ + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_icu(dest, src, srclen, destsize, locale); +#endif + else + /* shouldn't happen */ + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return result; +} + +/* + * Return true if the collation provider supports pg_strxfrm_prefix() and + * pg_strnxfrm_prefix(); otherwise false. + */ +bool +pg_strxfrm_prefix_enabled(pg_locale_t locale) +{ + if (!locale || locale->provider == COLLPROVIDER_LIBC) + return false; + else if (locale->provider == COLLPROVIDER_ICU) + return true; + else + /* shouldn't happen */ + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return false; /* keep compiler quiet */ +} + +/* + * pg_strxfrm_prefix + * + * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary + * memcmp() on the byte sequence is equivalent to pg_strcoll() on + * untransformed strings. The result is not nul-terminated. + * + * The provided 'src' must be nul-terminated. + * + * If destsize is not large enough to hold the resulting byte sequence, stores + * only the first destsize bytes in 'dest'. Returns the number of bytes + * actually copied to 'dest'. + */ +size_t +pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + size_t result = 0; /* keep compiler quiet */ + + if (!locale) + PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); +#endif + else + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return result; +} + +/* + * pg_strnxfrm_prefix + * + * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary + * memcmp() on the byte sequence is equivalent to pg_strcoll() on + * untransformed strings. The result is not nul-terminated. + * + * The provided 'src' must be nul-terminated. + * + * If destsize is not large enough to hold the resulting byte sequence, stores + * only the first destsize bytes in 'dest'. Returns the number of bytes + * actually copied to 'dest'. + * + * This function may need to nul-terminate the argument for libc functions; + * so if the caller already has a nul-terminated string, it should call + * pg_strxfrm_prefix() instead. + */ +size_t +pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, + size_t srclen, pg_locale_t locale) +{ + size_t result = 0; /* keep compiler quiet */ + + if (!locale) + PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); +#endif + else + PGLOCALE_SUPPORT_ERROR(locale->provider); + + return result; +} + +#ifdef USE_ICU + +/* + * Wrapper around ucol_open() to handle API differences for older ICU + * versions. + */ +static UCollator * +pg_ucol_open(const char *loc_str) +{ + UCollator *collator; + UErrorCode status; + const char *orig_str = loc_str; + char *fixed_str = NULL; + + /* + * Must never open default collator, because it depends on the environment + * and may change at any time. Should not happen, but check here to catch + * bugs that might be hard to catch otherwise. + * + * NB: the default collator is not the same as the collator for the root + * locale. The root locale may be specified as the empty string, "und", or + * "root". The default collator is opened by passing NULL to ucol_open(). + */ + if (loc_str == NULL) + elog(ERROR, "opening default collator is not supported"); + + /* + * In ICU versions 54 and earlier, "und" is not a recognized spelling of + * the root locale. If the first component of the locale is "und", replace + * with "root" before opening. + */ + if (U_ICU_VERSION_MAJOR_NUM < 55) + { + char lang[ULOC_LANG_CAPACITY]; + + status = U_ZERO_ERROR; + uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) + { + ereport(ERROR, + (errmsg("could not get language from locale \"%s\": %s", + loc_str, u_errorName(status)))); + } + + if (strcmp(lang, "und") == 0) + { + const char *remainder = loc_str + strlen("und"); + + fixed_str = palloc(strlen("root") + strlen(remainder) + 1); + strcpy(fixed_str, "root"); + strcat(fixed_str, remainder); + + loc_str = fixed_str; + } + } + + status = U_ZERO_ERROR; + collator = ucol_open(loc_str, &status); + if (U_FAILURE(status)) + ereport(ERROR, + /* use original string for error report */ + (errmsg("could not open collator for locale \"%s\": %s", + orig_str, u_errorName(status)))); + + if (U_ICU_VERSION_MAJOR_NUM < 54) + { + status = U_ZERO_ERROR; + icu_set_collation_attributes(collator, loc_str, &status); + + /* + * Pretend the error came from ucol_open(), for consistent error + * message across ICU versions. + */ + if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) + { + ucol_close(collator); + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + orig_str, u_errorName(status)))); + } + } + + if (fixed_str != NULL) + pfree(fixed_str); + + return collator; +} + +static void +init_icu_converter(void) +{ + const char *icu_encoding_name; + UErrorCode status; + UConverter *conv; + + if (icu_converter) + return; /* already done */ + + icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding()); + if (!icu_encoding_name) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("encoding \"%s\" not supported by ICU", + pg_encoding_to_char(GetDatabaseEncoding())))); + + status = U_ZERO_ERROR; + conv = ucnv_open(icu_encoding_name, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open ICU converter for encoding \"%s\": %s", + icu_encoding_name, u_errorName(status)))); + + icu_converter = conv; +} + +/* + * Find length, in UChars, of given string if converted to UChar string. + */ +static size_t +uchar_length(UConverter *converter, const char *str, int32_t len) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t ulen; + + ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status); + if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) + ereport(ERROR, + (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); + return ulen; +} + +/* + * Convert the given source string into a UChar string, stored in dest, and + * return the length (in UChars). + */ +static int32_t +uchar_convert(UConverter *converter, UChar *dest, int32_t destlen, + const char *src, int32_t srclen) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t ulen; + + status = U_ZERO_ERROR; + ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); + return ulen; +} + +/* + * Convert a string in the database encoding into a string of UChars. + * + * The source string at buff is of length nbytes + * (it needn't be nul-terminated) + * + * *buff_uchar receives a pointer to the palloc'd result string, and + * the function's result is the number of UChars generated. + * + * The result string is nul-terminated, though most callers rely on the + * result length instead. + */ +int32_t +icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) +{ + int32_t len_uchar; + + init_icu_converter(); + + len_uchar = uchar_length(icu_converter, buff, nbytes); + + *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar)); + len_uchar = uchar_convert(icu_converter, + *buff_uchar, len_uchar + 1, buff, nbytes); + + return len_uchar; +} + +/* + * Convert a string of UChars into the database encoding. + * + * The source string at buff_uchar is of length len_uchar + * (it needn't be nul-terminated) + * + * *result receives a pointer to the palloc'd result string, and the + * function's result is the number of bytes generated (not counting nul). + * + * The result string is nul-terminated. + */ +int32_t +icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar) +{ + UErrorCode status; + int32_t len_result; + + init_icu_converter(); + + status = U_ZERO_ERROR; + len_result = ucnv_fromUChars(icu_converter, NULL, 0, + buff_uchar, len_uchar, &status); + if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) + ereport(ERROR, + (errmsg("%s failed: %s", "ucnv_fromUChars", + u_errorName(status)))); + + *result = palloc(len_result + 1); + + status = U_ZERO_ERROR; + len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1, + buff_uchar, len_uchar, &status); + if (U_FAILURE(status) || + status == U_STRING_NOT_TERMINATED_WARNING) + ereport(ERROR, + (errmsg("%s failed: %s", "ucnv_fromUChars", + u_errorName(status)))); + + return len_result; +} + +/* + * Parse collation attributes from the given locale string and apply them to + * the open collator. + * + * First, the locale string is canonicalized to an ICU format locale ID such + * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies + * the key-value arguments. + * + * Starting with ICU version 54, the attributes are processed automatically by + * ucol_open(), so this is only necessary for emulating this behavior on older + * versions. + */ +pg_attribute_unused() +static void +icu_set_collation_attributes(UCollator *collator, const char *loc, + UErrorCode *status) +{ + int32_t len; + char *icu_locale_id; + char *lower_str; + char *str; + + /* + * The input locale may be a BCP 47 language tag, e.g. + * "und-u-kc-ks-level1", which expresses the same attributes in a + * different form. It will be converted to the equivalent ICU format + * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by + * uloc_canonicalize(). + */ + *status = U_ZERO_ERROR; + len = uloc_canonicalize(loc, NULL, 0, status); + icu_locale_id = palloc(len + 1); + *status = U_ZERO_ERROR; + len = uloc_canonicalize(loc, icu_locale_id, len + 1, status); + if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) + return; + + lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id)); + + pfree(icu_locale_id); + + str = strchr(lower_str, '@'); + if (!str) + return; + str++; + + for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";")) + { + char *e = strchr(token, '='); + + if (e) + { + char *name; + char *value; + UColAttribute uattr; + UColAttributeValue uvalue; + + *status = U_ZERO_ERROR; + + *e = '\0'; + name = token; + value = e + 1; + + /* + * See attribute name and value lists in ICU i18n/coll.cpp + */ + if (strcmp(name, "colstrength") == 0) + uattr = UCOL_STRENGTH; + else if (strcmp(name, "colbackwards") == 0) + uattr = UCOL_FRENCH_COLLATION; + else if (strcmp(name, "colcaselevel") == 0) + uattr = UCOL_CASE_LEVEL; + else if (strcmp(name, "colcasefirst") == 0) + uattr = UCOL_CASE_FIRST; + else if (strcmp(name, "colalternate") == 0) + uattr = UCOL_ALTERNATE_HANDLING; + else if (strcmp(name, "colnormalization") == 0) + uattr = UCOL_NORMALIZATION_MODE; + else if (strcmp(name, "colnumeric") == 0) + uattr = UCOL_NUMERIC_COLLATION; + else + /* ignore if unknown */ + continue; + + if (strcmp(value, "primary") == 0) + uvalue = UCOL_PRIMARY; + else if (strcmp(value, "secondary") == 0) + uvalue = UCOL_SECONDARY; + else if (strcmp(value, "tertiary") == 0) + uvalue = UCOL_TERTIARY; + else if (strcmp(value, "quaternary") == 0) + uvalue = UCOL_QUATERNARY; + else if (strcmp(value, "identical") == 0) + uvalue = UCOL_IDENTICAL; + else if (strcmp(value, "no") == 0) + uvalue = UCOL_OFF; + else if (strcmp(value, "yes") == 0) + uvalue = UCOL_ON; + else if (strcmp(value, "shifted") == 0) + uvalue = UCOL_SHIFTED; + else if (strcmp(value, "non-ignorable") == 0) + uvalue = UCOL_NON_IGNORABLE; + else if (strcmp(value, "lower") == 0) + uvalue = UCOL_LOWER_FIRST; + else if (strcmp(value, "upper") == 0) + uvalue = UCOL_UPPER_FIRST; + else + { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + + ucol_setAttribute(collator, uattr, uvalue, status); + } + } + + pfree(lower_str); +} +#endif + +/* + * Return the BCP47 language tag representation of the requested locale. + * + * This function should be called before passing the string to ucol_open(), + * because conversion to a language tag also performs "level 2 + * canonicalization". In addition to producing a consistent format, level 2 + * canonicalization is able to more accurately interpret different input + * locale string formats, such as POSIX and .NET IDs. + */ +char * +icu_language_tag(const char *loc_str, int elevel) +{ +#ifdef USE_ICU + UErrorCode status; + char *langtag; + size_t buflen = 32; /* arbitrary starting buffer size */ + const bool strict = true; + + /* + * A BCP47 language tag doesn't have a clearly-defined upper limit (cf. + * RFC5646 section 4.4). Additionally, in older ICU versions, + * uloc_toLanguageTag() doesn't always return the ultimate length on the + * first call, necessitating a loop. + */ + langtag = palloc(buflen); + while (true) + { + status = U_ZERO_ERROR; + uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status); + + /* try again if the buffer is not large enough */ + if ((status == U_BUFFER_OVERFLOW_ERROR || + status == U_STRING_NOT_TERMINATED_WARNING) && + buflen < MaxAllocSize) + { + buflen = Min(buflen * 2, MaxAllocSize); + langtag = repalloc(langtag, buflen); + continue; + } + + break; + } + + if (U_FAILURE(status)) + { + pfree(langtag); + + if (elevel > 0) + ereport(elevel, + (errmsg("could not convert locale name \"%s\" to language tag: %s", + loc_str, u_errorName(status)))); + return NULL; + } + + return langtag; +#else /* not USE_ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"))); + return NULL; /* keep compiler quiet */ +#endif /* not USE_ICU */ +} + +/* + * Perform best-effort check that the locale is a valid one. + */ +void +icu_validate_locale(const char *loc_str) +{ +#ifdef USE_ICU + UCollator *collator; + UErrorCode status; + char lang[ULOC_LANG_CAPACITY]; + bool found = false; + int elevel = icu_validation_level; + + /* no validation */ + if (elevel < 0) + return; + + /* downgrade to WARNING during pg_upgrade */ + if (IsBinaryUpgrade && elevel > WARNING) + elevel = WARNING; + + /* validate that we can extract the language */ + status = U_ZERO_ERROR; + uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) + { + ereport(elevel, + (errmsg("could not get language from ICU locale \"%s\": %s", + loc_str, u_errorName(status)), + errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".", + "icu_validation_level", "disabled"))); + return; + } + + /* check for special language name */ + if (strcmp(lang, "") == 0 || + strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0) + found = true; + + /* search for matching language within ICU */ + for (int32_t i = 0; !found && i < uloc_countAvailable(); i++) + { + const char *otherloc = uloc_getAvailable(i); + char otherlang[ULOC_LANG_CAPACITY]; + + status = U_ZERO_ERROR; + uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) + continue; + + if (strcmp(lang, otherlang) == 0) + found = true; + } + + if (!found) + ereport(elevel, + (errmsg("ICU locale \"%s\" has unknown language \"%s\"", + loc_str, lang), + errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".", + "icu_validation_level", "disabled"))); + + /* check that it can be opened */ + collator = pg_ucol_open(loc_str); + ucol_close(collator); +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"))); +#endif /* not USE_ICU */ +} + +/* + * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. + * Therefore we keep them here rather than with the mbutils code. + */ + +/* + * wchar2char --- convert wide characters to multibyte format + * + * This has the same API as the standard wcstombs_l() function; in particular, + * tolen is the maximum number of bytes to store at *to, and *from must be + * zero-terminated. The output will be zero-terminated iff there is room. + */ +size_t +wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) +{ + size_t result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + + if (tolen == 0) + return 0; + +#ifdef WIN32 + + /* + * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and + * for some reason mbstowcs and wcstombs won't do this for us, so we use + * MultiByteToWideChar(). + */ + if (GetDatabaseEncoding() == PG_UTF8) + { + result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, + NULL, NULL); + /* A zero return is failure */ + if (result <= 0) + result = -1; + else + { + Assert(result <= tolen); + /* Microsoft counts the zero terminator in the result */ + result--; + } + } + else +#endif /* WIN32 */ + if (locale == (pg_locale_t) 0) + { + /* Use wcstombs directly for the default locale */ + result = wcstombs(to, from, tolen); + } + else + { +#ifdef HAVE_LOCALE_T +#ifdef HAVE_WCSTOMBS_L + /* Use wcstombs_l for nondefault locales */ + result = wcstombs_l(to, from, tolen, locale->info.lt); +#else /* !HAVE_WCSTOMBS_L */ + /* We have to temporarily set the locale as current ... ugh */ + locale_t save_locale = uselocale(locale->info.lt); + + result = wcstombs(to, from, tolen); + + uselocale(save_locale); +#endif /* HAVE_WCSTOMBS_L */ +#else /* !HAVE_LOCALE_T */ + /* Can't have locale != 0 without HAVE_LOCALE_T */ + elog(ERROR, "wcstombs_l is not available"); + result = 0; /* keep compiler quiet */ +#endif /* HAVE_LOCALE_T */ + } + + return result; +} + +/* + * char2wchar --- convert multibyte characters to wide characters + * + * This has almost the API of mbstowcs_l(), except that *from need not be + * null-terminated; instead, the number of input bytes is specified as + * fromlen. Also, we ereport() rather than returning -1 for invalid + * input encoding. tolen is the maximum number of wchar_t's to store at *to. + * The output will be zero-terminated iff there is room. + */ +size_t +char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, + pg_locale_t locale) +{ + size_t result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + + if (tolen == 0) + return 0; + +#ifdef WIN32 + /* See WIN32 "Unicode" comment above */ + if (GetDatabaseEncoding() == PG_UTF8) + { + /* Win32 API does not work for zero-length input */ + if (fromlen == 0) + result = 0; + else + { + result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); + /* A zero return is failure */ + if (result == 0) + result = -1; + } + + if (result != -1) + { + Assert(result < tolen); + /* Append trailing null wchar (MultiByteToWideChar() does not) */ + to[result] = 0; + } + } + else +#endif /* WIN32 */ + { + /* mbstowcs requires ending '\0' */ + char *str = pnstrdup(from, fromlen); + + if (locale == (pg_locale_t) 0) + { + /* Use mbstowcs directly for the default locale */ + result = mbstowcs(to, str, tolen); + } + else + { +#ifdef HAVE_LOCALE_T +#ifdef HAVE_MBSTOWCS_L + /* Use mbstowcs_l for nondefault locales */ + result = mbstowcs_l(to, str, tolen, locale->info.lt); +#else /* !HAVE_MBSTOWCS_L */ + /* We have to temporarily set the locale as current ... ugh */ + locale_t save_locale = uselocale(locale->info.lt); + + result = mbstowcs(to, str, tolen); + + uselocale(save_locale); +#endif /* HAVE_MBSTOWCS_L */ +#else /* !HAVE_LOCALE_T */ + /* Can't have locale != 0 without HAVE_LOCALE_T */ + elog(ERROR, "mbstowcs_l is not available"); + result = 0; /* keep compiler quiet */ +#endif /* HAVE_LOCALE_T */ + } + + pfree(str); + } + + if (result == -1) + { + /* + * Invalid multibyte character encountered. We try to give a useful + * error message by letting pg_verifymbstr check the string. But it's + * possible that the string is OK to us, and not OK to mbstowcs --- + * this suggests that the LC_CTYPE locale is different from the + * database encoding. Give a generic error message if pg_verifymbstr + * can't find anything wrong. + */ + pg_verifymbstr(from, fromlen, false); /* might not return */ + /* but if it does ... */ + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"), + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); + } + + return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_lsn.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_lsn.c new file mode 100644 index 00000000000..613c3722b94 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_lsn.c @@ -0,0 +1,313 @@ +/*------------------------------------------------------------------------- + * + * pg_lsn.c + * Operations for the pg_lsn datatype. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/pg_lsn.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "utils/builtins.h" +#include "utils/numeric.h" +#include "utils/pg_lsn.h" + +#define MAXPG_LSNLEN 17 +#define MAXPG_LSNCOMPONENT 8 + +/*---------------------------------------------------------- + * Formatting and conversion routines. + *---------------------------------------------------------*/ + +XLogRecPtr +pg_lsn_in_internal(const char *str, bool *have_error) +{ + int len1, + len2; + uint32 id, + off; + XLogRecPtr result; + + Assert(have_error != NULL); + *have_error = false; + + /* Sanity check input format. */ + len1 = strspn(str, "0123456789abcdefABCDEF"); + if (len1 < 1 || len1 > MAXPG_LSNCOMPONENT || str[len1] != '/') + { + *have_error = true; + return InvalidXLogRecPtr; + } + len2 = strspn(str + len1 + 1, "0123456789abcdefABCDEF"); + if (len2 < 1 || len2 > MAXPG_LSNCOMPONENT || str[len1 + 1 + len2] != '\0') + { + *have_error = true; + return InvalidXLogRecPtr; + } + + /* Decode result. */ + id = (uint32) strtoul(str, NULL, 16); + off = (uint32) strtoul(str + len1 + 1, NULL, 16); + result = ((uint64) id << 32) | off; + + return result; +} + +Datum +pg_lsn_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + XLogRecPtr result; + bool have_error = false; + + result = pg_lsn_in_internal(str, &have_error); + if (have_error) + ereturn(fcinfo->context, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "pg_lsn", str))); + + PG_RETURN_LSN(result); +} + +Datum +pg_lsn_out(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn = PG_GETARG_LSN(0); + char buf[MAXPG_LSNLEN + 1]; + char *result; + + snprintf(buf, sizeof buf, "%X/%X", LSN_FORMAT_ARGS(lsn)); + result = pstrdup(buf); + PG_RETURN_CSTRING(result); +} + +Datum +pg_lsn_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + XLogRecPtr result; + + result = pq_getmsgint64(buf); + PG_RETURN_LSN(result); +} + +Datum +pg_lsn_send(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn = PG_GETARG_LSN(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, lsn); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/*---------------------------------------------------------- + * Operators for PostgreSQL LSNs + *---------------------------------------------------------*/ + +Datum +pg_lsn_eq(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_BOOL(lsn1 == lsn2); +} + +Datum +pg_lsn_ne(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_BOOL(lsn1 != lsn2); +} + +Datum +pg_lsn_lt(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_BOOL(lsn1 < lsn2); +} + +Datum +pg_lsn_gt(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_BOOL(lsn1 > lsn2); +} + +Datum +pg_lsn_le(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_BOOL(lsn1 <= lsn2); +} + +Datum +pg_lsn_ge(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_BOOL(lsn1 >= lsn2); +} + +Datum +pg_lsn_larger(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_LSN((lsn1 > lsn2) ? lsn1 : lsn2); +} + +Datum +pg_lsn_smaller(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + + PG_RETURN_LSN((lsn1 < lsn2) ? lsn1 : lsn2); +} + +/* btree index opclass support */ +Datum +pg_lsn_cmp(PG_FUNCTION_ARGS) +{ + XLogRecPtr a = PG_GETARG_LSN(0); + XLogRecPtr b = PG_GETARG_LSN(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); +} + +/* hash index opclass support */ +Datum +pg_lsn_hash(PG_FUNCTION_ARGS) +{ + /* We can use hashint8 directly */ + return hashint8(fcinfo); +} + +Datum +pg_lsn_hash_extended(PG_FUNCTION_ARGS) +{ + return hashint8extended(fcinfo); +} + + +/*---------------------------------------------------------- + * Arithmetic operators on PostgreSQL LSNs. + *---------------------------------------------------------*/ + +Datum +pg_lsn_mi(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn1 = PG_GETARG_LSN(0); + XLogRecPtr lsn2 = PG_GETARG_LSN(1); + char buf[256]; + Datum result; + + /* Output could be as large as plus or minus 2^63 - 1. */ + if (lsn1 < lsn2) + snprintf(buf, sizeof buf, "-" UINT64_FORMAT, lsn2 - lsn1); + else + snprintf(buf, sizeof buf, UINT64_FORMAT, lsn1 - lsn2); + + /* Convert to numeric. */ + result = DirectFunctionCall3(numeric_in, + CStringGetDatum(buf), + ObjectIdGetDatum(0), + Int32GetDatum(-1)); + + return result; +} + +/* + * Add the number of bytes to pg_lsn, giving a new pg_lsn. + * Must handle both positive and negative numbers of bytes. + */ +Datum +pg_lsn_pli(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn = PG_GETARG_LSN(0); + Numeric nbytes = PG_GETARG_NUMERIC(1); + Datum num; + Datum res; + char buf[32]; + + if (numeric_is_nan(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot add NaN to pg_lsn"))); + + /* Convert to numeric */ + snprintf(buf, sizeof(buf), UINT64_FORMAT, lsn); + num = DirectFunctionCall3(numeric_in, + CStringGetDatum(buf), + ObjectIdGetDatum(0), + Int32GetDatum(-1)); + + /* Add two numerics */ + res = DirectFunctionCall2(numeric_add, + num, + NumericGetDatum(nbytes)); + + /* Convert to pg_lsn */ + return DirectFunctionCall1(numeric_pg_lsn, res); +} + +/* + * Subtract the number of bytes from pg_lsn, giving a new pg_lsn. + * Must handle both positive and negative numbers of bytes. + */ +Datum +pg_lsn_mii(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn = PG_GETARG_LSN(0); + Numeric nbytes = PG_GETARG_NUMERIC(1); + Datum num; + Datum res; + char buf[32]; + + if (numeric_is_nan(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot subtract NaN from pg_lsn"))); + + /* Convert to numeric */ + snprintf(buf, sizeof(buf), UINT64_FORMAT, lsn); + num = DirectFunctionCall3(numeric_in, + CStringGetDatum(buf), + ObjectIdGetDatum(0), + Int32GetDatum(-1)); + + /* Subtract two numerics */ + res = DirectFunctionCall2(numeric_sub, + num, + NumericGetDatum(nbytes)); + + /* Convert to pg_lsn */ + return DirectFunctionCall1(numeric_pg_lsn, res); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_upgrade_support.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_upgrade_support.c new file mode 100644 index 00000000000..0186636d9f8 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_upgrade_support.c @@ -0,0 +1,263 @@ +/* + * pg_upgrade_support.c + * + * server-side functions to set backend global variables + * to control oid and relfilenumber assignment, and do other special + * hacks needed for pg_upgrade. + * + * Copyright (c) 2010-2023, PostgreSQL Global Development Group + * src/backend/utils/adt/pg_upgrade_support.c + */ + +#include "postgres.h" + +#include "catalog/binary_upgrade.h" +#include "catalog/heap.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "commands/extension.h" +#include "miscadmin.h" +#include "utils/array.h" +#include "utils/builtins.h" + + +#define CHECK_IS_BINARY_UPGRADE \ +do { \ + if (!IsBinaryUpgrade) \ + ereport(ERROR, \ + (errcode(ERRCODE_CANT_CHANGE_RUNTIME_PARAM), \ + errmsg("function can only be called when server is in binary upgrade mode"))); \ +} while (0) + +Datum +binary_upgrade_set_next_pg_tablespace_oid(PG_FUNCTION_ARGS) +{ + Oid tbspoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_pg_tablespace_oid = tbspoid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_pg_type_oid(PG_FUNCTION_ARGS) +{ + Oid typoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_pg_type_oid = typoid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_array_pg_type_oid(PG_FUNCTION_ARGS) +{ + Oid typoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_array_pg_type_oid = typoid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_multirange_pg_type_oid(PG_FUNCTION_ARGS) +{ + Oid typoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_mrng_pg_type_oid = typoid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_multirange_array_pg_type_oid(PG_FUNCTION_ARGS) +{ + Oid typoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_mrng_array_pg_type_oid = typoid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_heap_pg_class_oid(PG_FUNCTION_ARGS) +{ + Oid reloid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_heap_pg_class_oid = reloid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS) +{ + RelFileNumber relfilenumber = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_heap_pg_class_relfilenumber = relfilenumber; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS) +{ + Oid reloid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_index_pg_class_oid = reloid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS) +{ + RelFileNumber relfilenumber = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_index_pg_class_relfilenumber = relfilenumber; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS) +{ + Oid reloid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_toast_pg_class_oid = reloid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS) +{ + RelFileNumber relfilenumber = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_toast_pg_class_relfilenumber = relfilenumber; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_pg_enum_oid(PG_FUNCTION_ARGS) +{ + Oid enumoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_pg_enum_oid = enumoid; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_next_pg_authid_oid(PG_FUNCTION_ARGS) +{ + Oid authoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_pg_authid_oid = authoid; + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_create_empty_extension(PG_FUNCTION_ARGS) +{ + text *extName; + text *schemaName; + bool relocatable; + text *extVersion; + Datum extConfig; + Datum extCondition; + List *requiredExtensions; + + CHECK_IS_BINARY_UPGRADE; + + /* We must check these things before dereferencing the arguments */ + if (PG_ARGISNULL(0) || + PG_ARGISNULL(1) || + PG_ARGISNULL(2) || + PG_ARGISNULL(3)) + elog(ERROR, "null argument to binary_upgrade_create_empty_extension is not allowed"); + + extName = PG_GETARG_TEXT_PP(0); + schemaName = PG_GETARG_TEXT_PP(1); + relocatable = PG_GETARG_BOOL(2); + extVersion = PG_GETARG_TEXT_PP(3); + + if (PG_ARGISNULL(4)) + extConfig = PointerGetDatum(NULL); + else + extConfig = PG_GETARG_DATUM(4); + + if (PG_ARGISNULL(5)) + extCondition = PointerGetDatum(NULL); + else + extCondition = PG_GETARG_DATUM(5); + + requiredExtensions = NIL; + if (!PG_ARGISNULL(6)) + { + ArrayType *textArray = PG_GETARG_ARRAYTYPE_P(6); + Datum *textDatums; + int ndatums; + int i; + + deconstruct_array_builtin(textArray, TEXTOID, &textDatums, NULL, &ndatums); + for (i = 0; i < ndatums; i++) + { + char *extName = TextDatumGetCString(textDatums[i]); + Oid extOid = get_extension_oid(extName, false); + + requiredExtensions = lappend_oid(requiredExtensions, extOid); + } + } + + InsertExtensionTuple(text_to_cstring(extName), + GetUserId(), + get_namespace_oid(text_to_cstring(schemaName), false), + relocatable, + text_to_cstring(extVersion), + extConfig, + extCondition, + requiredExtensions); + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_record_init_privs(PG_FUNCTION_ARGS) +{ + bool record_init_privs = PG_GETARG_BOOL(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_record_init_privs = record_init_privs; + + PG_RETURN_VOID(); +} + +Datum +binary_upgrade_set_missing_value(PG_FUNCTION_ARGS) +{ + Oid table_id = PG_GETARG_OID(0); + text *attname = PG_GETARG_TEXT_P(1); + text *value = PG_GETARG_TEXT_P(2); + char *cattname = text_to_cstring(attname); + char *cvalue = text_to_cstring(value); + + CHECK_IS_BINARY_UPGRADE; + SetAttrMissing(table_id, cattname, cvalue); + + PG_RETURN_VOID(); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pgstatfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pgstatfuncs.c new file mode 100644 index 00000000000..68ecd3bc66b --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pgstatfuncs.c @@ -0,0 +1,2069 @@ +/*------------------------------------------------------------------------- + * + * pgstatfuncs.c + * Functions for accessing various forms of statistics data + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/pgstatfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/xlog.h" +#include "access/xlogprefetcher.h" +#include "catalog/catalog.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_type.h" +#include "common/ip.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/bgworker_internals.h" +#include "postmaster/postmaster.h" +#include "replication/logicallauncher.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/inet.h" +#include "utils/timestamp.h" + +#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) + +#define HAS_PGSTAT_PERMISSIONS(role) (has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS) || has_privs_of_role(GetUserId(), role)) + +#define PG_STAT_GET_RELENTRY_INT64(stat) \ +Datum \ +CppConcat(pg_stat_get_,stat)(PG_FUNCTION_ARGS) \ +{ \ + Oid relid = PG_GETARG_OID(0); \ + int64 result; \ + PgStat_StatTabEntry *tabentry; \ + \ + if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL) \ + result = 0; \ + else \ + result = (int64) (tabentry->stat); \ + \ + PG_RETURN_INT64(result); \ +} + +/* pg_stat_get_analyze_count */ +PG_STAT_GET_RELENTRY_INT64(analyze_count) + +/* pg_stat_get_autoanalyze_count */ +PG_STAT_GET_RELENTRY_INT64(autoanalyze_count) + +/* pg_stat_get_autovacuum_count */ +PG_STAT_GET_RELENTRY_INT64(autovacuum_count) + +/* pg_stat_get_blocks_fetched */ +PG_STAT_GET_RELENTRY_INT64(blocks_fetched) + +/* pg_stat_get_blocks_hit */ +PG_STAT_GET_RELENTRY_INT64(blocks_hit) + +/* pg_stat_get_dead_tuples */ +PG_STAT_GET_RELENTRY_INT64(dead_tuples) + +/* pg_stat_get_ins_since_vacuum */ +PG_STAT_GET_RELENTRY_INT64(ins_since_vacuum) + +/* pg_stat_get_live_tuples */ +PG_STAT_GET_RELENTRY_INT64(live_tuples) + +/* pg_stat_get_mod_since_analyze */ +PG_STAT_GET_RELENTRY_INT64(mod_since_analyze) + +/* pg_stat_get_numscans */ +PG_STAT_GET_RELENTRY_INT64(numscans) + +/* pg_stat_get_tuples_deleted */ +PG_STAT_GET_RELENTRY_INT64(tuples_deleted) + +/* pg_stat_get_tuples_fetched */ +PG_STAT_GET_RELENTRY_INT64(tuples_fetched) + +/* pg_stat_get_tuples_hot_updated */ +PG_STAT_GET_RELENTRY_INT64(tuples_hot_updated) + +/* pg_stat_get_tuples_newpage_updated */ +PG_STAT_GET_RELENTRY_INT64(tuples_newpage_updated) + +/* pg_stat_get_tuples_inserted */ +PG_STAT_GET_RELENTRY_INT64(tuples_inserted) + +/* pg_stat_get_tuples_returned */ +PG_STAT_GET_RELENTRY_INT64(tuples_returned) + +/* pg_stat_get_tuples_updated */ +PG_STAT_GET_RELENTRY_INT64(tuples_updated) + +/* pg_stat_get_vacuum_count */ +PG_STAT_GET_RELENTRY_INT64(vacuum_count) + +#define PG_STAT_GET_RELENTRY_TIMESTAMPTZ(stat) \ +Datum \ +CppConcat(pg_stat_get_,stat)(PG_FUNCTION_ARGS) \ +{ \ + Oid relid = PG_GETARG_OID(0); \ + TimestampTz result; \ + PgStat_StatTabEntry *tabentry; \ + \ + if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL) \ + result = 0; \ + else \ + result = tabentry->stat; \ + \ + if (result == 0) \ + PG_RETURN_NULL(); \ + else \ + PG_RETURN_TIMESTAMPTZ(result); \ +} + +/* pg_stat_get_last_analyze_time */ +PG_STAT_GET_RELENTRY_TIMESTAMPTZ(last_analyze_time) + +/* pg_stat_get_last_autoanalyze_time */ +PG_STAT_GET_RELENTRY_TIMESTAMPTZ(last_autoanalyze_time) + +/* pg_stat_get_last_autovacuum_time */ +PG_STAT_GET_RELENTRY_TIMESTAMPTZ(last_autovacuum_time) + +/* pg_stat_get_last_vacuum_time */ +PG_STAT_GET_RELENTRY_TIMESTAMPTZ(last_vacuum_time) + +/* pg_stat_get_lastscan */ +PG_STAT_GET_RELENTRY_TIMESTAMPTZ(lastscan) + +Datum +pg_stat_get_function_calls(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + PgStat_StatFuncEntry *funcentry; + + if ((funcentry = pgstat_fetch_stat_funcentry(funcid)) == NULL) + PG_RETURN_NULL(); + PG_RETURN_INT64(funcentry->numcalls); +} + +/* convert counter from microsec to millisec for display */ +#define PG_STAT_GET_FUNCENTRY_FLOAT8_MS(stat) \ +Datum \ +CppConcat(pg_stat_get_function_,stat)(PG_FUNCTION_ARGS) \ +{ \ + Oid funcid = PG_GETARG_OID(0); \ + double result; \ + PgStat_StatFuncEntry *funcentry; \ + \ + if ((funcentry = pgstat_fetch_stat_funcentry(funcid)) == NULL) \ + PG_RETURN_NULL(); \ + result = ((double) funcentry->stat) / 1000.0; \ + PG_RETURN_FLOAT8(result); \ +} + +/* pg_stat_get_function_total_time */ +PG_STAT_GET_FUNCENTRY_FLOAT8_MS(total_time) + +/* pg_stat_get_function_self_time */ +PG_STAT_GET_FUNCENTRY_FLOAT8_MS(self_time) + +Datum +pg_stat_get_backend_idset(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + int *fctx; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + fctx = MemoryContextAlloc(funcctx->multi_call_memory_ctx, + sizeof(int)); + funcctx->user_fctx = fctx; + + fctx[0] = 0; + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + fctx = funcctx->user_fctx; + + fctx[0] += 1; + + /* + * We recheck pgstat_fetch_stat_numbackends() each time through, just in + * case the local status data has been refreshed since we started. It's + * plenty cheap enough if not. If a refresh does happen, we'll likely + * miss or duplicate some backend IDs, but we're content not to crash. + * (Refreshing midway through such a query would be problematic usage + * anyway, since the backend IDs we've already returned might no longer + * refer to extant sessions.) + */ + if (fctx[0] <= pgstat_fetch_stat_numbackends()) + { + /* do when there is more left to send */ + LocalPgBackendStatus *local_beentry = pgstat_get_local_beentry_by_index(fctx[0]); + + SRF_RETURN_NEXT(funcctx, Int32GetDatum(local_beentry->backend_id)); + } + else + { + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); + } +} + +/* + * Returns command progress information for the named command. + */ +Datum +pg_stat_get_progress_info(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_PROGRESS_COLS PGSTAT_NUM_PROGRESS_PARAM + 3 + int num_backends = pgstat_fetch_stat_numbackends(); + int curr_backend; + char *cmd = text_to_cstring(PG_GETARG_TEXT_PP(0)); + ProgressCommandType cmdtype; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + /* Translate command name into command type code. */ + if (pg_strcasecmp(cmd, "VACUUM") == 0) + cmdtype = PROGRESS_COMMAND_VACUUM; + else if (pg_strcasecmp(cmd, "ANALYZE") == 0) + cmdtype = PROGRESS_COMMAND_ANALYZE; + else if (pg_strcasecmp(cmd, "CLUSTER") == 0) + cmdtype = PROGRESS_COMMAND_CLUSTER; + else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0) + cmdtype = PROGRESS_COMMAND_CREATE_INDEX; + else if (pg_strcasecmp(cmd, "BASEBACKUP") == 0) + cmdtype = PROGRESS_COMMAND_BASEBACKUP; + else if (pg_strcasecmp(cmd, "COPY") == 0) + cmdtype = PROGRESS_COMMAND_COPY; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid command name: \"%s\"", cmd))); + + InitMaterializedSRF(fcinfo, 0); + + /* 1-based index */ + for (curr_backend = 1; curr_backend <= num_backends; curr_backend++) + { + LocalPgBackendStatus *local_beentry; + PgBackendStatus *beentry; + Datum values[PG_STAT_GET_PROGRESS_COLS] = {0}; + bool nulls[PG_STAT_GET_PROGRESS_COLS] = {0}; + int i; + + local_beentry = pgstat_get_local_beentry_by_index(curr_backend); + beentry = &local_beentry->backendStatus; + + /* + * Report values for only those backends which are running the given + * command. + */ + if (beentry->st_progress_command != cmdtype) + continue; + + /* Value available to all callers */ + values[0] = Int32GetDatum(beentry->st_procpid); + values[1] = ObjectIdGetDatum(beentry->st_databaseid); + + /* show rest of the values including relid only to role members */ + if (HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + { + values[2] = ObjectIdGetDatum(beentry->st_progress_command_target); + for (i = 0; i < PGSTAT_NUM_PROGRESS_PARAM; i++) + values[i + 3] = Int64GetDatum(beentry->st_progress_param[i]); + } + else + { + nulls[2] = true; + for (i = 0; i < PGSTAT_NUM_PROGRESS_PARAM; i++) + nulls[i + 3] = true; + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + return (Datum) 0; +} + +/* + * Returns activity of PG backends. + */ +Datum +pg_stat_get_activity(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_ACTIVITY_COLS 31 + int num_backends = pgstat_fetch_stat_numbackends(); + int curr_backend; + int pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, 0); + + /* 1-based index */ + for (curr_backend = 1; curr_backend <= num_backends; curr_backend++) + { + /* for each row */ + Datum values[PG_STAT_GET_ACTIVITY_COLS] = {0}; + bool nulls[PG_STAT_GET_ACTIVITY_COLS] = {0}; + LocalPgBackendStatus *local_beentry; + PgBackendStatus *beentry; + PGPROC *proc; + const char *wait_event_type = NULL; + const char *wait_event = NULL; + + /* Get the next one in the list */ + local_beentry = pgstat_get_local_beentry_by_index(curr_backend); + beentry = &local_beentry->backendStatus; + + /* If looking for specific PID, ignore all the others */ + if (pid != -1 && beentry->st_procpid != pid) + continue; + + /* Values available to all callers */ + if (beentry->st_databaseid != InvalidOid) + values[0] = ObjectIdGetDatum(beentry->st_databaseid); + else + nulls[0] = true; + + values[1] = Int32GetDatum(beentry->st_procpid); + + if (beentry->st_userid != InvalidOid) + values[2] = ObjectIdGetDatum(beentry->st_userid); + else + nulls[2] = true; + + if (beentry->st_appname) + values[3] = CStringGetTextDatum(beentry->st_appname); + else + nulls[3] = true; + + if (TransactionIdIsValid(local_beentry->backend_xid)) + values[15] = TransactionIdGetDatum(local_beentry->backend_xid); + else + nulls[15] = true; + + if (TransactionIdIsValid(local_beentry->backend_xmin)) + values[16] = TransactionIdGetDatum(local_beentry->backend_xmin); + else + nulls[16] = true; + + /* Values only available to role member or pg_read_all_stats */ + if (HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + { + SockAddr zero_clientaddr; + char *clipped_activity; + + switch (beentry->st_state) + { + case STATE_IDLE: + values[4] = CStringGetTextDatum("idle"); + break; + case STATE_RUNNING: + values[4] = CStringGetTextDatum("active"); + break; + case STATE_IDLEINTRANSACTION: + values[4] = CStringGetTextDatum("idle in transaction"); + break; + case STATE_FASTPATH: + values[4] = CStringGetTextDatum("fastpath function call"); + break; + case STATE_IDLEINTRANSACTION_ABORTED: + values[4] = CStringGetTextDatum("idle in transaction (aborted)"); + break; + case STATE_DISABLED: + values[4] = CStringGetTextDatum("disabled"); + break; + case STATE_UNDEFINED: + nulls[4] = true; + break; + } + + clipped_activity = pgstat_clip_activity(beentry->st_activity_raw); + values[5] = CStringGetTextDatum(clipped_activity); + pfree(clipped_activity); + + /* leader_pid */ + nulls[29] = true; + + proc = BackendPidGetProc(beentry->st_procpid); + + if (proc == NULL && (beentry->st_backendType != B_BACKEND)) + { + /* + * For an auxiliary process, retrieve process info from + * AuxiliaryProcs stored in shared-memory. + */ + proc = AuxiliaryPidGetProc(beentry->st_procpid); + } + + /* + * If a PGPROC entry was retrieved, display wait events and lock + * group leader or apply leader information if any. To avoid + * extra overhead, no extra lock is being held, so there is no + * guarantee of consistency across multiple rows. + */ + if (proc != NULL) + { + uint32 raw_wait_event; + PGPROC *leader; + + raw_wait_event = UINT32_ACCESS_ONCE(proc->wait_event_info); + wait_event_type = pgstat_get_wait_event_type(raw_wait_event); + wait_event = pgstat_get_wait_event(raw_wait_event); + + leader = proc->lockGroupLeader; + + /* + * Show the leader only for active parallel workers. This + * leaves the field as NULL for the leader of a parallel group + * or the leader of parallel apply workers. + */ + if (leader && leader->pid != beentry->st_procpid) + { + values[29] = Int32GetDatum(leader->pid); + nulls[29] = false; + } + else if (beentry->st_backendType == B_BG_WORKER) + { + int leader_pid = GetLeaderApplyWorkerPid(beentry->st_procpid); + + if (leader_pid != InvalidPid) + { + values[29] = Int32GetDatum(leader_pid); + nulls[29] = false; + } + } + } + + if (wait_event_type) + values[6] = CStringGetTextDatum(wait_event_type); + else + nulls[6] = true; + + if (wait_event) + values[7] = CStringGetTextDatum(wait_event); + else + nulls[7] = true; + + /* + * Don't expose transaction time for walsenders; it confuses + * monitoring, particularly because we don't keep the time up-to- + * date. + */ + if (beentry->st_xact_start_timestamp != 0 && + beentry->st_backendType != B_WAL_SENDER) + values[8] = TimestampTzGetDatum(beentry->st_xact_start_timestamp); + else + nulls[8] = true; + + if (beentry->st_activity_start_timestamp != 0) + values[9] = TimestampTzGetDatum(beentry->st_activity_start_timestamp); + else + nulls[9] = true; + + if (beentry->st_proc_start_timestamp != 0) + values[10] = TimestampTzGetDatum(beentry->st_proc_start_timestamp); + else + nulls[10] = true; + + if (beentry->st_state_start_timestamp != 0) + values[11] = TimestampTzGetDatum(beentry->st_state_start_timestamp); + else + nulls[11] = true; + + /* A zeroed client addr means we don't know */ + memset(&zero_clientaddr, 0, sizeof(zero_clientaddr)); + if (memcmp(&(beentry->st_clientaddr), &zero_clientaddr, + sizeof(zero_clientaddr)) == 0) + { + nulls[12] = true; + nulls[13] = true; + nulls[14] = true; + } + else + { + if (beentry->st_clientaddr.addr.ss_family == AF_INET || + beentry->st_clientaddr.addr.ss_family == AF_INET6) + { + char remote_host[NI_MAXHOST]; + char remote_port[NI_MAXSERV]; + int ret; + + remote_host[0] = '\0'; + remote_port[0] = '\0'; + ret = pg_getnameinfo_all(&beentry->st_clientaddr.addr, + beentry->st_clientaddr.salen, + remote_host, sizeof(remote_host), + remote_port, sizeof(remote_port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret == 0) + { + clean_ipv6_addr(beentry->st_clientaddr.addr.ss_family, remote_host); + values[12] = DirectFunctionCall1(inet_in, + CStringGetDatum(remote_host)); + if (beentry->st_clienthostname && + beentry->st_clienthostname[0]) + values[13] = CStringGetTextDatum(beentry->st_clienthostname); + else + nulls[13] = true; + values[14] = Int32GetDatum(atoi(remote_port)); + } + else + { + nulls[12] = true; + nulls[13] = true; + nulls[14] = true; + } + } + else if (beentry->st_clientaddr.addr.ss_family == AF_UNIX) + { + /* + * Unix sockets always reports NULL for host and -1 for + * port, so it's possible to tell the difference to + * connections we have no permissions to view, or with + * errors. + */ + nulls[12] = true; + nulls[13] = true; + values[14] = Int32GetDatum(-1); + } + else + { + /* Unknown address type, should never happen */ + nulls[12] = true; + nulls[13] = true; + nulls[14] = true; + } + } + /* Add backend type */ + if (beentry->st_backendType == B_BG_WORKER) + { + const char *bgw_type; + + bgw_type = GetBackgroundWorkerTypeByPid(beentry->st_procpid); + if (bgw_type) + values[17] = CStringGetTextDatum(bgw_type); + else + nulls[17] = true; + } + else + values[17] = + CStringGetTextDatum(GetBackendTypeDesc(beentry->st_backendType)); + + /* SSL information */ + if (beentry->st_ssl) + { + values[18] = BoolGetDatum(true); /* ssl */ + values[19] = CStringGetTextDatum(beentry->st_sslstatus->ssl_version); + values[20] = CStringGetTextDatum(beentry->st_sslstatus->ssl_cipher); + values[21] = Int32GetDatum(beentry->st_sslstatus->ssl_bits); + + if (beentry->st_sslstatus->ssl_client_dn[0]) + values[22] = CStringGetTextDatum(beentry->st_sslstatus->ssl_client_dn); + else + nulls[22] = true; + + if (beentry->st_sslstatus->ssl_client_serial[0]) + values[23] = DirectFunctionCall3(numeric_in, + CStringGetDatum(beentry->st_sslstatus->ssl_client_serial), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + else + nulls[23] = true; + + if (beentry->st_sslstatus->ssl_issuer_dn[0]) + values[24] = CStringGetTextDatum(beentry->st_sslstatus->ssl_issuer_dn); + else + nulls[24] = true; + } + else + { + values[18] = BoolGetDatum(false); /* ssl */ + nulls[19] = nulls[20] = nulls[21] = nulls[22] = nulls[23] = nulls[24] = true; + } + + /* GSSAPI information */ + if (beentry->st_gss) + { + values[25] = BoolGetDatum(beentry->st_gssstatus->gss_auth); /* gss_auth */ + values[26] = CStringGetTextDatum(beentry->st_gssstatus->gss_princ); + values[27] = BoolGetDatum(beentry->st_gssstatus->gss_enc); /* GSS Encryption in use */ + values[28] = BoolGetDatum(beentry->st_gssstatus->gss_delegation); /* GSS credentials + * delegated */ + } + else + { + values[25] = BoolGetDatum(false); /* gss_auth */ + nulls[26] = true; /* No GSS principal */ + values[27] = BoolGetDatum(false); /* GSS Encryption not in + * use */ + values[28] = BoolGetDatum(false); /* GSS credentials not + * delegated */ + } + if (beentry->st_query_id == 0) + nulls[30] = true; + else + values[30] = UInt64GetDatum(beentry->st_query_id); + } + else + { + /* No permissions to view data about this session */ + values[5] = CStringGetTextDatum("<insufficient privilege>"); + nulls[4] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + nulls[10] = true; + nulls[11] = true; + nulls[12] = true; + nulls[13] = true; + nulls[14] = true; + nulls[17] = true; + nulls[18] = true; + nulls[19] = true; + nulls[20] = true; + nulls[21] = true; + nulls[22] = true; + nulls[23] = true; + nulls[24] = true; + nulls[25] = true; + nulls[26] = true; + nulls[27] = true; + nulls[28] = true; + nulls[29] = true; + nulls[30] = true; + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + + /* If only a single backend was requested, and we found it, break. */ + if (pid != -1) + break; + } + + return (Datum) 0; +} + + +Datum +pg_backend_pid(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(MyProcPid); +} + + +Datum +pg_stat_get_backend_pid(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + PG_RETURN_INT32(beentry->st_procpid); +} + + +Datum +pg_stat_get_backend_dbid(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + PG_RETURN_OID(beentry->st_databaseid); +} + + +Datum +pg_stat_get_backend_userid(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + PG_RETURN_OID(beentry->st_userid); +} + +Datum +pg_stat_get_backend_subxact(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_SUBXACT_COLS 2 + TupleDesc tupdesc; + Datum values[PG_STAT_GET_SUBXACT_COLS]; + bool nulls[PG_STAT_GET_SUBXACT_COLS]; + int32 beid = PG_GETARG_INT32(0); + LocalPgBackendStatus *local_beentry; + + /* Initialise values and NULL flags arrays */ + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + /* Initialise attributes information in the tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_SUBXACT_COLS); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "subxact_count", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "subxact_overflow", + BOOLOID, -1, 0); + + BlessTupleDesc(tupdesc); + + if ((local_beentry = pgstat_get_local_beentry_by_backend_id(beid)) != NULL) + { + /* Fill values and NULLs */ + values[0] = Int32GetDatum(local_beentry->backend_subxact_count); + values[1] = BoolGetDatum(local_beentry->backend_subxact_overflowed); + } + else + { + nulls[0] = true; + nulls[1] = true; + } + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +Datum +pg_stat_get_backend_activity(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + const char *activity; + char *clipped_activity; + text *ret; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + activity = "<backend information not available>"; + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + activity = "<insufficient privilege>"; + else if (*(beentry->st_activity_raw) == '\0') + activity = "<command string not enabled>"; + else + activity = beentry->st_activity_raw; + + clipped_activity = pgstat_clip_activity(activity); + ret = cstring_to_text(activity); + pfree(clipped_activity); + + PG_RETURN_TEXT_P(ret); +} + +Datum +pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + PGPROC *proc; + const char *wait_event_type = NULL; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + wait_event_type = "<backend information not available>"; + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + wait_event_type = "<insufficient privilege>"; + else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL) + wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info); + + if (!wait_event_type) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(cstring_to_text(wait_event_type)); +} + +Datum +pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + PGPROC *proc; + const char *wait_event = NULL; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + wait_event = "<backend information not available>"; + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + wait_event = "<insufficient privilege>"; + else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL) + wait_event = pgstat_get_wait_event(proc->wait_event_info); + + if (!wait_event) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(cstring_to_text(wait_event)); +} + + +Datum +pg_stat_get_backend_activity_start(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + TimestampTz result; + PgBackendStatus *beentry; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + PG_RETURN_NULL(); + + result = beentry->st_activity_start_timestamp; + + /* + * No time recorded for start of current query -- this is the case if the + * user hasn't enabled query-level stats collection. + */ + if (result == 0) + PG_RETURN_NULL(); + + PG_RETURN_TIMESTAMPTZ(result); +} + + +Datum +pg_stat_get_backend_xact_start(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + TimestampTz result; + PgBackendStatus *beentry; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + PG_RETURN_NULL(); + + result = beentry->st_xact_start_timestamp; + + if (result == 0) /* not in a transaction */ + PG_RETURN_NULL(); + + PG_RETURN_TIMESTAMPTZ(result); +} + + +Datum +pg_stat_get_backend_start(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + TimestampTz result; + PgBackendStatus *beentry; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + PG_RETURN_NULL(); + + result = beentry->st_proc_start_timestamp; + + if (result == 0) /* probably can't happen? */ + PG_RETURN_NULL(); + + PG_RETURN_TIMESTAMPTZ(result); +} + + +Datum +pg_stat_get_backend_client_addr(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + SockAddr zero_clientaddr; + char remote_host[NI_MAXHOST]; + int ret; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + PG_RETURN_NULL(); + + /* A zeroed client addr means we don't know */ + memset(&zero_clientaddr, 0, sizeof(zero_clientaddr)); + if (memcmp(&(beentry->st_clientaddr), &zero_clientaddr, + sizeof(zero_clientaddr)) == 0) + PG_RETURN_NULL(); + + switch (beentry->st_clientaddr.addr.ss_family) + { + case AF_INET: + case AF_INET6: + break; + default: + PG_RETURN_NULL(); + } + + remote_host[0] = '\0'; + ret = pg_getnameinfo_all(&beentry->st_clientaddr.addr, + beentry->st_clientaddr.salen, + remote_host, sizeof(remote_host), + NULL, 0, + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) + PG_RETURN_NULL(); + + clean_ipv6_addr(beentry->st_clientaddr.addr.ss_family, remote_host); + + PG_RETURN_DATUM(DirectFunctionCall1(inet_in, + CStringGetDatum(remote_host))); +} + +Datum +pg_stat_get_backend_client_port(PG_FUNCTION_ARGS) +{ + int32 beid = PG_GETARG_INT32(0); + PgBackendStatus *beentry; + SockAddr zero_clientaddr; + char remote_port[NI_MAXSERV]; + int ret; + + if ((beentry = pgstat_get_beentry_by_backend_id(beid)) == NULL) + PG_RETURN_NULL(); + + else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) + PG_RETURN_NULL(); + + /* A zeroed client addr means we don't know */ + memset(&zero_clientaddr, 0, sizeof(zero_clientaddr)); + if (memcmp(&(beentry->st_clientaddr), &zero_clientaddr, + sizeof(zero_clientaddr)) == 0) + PG_RETURN_NULL(); + + switch (beentry->st_clientaddr.addr.ss_family) + { + case AF_INET: + case AF_INET6: + break; + case AF_UNIX: + PG_RETURN_INT32(-1); + default: + PG_RETURN_NULL(); + } + + remote_port[0] = '\0'; + ret = pg_getnameinfo_all(&beentry->st_clientaddr.addr, + beentry->st_clientaddr.salen, + NULL, 0, + remote_port, sizeof(remote_port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(DirectFunctionCall1(int4in, + CStringGetDatum(remote_port))); +} + + +Datum +pg_stat_get_db_numbackends(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int32 result; + int tot_backends = pgstat_fetch_stat_numbackends(); + int idx; + + result = 0; + for (idx = 1; idx <= tot_backends; idx++) + { + LocalPgBackendStatus *local_beentry = pgstat_get_local_beentry_by_index(idx); + + if (local_beentry->backendStatus.st_databaseid == dbid) + result++; + } + + PG_RETURN_INT32(result); +} + + +#define PG_STAT_GET_DBENTRY_INT64(stat) \ +Datum \ +CppConcat(pg_stat_get_db_,stat)(PG_FUNCTION_ARGS) \ +{ \ + Oid dbid = PG_GETARG_OID(0); \ + int64 result; \ + PgStat_StatDBEntry *dbentry; \ + \ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) \ + result = 0; \ + else \ + result = (int64) (dbentry->stat); \ + \ + PG_RETURN_INT64(result); \ +} + +/* pg_stat_get_db_blocks_fetched */ +PG_STAT_GET_DBENTRY_INT64(blocks_fetched) + +/* pg_stat_get_db_blocks_hit */ +PG_STAT_GET_DBENTRY_INT64(blocks_hit) + +/* pg_stat_get_db_conflict_bufferpin */ +PG_STAT_GET_DBENTRY_INT64(conflict_bufferpin) + +/* pg_stat_get_db_conflict_lock */ +PG_STAT_GET_DBENTRY_INT64(conflict_lock) + +/* pg_stat_get_db_conflict_snapshot */ +PG_STAT_GET_DBENTRY_INT64(conflict_snapshot) + +/* pg_stat_get_db_conflict_startup_deadlock */ +PG_STAT_GET_DBENTRY_INT64(conflict_startup_deadlock) + +/* pg_stat_get_db_conflict_tablespace */ +PG_STAT_GET_DBENTRY_INT64(conflict_tablespace) + +/* pg_stat_get_db_deadlocks */ +PG_STAT_GET_DBENTRY_INT64(deadlocks) + +/* pg_stat_get_db_sessions */ +PG_STAT_GET_DBENTRY_INT64(sessions) + +/* pg_stat_get_db_sessions_abandoned */ +PG_STAT_GET_DBENTRY_INT64(sessions_abandoned) + +/* pg_stat_get_db_sessions_fatal */ +PG_STAT_GET_DBENTRY_INT64(sessions_fatal) + +/* pg_stat_get_db_sessions_killed */ +PG_STAT_GET_DBENTRY_INT64(sessions_killed) + +/* pg_stat_get_db_temp_bytes */ +PG_STAT_GET_DBENTRY_INT64(temp_bytes) + +/* pg_stat_get_db_temp_files */ +PG_STAT_GET_DBENTRY_INT64(temp_files) + +/* pg_stat_get_db_tuples_deleted */ +PG_STAT_GET_DBENTRY_INT64(tuples_deleted) + +/* pg_stat_get_db_tuples_fetched */ +PG_STAT_GET_DBENTRY_INT64(tuples_fetched) + +/* pg_stat_get_db_tuples_inserted */ +PG_STAT_GET_DBENTRY_INT64(tuples_inserted) + +/* pg_stat_get_db_tuples_returned */ +PG_STAT_GET_DBENTRY_INT64(tuples_returned) + +/* pg_stat_get_db_tuples_updated */ +PG_STAT_GET_DBENTRY_INT64(tuples_updated) + +/* pg_stat_get_db_xact_commit */ +PG_STAT_GET_DBENTRY_INT64(xact_commit) + +/* pg_stat_get_db_xact_rollback */ +PG_STAT_GET_DBENTRY_INT64(xact_rollback) + +/* pg_stat_get_db_conflict_logicalslot */ +PG_STAT_GET_DBENTRY_INT64(conflict_logicalslot) + +Datum +pg_stat_get_db_stat_reset_time(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + TimestampTz result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = dbentry->stat_reset_timestamp; + + if (result == 0) + PG_RETURN_NULL(); + else + PG_RETURN_TIMESTAMPTZ(result); +} + + +Datum +pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->conflict_tablespace + + dbentry->conflict_lock + + dbentry->conflict_snapshot + + dbentry->conflict_logicalslot + + dbentry->conflict_bufferpin + + dbentry->conflict_startup_deadlock); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_checksum_failures(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if (!DataChecksumsEnabled()) + PG_RETURN_NULL(); + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->checksum_failures); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_checksum_last_failure(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + TimestampTz result; + PgStat_StatDBEntry *dbentry; + + if (!DataChecksumsEnabled()) + PG_RETURN_NULL(); + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = dbentry->last_checksum_failure; + + if (result == 0) + PG_RETURN_NULL(); + else + PG_RETURN_TIMESTAMPTZ(result); +} + +/* convert counter from microsec to millisec for display */ +#define PG_STAT_GET_DBENTRY_FLOAT8_MS(stat) \ +Datum \ +CppConcat(pg_stat_get_db_,stat)(PG_FUNCTION_ARGS) \ +{ \ + Oid dbid = PG_GETARG_OID(0); \ + double result; \ + PgStat_StatDBEntry *dbentry; \ + \ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) \ + result = 0; \ + else \ + result = ((double) dbentry->stat) / 1000.0; \ + \ + PG_RETURN_FLOAT8(result); \ +} + +/* pg_stat_get_db_active_time */ +PG_STAT_GET_DBENTRY_FLOAT8_MS(active_time) + +/* pg_stat_get_db_blk_read_time */ +PG_STAT_GET_DBENTRY_FLOAT8_MS(blk_read_time) + +/* pg_stat_get_db_blk_write_time */ +PG_STAT_GET_DBENTRY_FLOAT8_MS(blk_write_time) + +/* pg_stat_get_db_idle_in_transaction_time */ +PG_STAT_GET_DBENTRY_FLOAT8_MS(idle_in_transaction_time) + +/* pg_stat_get_db_session_time */ +PG_STAT_GET_DBENTRY_FLOAT8_MS(session_time) + +Datum +pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->timed_checkpoints); +} + +Datum +pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->requested_checkpoints); +} + +Datum +pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_written_checkpoints); +} + +Datum +pg_stat_get_bgwriter_buf_written_clean(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_written_clean); +} + +Datum +pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->maxwritten_clean); +} + +Datum +pg_stat_get_checkpoint_write_time(PG_FUNCTION_ARGS) +{ + /* time is already in msec, just convert to double for presentation */ + PG_RETURN_FLOAT8((double) + pgstat_fetch_stat_checkpointer()->checkpoint_write_time); +} + +Datum +pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS) +{ + /* time is already in msec, just convert to double for presentation */ + PG_RETURN_FLOAT8((double) + pgstat_fetch_stat_checkpointer()->checkpoint_sync_time); +} + +Datum +pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS) +{ + PG_RETURN_TIMESTAMPTZ(pgstat_fetch_stat_bgwriter()->stat_reset_timestamp); +} + +Datum +pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_written_backend); +} + +Datum +pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_fsync_backend); +} + +Datum +pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc); +} + +/* +* When adding a new column to the pg_stat_io view, add a new enum value +* here above IO_NUM_COLUMNS. +*/ +typedef enum io_stat_col +{ + IO_COL_INVALID = -1, + IO_COL_BACKEND_TYPE, + IO_COL_OBJECT, + IO_COL_CONTEXT, + IO_COL_READS, + IO_COL_READ_TIME, + IO_COL_WRITES, + IO_COL_WRITE_TIME, + IO_COL_WRITEBACKS, + IO_COL_WRITEBACK_TIME, + IO_COL_EXTENDS, + IO_COL_EXTEND_TIME, + IO_COL_CONVERSION, + IO_COL_HITS, + IO_COL_EVICTIONS, + IO_COL_REUSES, + IO_COL_FSYNCS, + IO_COL_FSYNC_TIME, + IO_COL_RESET_TIME, + IO_NUM_COLUMNS, +} io_stat_col; + +/* + * When adding a new IOOp, add a new io_stat_col and add a case to this + * function returning the corresponding io_stat_col. + */ +static io_stat_col +pgstat_get_io_op_index(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return IO_COL_EVICTIONS; + case IOOP_EXTEND: + return IO_COL_EXTENDS; + case IOOP_FSYNC: + return IO_COL_FSYNCS; + case IOOP_HIT: + return IO_COL_HITS; + case IOOP_READ: + return IO_COL_READS; + case IOOP_REUSE: + return IO_COL_REUSES; + case IOOP_WRITE: + return IO_COL_WRITES; + case IOOP_WRITEBACK: + return IO_COL_WRITEBACKS; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + pg_unreachable(); +} + +/* + * Get the number of the column containing IO times for the specified IOOp. + * This function encodes our assumption that IO time for an IOOp is displayed + * in the view in the column directly after the IOOp counts. If an op has no + * associated time, IO_COL_INVALID is returned. + */ +static io_stat_col +pgstat_get_io_time_index(IOOp io_op) +{ + switch (io_op) + { + case IOOP_READ: + case IOOP_WRITE: + case IOOP_WRITEBACK: + case IOOP_EXTEND: + case IOOP_FSYNC: + return pgstat_get_io_op_index(io_op) + 1; + case IOOP_EVICT: + case IOOP_HIT: + case IOOP_REUSE: + return IO_COL_INVALID; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + pg_unreachable(); +} + +static inline double +pg_stat_us_to_ms(PgStat_Counter val_ms) +{ + return val_ms * (double) 0.001; +} + +Datum +pg_stat_get_io(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo; + PgStat_IO *backends_io_stats; + Datum reset_time; + + InitMaterializedSRF(fcinfo, 0); + rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + backends_io_stats = pgstat_fetch_stat_io(); + + reset_time = TimestampTzGetDatum(backends_io_stats->stat_reset_timestamp); + + for (int bktype = 0; bktype < BACKEND_NUM_TYPES; bktype++) + { + Datum bktype_desc = CStringGetTextDatum(GetBackendTypeDesc(bktype)); + PgStat_BktypeIO *bktype_stats = &backends_io_stats->stats[bktype]; + + /* + * In Assert builds, we can afford an extra loop through all of the + * counters checking that only expected stats are non-zero, since it + * keeps the non-Assert code cleaner. + */ + Assert(pgstat_bktype_io_stats_valid(bktype_stats, bktype)); + + /* + * For those BackendTypes without IO Operation stats, skip + * representing them in the view altogether. + */ + if (!pgstat_tracks_io_bktype(bktype)) + continue; + + for (int io_obj = 0; io_obj < IOOBJECT_NUM_TYPES; io_obj++) + { + const char *obj_name = pgstat_get_io_object_name(io_obj); + + for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + const char *context_name = pgstat_get_io_context_name(io_context); + + Datum values[IO_NUM_COLUMNS] = {0}; + bool nulls[IO_NUM_COLUMNS] = {0}; + + /* + * Some combinations of BackendType, IOObject, and IOContext + * are not valid for any type of IOOp. In such cases, omit the + * entire row from the view. + */ + if (!pgstat_tracks_io_object(bktype, io_obj, io_context)) + continue; + + values[IO_COL_BACKEND_TYPE] = bktype_desc; + values[IO_COL_CONTEXT] = CStringGetTextDatum(context_name); + values[IO_COL_OBJECT] = CStringGetTextDatum(obj_name); + values[IO_COL_RESET_TIME] = TimestampTzGetDatum(reset_time); + + /* + * Hard-code this to the value of BLCKSZ for now. Future + * values could include XLOG_BLCKSZ, once WAL IO is tracked, + * and constant multipliers, once non-block-oriented IO (e.g. + * temporary file IO) is tracked. + */ + values[IO_COL_CONVERSION] = Int64GetDatum(BLCKSZ); + + for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) + { + int op_idx = pgstat_get_io_op_index(io_op); + int time_idx = pgstat_get_io_time_index(io_op); + + /* + * Some combinations of BackendType and IOOp, of IOContext + * and IOOp, and of IOObject and IOOp are not tracked. Set + * these cells in the view NULL. + */ + if (pgstat_tracks_io_op(bktype, io_obj, io_context, io_op)) + { + PgStat_Counter count = + bktype_stats->counts[io_obj][io_context][io_op]; + + values[op_idx] = Int64GetDatum(count); + } + else + nulls[op_idx] = true; + + /* not every operation is timed */ + if (time_idx == IO_COL_INVALID) + continue; + + if (!nulls[op_idx]) + { + PgStat_Counter time = + bktype_stats->times[io_obj][io_context][io_op]; + + values[time_idx] = Float8GetDatum(pg_stat_us_to_ms(time)); + } + else + nulls[time_idx] = true; + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + } + } + + return (Datum) 0; +} + +/* + * Returns statistics of WAL activity + */ +Datum +pg_stat_get_wal(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_WAL_COLS 9 + TupleDesc tupdesc; + Datum values[PG_STAT_GET_WAL_COLS] = {0}; + bool nulls[PG_STAT_GET_WAL_COLS] = {0}; + char buf[256]; + PgStat_WalStats *wal_stats; + + /* Initialise attributes information in the tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_WAL_COLS); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "wal_records", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "wal_fpi", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "wal_bytes", + NUMERICOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "wal_buffers_full", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "wal_write", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "wal_sync", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "wal_write_time", + FLOAT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "wal_sync_time", + FLOAT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "stats_reset", + TIMESTAMPTZOID, -1, 0); + + BlessTupleDesc(tupdesc); + + /* Get statistics about WAL activity */ + wal_stats = pgstat_fetch_stat_wal(); + + /* Fill values and NULLs */ + values[0] = Int64GetDatum(wal_stats->wal_records); + values[1] = Int64GetDatum(wal_stats->wal_fpi); + + /* Convert to numeric. */ + snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats->wal_bytes); + values[2] = DirectFunctionCall3(numeric_in, + CStringGetDatum(buf), + ObjectIdGetDatum(0), + Int32GetDatum(-1)); + + values[3] = Int64GetDatum(wal_stats->wal_buffers_full); + values[4] = Int64GetDatum(wal_stats->wal_write); + values[5] = Int64GetDatum(wal_stats->wal_sync); + + /* Convert counters from microsec to millisec for display */ + values[6] = Float8GetDatum(((double) wal_stats->wal_write_time) / 1000.0); + values[7] = Float8GetDatum(((double) wal_stats->wal_sync_time) / 1000.0); + + values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Returns statistics of SLRU caches. + */ +Datum +pg_stat_get_slru(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_SLRU_COLS 9 + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + int i; + PgStat_SLRUStats *stats; + + InitMaterializedSRF(fcinfo, 0); + + /* request SLRU stats from the cumulative stats system */ + stats = pgstat_fetch_slru(); + + for (i = 0;; i++) + { + /* for each row */ + Datum values[PG_STAT_GET_SLRU_COLS] = {0}; + bool nulls[PG_STAT_GET_SLRU_COLS] = {0}; + PgStat_SLRUStats stat; + const char *name; + + name = pgstat_get_slru_name(i); + + if (!name) + break; + + stat = stats[i]; + + values[0] = PointerGetDatum(cstring_to_text(name)); + values[1] = Int64GetDatum(stat.blocks_zeroed); + values[2] = Int64GetDatum(stat.blocks_hit); + values[3] = Int64GetDatum(stat.blocks_read); + values[4] = Int64GetDatum(stat.blocks_written); + values[5] = Int64GetDatum(stat.blocks_exists); + values[6] = Int64GetDatum(stat.flush); + values[7] = Int64GetDatum(stat.truncate); + values[8] = TimestampTzGetDatum(stat.stat_reset_timestamp); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + return (Datum) 0; +} + +#define PG_STAT_GET_XACT_RELENTRY_INT64(stat) \ +Datum \ +CppConcat(pg_stat_get_xact_,stat)(PG_FUNCTION_ARGS) \ +{ \ + Oid relid = PG_GETARG_OID(0); \ + int64 result; \ + PgStat_TableStatus *tabentry; \ + \ + if ((tabentry = find_tabstat_entry(relid)) == NULL) \ + result = 0; \ + else \ + result = (int64) (tabentry->counts.stat); \ + \ + PG_RETURN_INT64(result); \ +} + +/* pg_stat_get_xact_numscans */ +PG_STAT_GET_XACT_RELENTRY_INT64(numscans) + +/* pg_stat_get_xact_tuples_returned */ +PG_STAT_GET_XACT_RELENTRY_INT64(tuples_returned) + +/* pg_stat_get_xact_tuples_fetched */ +PG_STAT_GET_XACT_RELENTRY_INT64(tuples_fetched) + +/* pg_stat_get_xact_tuples_hot_updated */ +PG_STAT_GET_XACT_RELENTRY_INT64(tuples_hot_updated) + +/* pg_stat_get_xact_tuples_newpage_updated */ +PG_STAT_GET_XACT_RELENTRY_INT64(tuples_newpage_updated) + +/* pg_stat_get_xact_blocks_fetched */ +PG_STAT_GET_XACT_RELENTRY_INT64(blocks_fetched) + +/* pg_stat_get_xact_blocks_hit */ +PG_STAT_GET_XACT_RELENTRY_INT64(blocks_hit) + +Datum +pg_stat_get_xact_tuples_inserted(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 result; + PgStat_TableStatus *tabentry; + PgStat_TableXactStatus *trans; + + if ((tabentry = find_tabstat_entry(relid)) == NULL) + result = 0; + else + { + result = tabentry->counts.tuples_inserted; + /* live subtransactions' counts aren't in tuples_inserted yet */ + for (trans = tabentry->trans; trans != NULL; trans = trans->upper) + result += trans->tuples_inserted; + } + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_xact_tuples_updated(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 result; + PgStat_TableStatus *tabentry; + PgStat_TableXactStatus *trans; + + if ((tabentry = find_tabstat_entry(relid)) == NULL) + result = 0; + else + { + result = tabentry->counts.tuples_updated; + /* live subtransactions' counts aren't in tuples_updated yet */ + for (trans = tabentry->trans; trans != NULL; trans = trans->upper) + result += trans->tuples_updated; + } + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_xact_tuples_deleted(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 result; + PgStat_TableStatus *tabentry; + PgStat_TableXactStatus *trans; + + if ((tabentry = find_tabstat_entry(relid)) == NULL) + result = 0; + else + { + result = tabentry->counts.tuples_deleted; + /* live subtransactions' counts aren't in tuples_deleted yet */ + for (trans = tabentry->trans; trans != NULL; trans = trans->upper) + result += trans->tuples_deleted; + } + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_xact_function_calls(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + PgStat_FunctionCounts *funcentry; + + if ((funcentry = find_funcstat_entry(funcid)) == NULL) + PG_RETURN_NULL(); + PG_RETURN_INT64(funcentry->numcalls); +} + +#define PG_STAT_GET_XACT_FUNCENTRY_FLOAT8_MS(stat) \ +Datum \ +CppConcat(pg_stat_get_xact_function_,stat)(PG_FUNCTION_ARGS) \ +{ \ + Oid funcid = PG_GETARG_OID(0); \ + PgStat_FunctionCounts *funcentry; \ + \ + if ((funcentry = find_funcstat_entry(funcid)) == NULL) \ + PG_RETURN_NULL(); \ + PG_RETURN_FLOAT8(INSTR_TIME_GET_MILLISEC(funcentry->stat)); \ +} + +/* pg_stat_get_xact_function_total_time */ +PG_STAT_GET_XACT_FUNCENTRY_FLOAT8_MS(total_time) + +/* pg_stat_get_xact_function_self_time */ +PG_STAT_GET_XACT_FUNCENTRY_FLOAT8_MS(self_time) + +/* Get the timestamp of the current statistics snapshot */ +Datum +pg_stat_get_snapshot_timestamp(PG_FUNCTION_ARGS) +{ + bool have_snapshot; + TimestampTz ts; + + ts = pgstat_get_stat_snapshot_timestamp(&have_snapshot); + + if (!have_snapshot) + PG_RETURN_NULL(); + + PG_RETURN_TIMESTAMPTZ(ts); +} + +/* Discard the active statistics snapshot */ +Datum +pg_stat_clear_snapshot(PG_FUNCTION_ARGS) +{ + pgstat_clear_snapshot(); + + PG_RETURN_VOID(); +} + + +/* Force statistics to be reported at the next occasion */ +Datum +pg_stat_force_next_flush(PG_FUNCTION_ARGS) +{ + pgstat_force_next_flush(); + + PG_RETURN_VOID(); +} + + +/* Reset all counters for the current database */ +Datum +pg_stat_reset(PG_FUNCTION_ARGS) +{ + pgstat_reset_counters(); + + PG_RETURN_VOID(); +} + +/* + * Reset some shared cluster-wide counters + * + * When adding a new reset target, ideally the name should match that in + * pgstat_kind_infos, if relevant. + */ +Datum +pg_stat_reset_shared(PG_FUNCTION_ARGS) +{ + char *target = text_to_cstring(PG_GETARG_TEXT_PP(0)); + + if (strcmp(target, "archiver") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_ARCHIVER); + else if (strcmp(target, "bgwriter") == 0) + { + /* + * Historically checkpointer was part of bgwriter, continue to reset + * both for now. + */ + pgstat_reset_of_kind(PGSTAT_KIND_BGWRITER); + pgstat_reset_of_kind(PGSTAT_KIND_CHECKPOINTER); + } + else if (strcmp(target, "io") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_IO); + else if (strcmp(target, "recovery_prefetch") == 0) + XLogPrefetchResetStats(); + else if (strcmp(target, "wal") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_WAL); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized reset target: \"%s\"", target), + errhint("Target must be \"archiver\", \"bgwriter\", \"io\", \"recovery_prefetch\", or \"wal\"."))); + + PG_RETURN_VOID(); +} + +/* + * Reset a statistics for a single object, which may be of current + * database or shared across all databases in the cluster. + */ +Datum +pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS) +{ + Oid taboid = PG_GETARG_OID(0); + Oid dboid = (IsSharedRelation(taboid) ? InvalidOid : MyDatabaseId); + + pgstat_reset(PGSTAT_KIND_RELATION, dboid, taboid); + + PG_RETURN_VOID(); +} + +Datum +pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS) +{ + Oid funcoid = PG_GETARG_OID(0); + + pgstat_reset(PGSTAT_KIND_FUNCTION, MyDatabaseId, funcoid); + + PG_RETURN_VOID(); +} + +/* Reset SLRU counters (a specific one or all of them). */ +Datum +pg_stat_reset_slru(PG_FUNCTION_ARGS) +{ + char *target = NULL; + + if (PG_ARGISNULL(0)) + pgstat_reset_of_kind(PGSTAT_KIND_SLRU); + else + { + target = text_to_cstring(PG_GETARG_TEXT_PP(0)); + pgstat_reset_slru(target); + } + + PG_RETURN_VOID(); +} + +/* Reset replication slots stats (a specific one or all of them). */ +Datum +pg_stat_reset_replication_slot(PG_FUNCTION_ARGS) +{ + char *target = NULL; + + if (PG_ARGISNULL(0)) + pgstat_reset_of_kind(PGSTAT_KIND_REPLSLOT); + else + { + target = text_to_cstring(PG_GETARG_TEXT_PP(0)); + pgstat_reset_replslot(target); + } + + PG_RETURN_VOID(); +} + +/* Reset subscription stats (a specific one or all of them) */ +Datum +pg_stat_reset_subscription_stats(PG_FUNCTION_ARGS) +{ + Oid subid; + + if (PG_ARGISNULL(0)) + { + /* Clear all subscription stats */ + pgstat_reset_of_kind(PGSTAT_KIND_SUBSCRIPTION); + } + else + { + subid = PG_GETARG_OID(0); + + if (!OidIsValid(subid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid subscription OID %u", subid))); + pgstat_reset(PGSTAT_KIND_SUBSCRIPTION, InvalidOid, subid); + } + + PG_RETURN_VOID(); +} + +Datum +pg_stat_get_archiver(PG_FUNCTION_ARGS) +{ + TupleDesc tupdesc; + Datum values[7] = {0}; + bool nulls[7] = {0}; + PgStat_ArchiverStats *archiver_stats; + + /* Initialise attributes information in the tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(7); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "archived_count", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "last_archived_wal", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "last_archived_time", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "failed_count", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "last_failed_wal", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "last_failed_time", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset", + TIMESTAMPTZOID, -1, 0); + + BlessTupleDesc(tupdesc); + + /* Get statistics about the archiver process */ + archiver_stats = pgstat_fetch_stat_archiver(); + + /* Fill values and NULLs */ + values[0] = Int64GetDatum(archiver_stats->archived_count); + if (*(archiver_stats->last_archived_wal) == '\0') + nulls[1] = true; + else + values[1] = CStringGetTextDatum(archiver_stats->last_archived_wal); + + if (archiver_stats->last_archived_timestamp == 0) + nulls[2] = true; + else + values[2] = TimestampTzGetDatum(archiver_stats->last_archived_timestamp); + + values[3] = Int64GetDatum(archiver_stats->failed_count); + if (*(archiver_stats->last_failed_wal) == '\0') + nulls[4] = true; + else + values[4] = CStringGetTextDatum(archiver_stats->last_failed_wal); + + if (archiver_stats->last_failed_timestamp == 0) + nulls[5] = true; + else + values[5] = TimestampTzGetDatum(archiver_stats->last_failed_timestamp); + + if (archiver_stats->stat_reset_timestamp == 0) + nulls[6] = true; + else + values[6] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp); + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Get the statistics for the replication slot. If the slot statistics is not + * available, return all-zeroes stats. + */ +Datum +pg_stat_get_replication_slot(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_REPLICATION_SLOT_COLS 10 + text *slotname_text = PG_GETARG_TEXT_P(0); + NameData slotname; + TupleDesc tupdesc; + Datum values[PG_STAT_GET_REPLICATION_SLOT_COLS] = {0}; + bool nulls[PG_STAT_GET_REPLICATION_SLOT_COLS] = {0}; + PgStat_StatReplSlotEntry *slotent; + PgStat_StatReplSlotEntry allzero; + + /* Initialise attributes information in the tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_REPLICATION_SLOT_COLS); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "slot_name", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "spill_txns", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "spill_count", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "spill_bytes", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "stream_txns", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "stream_count", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stream_bytes", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "total_txns", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "total_bytes", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "stats_reset", + TIMESTAMPTZOID, -1, 0); + BlessTupleDesc(tupdesc); + + namestrcpy(&slotname, text_to_cstring(slotname_text)); + slotent = pgstat_fetch_replslot(slotname); + if (!slotent) + { + /* + * If the slot is not found, initialise its stats. This is possible if + * the create slot message is lost. + */ + memset(&allzero, 0, sizeof(PgStat_StatReplSlotEntry)); + slotent = &allzero; + } + + values[0] = CStringGetTextDatum(NameStr(slotname)); + values[1] = Int64GetDatum(slotent->spill_txns); + values[2] = Int64GetDatum(slotent->spill_count); + values[3] = Int64GetDatum(slotent->spill_bytes); + values[4] = Int64GetDatum(slotent->stream_txns); + values[5] = Int64GetDatum(slotent->stream_count); + values[6] = Int64GetDatum(slotent->stream_bytes); + values[7] = Int64GetDatum(slotent->total_txns); + values[8] = Int64GetDatum(slotent->total_bytes); + + if (slotent->stat_reset_timestamp == 0) + nulls[9] = true; + else + values[9] = TimestampTzGetDatum(slotent->stat_reset_timestamp); + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Get the subscription statistics for the given subscription. If the + * subscription statistics is not available, return all-zeros stats. + */ +Datum +pg_stat_get_subscription_stats(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_SUBSCRIPTION_STATS_COLS 4 + Oid subid = PG_GETARG_OID(0); + TupleDesc tupdesc; + Datum values[PG_STAT_GET_SUBSCRIPTION_STATS_COLS] = {0}; + bool nulls[PG_STAT_GET_SUBSCRIPTION_STATS_COLS] = {0}; + PgStat_StatSubEntry *subentry; + PgStat_StatSubEntry allzero; + + /* Get subscription stats */ + subentry = pgstat_fetch_stat_subscription(subid); + + /* Initialise attributes information in the tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_SUBSCRIPTION_STATS_COLS); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "subid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "apply_error_count", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "sync_error_count", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "stats_reset", + TIMESTAMPTZOID, -1, 0); + BlessTupleDesc(tupdesc); + + if (!subentry) + { + /* If the subscription is not found, initialise its stats */ + memset(&allzero, 0, sizeof(PgStat_StatSubEntry)); + subentry = &allzero; + } + + /* subid */ + values[0] = ObjectIdGetDatum(subid); + + /* apply_error_count */ + values[1] = Int64GetDatum(subentry->apply_error_count); + + /* sync_error_count */ + values[2] = Int64GetDatum(subentry->sync_error_count); + + /* stats_reset */ + if (subentry->stat_reset_timestamp == 0) + nulls[3] = true; + else + values[3] = TimestampTzGetDatum(subentry->stat_reset_timestamp); + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Checks for presence of stats for object with provided kind, database oid, + * object oid. + * + * This is useful for tests, but not really anything else. Therefore not + * documented. + */ +Datum +pg_stat_have_stats(PG_FUNCTION_ARGS) +{ + char *stats_type = text_to_cstring(PG_GETARG_TEXT_P(0)); + Oid dboid = PG_GETARG_OID(1); + Oid objoid = PG_GETARG_OID(2); + PgStat_Kind kind = pgstat_get_kind_from_str(stats_type); + + PG_RETURN_BOOL(pgstat_have_entry(kind, dboid, objoid)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pseudotypes.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pseudotypes.c new file mode 100644 index 00000000000..3ba8cb192ca --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pseudotypes.c @@ -0,0 +1,380 @@ +/*------------------------------------------------------------------------- + * + * pseudotypes.c + * Functions for the system pseudo-types. + * + * A pseudo-type isn't really a type and never has any operations, but + * we do need to supply input and output functions to satisfy the links + * in the pseudo-type's entry in pg_type. In most cases the functions + * just throw an error if invoked. (XXX the error messages here cover + * the most common case, but might be confusing in some contexts. Can + * we do better?) + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/pseudotypes.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "libpq/pqformat.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/rangetypes.h" +#include "utils/multirangetypes.h" + + +/* + * These macros generate input and output functions for a pseudo-type that + * will reject all input and output attempts. (But for some types, only + * the input function need be dummy.) + */ +#define PSEUDOTYPE_DUMMY_INPUT_FUNC(typname) \ +Datum \ +typname##_in(PG_FUNCTION_ARGS) \ +{ \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("cannot accept a value of type %s", #typname))); \ +\ + PG_RETURN_VOID(); /* keep compiler quiet */ \ +} \ +\ +extern int no_such_variable + +#define PSEUDOTYPE_DUMMY_IO_FUNCS(typname) \ +PSEUDOTYPE_DUMMY_INPUT_FUNC(typname); \ +\ +Datum \ +typname##_out(PG_FUNCTION_ARGS) \ +{ \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("cannot display a value of type %s", #typname))); \ +\ + PG_RETURN_VOID(); /* keep compiler quiet */ \ +} \ +\ +extern int no_such_variable + +/* + * Likewise for binary send/receive functions. We don't bother with these + * at all for many pseudotypes, but some have them. (By convention, if + * a type has a send function it should have a receive function, even if + * that's only dummy.) + */ +#define PSEUDOTYPE_DUMMY_RECEIVE_FUNC(typname) \ +Datum \ +typname##_recv(PG_FUNCTION_ARGS) \ +{ \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("cannot accept a value of type %s", #typname))); \ +\ + PG_RETURN_VOID(); /* keep compiler quiet */ \ +} \ +\ +extern int no_such_variable + +#define PSEUDOTYPE_DUMMY_BINARY_IO_FUNCS(typname) \ +PSEUDOTYPE_DUMMY_RECEIVE_FUNC(typname); \ +\ +Datum \ +typname##_send(PG_FUNCTION_ARGS) \ +{ \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("cannot display a value of type %s", #typname))); \ +\ + PG_RETURN_VOID(); /* keep compiler quiet */ \ +} \ +\ +extern int no_such_variable + + +/* + * cstring + * + * cstring is marked as a pseudo-type because we don't want people using it + * in tables. But it's really a perfectly functional type, so provide + * a full set of working I/O functions for it. Among other things, this + * allows manual invocation of datatype I/O functions, along the lines of + * "SELECT foo_in('blah')" or "SELECT foo_out(some-foo-value)". + */ +Datum +cstring_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + + PG_RETURN_CSTRING(pstrdup(str)); +} + +Datum +cstring_out(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + + PG_RETURN_CSTRING(pstrdup(str)); +} + +Datum +cstring_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + PG_RETURN_CSTRING(str); +} + +Datum +cstring_send(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendtext(&buf, str, strlen(str)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * anyarray + * + * We need to allow output of anyarray so that, e.g., pg_statistic columns + * can be printed. Input has to be disallowed, however. + * + * XXX anyarray_recv could actually be made to work, since the incoming + * array data would contain the element type OID. It seems unlikely that + * it'd be sufficiently type-safe, though. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(anyarray); +PSEUDOTYPE_DUMMY_RECEIVE_FUNC(anyarray); + +Datum +anyarray_out(PG_FUNCTION_ARGS) +{ + return array_out(fcinfo); +} + +Datum +anyarray_send(PG_FUNCTION_ARGS) +{ + return array_send(fcinfo); +} + +/* + * anycompatiblearray + * + * We may as well allow output, since we do for anyarray. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(anycompatiblearray); +PSEUDOTYPE_DUMMY_RECEIVE_FUNC(anycompatiblearray); + +Datum +anycompatiblearray_out(PG_FUNCTION_ARGS) +{ + return array_out(fcinfo); +} + +Datum +anycompatiblearray_send(PG_FUNCTION_ARGS) +{ + return array_send(fcinfo); +} + +/* + * anyenum + * + * We may as well allow output, since enum_out will in fact work. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(anyenum); + +Datum +anyenum_out(PG_FUNCTION_ARGS) +{ + return enum_out(fcinfo); +} + +/* + * anyrange + * + * We may as well allow output, since range_out will in fact work. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(anyrange); + +Datum +anyrange_out(PG_FUNCTION_ARGS) +{ + return range_out(fcinfo); +} + +/* + * anycompatiblerange + * + * We may as well allow output, since range_out will in fact work. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(anycompatiblerange); + +Datum +anycompatiblerange_out(PG_FUNCTION_ARGS) +{ + return range_out(fcinfo); +} + +/* + * anymultirange + * + * We may as well allow output, since multirange_out will in fact work. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(anymultirange); + +Datum +anymultirange_out(PG_FUNCTION_ARGS) +{ + return multirange_out(fcinfo); +} + +/* + * anycompatiblemultirange + * + * We may as well allow output, since multirange_out will in fact work. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(anycompatiblemultirange); + +Datum +anycompatiblemultirange_out(PG_FUNCTION_ARGS) +{ + return multirange_out(fcinfo); +} + +/* + * void + * + * We support void_in so that PL functions can return VOID without any + * special hack in the PL handler. Whatever value the PL thinks it's + * returning will just be ignored. Conversely, void_out and void_send + * are needed so that "SELECT function_returning_void(...)" works. + */ +Datum +void_in(PG_FUNCTION_ARGS) +{ + PG_RETURN_VOID(); /* you were expecting something different? */ +} + +Datum +void_out(PG_FUNCTION_ARGS) +{ + PG_RETURN_CSTRING(pstrdup("")); +} + +Datum +void_recv(PG_FUNCTION_ARGS) +{ + /* + * Note that since we consume no bytes, an attempt to send anything but an + * empty string will result in an "invalid message format" error. + */ + PG_RETURN_VOID(); +} + +Datum +void_send(PG_FUNCTION_ARGS) +{ + StringInfoData buf; + + /* send an empty string */ + pq_begintypsend(&buf); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * shell + * + * shell_in and shell_out are entered in pg_type for "shell" types + * (those not yet filled in). They should be unreachable, but we + * set them up just in case some code path tries to do I/O without + * having checked pg_type.typisdefined anywhere along the way. + */ +Datum +shell_in(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of a shell type"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +Datum +shell_out(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot display a value of a shell type"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + + +/* + * pg_node_tree + * + * pg_node_tree isn't really a pseudotype --- it's real enough to be a table + * column --- but it presently has no operations of its own, and disallows + * input too, so its I/O functions seem to fit here as much as anywhere. + * + * We must disallow input of pg_node_tree values because the SQL functions + * that operate on the type are not secure against malformed input. + * We do want to allow output, though. + */ +PSEUDOTYPE_DUMMY_INPUT_FUNC(pg_node_tree); +PSEUDOTYPE_DUMMY_RECEIVE_FUNC(pg_node_tree); + +Datum +pg_node_tree_out(PG_FUNCTION_ARGS) +{ + return textout(fcinfo); +} + +Datum +pg_node_tree_send(PG_FUNCTION_ARGS) +{ + return textsend(fcinfo); +} + +/* + * pg_ddl_command + * + * Like pg_node_tree, pg_ddl_command isn't really a pseudotype; it's here + * for the same reasons as that one. + * + * We don't have any good way to output this type directly, so punt + * for output as well as input. + */ +PSEUDOTYPE_DUMMY_IO_FUNCS(pg_ddl_command); +PSEUDOTYPE_DUMMY_BINARY_IO_FUNCS(pg_ddl_command); + + +/* + * Dummy I/O functions for various other pseudotypes. + */ +PSEUDOTYPE_DUMMY_IO_FUNCS(any); +PSEUDOTYPE_DUMMY_IO_FUNCS(trigger); +PSEUDOTYPE_DUMMY_IO_FUNCS(event_trigger); +PSEUDOTYPE_DUMMY_IO_FUNCS(language_handler); +PSEUDOTYPE_DUMMY_IO_FUNCS(fdw_handler); +PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler); +PSEUDOTYPE_DUMMY_IO_FUNCS(index_am_handler); +PSEUDOTYPE_DUMMY_IO_FUNCS(tsm_handler); +PSEUDOTYPE_DUMMY_IO_FUNCS(internal); +PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement); +PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray); +PSEUDOTYPE_DUMMY_IO_FUNCS(anycompatible); +PSEUDOTYPE_DUMMY_IO_FUNCS(anycompatiblenonarray); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/quote.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/quote.c new file mode 100644 index 00000000000..f2f633befac --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/quote.c @@ -0,0 +1,132 @@ +/*------------------------------------------------------------------------- + * + * quote.c + * Functions for quoting identifiers and literals + * + * Portions Copyright (c) 2000-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/quote.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/builtins.h" +#include "varatt.h" + + +/* + * quote_ident - + * returns a properly quoted identifier + */ +Datum +quote_ident(PG_FUNCTION_ARGS) +{ + text *t = PG_GETARG_TEXT_PP(0); + const char *qstr; + char *str; + + str = text_to_cstring(t); + qstr = quote_identifier(str); + PG_RETURN_TEXT_P(cstring_to_text(qstr)); +} + +/* + * quote_literal_internal - + * helper function for quote_literal and quote_literal_cstr + * + * NOTE: think not to make this function's behavior change with + * standard_conforming_strings. We don't know where the result + * literal will be used, and so we must generate a result that + * will work with either setting. Take a look at what dblink + * uses this for before thinking you know better. + */ +static size_t +quote_literal_internal(char *dst, const char *src, size_t len) +{ + const char *s; + char *savedst = dst; + + for (s = src; s < src + len; s++) + { + if (*s == '\\') + { + *dst++ = ESCAPE_STRING_SYNTAX; + break; + } + } + + *dst++ = '\''; + while (len-- > 0) + { + if (SQL_STR_DOUBLE(*src, true)) + *dst++ = *src; + *dst++ = *src++; + } + *dst++ = '\''; + + return dst - savedst; +} + +/* + * quote_literal - + * returns a properly quoted literal + */ +Datum +quote_literal(PG_FUNCTION_ARGS) +{ + text *t = PG_GETARG_TEXT_PP(0); + text *result; + char *cp1; + char *cp2; + int len; + + len = VARSIZE_ANY_EXHDR(t); + /* We make a worst-case result area; wasting a little space is OK */ + result = (text *) palloc(len * 2 + 3 + VARHDRSZ); + + cp1 = VARDATA_ANY(t); + cp2 = VARDATA(result); + + SET_VARSIZE(result, VARHDRSZ + quote_literal_internal(cp2, cp1, len)); + + PG_RETURN_TEXT_P(result); +} + +/* + * quote_literal_cstr - + * returns a properly quoted literal + */ +char * +quote_literal_cstr(const char *rawstr) +{ + char *result; + int len; + int newlen; + + len = strlen(rawstr); + /* We make a worst-case result area; wasting a little space is OK */ + result = palloc(len * 2 + 3 + 1); + + newlen = quote_literal_internal(result, rawstr, len); + result[newlen] = '\0'; + + return result; +} + +/* + * quote_nullable - + * Returns a properly quoted literal, with null values returned + * as the text string 'NULL'. + */ +Datum +quote_nullable(PG_FUNCTION_ARGS) +{ + if (PG_ARGISNULL(0)) + PG_RETURN_TEXT_P(cstring_to_text("NULL")); + else + PG_RETURN_DATUM(DirectFunctionCall1(quote_literal, + PG_GETARG_DATUM(0))); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes.c new file mode 100644 index 00000000000..24bad529239 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes.c @@ -0,0 +1,2717 @@ +/*------------------------------------------------------------------------- + * + * rangetypes.c + * I/O functions, operators, and support functions for range types. + * + * The stored (serialized) format of a range value is: + * + * 4 bytes: varlena header + * 4 bytes: range type's OID + * Lower boundary value, if any, aligned according to subtype's typalign + * Upper boundary value, if any, aligned according to subtype's typalign + * 1 byte for flags + * + * This representation is chosen to avoid needing any padding before the + * lower boundary value, even when it requires double alignment. We can + * expect that the varlena header is presented to us on a suitably aligned + * boundary (possibly after detoasting), and then the lower boundary is too. + * Note that this means we can't work with a packed (short varlena header) + * value; we must detoast it first. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/rangetypes.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tupmacs.h" +#include "common/hashfn.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/miscnodes.h" +#include "port/pg_bitutils.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/lsyscache.h" +#include "utils/rangetypes.h" +#include "utils/timestamp.h" +#include "varatt.h" + + +/* fn_extra cache entry for one of the range I/O functions */ +typedef struct RangeIOData +{ + TypeCacheEntry *typcache; /* range type's typcache entry */ + FmgrInfo typioproc; /* element type's I/O function */ + Oid typioparam; /* element type's I/O parameter */ +} RangeIOData; + + +static RangeIOData *get_range_io_data(FunctionCallInfo fcinfo, Oid rngtypid, + IOFuncSelector func); +static char range_parse_flags(const char *flags_str); +static bool range_parse(const char *string, char *flags, char **lbound_str, + char **ubound_str, Node *escontext); +static const char *range_parse_bound(const char *string, const char *ptr, + char **bound_str, bool *infinite, + Node *escontext); +static char *range_deparse(char flags, const char *lbound_str, + const char *ubound_str); +static char *range_bound_escape(const char *value); +static Size datum_compute_size(Size data_length, Datum val, bool typbyval, + char typalign, int16 typlen, char typstorage); +static Pointer datum_write(Pointer ptr, Datum datum, bool typbyval, + char typalign, int16 typlen, char typstorage); + + +/* + *---------------------------------------------------------- + * I/O FUNCTIONS + *---------------------------------------------------------- + */ + +Datum +range_in(PG_FUNCTION_ARGS) +{ + char *input_str = PG_GETARG_CSTRING(0); + Oid rngtypoid = PG_GETARG_OID(1); + Oid typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + RangeType *range; + RangeIOData *cache; + char flags; + char *lbound_str; + char *ubound_str; + RangeBound lower; + RangeBound upper; + + check_stack_depth(); /* recurses when subtype is a range type */ + + cache = get_range_io_data(fcinfo, rngtypoid, IOFunc_input); + + /* parse */ + if (!range_parse(input_str, &flags, &lbound_str, &ubound_str, escontext)) + PG_RETURN_NULL(); + + /* call element type's input function */ + if (RANGE_HAS_LBOUND(flags)) + if (!InputFunctionCallSafe(&cache->typioproc, lbound_str, + cache->typioparam, typmod, + escontext, &lower.val)) + PG_RETURN_NULL(); + if (RANGE_HAS_UBOUND(flags)) + if (!InputFunctionCallSafe(&cache->typioproc, ubound_str, + cache->typioparam, typmod, + escontext, &upper.val)) + PG_RETURN_NULL(); + + lower.infinite = (flags & RANGE_LB_INF) != 0; + lower.inclusive = (flags & RANGE_LB_INC) != 0; + lower.lower = true; + upper.infinite = (flags & RANGE_UB_INF) != 0; + upper.inclusive = (flags & RANGE_UB_INC) != 0; + upper.lower = false; + + /* serialize and canonicalize */ + range = make_range(cache->typcache, &lower, &upper, + flags & RANGE_EMPTY, escontext); + + PG_RETURN_RANGE_P(range); +} + +Datum +range_out(PG_FUNCTION_ARGS) +{ + RangeType *range = PG_GETARG_RANGE_P(0); + char *output_str; + RangeIOData *cache; + char flags; + char *lbound_str = NULL; + char *ubound_str = NULL; + RangeBound lower; + RangeBound upper; + bool empty; + + check_stack_depth(); /* recurses when subtype is a range type */ + + cache = get_range_io_data(fcinfo, RangeTypeGetOid(range), IOFunc_output); + + /* deserialize */ + range_deserialize(cache->typcache, range, &lower, &upper, &empty); + flags = range_get_flags(range); + + /* call element type's output function */ + if (RANGE_HAS_LBOUND(flags)) + lbound_str = OutputFunctionCall(&cache->typioproc, lower.val); + if (RANGE_HAS_UBOUND(flags)) + ubound_str = OutputFunctionCall(&cache->typioproc, upper.val); + + /* construct result string */ + output_str = range_deparse(flags, lbound_str, ubound_str); + + PG_RETURN_CSTRING(output_str); +} + +/* + * Binary representation: The first byte is the flags, then the lower bound + * (if present), then the upper bound (if present). Each bound is represented + * by a 4-byte length header and the binary representation of that bound (as + * returned by a call to the send function for the subtype). + */ + +Datum +range_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Oid rngtypoid = PG_GETARG_OID(1); + int32 typmod = PG_GETARG_INT32(2); + RangeType *range; + RangeIOData *cache; + char flags; + RangeBound lower; + RangeBound upper; + + check_stack_depth(); /* recurses when subtype is a range type */ + + cache = get_range_io_data(fcinfo, rngtypoid, IOFunc_receive); + + /* receive the flags... */ + flags = (unsigned char) pq_getmsgbyte(buf); + + /* + * Mask out any unsupported flags, particularly RANGE_xB_NULL which would + * confuse following tests. Note that range_serialize will take care of + * cleaning up any inconsistencies in the remaining flags. + */ + flags &= (RANGE_EMPTY | + RANGE_LB_INC | + RANGE_LB_INF | + RANGE_UB_INC | + RANGE_UB_INF); + + /* receive the bounds ... */ + if (RANGE_HAS_LBOUND(flags)) + { + uint32 bound_len = pq_getmsgint(buf, 4); + const char *bound_data = pq_getmsgbytes(buf, bound_len); + StringInfoData bound_buf; + + initStringInfo(&bound_buf); + appendBinaryStringInfo(&bound_buf, bound_data, bound_len); + + lower.val = ReceiveFunctionCall(&cache->typioproc, + &bound_buf, + cache->typioparam, + typmod); + pfree(bound_buf.data); + } + else + lower.val = (Datum) 0; + + if (RANGE_HAS_UBOUND(flags)) + { + uint32 bound_len = pq_getmsgint(buf, 4); + const char *bound_data = pq_getmsgbytes(buf, bound_len); + StringInfoData bound_buf; + + initStringInfo(&bound_buf); + appendBinaryStringInfo(&bound_buf, bound_data, bound_len); + + upper.val = ReceiveFunctionCall(&cache->typioproc, + &bound_buf, + cache->typioparam, + typmod); + pfree(bound_buf.data); + } + else + upper.val = (Datum) 0; + + pq_getmsgend(buf); + + /* finish constructing RangeBound representation */ + lower.infinite = (flags & RANGE_LB_INF) != 0; + lower.inclusive = (flags & RANGE_LB_INC) != 0; + lower.lower = true; + upper.infinite = (flags & RANGE_UB_INF) != 0; + upper.inclusive = (flags & RANGE_UB_INC) != 0; + upper.lower = false; + + /* serialize and canonicalize */ + range = make_range(cache->typcache, &lower, &upper, + flags & RANGE_EMPTY, NULL); + + PG_RETURN_RANGE_P(range); +} + +Datum +range_send(PG_FUNCTION_ARGS) +{ + RangeType *range = PG_GETARG_RANGE_P(0); + StringInfo buf = makeStringInfo(); + RangeIOData *cache; + char flags; + RangeBound lower; + RangeBound upper; + bool empty; + + check_stack_depth(); /* recurses when subtype is a range type */ + + cache = get_range_io_data(fcinfo, RangeTypeGetOid(range), IOFunc_send); + + /* deserialize */ + range_deserialize(cache->typcache, range, &lower, &upper, &empty); + flags = range_get_flags(range); + + /* construct output */ + pq_begintypsend(buf); + + pq_sendbyte(buf, flags); + + if (RANGE_HAS_LBOUND(flags)) + { + Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc, + lower.val)); + uint32 bound_len = VARSIZE(bound) - VARHDRSZ; + char *bound_data = VARDATA(bound); + + pq_sendint32(buf, bound_len); + pq_sendbytes(buf, bound_data, bound_len); + } + + if (RANGE_HAS_UBOUND(flags)) + { + Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc, + upper.val)); + uint32 bound_len = VARSIZE(bound) - VARHDRSZ; + char *bound_data = VARDATA(bound); + + pq_sendint32(buf, bound_len); + pq_sendbytes(buf, bound_data, bound_len); + } + + PG_RETURN_BYTEA_P(pq_endtypsend(buf)); +} + +/* + * get_range_io_data: get cached information needed for range type I/O + * + * The range I/O functions need a bit more cached info than other range + * functions, so they store a RangeIOData struct in fn_extra, not just a + * pointer to a type cache entry. + */ +static RangeIOData * +get_range_io_data(FunctionCallInfo fcinfo, Oid rngtypid, IOFuncSelector func) +{ + RangeIOData *cache = (RangeIOData *) fcinfo->flinfo->fn_extra; + + if (cache == NULL || cache->typcache->type_id != rngtypid) + { + int16 typlen; + bool typbyval; + char typalign; + char typdelim; + Oid typiofunc; + + cache = (RangeIOData *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(RangeIOData)); + cache->typcache = lookup_type_cache(rngtypid, TYPECACHE_RANGE_INFO); + if (cache->typcache->rngelemtype == NULL) + elog(ERROR, "type %u is not a range type", rngtypid); + + /* get_type_io_data does more than we need, but is convenient */ + get_type_io_data(cache->typcache->rngelemtype->type_id, + func, + &typlen, + &typbyval, + &typalign, + &typdelim, + &cache->typioparam, + &typiofunc); + + if (!OidIsValid(typiofunc)) + { + /* this could only happen for receive or send */ + if (func == IOFunc_receive) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary input function available for type %s", + format_type_be(cache->typcache->rngelemtype->type_id)))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary output function available for type %s", + format_type_be(cache->typcache->rngelemtype->type_id)))); + } + fmgr_info_cxt(typiofunc, &cache->typioproc, + fcinfo->flinfo->fn_mcxt); + + fcinfo->flinfo->fn_extra = (void *) cache; + } + + return cache; +} + + +/* + *---------------------------------------------------------- + * GENERIC FUNCTIONS + *---------------------------------------------------------- + */ + +/* Construct standard-form range value from two arguments */ +Datum +range_constructor2(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Oid rngtypid = get_fn_expr_rettype(fcinfo->flinfo); + RangeType *range; + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + + typcache = range_get_typcache(fcinfo, rngtypid); + + lower.val = PG_ARGISNULL(0) ? (Datum) 0 : arg1; + lower.infinite = PG_ARGISNULL(0); + lower.inclusive = true; + lower.lower = true; + + upper.val = PG_ARGISNULL(1) ? (Datum) 0 : arg2; + upper.infinite = PG_ARGISNULL(1); + upper.inclusive = false; + upper.lower = false; + + range = make_range(typcache, &lower, &upper, false, NULL); + + PG_RETURN_RANGE_P(range); +} + +/* Construct general range value from three arguments */ +Datum +range_constructor3(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Oid rngtypid = get_fn_expr_rettype(fcinfo->flinfo); + RangeType *range; + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + char flags; + + typcache = range_get_typcache(fcinfo, rngtypid); + + if (PG_ARGISNULL(2)) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("range constructor flags argument must not be null"))); + + flags = range_parse_flags(text_to_cstring(PG_GETARG_TEXT_PP(2))); + + lower.val = PG_ARGISNULL(0) ? (Datum) 0 : arg1; + lower.infinite = PG_ARGISNULL(0); + lower.inclusive = (flags & RANGE_LB_INC) != 0; + lower.lower = true; + + upper.val = PG_ARGISNULL(1) ? (Datum) 0 : arg2; + upper.infinite = PG_ARGISNULL(1); + upper.inclusive = (flags & RANGE_UB_INC) != 0; + upper.lower = false; + + range = make_range(typcache, &lower, &upper, false, NULL); + + PG_RETURN_RANGE_P(range); +} + + +/* range -> subtype functions */ + +/* extract lower bound value */ +Datum +range_lower(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + bool empty; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + range_deserialize(typcache, r1, &lower, &upper, &empty); + + /* Return NULL if there's no finite lower bound */ + if (empty || lower.infinite) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(lower.val); +} + +/* extract upper bound value */ +Datum +range_upper(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + bool empty; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + range_deserialize(typcache, r1, &lower, &upper, &empty); + + /* Return NULL if there's no finite upper bound */ + if (empty || upper.infinite) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(upper.val); +} + + +/* range -> bool functions */ + +/* is range empty? */ +Datum +range_empty(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + char flags = range_get_flags(r1); + + PG_RETURN_BOOL(flags & RANGE_EMPTY); +} + +/* is lower bound inclusive? */ +Datum +range_lower_inc(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + char flags = range_get_flags(r1); + + PG_RETURN_BOOL(flags & RANGE_LB_INC); +} + +/* is upper bound inclusive? */ +Datum +range_upper_inc(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + char flags = range_get_flags(r1); + + PG_RETURN_BOOL(flags & RANGE_UB_INC); +} + +/* is lower bound infinite? */ +Datum +range_lower_inf(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + char flags = range_get_flags(r1); + + PG_RETURN_BOOL(flags & RANGE_LB_INF); +} + +/* is upper bound infinite? */ +Datum +range_upper_inf(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + char flags = range_get_flags(r1); + + PG_RETURN_BOOL(flags & RANGE_UB_INF); +} + + +/* range, element -> bool functions */ + +/* contains? */ +Datum +range_contains_elem(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + Datum val = PG_GETARG_DATUM(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r)); + + PG_RETURN_BOOL(range_contains_elem_internal(typcache, r, val)); +} + +/* contained by? */ +Datum +elem_contained_by_range(PG_FUNCTION_ARGS) +{ + Datum val = PG_GETARG_DATUM(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r)); + + PG_RETURN_BOOL(range_contains_elem_internal(typcache, r, val)); +} + + +/* range, range -> bool functions */ + +/* equality (internal version) */ +bool +range_eq_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + if (empty1 && empty2) + return true; + if (empty1 != empty2) + return false; + + if (range_cmp_bounds(typcache, &lower1, &lower2) != 0) + return false; + + if (range_cmp_bounds(typcache, &upper1, &upper2) != 0) + return false; + + return true; +} + +/* equality */ +Datum +range_eq(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_eq_internal(typcache, r1, r2)); +} + +/* inequality (internal version) */ +bool +range_ne_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + return (!range_eq_internal(typcache, r1, r2)); +} + +/* inequality */ +Datum +range_ne(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_ne_internal(typcache, r1, r2)); +} + +/* contains? */ +Datum +range_contains(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_contains_internal(typcache, r1, r2)); +} + +/* contained by? */ +Datum +range_contained_by(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_contained_by_internal(typcache, r1, r2)); +} + +/* strictly left of? (internal version) */ +bool +range_before_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* An empty range is neither before nor after any other range */ + if (empty1 || empty2) + return false; + + return (range_cmp_bounds(typcache, &upper1, &lower2) < 0); +} + +/* strictly left of? */ +Datum +range_before(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_before_internal(typcache, r1, r2)); +} + +/* strictly right of? (internal version) */ +bool +range_after_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* An empty range is neither before nor after any other range */ + if (empty1 || empty2) + return false; + + return (range_cmp_bounds(typcache, &lower1, &upper2) > 0); +} + +/* strictly right of? */ +Datum +range_after(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_after_internal(typcache, r1, r2)); +} + +/* + * Check if two bounds A and B are "adjacent", where A is an upper bound and B + * is a lower bound. For the bounds to be adjacent, each subtype value must + * satisfy strictly one of the bounds: there are no values which satisfy both + * bounds (i.e. less than A and greater than B); and there are no values which + * satisfy neither bound (i.e. greater than A and less than B). + * + * For discrete ranges, we rely on the canonicalization function to see if A..B + * normalizes to empty. (If there is no canonicalization function, it's + * impossible for such a range to normalize to empty, so we needn't bother to + * try.) + * + * If A == B, the ranges are adjacent only if the bounds have different + * inclusive flags (i.e., exactly one of the ranges includes the common + * boundary point). + * + * And if A > B then the ranges are not adjacent in this order. + */ +bool +bounds_adjacent(TypeCacheEntry *typcache, RangeBound boundA, RangeBound boundB) +{ + int cmp; + + Assert(!boundA.lower && boundB.lower); + + cmp = range_cmp_bound_values(typcache, &boundA, &boundB); + if (cmp < 0) + { + RangeType *r; + + /* + * Bounds do not overlap; see if there are points in between. + */ + + /* in a continuous subtype, there are assumed to be points between */ + if (!OidIsValid(typcache->rng_canonical_finfo.fn_oid)) + return false; + + /* + * The bounds are of a discrete range type; so make a range A..B and + * see if it's empty. + */ + + /* flip the inclusion flags */ + boundA.inclusive = !boundA.inclusive; + boundB.inclusive = !boundB.inclusive; + /* change upper/lower labels to avoid Assert failures */ + boundA.lower = true; + boundB.lower = false; + r = make_range(typcache, &boundA, &boundB, false, NULL); + return RangeIsEmpty(r); + } + else if (cmp == 0) + return boundA.inclusive != boundB.inclusive; + else + return false; /* bounds overlap */ +} + +/* adjacent to (but not overlapping)? (internal version) */ +bool +range_adjacent_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* An empty range is not adjacent to any other range */ + if (empty1 || empty2) + return false; + + /* + * Given two ranges A..B and C..D, the ranges are adjacent if and only if + * B is adjacent to C, or D is adjacent to A. + */ + return (bounds_adjacent(typcache, upper1, lower2) || + bounds_adjacent(typcache, upper2, lower1)); +} + +/* adjacent to (but not overlapping)? */ +Datum +range_adjacent(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_adjacent_internal(typcache, r1, r2)); +} + +/* overlaps? (internal version) */ +bool +range_overlaps_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* An empty range does not overlap any other range */ + if (empty1 || empty2) + return false; + + if (range_cmp_bounds(typcache, &lower1, &lower2) >= 0 && + range_cmp_bounds(typcache, &lower1, &upper2) <= 0) + return true; + + if (range_cmp_bounds(typcache, &lower2, &lower1) >= 0 && + range_cmp_bounds(typcache, &lower2, &upper1) <= 0) + return true; + + return false; +} + +/* overlaps? */ +Datum +range_overlaps(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_overlaps_internal(typcache, r1, r2)); +} + +/* does not extend to right of? (internal version) */ +bool +range_overleft_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* An empty range is neither before nor after any other range */ + if (empty1 || empty2) + return false; + + if (range_cmp_bounds(typcache, &upper1, &upper2) <= 0) + return true; + + return false; +} + +/* does not extend to right of? */ +Datum +range_overleft(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_overleft_internal(typcache, r1, r2)); +} + +/* does not extend to left of? (internal version) */ +bool +range_overright_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* An empty range is neither before nor after any other range */ + if (empty1 || empty2) + return false; + + if (range_cmp_bounds(typcache, &lower1, &lower2) >= 0) + return true; + + return false; +} + +/* does not extend to left of? */ +Datum +range_overright(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_BOOL(range_overright_internal(typcache, r1, r2)); +} + + +/* range, range -> range functions */ + +/* set difference */ +Datum +range_minus(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + RangeType *ret; + TypeCacheEntry *typcache; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + ret = range_minus_internal(typcache, r1, r2); + if (ret) + PG_RETURN_RANGE_P(ret); + else + PG_RETURN_NULL(); +} + +RangeType * +range_minus_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + int cmp_l1l2, + cmp_l1u2, + cmp_u1l2, + cmp_u1u2; + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* if either is empty, r1 is the correct answer */ + if (empty1 || empty2) + return r1; + + cmp_l1l2 = range_cmp_bounds(typcache, &lower1, &lower2); + cmp_l1u2 = range_cmp_bounds(typcache, &lower1, &upper2); + cmp_u1l2 = range_cmp_bounds(typcache, &upper1, &lower2); + cmp_u1u2 = range_cmp_bounds(typcache, &upper1, &upper2); + + if (cmp_l1l2 < 0 && cmp_u1u2 > 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("result of range difference would not be contiguous"))); + + if (cmp_l1u2 > 0 || cmp_u1l2 < 0) + return r1; + + if (cmp_l1l2 >= 0 && cmp_u1u2 <= 0) + return make_empty_range(typcache); + + if (cmp_l1l2 <= 0 && cmp_u1l2 >= 0 && cmp_u1u2 <= 0) + { + lower2.inclusive = !lower2.inclusive; + lower2.lower = false; /* it will become the upper bound */ + return make_range(typcache, &lower1, &lower2, false, NULL); + } + + if (cmp_l1l2 >= 0 && cmp_u1u2 >= 0 && cmp_l1u2 <= 0) + { + upper2.inclusive = !upper2.inclusive; + upper2.lower = true; /* it will become the lower bound */ + return make_range(typcache, &upper2, &upper1, false, NULL); + } + + elog(ERROR, "unexpected case in range_minus"); + return NULL; +} + +/* + * Set union. If strict is true, it is an error that the two input ranges + * are not adjacent or overlapping. + */ +RangeType * +range_union_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2, + bool strict) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + RangeBound *result_lower; + RangeBound *result_upper; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* if either is empty, the other is the correct answer */ + if (empty1) + return r2; + if (empty2) + return r1; + + if (strict && + !DatumGetBool(range_overlaps_internal(typcache, r1, r2)) && + !DatumGetBool(range_adjacent_internal(typcache, r1, r2))) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("result of range union would not be contiguous"))); + + if (range_cmp_bounds(typcache, &lower1, &lower2) < 0) + result_lower = &lower1; + else + result_lower = &lower2; + + if (range_cmp_bounds(typcache, &upper1, &upper2) > 0) + result_upper = &upper1; + else + result_upper = &upper2; + + return make_range(typcache, result_lower, result_upper, false, NULL); +} + +Datum +range_union(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_RANGE_P(range_union_internal(typcache, r1, r2, true)); +} + +/* + * range merge: like set union, except also allow and account for non-adjacent + * input ranges. + */ +Datum +range_merge(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_RANGE_P(range_union_internal(typcache, r1, r2, false)); +} + +/* set intersection */ +Datum +range_intersect(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + PG_RETURN_RANGE_P(range_intersect_internal(typcache, r1, r2)); +} + +RangeType * +range_intersect_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + RangeBound *result_lower; + RangeBound *result_upper; + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + if (empty1 || empty2 || !range_overlaps_internal(typcache, r1, r2)) + return make_empty_range(typcache); + + if (range_cmp_bounds(typcache, &lower1, &lower2) >= 0) + result_lower = &lower1; + else + result_lower = &lower2; + + if (range_cmp_bounds(typcache, &upper1, &upper2) <= 0) + result_upper = &upper1; + else + result_upper = &upper2; + + return make_range(typcache, result_lower, result_upper, false, NULL); +} + +/* range, range -> range, range functions */ + +/* + * range_split_internal - if r2 intersects the middle of r1, leaving non-empty + * ranges on both sides, then return true and set output1 and output2 to the + * results of r1 - r2 (in order). Otherwise return false and don't set output1 + * or output2. Neither input range should be empty. + */ +bool +range_split_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2, + RangeType **output1, RangeType **output2) +{ + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + if (range_cmp_bounds(typcache, &lower1, &lower2) < 0 && + range_cmp_bounds(typcache, &upper1, &upper2) > 0) + { + /* + * Need to invert inclusive/exclusive for the lower2 and upper2 + * points. They can't be infinite though. We're allowed to overwrite + * these RangeBounds since they only exist locally. + */ + lower2.inclusive = !lower2.inclusive; + lower2.lower = false; + upper2.inclusive = !upper2.inclusive; + upper2.lower = true; + + *output1 = make_range(typcache, &lower1, &lower2, false, NULL); + *output2 = make_range(typcache, &upper2, &upper1, false, NULL); + return true; + } + + return false; +} + +/* range -> range aggregate functions */ + +Datum +range_intersect_agg_transfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggContext; + Oid rngtypoid; + TypeCacheEntry *typcache; + RangeType *result; + RangeType *current; + + if (!AggCheckCallContext(fcinfo, &aggContext)) + elog(ERROR, "range_intersect_agg_transfn called in non-aggregate context"); + + rngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1); + if (!type_is_range(rngtypoid)) + elog(ERROR, "range_intersect_agg must be called with a range"); + + typcache = range_get_typcache(fcinfo, rngtypoid); + + /* strictness ensures these are non-null */ + result = PG_GETARG_RANGE_P(0); + current = PG_GETARG_RANGE_P(1); + + result = range_intersect_internal(typcache, result, current); + PG_RETURN_RANGE_P(result); +} + + +/* Btree support */ + +/* btree comparator */ +Datum +range_cmp(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + int cmp; + + check_stack_depth(); /* recurses when subtype is a range type */ + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* For b-tree use, empty ranges sort before all else */ + if (empty1 && empty2) + cmp = 0; + else if (empty1) + cmp = -1; + else if (empty2) + cmp = 1; + else + { + cmp = range_cmp_bounds(typcache, &lower1, &lower2); + if (cmp == 0) + cmp = range_cmp_bounds(typcache, &upper1, &upper2); + } + + PG_FREE_IF_COPY(r1, 0); + PG_FREE_IF_COPY(r2, 1); + + PG_RETURN_INT32(cmp); +} + +/* inequality operators using the range_cmp function */ +Datum +range_lt(PG_FUNCTION_ARGS) +{ + int cmp = range_cmp(fcinfo); + + PG_RETURN_BOOL(cmp < 0); +} + +Datum +range_le(PG_FUNCTION_ARGS) +{ + int cmp = range_cmp(fcinfo); + + PG_RETURN_BOOL(cmp <= 0); +} + +Datum +range_ge(PG_FUNCTION_ARGS) +{ + int cmp = range_cmp(fcinfo); + + PG_RETURN_BOOL(cmp >= 0); +} + +Datum +range_gt(PG_FUNCTION_ARGS) +{ + int cmp = range_cmp(fcinfo); + + PG_RETURN_BOOL(cmp > 0); +} + +/* Hash support */ + +/* hash a range value */ +Datum +hash_range(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + uint32 result; + TypeCacheEntry *typcache; + TypeCacheEntry *scache; + RangeBound lower; + RangeBound upper; + bool empty; + char flags; + uint32 lower_hash; + uint32 upper_hash; + + check_stack_depth(); /* recurses when subtype is a range type */ + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r)); + + /* deserialize */ + range_deserialize(typcache, r, &lower, &upper, &empty); + flags = range_get_flags(r); + + /* + * Look up the element type's hash function, if not done already. + */ + scache = typcache->rngelemtype; + if (!OidIsValid(scache->hash_proc_finfo.fn_oid)) + { + scache = lookup_type_cache(scache->type_id, TYPECACHE_HASH_PROC_FINFO); + if (!OidIsValid(scache->hash_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(scache->type_id)))); + } + + /* + * Apply the hash function to each bound. + */ + if (RANGE_HAS_LBOUND(flags)) + lower_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo, + typcache->rng_collation, + lower.val)); + else + lower_hash = 0; + + if (RANGE_HAS_UBOUND(flags)) + upper_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo, + typcache->rng_collation, + upper.val)); + else + upper_hash = 0; + + /* Merge hashes of flags and bounds */ + result = hash_uint32((uint32) flags); + result ^= lower_hash; + result = pg_rotate_left32(result, 1); + result ^= upper_hash; + + PG_RETURN_INT32(result); +} + +/* + * Returns 64-bit value by hashing a value to a 64-bit value, with a seed. + * Otherwise, similar to hash_range. + */ +Datum +hash_range_extended(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + Datum seed = PG_GETARG_DATUM(1); + uint64 result; + TypeCacheEntry *typcache; + TypeCacheEntry *scache; + RangeBound lower; + RangeBound upper; + bool empty; + char flags; + uint64 lower_hash; + uint64 upper_hash; + + check_stack_depth(); + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r)); + + range_deserialize(typcache, r, &lower, &upper, &empty); + flags = range_get_flags(r); + + scache = typcache->rngelemtype; + if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid)) + { + scache = lookup_type_cache(scache->type_id, + TYPECACHE_HASH_EXTENDED_PROC_FINFO); + if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(scache->type_id)))); + } + + if (RANGE_HAS_LBOUND(flags)) + lower_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo, + typcache->rng_collation, + lower.val, + seed)); + else + lower_hash = 0; + + if (RANGE_HAS_UBOUND(flags)) + upper_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo, + typcache->rng_collation, + upper.val, + seed)); + else + upper_hash = 0; + + /* Merge hashes of flags and bounds */ + result = DatumGetUInt64(hash_uint32_extended((uint32) flags, + DatumGetInt64(seed))); + result ^= lower_hash; + result = ROTATE_HIGH_AND_LOW_32BITS(result); + result ^= upper_hash; + + PG_RETURN_UINT64(result); +} + +/* + *---------------------------------------------------------- + * CANONICAL FUNCTIONS + * + * Functions for specific built-in range types. + *---------------------------------------------------------- + */ + +Datum +int4range_canonical(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + Node *escontext = fcinfo->context; + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + bool empty; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r)); + + range_deserialize(typcache, r, &lower, &upper, &empty); + + if (empty) + PG_RETURN_RANGE_P(r); + + if (!lower.infinite && !lower.inclusive) + { + int32 bnd = DatumGetInt32(lower.val); + + /* Handle possible overflow manually */ + if (unlikely(bnd == PG_INT32_MAX)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + lower.val = Int32GetDatum(bnd + 1); + lower.inclusive = true; + } + + if (!upper.infinite && upper.inclusive) + { + int32 bnd = DatumGetInt32(upper.val); + + /* Handle possible overflow manually */ + if (unlikely(bnd == PG_INT32_MAX)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + upper.val = Int32GetDatum(bnd + 1); + upper.inclusive = false; + } + + PG_RETURN_RANGE_P(range_serialize(typcache, &lower, &upper, + false, escontext)); +} + +Datum +int8range_canonical(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + Node *escontext = fcinfo->context; + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + bool empty; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r)); + + range_deserialize(typcache, r, &lower, &upper, &empty); + + if (empty) + PG_RETURN_RANGE_P(r); + + if (!lower.infinite && !lower.inclusive) + { + int64 bnd = DatumGetInt64(lower.val); + + /* Handle possible overflow manually */ + if (unlikely(bnd == PG_INT64_MAX)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + lower.val = Int64GetDatum(bnd + 1); + lower.inclusive = true; + } + + if (!upper.infinite && upper.inclusive) + { + int64 bnd = DatumGetInt64(upper.val); + + /* Handle possible overflow manually */ + if (unlikely(bnd == PG_INT64_MAX)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + upper.val = Int64GetDatum(bnd + 1); + upper.inclusive = false; + } + + PG_RETURN_RANGE_P(range_serialize(typcache, &lower, &upper, + false, escontext)); +} + +Datum +daterange_canonical(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + Node *escontext = fcinfo->context; + TypeCacheEntry *typcache; + RangeBound lower; + RangeBound upper; + bool empty; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r)); + + range_deserialize(typcache, r, &lower, &upper, &empty); + + if (empty) + PG_RETURN_RANGE_P(r); + + if (!lower.infinite && !DATE_NOT_FINITE(DatumGetDateADT(lower.val)) && + !lower.inclusive) + { + DateADT bnd = DatumGetDateADT(lower.val); + + /* Check for overflow -- note we already eliminated PG_INT32_MAX */ + bnd++; + if (unlikely(!IS_VALID_DATE(bnd))) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range"))); + lower.val = DateADTGetDatum(bnd); + lower.inclusive = true; + } + + if (!upper.infinite && !DATE_NOT_FINITE(DatumGetDateADT(upper.val)) && + upper.inclusive) + { + DateADT bnd = DatumGetDateADT(upper.val); + + /* Check for overflow -- note we already eliminated PG_INT32_MAX */ + bnd++; + if (unlikely(!IS_VALID_DATE(bnd))) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range"))); + upper.val = DateADTGetDatum(bnd); + upper.inclusive = false; + } + + PG_RETURN_RANGE_P(range_serialize(typcache, &lower, &upper, + false, escontext)); +} + +/* + *---------------------------------------------------------- + * SUBTYPE_DIFF FUNCTIONS + * + * Functions for specific built-in range types. + * + * Note that subtype_diff does return the difference, not the absolute value + * of the difference, and it must take care to avoid overflow. + * (numrange_subdiff is at some risk there ...) + *---------------------------------------------------------- + */ + +Datum +int4range_subdiff(PG_FUNCTION_ARGS) +{ + int32 v1 = PG_GETARG_INT32(0); + int32 v2 = PG_GETARG_INT32(1); + + PG_RETURN_FLOAT8((float8) v1 - (float8) v2); +} + +Datum +int8range_subdiff(PG_FUNCTION_ARGS) +{ + int64 v1 = PG_GETARG_INT64(0); + int64 v2 = PG_GETARG_INT64(1); + + PG_RETURN_FLOAT8((float8) v1 - (float8) v2); +} + +Datum +numrange_subdiff(PG_FUNCTION_ARGS) +{ + Datum v1 = PG_GETARG_DATUM(0); + Datum v2 = PG_GETARG_DATUM(1); + Datum numresult; + float8 floatresult; + + numresult = DirectFunctionCall2(numeric_sub, v1, v2); + + floatresult = DatumGetFloat8(DirectFunctionCall1(numeric_float8, + numresult)); + + PG_RETURN_FLOAT8(floatresult); +} + +Datum +daterange_subdiff(PG_FUNCTION_ARGS) +{ + int32 v1 = PG_GETARG_INT32(0); + int32 v2 = PG_GETARG_INT32(1); + + PG_RETURN_FLOAT8((float8) v1 - (float8) v2); +} + +Datum +tsrange_subdiff(PG_FUNCTION_ARGS) +{ + Timestamp v1 = PG_GETARG_TIMESTAMP(0); + Timestamp v2 = PG_GETARG_TIMESTAMP(1); + float8 result; + + result = ((float8) v1 - (float8) v2) / USECS_PER_SEC; + PG_RETURN_FLOAT8(result); +} + +Datum +tstzrange_subdiff(PG_FUNCTION_ARGS) +{ + Timestamp v1 = PG_GETARG_TIMESTAMP(0); + Timestamp v2 = PG_GETARG_TIMESTAMP(1); + float8 result; + + result = ((float8) v1 - (float8) v2) / USECS_PER_SEC; + PG_RETURN_FLOAT8(result); +} + +/* + *---------------------------------------------------------- + * SUPPORT FUNCTIONS + * + * These functions aren't in pg_proc, but are useful for + * defining new generic range functions in C. + *---------------------------------------------------------- + */ + +/* + * range_get_typcache: get cached information about a range type + * + * This is for use by range-related functions that follow the convention + * of using the fn_extra field as a pointer to the type cache entry for + * the range type. Functions that need to cache more information than + * that must fend for themselves. + */ +TypeCacheEntry * +range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid) +{ + TypeCacheEntry *typcache = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + + if (typcache == NULL || + typcache->type_id != rngtypid) + { + typcache = lookup_type_cache(rngtypid, TYPECACHE_RANGE_INFO); + if (typcache->rngelemtype == NULL) + elog(ERROR, "type %u is not a range type", rngtypid); + fcinfo->flinfo->fn_extra = (void *) typcache; + } + + return typcache; +} + +/* + * range_serialize: construct a range value from bounds and empty-flag + * + * This does not force canonicalization of the range value. In most cases, + * external callers should only be canonicalization functions. Note that + * we perform some datatype-independent canonicalization checks anyway. + */ +RangeType * +range_serialize(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, + bool empty, struct Node *escontext) +{ + RangeType *range; + int cmp; + Size msize; + Pointer ptr; + int16 typlen; + bool typbyval; + char typalign; + char typstorage; + char flags = 0; + + /* + * Verify range is not invalid on its face, and construct flags value, + * preventing any non-canonical combinations such as infinite+inclusive. + */ + Assert(lower->lower); + Assert(!upper->lower); + + if (empty) + flags |= RANGE_EMPTY; + else + { + cmp = range_cmp_bound_values(typcache, lower, upper); + + /* error check: if lower bound value is above upper, it's wrong */ + if (cmp > 0) + ereturn(escontext, NULL, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("range lower bound must be less than or equal to range upper bound"))); + + /* if bounds are equal, and not both inclusive, range is empty */ + if (cmp == 0 && !(lower->inclusive && upper->inclusive)) + flags |= RANGE_EMPTY; + else + { + /* infinite boundaries are never inclusive */ + if (lower->infinite) + flags |= RANGE_LB_INF; + else if (lower->inclusive) + flags |= RANGE_LB_INC; + if (upper->infinite) + flags |= RANGE_UB_INF; + else if (upper->inclusive) + flags |= RANGE_UB_INC; + } + } + + /* Fetch information about range's element type */ + typlen = typcache->rngelemtype->typlen; + typbyval = typcache->rngelemtype->typbyval; + typalign = typcache->rngelemtype->typalign; + typstorage = typcache->rngelemtype->typstorage; + + /* Count space for varlena header and range type's OID */ + msize = sizeof(RangeType); + Assert(msize == MAXALIGN(msize)); + + /* Count space for bounds */ + if (RANGE_HAS_LBOUND(flags)) + { + /* + * Make sure item to be inserted is not toasted. It is essential that + * we not insert an out-of-line toast value pointer into a range + * object, for the same reasons that arrays and records can't contain + * them. It would work to store a compressed-in-line value, but we + * prefer to decompress and then let compression be applied to the + * whole range object if necessary. But, unlike arrays, we do allow + * short-header varlena objects to stay as-is. + */ + if (typlen == -1) + lower->val = PointerGetDatum(PG_DETOAST_DATUM_PACKED(lower->val)); + + msize = datum_compute_size(msize, lower->val, typbyval, typalign, + typlen, typstorage); + } + + if (RANGE_HAS_UBOUND(flags)) + { + /* Make sure item to be inserted is not toasted */ + if (typlen == -1) + upper->val = PointerGetDatum(PG_DETOAST_DATUM_PACKED(upper->val)); + + msize = datum_compute_size(msize, upper->val, typbyval, typalign, + typlen, typstorage); + } + + /* Add space for flag byte */ + msize += sizeof(char); + + /* Note: zero-fill is required here, just as in heap tuples */ + range = (RangeType *) palloc0(msize); + SET_VARSIZE(range, msize); + + /* Now fill in the datum */ + range->rangetypid = typcache->type_id; + + ptr = (char *) (range + 1); + + if (RANGE_HAS_LBOUND(flags)) + { + Assert(lower->lower); + ptr = datum_write(ptr, lower->val, typbyval, typalign, typlen, + typstorage); + } + + if (RANGE_HAS_UBOUND(flags)) + { + Assert(!upper->lower); + ptr = datum_write(ptr, upper->val, typbyval, typalign, typlen, + typstorage); + } + + *((char *) ptr) = flags; + + return range; +} + +/* + * range_deserialize: deconstruct a range value + * + * NB: the given range object must be fully detoasted; it cannot have a + * short varlena header. + * + * Note that if the element type is pass-by-reference, the datums in the + * RangeBound structs will be pointers into the given range object. + */ +void +range_deserialize(TypeCacheEntry *typcache, const RangeType *range, + RangeBound *lower, RangeBound *upper, bool *empty) +{ + char flags; + int16 typlen; + bool typbyval; + char typalign; + Pointer ptr; + Datum lbound; + Datum ubound; + + /* assert caller passed the right typcache entry */ + Assert(RangeTypeGetOid(range) == typcache->type_id); + + /* fetch the flag byte from datum's last byte */ + flags = *((const char *) range + VARSIZE(range) - 1); + + /* fetch information about range's element type */ + typlen = typcache->rngelemtype->typlen; + typbyval = typcache->rngelemtype->typbyval; + typalign = typcache->rngelemtype->typalign; + + /* initialize data pointer just after the range OID */ + ptr = (Pointer) (range + 1); + + /* fetch lower bound, if any */ + if (RANGE_HAS_LBOUND(flags)) + { + /* att_align_pointer cannot be necessary here */ + lbound = fetch_att(ptr, typbyval, typlen); + ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr); + } + else + lbound = (Datum) 0; + + /* fetch upper bound, if any */ + if (RANGE_HAS_UBOUND(flags)) + { + ptr = (Pointer) att_align_pointer(ptr, typalign, typlen, ptr); + ubound = fetch_att(ptr, typbyval, typlen); + /* no need for att_addlength_pointer */ + } + else + ubound = (Datum) 0; + + /* emit results */ + + *empty = (flags & RANGE_EMPTY) != 0; + + lower->val = lbound; + lower->infinite = (flags & RANGE_LB_INF) != 0; + lower->inclusive = (flags & RANGE_LB_INC) != 0; + lower->lower = true; + + upper->val = ubound; + upper->infinite = (flags & RANGE_UB_INF) != 0; + upper->inclusive = (flags & RANGE_UB_INC) != 0; + upper->lower = false; +} + +/* + * range_get_flags: just get the flags from a RangeType value. + * + * This is frequently useful in places that only need the flags and not + * the full results of range_deserialize. + */ +char +range_get_flags(const RangeType *range) +{ + /* fetch the flag byte from datum's last byte */ + return *((char *) range + VARSIZE(range) - 1); +} + +/* + * range_set_contain_empty: set the RANGE_CONTAIN_EMPTY bit in the value. + * + * This is only needed in GiST operations, so we don't include a provision + * for setting it in range_serialize; rather, this function must be applied + * afterwards. + */ +void +range_set_contain_empty(RangeType *range) +{ + char *flagsp; + + /* flag byte is datum's last byte */ + flagsp = (char *) range + VARSIZE(range) - 1; + + *flagsp |= RANGE_CONTAIN_EMPTY; +} + +/* + * This both serializes and canonicalizes (if applicable) the range. + * This should be used by most callers. + */ +RangeType * +make_range(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, + bool empty, struct Node *escontext) +{ + RangeType *range; + + range = range_serialize(typcache, lower, upper, empty, escontext); + + if (SOFT_ERROR_OCCURRED(escontext)) + return NULL; + + /* no need to call canonical on empty ranges ... */ + if (OidIsValid(typcache->rng_canonical_finfo.fn_oid) && + !RangeIsEmpty(range)) + { + /* Do this the hard way so that we can pass escontext */ + LOCAL_FCINFO(fcinfo, 1); + Datum result; + + InitFunctionCallInfoData(*fcinfo, &typcache->rng_canonical_finfo, 1, + InvalidOid, escontext, NULL); + + fcinfo->args[0].value = RangeTypePGetDatum(range); + fcinfo->args[0].isnull = false; + + result = FunctionCallInvoke(fcinfo); + + if (SOFT_ERROR_OCCURRED(escontext)) + return NULL; + + /* Should not get a null result if there was no error */ + if (fcinfo->isnull) + elog(ERROR, "function %u returned NULL", + typcache->rng_canonical_finfo.fn_oid); + + range = DatumGetRangeTypeP(result); + } + + return range; +} + +/* + * Compare two range boundary points, returning <0, 0, or >0 according to + * whether b1 is less than, equal to, or greater than b2. + * + * The boundaries can be any combination of upper and lower; so it's useful + * for a variety of operators. + * + * The simple case is when b1 and b2 are both finite and inclusive, in which + * case the result is just a comparison of the values held in b1 and b2. + * + * If a bound is exclusive, then we need to know whether it's a lower bound, + * in which case we treat the boundary point as "just greater than" the held + * value; or an upper bound, in which case we treat the boundary point as + * "just less than" the held value. + * + * If a bound is infinite, it represents minus infinity (less than every other + * point) if it's a lower bound; or plus infinity (greater than every other + * point) if it's an upper bound. + * + * There is only one case where two boundaries compare equal but are not + * identical: when both bounds are inclusive and hold the same finite value, + * but one is an upper bound and the other a lower bound. + */ +int +range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2) +{ + int32 result; + + /* + * First, handle cases involving infinity, which don't require invoking + * the comparison proc. + */ + if (b1->infinite && b2->infinite) + { + /* + * Both are infinity, so they are equal unless one is lower and the + * other not. + */ + if (b1->lower == b2->lower) + return 0; + else + return b1->lower ? -1 : 1; + } + else if (b1->infinite) + return b1->lower ? -1 : 1; + else if (b2->infinite) + return b2->lower ? 1 : -1; + + /* + * Both boundaries are finite, so compare the held values. + */ + result = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo, + typcache->rng_collation, + b1->val, b2->val)); + + /* + * If the comparison is anything other than equal, we're done. If they + * compare equal though, we still have to consider whether the boundaries + * are inclusive or exclusive. + */ + if (result == 0) + { + if (!b1->inclusive && !b2->inclusive) + { + /* both are exclusive */ + if (b1->lower == b2->lower) + return 0; + else + return b1->lower ? 1 : -1; + } + else if (!b1->inclusive) + return b1->lower ? 1 : -1; + else if (!b2->inclusive) + return b2->lower ? -1 : 1; + else + { + /* + * Both are inclusive and the values held are equal, so they are + * equal regardless of whether they are upper or lower boundaries, + * or a mix. + */ + return 0; + } + } + + return result; +} + +/* + * Compare two range boundary point values, returning <0, 0, or >0 according + * to whether b1 is less than, equal to, or greater than b2. + * + * This is similar to but simpler than range_cmp_bounds(). We just compare + * the values held in b1 and b2, ignoring inclusive/exclusive flags. The + * lower/upper flags only matter for infinities, where they tell us if the + * infinity is plus or minus. + */ +int +range_cmp_bound_values(TypeCacheEntry *typcache, const RangeBound *b1, + const RangeBound *b2) +{ + /* + * First, handle cases involving infinity, which don't require invoking + * the comparison proc. + */ + if (b1->infinite && b2->infinite) + { + /* + * Both are infinity, so they are equal unless one is lower and the + * other not. + */ + if (b1->lower == b2->lower) + return 0; + else + return b1->lower ? -1 : 1; + } + else if (b1->infinite) + return b1->lower ? -1 : 1; + else if (b2->infinite) + return b2->lower ? 1 : -1; + + /* + * Both boundaries are finite, so compare the held values. + */ + return DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo, + typcache->rng_collation, + b1->val, b2->val)); +} + +/* + * qsort callback for sorting ranges. + * + * Two empty ranges compare equal; an empty range sorts to the left of any + * non-empty range. Two non-empty ranges are sorted by lower bound first + * and by upper bound next. + */ +int +range_compare(const void *key1, const void *key2, void *arg) +{ + RangeType *r1 = *(RangeType **) key1; + RangeType *r2 = *(RangeType **) key2; + TypeCacheEntry *typcache = (TypeCacheEntry *) arg; + RangeBound lower1; + RangeBound upper1; + RangeBound lower2; + RangeBound upper2; + bool empty1; + bool empty2; + int cmp; + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + if (empty1 && empty2) + cmp = 0; + else if (empty1) + cmp = -1; + else if (empty2) + cmp = 1; + else + { + cmp = range_cmp_bounds(typcache, &lower1, &lower2); + if (cmp == 0) + cmp = range_cmp_bounds(typcache, &upper1, &upper2); + } + + return cmp; +} + +/* + * Build an empty range value of the type indicated by the typcache entry. + */ +RangeType * +make_empty_range(TypeCacheEntry *typcache) +{ + RangeBound lower; + RangeBound upper; + + lower.val = (Datum) 0; + lower.infinite = false; + lower.inclusive = false; + lower.lower = true; + + upper.val = (Datum) 0; + upper.infinite = false; + upper.inclusive = false; + upper.lower = false; + + return make_range(typcache, &lower, &upper, true, NULL); +} + + +/* + *---------------------------------------------------------- + * STATIC FUNCTIONS + *---------------------------------------------------------- + */ + +/* + * Given a string representing the flags for the range type, return the flags + * represented as a char. + */ +static char +range_parse_flags(const char *flags_str) +{ + char flags = 0; + + if (flags_str[0] == '\0' || + flags_str[1] == '\0' || + flags_str[2] != '\0') + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid range bound flags"), + errhint("Valid values are \"[]\", \"[)\", \"(]\", and \"()\"."))); + + switch (flags_str[0]) + { + case '[': + flags |= RANGE_LB_INC; + break; + case '(': + break; + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid range bound flags"), + errhint("Valid values are \"[]\", \"[)\", \"(]\", and \"()\"."))); + } + + switch (flags_str[1]) + { + case ']': + flags |= RANGE_UB_INC; + break; + case ')': + break; + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid range bound flags"), + errhint("Valid values are \"[]\", \"[)\", \"(]\", and \"()\"."))); + } + + return flags; +} + +/* + * Parse range input. + * + * Input parameters: + * string: input string to be parsed + * Output parameters: + * *flags: receives flags bitmask + * *lbound_str: receives palloc'd lower bound string, or NULL if none + * *ubound_str: receives palloc'd upper bound string, or NULL if none + * + * This is modeled somewhat after record_in in rowtypes.c. + * The input syntax is: + * <range> := EMPTY + * | <lb-inc> <string>, <string> <ub-inc> + * <lb-inc> := '[' | '(' + * <ub-inc> := ']' | ')' + * + * Whitespace before or after <range> is ignored. Whitespace within a <string> + * is taken literally and becomes part of the input string for that bound. + * + * A <string> of length zero is taken as "infinite" (i.e. no bound), unless it + * is surrounded by double-quotes, in which case it is the literal empty + * string. + * + * Within a <string>, special characters (such as comma, parenthesis, or + * brackets) can be enclosed in double-quotes or escaped with backslash. Within + * double-quotes, a double-quote can be escaped with double-quote or backslash. + * + * Returns true on success, false on failure (but failures will return only if + * escontext is an ErrorSaveContext). + */ +static bool +range_parse(const char *string, char *flags, char **lbound_str, + char **ubound_str, Node *escontext) +{ + const char *ptr = string; + bool infinite; + + *flags = 0; + + /* consume whitespace */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + /* check for empty range */ + if (pg_strncasecmp(ptr, RANGE_EMPTY_LITERAL, + strlen(RANGE_EMPTY_LITERAL)) == 0) + { + *flags = RANGE_EMPTY; + *lbound_str = NULL; + *ubound_str = NULL; + + ptr += strlen(RANGE_EMPTY_LITERAL); + + /* the rest should be whitespace */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + /* should have consumed everything */ + if (*ptr != '\0') + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed range literal: \"%s\"", + string), + errdetail("Junk after \"empty\" key word."))); + + return true; + } + + if (*ptr == '[') + { + *flags |= RANGE_LB_INC; + ptr++; + } + else if (*ptr == '(') + ptr++; + else + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed range literal: \"%s\"", + string), + errdetail("Missing left parenthesis or bracket."))); + + ptr = range_parse_bound(string, ptr, lbound_str, &infinite, escontext); + if (ptr == NULL) + return false; + if (infinite) + *flags |= RANGE_LB_INF; + + if (*ptr == ',') + ptr++; + else + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed range literal: \"%s\"", + string), + errdetail("Missing comma after lower bound."))); + + ptr = range_parse_bound(string, ptr, ubound_str, &infinite, escontext); + if (ptr == NULL) + return false; + if (infinite) + *flags |= RANGE_UB_INF; + + if (*ptr == ']') + { + *flags |= RANGE_UB_INC; + ptr++; + } + else if (*ptr == ')') + ptr++; + else /* must be a comma */ + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed range literal: \"%s\"", + string), + errdetail("Too many commas."))); + + /* consume whitespace */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + if (*ptr != '\0') + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed range literal: \"%s\"", + string), + errdetail("Junk after right parenthesis or bracket."))); + + return true; +} + +/* + * Helper for range_parse: parse and de-quote one bound string. + * + * We scan until finding comma, right parenthesis, or right bracket. + * + * Input parameters: + * string: entire input string (used only for error reports) + * ptr: where to start parsing bound + * Output parameters: + * *bound_str: receives palloc'd bound string, or NULL if none + * *infinite: set true if no bound, else false + * + * The return value is the scan ptr, advanced past the bound string. + * However, if escontext is an ErrorSaveContext, we return NULL on failure. + */ +static const char * +range_parse_bound(const char *string, const char *ptr, + char **bound_str, bool *infinite, Node *escontext) +{ + StringInfoData buf; + + /* Check for null: completely empty input means null */ + if (*ptr == ',' || *ptr == ')' || *ptr == ']') + { + *bound_str = NULL; + *infinite = true; + } + else + { + /* Extract string for this bound */ + bool inquote = false; + + initStringInfo(&buf); + while (inquote || !(*ptr == ',' || *ptr == ')' || *ptr == ']')) + { + char ch = *ptr++; + + if (ch == '\0') + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed range literal: \"%s\"", + string), + errdetail("Unexpected end of input."))); + if (ch == '\\') + { + if (*ptr == '\0') + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed range literal: \"%s\"", + string), + errdetail("Unexpected end of input."))); + appendStringInfoChar(&buf, *ptr++); + } + else if (ch == '"') + { + if (!inquote) + inquote = true; + else if (*ptr == '"') + { + /* doubled quote within quote sequence */ + appendStringInfoChar(&buf, *ptr++); + } + else + inquote = false; + } + else + appendStringInfoChar(&buf, ch); + } + + *bound_str = buf.data; + *infinite = false; + } + + return ptr; +} + +/* + * Convert a deserialized range value to text form + * + * Inputs are the flags byte, and the two bound values already converted to + * text (but not yet quoted). If no bound value, pass NULL. + * + * Result is a palloc'd string + */ +static char * +range_deparse(char flags, const char *lbound_str, const char *ubound_str) +{ + StringInfoData buf; + + if (flags & RANGE_EMPTY) + return pstrdup(RANGE_EMPTY_LITERAL); + + initStringInfo(&buf); + + appendStringInfoChar(&buf, (flags & RANGE_LB_INC) ? '[' : '('); + + if (RANGE_HAS_LBOUND(flags)) + appendStringInfoString(&buf, range_bound_escape(lbound_str)); + + appendStringInfoChar(&buf, ','); + + if (RANGE_HAS_UBOUND(flags)) + appendStringInfoString(&buf, range_bound_escape(ubound_str)); + + appendStringInfoChar(&buf, (flags & RANGE_UB_INC) ? ']' : ')'); + + return buf.data; +} + +/* + * Helper for range_deparse: quote a bound value as needed + * + * Result is a palloc'd string + */ +static char * +range_bound_escape(const char *value) +{ + bool nq; + const char *ptr; + StringInfoData buf; + + initStringInfo(&buf); + + /* Detect whether we need double quotes for this value */ + nq = (value[0] == '\0'); /* force quotes for empty string */ + for (ptr = value; *ptr; ptr++) + { + char ch = *ptr; + + if (ch == '"' || ch == '\\' || + ch == '(' || ch == ')' || + ch == '[' || ch == ']' || + ch == ',' || + isspace((unsigned char) ch)) + { + nq = true; + break; + } + } + + /* And emit the string */ + if (nq) + appendStringInfoChar(&buf, '"'); + for (ptr = value; *ptr; ptr++) + { + char ch = *ptr; + + if (ch == '"' || ch == '\\') + appendStringInfoChar(&buf, ch); + appendStringInfoChar(&buf, ch); + } + if (nq) + appendStringInfoChar(&buf, '"'); + + return buf.data; +} + +/* + * Test whether range r1 contains range r2. + * + * Caller has already checked that they are the same range type, and looked up + * the necessary typcache entry. + */ +bool +range_contains_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + RangeBound lower1; + RangeBound upper1; + bool empty1; + RangeBound lower2; + RangeBound upper2; + bool empty2; + + /* Different types should be prevented by ANYRANGE matching rules */ + if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2)) + elog(ERROR, "range types do not match"); + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + + /* If either range is empty, the answer is easy */ + if (empty2) + return true; + else if (empty1) + return false; + + /* Else we must have lower1 <= lower2 and upper1 >= upper2 */ + if (range_cmp_bounds(typcache, &lower1, &lower2) > 0) + return false; + if (range_cmp_bounds(typcache, &upper1, &upper2) < 0) + return false; + + return true; +} + +bool +range_contained_by_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2) +{ + return range_contains_internal(typcache, r2, r1); +} + +/* + * Test whether range r contains a specific element value. + */ +bool +range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum val) +{ + RangeBound lower; + RangeBound upper; + bool empty; + int32 cmp; + + range_deserialize(typcache, r, &lower, &upper, &empty); + + if (empty) + return false; + + if (!lower.infinite) + { + cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo, + typcache->rng_collation, + lower.val, val)); + if (cmp > 0) + return false; + if (cmp == 0 && !lower.inclusive) + return false; + } + + if (!upper.infinite) + { + cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo, + typcache->rng_collation, + upper.val, val)); + if (cmp < 0) + return false; + if (cmp == 0 && !upper.inclusive) + return false; + } + + return true; +} + + +/* + * datum_compute_size() and datum_write() are used to insert the bound + * values into a range object. They are modeled after heaptuple.c's + * heap_compute_data_size() and heap_fill_tuple(), but we need not handle + * null values here. TYPE_IS_PACKABLE must test the same conditions as + * heaptuple.c's ATT_IS_PACKABLE macro. See the comments thare for more + * details. + */ + +/* Does datatype allow packing into the 1-byte-header varlena format? */ +#define TYPE_IS_PACKABLE(typlen, typstorage) \ + ((typlen) == -1 && (typstorage) != TYPSTORAGE_PLAIN) + +/* + * Increment data_length by the space needed by the datum, including any + * preceding alignment padding. + */ +static Size +datum_compute_size(Size data_length, Datum val, bool typbyval, char typalign, + int16 typlen, char typstorage) +{ + if (TYPE_IS_PACKABLE(typlen, typstorage) && + VARATT_CAN_MAKE_SHORT(DatumGetPointer(val))) + { + /* + * we're anticipating converting to a short varlena header, so adjust + * length and don't count any alignment + */ + data_length += VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(val)); + } + else + { + data_length = att_align_datum(data_length, typalign, typlen, val); + data_length = att_addlength_datum(data_length, typlen, val); + } + + return data_length; +} + +/* + * Write the given datum beginning at ptr (after advancing to correct + * alignment, if needed). Return the pointer incremented by space used. + */ +static Pointer +datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign, + int16 typlen, char typstorage) +{ + Size data_length; + + if (typbyval) + { + /* pass-by-value */ + ptr = (char *) att_align_nominal(ptr, typalign); + store_att_byval(ptr, datum, typlen); + data_length = typlen; + } + else if (typlen == -1) + { + /* varlena */ + Pointer val = DatumGetPointer(datum); + + if (VARATT_IS_EXTERNAL(val)) + { + /* + * Throw error, because we must never put a toast pointer inside a + * range object. Caller should have detoasted it. + */ + elog(ERROR, "cannot store a toast pointer inside a range"); + data_length = 0; /* keep compiler quiet */ + } + else if (VARATT_IS_SHORT(val)) + { + /* no alignment for short varlenas */ + data_length = VARSIZE_SHORT(val); + memcpy(ptr, val, data_length); + } + else if (TYPE_IS_PACKABLE(typlen, typstorage) && + VARATT_CAN_MAKE_SHORT(val)) + { + /* convert to short varlena -- no alignment */ + data_length = VARATT_CONVERTED_SHORT_SIZE(val); + SET_VARSIZE_SHORT(ptr, data_length); + memcpy(ptr + 1, VARDATA(val), data_length - 1); + } + else + { + /* full 4-byte header varlena */ + ptr = (char *) att_align_nominal(ptr, typalign); + data_length = VARSIZE(val); + memcpy(ptr, val, data_length); + } + } + else if (typlen == -2) + { + /* cstring ... never needs alignment */ + Assert(typalign == TYPALIGN_CHAR); + data_length = strlen(DatumGetCString(datum)) + 1; + memcpy(ptr, DatumGetPointer(datum), data_length); + } + else + { + /* fixed-length pass-by-reference */ + ptr = (char *) att_align_nominal(ptr, typalign); + Assert(typlen > 0); + data_length = typlen; + memcpy(ptr, DatumGetPointer(datum), data_length); + } + + ptr += data_length; + + return ptr; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_gist.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_gist.c new file mode 100644 index 00000000000..08846783818 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_gist.c @@ -0,0 +1,1799 @@ +/*------------------------------------------------------------------------- + * + * rangetypes_gist.c + * GiST support for range types. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/rangetypes_gist.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/gist.h" +#include "access/stratnum.h" +#include "utils/datum.h" +#include "utils/float.h" +#include "utils/fmgrprotos.h" +#include "utils/multirangetypes.h" +#include "utils/rangetypes.h" + +/* + * Range class properties used to segregate different classes of ranges in + * GiST. Each unique combination of properties is a class. CLS_EMPTY cannot + * be combined with anything else. + */ +#define CLS_NORMAL 0 /* Ordinary finite range (no bits set) */ +#define CLS_LOWER_INF 1 /* Lower bound is infinity */ +#define CLS_UPPER_INF 2 /* Upper bound is infinity */ +#define CLS_CONTAIN_EMPTY 4 /* Contains underlying empty ranges */ +#define CLS_EMPTY 8 /* Special class for empty ranges */ + +#define CLS_COUNT 9 /* # of classes; includes all combinations of + * properties. CLS_EMPTY doesn't combine with + * anything else, so it's only 2^3 + 1. */ + +/* + * Minimum accepted ratio of split for items of the same class. If the items + * are of different classes, we will separate along those lines regardless of + * the ratio. + */ +#define LIMIT_RATIO 0.3 + +/* Constants for fixed penalty values */ +#define INFINITE_BOUND_PENALTY 2.0 +#define CONTAIN_EMPTY_PENALTY 1.0 +#define DEFAULT_SUBTYPE_DIFF_PENALTY 1.0 + +/* + * Per-item data for range_gist_single_sorting_split. + */ +typedef struct +{ + int index; + RangeBound bound; +} SingleBoundSortItem; + +/* place on left or right side of split? */ +typedef enum +{ + SPLIT_LEFT = 0, /* makes initialization to SPLIT_LEFT easier */ + SPLIT_RIGHT +} SplitLR; + +/* + * Context for range_gist_consider_split. + */ +typedef struct +{ + TypeCacheEntry *typcache; /* typcache for range type */ + bool has_subtype_diff; /* does it have subtype_diff? */ + int entries_count; /* total number of entries being split */ + + /* Information about currently selected split follows */ + + bool first; /* true if no split was selected yet */ + + RangeBound *left_upper; /* upper bound of left interval */ + RangeBound *right_lower; /* lower bound of right interval */ + + float4 ratio; /* split ratio */ + float4 overlap; /* overlap between left and right predicate */ + int common_left; /* # common entries destined for each side */ + int common_right; +} ConsiderSplitContext; + +/* + * Bounds extracted from a non-empty range, for use in + * range_gist_double_sorting_split. + */ +typedef struct +{ + RangeBound lower; + RangeBound upper; +} NonEmptyRange; + +/* + * Represents information about an entry that can be placed in either group + * without affecting overlap over selected axis ("common entry"). + */ +typedef struct +{ + /* Index of entry in the initial array */ + int index; + /* Delta between closeness of range to each of the two groups */ + double delta; +} CommonEntry; + +/* Helper macros to place an entry in the left or right group during split */ +/* Note direct access to variables v, typcache, left_range, right_range */ +#define PLACE_LEFT(range, off) \ + do { \ + if (v->spl_nleft > 0) \ + left_range = range_super_union(typcache, left_range, range); \ + else \ + left_range = (range); \ + v->spl_left[v->spl_nleft++] = (off); \ + } while(0) + +#define PLACE_RIGHT(range, off) \ + do { \ + if (v->spl_nright > 0) \ + right_range = range_super_union(typcache, right_range, range); \ + else \ + right_range = (range); \ + v->spl_right[v->spl_nright++] = (off); \ + } while(0) + +/* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */ +#define rangeCopy(r) \ + ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \ + false, -1))) + +static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1, + RangeType *r2); +static bool range_gist_consistent_int_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query); +static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query); +static bool range_gist_consistent_int_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query); +static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query); +static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query); +static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query); +static void range_gist_fallback_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v); +static void range_gist_class_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v, + SplitLR *classes_groups); +static void range_gist_single_sorting_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v, + bool use_upper_bound); +static void range_gist_double_sorting_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v); +static void range_gist_consider_split(ConsiderSplitContext *context, + RangeBound *right_lower, int min_left_count, + RangeBound *left_upper, int max_left_count); +static int get_gist_range_class(RangeType *range); +static int single_bound_cmp(const void *a, const void *b, void *arg); +static int interval_cmp_lower(const void *a, const void *b, void *arg); +static int interval_cmp_upper(const void *a, const void *b, void *arg); +static int common_entry_cmp(const void *i1, const void *i2); +static float8 call_subtype_diff(TypeCacheEntry *typcache, + Datum val1, Datum val2); + + +/* GiST query consistency check */ +Datum +range_gist_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + Datum query = PG_GETARG_DATUM(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + bool result; + Oid subtype = PG_GETARG_OID(3); + bool *recheck = (bool *) PG_GETARG_POINTER(4); + RangeType *key = DatumGetRangeTypeP(entry->key); + TypeCacheEntry *typcache; + + /* All operators served by this function are exact */ + *recheck = false; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key)); + + /* + * Perform consistent checking using function corresponding to key type + * (leaf or internal) and query subtype (range, multirange, or element). + * Note that invalid subtype means that query type matches key type + * (range). + */ + if (GIST_LEAF(entry)) + { + if (!OidIsValid(subtype) || subtype == ANYRANGEOID) + result = range_gist_consistent_leaf_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else if (subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_leaf_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else + result = range_gist_consistent_leaf_element(typcache, strategy, + key, query); + } + else + { + if (!OidIsValid(subtype) || subtype == ANYRANGEOID) + result = range_gist_consistent_int_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else if (subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_int_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else + result = range_gist_consistent_int_element(typcache, strategy, + key, query); + } + PG_RETURN_BOOL(result); +} + +/* + * GiST compress method for multiranges: multirange is approximated as union + * range with no gaps. + */ +Datum +multirange_gist_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + + if (entry->leafkey) + { + MultirangeType *mr = DatumGetMultirangeTypeP(entry->key); + RangeType *r; + TypeCacheEntry *typcache; + GISTENTRY *retval = palloc(sizeof(GISTENTRY)); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + r = multirange_get_union_range(typcache->rngtype, mr); + + gistentryinit(*retval, RangeTypePGetDatum(r), + entry->rel, entry->page, entry->offset, false); + + PG_RETURN_POINTER(retval); + } + + PG_RETURN_POINTER(entry); +} + +/* GiST query consistency check for multiranges */ +Datum +multirange_gist_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + Datum query = PG_GETARG_DATUM(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + bool result; + Oid subtype = PG_GETARG_OID(3); + bool *recheck = (bool *) PG_GETARG_POINTER(4); + RangeType *key = DatumGetRangeTypeP(entry->key); + TypeCacheEntry *typcache; + + /* + * All operators served by this function are inexact because multirange is + * approximated by union range with no gaps. + */ + *recheck = true; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key)); + + /* + * Perform consistent checking using function corresponding to key type + * (leaf or internal) and query subtype (range, multirange, or element). + * Note that invalid subtype means that query type matches key type + * (multirange). + */ + if (GIST_LEAF(entry)) + { + if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_leaf_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else if (subtype == ANYRANGEOID) + result = range_gist_consistent_leaf_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else + result = range_gist_consistent_leaf_element(typcache, strategy, + key, query); + } + else + { + if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_int_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else if (subtype == ANYRANGEOID) + result = range_gist_consistent_int_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else + result = range_gist_consistent_int_element(typcache, strategy, + key, query); + } + PG_RETURN_BOOL(result); +} + +/* form union range */ +Datum +range_gist_union(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GISTENTRY *ent = entryvec->vector; + RangeType *result_range; + TypeCacheEntry *typcache; + int i; + + result_range = DatumGetRangeTypeP(ent[0].key); + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range)); + + for (i = 1; i < entryvec->n; i++) + { + result_range = range_super_union(typcache, result_range, + DatumGetRangeTypeP(ent[i].key)); + } + + PG_RETURN_RANGE_P(result_range); +} + +/* + * We store ranges as ranges in GiST indexes, so we do not need + * compress, decompress, or fetch functions. Note this implies a limit + * on the size of range values that can be indexed. + */ + +/* + * GiST page split penalty function. + * + * The penalty function has the following goals (in order from most to least + * important): + * - Keep normal ranges separate + * - Avoid broadening the class of the original predicate + * - Avoid broadening (as determined by subtype_diff) the original predicate + * - Favor adding ranges to narrower original predicates + */ +Datum +range_gist_penalty(PG_FUNCTION_ARGS) +{ + GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); + float *penalty = (float *) PG_GETARG_POINTER(2); + RangeType *orig = DatumGetRangeTypeP(origentry->key); + RangeType *new = DatumGetRangeTypeP(newentry->key); + TypeCacheEntry *typcache; + bool has_subtype_diff; + RangeBound orig_lower, + new_lower, + orig_upper, + new_upper; + bool orig_empty, + new_empty; + + if (RangeTypeGetOid(orig) != RangeTypeGetOid(new)) + elog(ERROR, "range types do not match"); + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig)); + + has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); + + range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty); + range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty); + + /* + * Distinct branches for handling distinct classes of ranges. Note that + * penalty values only need to be commensurate within the same class of + * new range. + */ + if (new_empty) + { + /* Handle insertion of empty range */ + if (orig_empty) + { + /* + * The best case is to insert it to empty original range. + * Insertion here means no broadening of original range. Also + * original range is the most narrow. + */ + *penalty = 0.0; + } + else if (RangeIsOrContainsEmpty(orig)) + { + /* + * The second case is to insert empty range into range which + * contains at least one underlying empty range. There is still + * no broadening of original range, but original range is not as + * narrow as possible. + */ + *penalty = CONTAIN_EMPTY_PENALTY; + } + else if (orig_lower.infinite && orig_upper.infinite) + { + /* + * Original range requires broadening. (-inf; +inf) is most far + * from normal range in this case. + */ + *penalty = 2 * CONTAIN_EMPTY_PENALTY; + } + else if (orig_lower.infinite || orig_upper.infinite) + { + /* + * (-inf, x) or (x, +inf) original ranges are closer to normal + * ranges, so it's worse to mix it with empty ranges. + */ + *penalty = 3 * CONTAIN_EMPTY_PENALTY; + } + else + { + /* + * The least preferred case is broadening of normal range. + */ + *penalty = 4 * CONTAIN_EMPTY_PENALTY; + } + } + else if (new_lower.infinite && new_upper.infinite) + { + /* Handle insertion of (-inf, +inf) range */ + if (orig_lower.infinite && orig_upper.infinite) + { + /* + * Best case is inserting to (-inf, +inf) original range. + */ + *penalty = 0.0; + } + else if (orig_lower.infinite || orig_upper.infinite) + { + /* + * When original range is (-inf, x) or (x, +inf) it requires + * broadening of original range (extension of one bound to + * infinity). + */ + *penalty = INFINITE_BOUND_PENALTY; + } + else + { + /* + * Insertion to normal original range is least preferred. + */ + *penalty = 2 * INFINITE_BOUND_PENALTY; + } + + if (RangeIsOrContainsEmpty(orig)) + { + /* + * Original range is narrower when it doesn't contain empty + * ranges. Add additional penalty otherwise. + */ + *penalty += CONTAIN_EMPTY_PENALTY; + } + } + else if (new_lower.infinite) + { + /* Handle insertion of (-inf, x) range */ + if (!orig_empty && orig_lower.infinite) + { + if (orig_upper.infinite) + { + /* + * (-inf, +inf) range won't be extended by insertion of (-inf, + * x) range. It's a less desirable case than insertion to + * (-inf, y) original range without extension, because in that + * case original range is narrower. But we can't express that + * in single float value. + */ + *penalty = 0.0; + } + else + { + if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0) + { + /* + * Get extension of original range using subtype_diff. Use + * constant if subtype_diff unavailable. + */ + if (has_subtype_diff) + *penalty = call_subtype_diff(typcache, + new_upper.val, + orig_upper.val); + else + *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY; + } + else + { + /* No extension of original range */ + *penalty = 0.0; + } + } + } + else + { + /* + * If lower bound of original range is not -inf, then extension of + * it is infinity. + */ + *penalty = get_float4_infinity(); + } + } + else if (new_upper.infinite) + { + /* Handle insertion of (x, +inf) range */ + if (!orig_empty && orig_upper.infinite) + { + if (orig_lower.infinite) + { + /* + * (-inf, +inf) range won't be extended by insertion of (x, + * +inf) range. It's a less desirable case than insertion to + * (y, +inf) original range without extension, because in that + * case original range is narrower. But we can't express that + * in single float value. + */ + *penalty = 0.0; + } + else + { + if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0) + { + /* + * Get extension of original range using subtype_diff. Use + * constant if subtype_diff unavailable. + */ + if (has_subtype_diff) + *penalty = call_subtype_diff(typcache, + orig_lower.val, + new_lower.val); + else + *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY; + } + else + { + /* No extension of original range */ + *penalty = 0.0; + } + } + } + else + { + /* + * If upper bound of original range is not +inf, then extension of + * it is infinity. + */ + *penalty = get_float4_infinity(); + } + } + else + { + /* Handle insertion of normal (non-empty, non-infinite) range */ + if (orig_empty || orig_lower.infinite || orig_upper.infinite) + { + /* + * Avoid mixing normal ranges with infinite and empty ranges. + */ + *penalty = get_float4_infinity(); + } + else + { + /* + * Calculate extension of original range by calling subtype_diff. + * Use constant if subtype_diff unavailable. + */ + float8 diff = 0.0; + + if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0) + { + if (has_subtype_diff) + diff += call_subtype_diff(typcache, + orig_lower.val, + new_lower.val); + else + diff += DEFAULT_SUBTYPE_DIFF_PENALTY; + } + if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0) + { + if (has_subtype_diff) + diff += call_subtype_diff(typcache, + new_upper.val, + orig_upper.val); + else + diff += DEFAULT_SUBTYPE_DIFF_PENALTY; + } + *penalty = diff; + } + } + + PG_RETURN_POINTER(penalty); +} + +/* + * The GiST PickSplit method for ranges + * + * Primarily, we try to segregate ranges of different classes. If splitting + * ranges of the same class, use the appropriate split method for that class. + */ +Datum +range_gist_picksplit(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + TypeCacheEntry *typcache; + OffsetNumber i; + RangeType *pred_left; + int nbytes; + OffsetNumber maxoff; + int count_in_classes[CLS_COUNT]; + int j; + int non_empty_classes_count = 0; + int biggest_class = -1; + int biggest_class_count = 0; + int total_count; + + /* use first item to look up range type's info */ + pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key); + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left)); + + maxoff = entryvec->n - 1; + nbytes = (maxoff + 1) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + + /* + * Get count distribution of range classes. + */ + memset(count_in_classes, 0, sizeof(count_in_classes)); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); + + count_in_classes[get_gist_range_class(range)]++; + } + + /* + * Count non-empty classes and find biggest class. + */ + total_count = maxoff; + for (j = 0; j < CLS_COUNT; j++) + { + if (count_in_classes[j] > 0) + { + if (count_in_classes[j] > biggest_class_count) + { + biggest_class_count = count_in_classes[j]; + biggest_class = j; + } + non_empty_classes_count++; + } + } + + Assert(non_empty_classes_count > 0); + + if (non_empty_classes_count == 1) + { + /* One non-empty class, so split inside class */ + if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL) + { + /* double sorting split for normal ranges */ + range_gist_double_sorting_split(typcache, entryvec, v); + } + else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF) + { + /* upper bound sorting split for (-inf, x) ranges */ + range_gist_single_sorting_split(typcache, entryvec, v, true); + } + else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF) + { + /* lower bound sorting split for (x, +inf) ranges */ + range_gist_single_sorting_split(typcache, entryvec, v, false); + } + else + { + /* trivial split for all (-inf, +inf) or all empty ranges */ + range_gist_fallback_split(typcache, entryvec, v); + } + } + else + { + /* + * Class based split. + * + * To which side of the split should each class go? Initialize them + * all to go to the left side. + */ + SplitLR classes_groups[CLS_COUNT]; + + memset(classes_groups, 0, sizeof(classes_groups)); + + if (count_in_classes[CLS_NORMAL] > 0) + { + /* separate normal ranges if any */ + classes_groups[CLS_NORMAL] = SPLIT_RIGHT; + } + else + { + /*---------- + * Try to split classes in one of two ways: + * 1) containing infinities - not containing infinities + * 2) containing empty - not containing empty + * + * Select the way which balances the ranges between left and right + * the best. If split in these ways is not possible, there are at + * most 3 classes, so just separate biggest class. + *---------- + */ + int infCount, + nonInfCount; + int emptyCount, + nonEmptyCount; + + nonInfCount = + count_in_classes[CLS_NORMAL] + + count_in_classes[CLS_CONTAIN_EMPTY] + + count_in_classes[CLS_EMPTY]; + infCount = total_count - nonInfCount; + + nonEmptyCount = + count_in_classes[CLS_NORMAL] + + count_in_classes[CLS_LOWER_INF] + + count_in_classes[CLS_UPPER_INF] + + count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF]; + emptyCount = total_count - nonEmptyCount; + + if (infCount > 0 && nonInfCount > 0 && + (abs(infCount - nonInfCount) <= + abs(emptyCount - nonEmptyCount))) + { + classes_groups[CLS_NORMAL] = SPLIT_RIGHT; + classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT; + classes_groups[CLS_EMPTY] = SPLIT_RIGHT; + } + else if (emptyCount > 0 && nonEmptyCount > 0) + { + classes_groups[CLS_NORMAL] = SPLIT_RIGHT; + classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT; + classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT; + classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT; + } + else + { + /* + * Either total_count == emptyCount or total_count == + * infCount. + */ + classes_groups[biggest_class] = SPLIT_RIGHT; + } + } + + range_gist_class_split(typcache, entryvec, v, classes_groups); + } + + PG_RETURN_POINTER(v); +} + +/* equality comparator for GiST */ +Datum +range_gist_same(PG_FUNCTION_ARGS) +{ + RangeType *r1 = PG_GETARG_RANGE_P(0); + RangeType *r2 = PG_GETARG_RANGE_P(1); + bool *result = (bool *) PG_GETARG_POINTER(2); + + /* + * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check + * that for ourselves. More generally, if the entries have been properly + * normalized, then unequal flags bytes must mean unequal ranges ... so + * let's just test all the flag bits at once. + */ + if (range_get_flags(r1) != range_get_flags(r2)) + *result = false; + else + { + TypeCacheEntry *typcache; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); + + *result = range_eq_internal(typcache, r1, r2); + } + + PG_RETURN_POINTER(result); +} + +/* + *---------------------------------------------------------- + * STATIC FUNCTIONS + *---------------------------------------------------------- + */ + +/* + * Return the smallest range that contains r1 and r2 + * + * This differs from regular range_union in two critical ways: + * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb + * the intervening values into the result range. + * 2. We track whether any empty range has been union'd into the result, + * so that contained_by searches can be indexed. Note that this means + * that *all* unions formed within the GiST index must go through here. + */ +static RangeType * +range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2) +{ + RangeType *result; + RangeBound lower1, + lower2; + RangeBound upper1, + upper2; + bool empty1, + empty2; + char flags1, + flags2; + RangeBound *result_lower; + RangeBound *result_upper; + + range_deserialize(typcache, r1, &lower1, &upper1, &empty1); + range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + flags1 = range_get_flags(r1); + flags2 = range_get_flags(r2); + + if (empty1) + { + /* We can return r2 as-is if it already is or contains empty */ + if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) + return r2; + /* Else we'd better copy it (modify-in-place isn't safe) */ + r2 = rangeCopy(r2); + range_set_contain_empty(r2); + return r2; + } + if (empty2) + { + /* We can return r1 as-is if it already is or contains empty */ + if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) + return r1; + /* Else we'd better copy it (modify-in-place isn't safe) */ + r1 = rangeCopy(r1); + range_set_contain_empty(r1); + return r1; + } + + if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0) + result_lower = &lower1; + else + result_lower = &lower2; + + if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0) + result_upper = &upper1; + else + result_upper = &upper2; + + /* optimization to avoid constructing a new range */ + if (result_lower == &lower1 && result_upper == &upper1 && + ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY))) + return r1; + if (result_lower == &lower2 && result_upper == &upper2 && + ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY))) + return r2; + + result = make_range(typcache, result_lower, result_upper, false, NULL); + + if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY)) + range_set_contain_empty(result); + + return result; +} + +static bool +multirange_union_range_equal(TypeCacheEntry *typcache, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2, + tmp; + bool empty; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + return (RangeIsEmpty(r) && MultirangeIsEmpty(mr)); + + range_deserialize(typcache, r, &lower1, &upper1, &empty); + Assert(!empty); + multirange_get_bounds(typcache, mr, 0, &lower2, &tmp); + multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2); + + return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 && + range_cmp_bounds(typcache, &upper1, &upper2) == 0); +} + +/* + * GiST consistent test on an index internal page with range query + */ +static bool +range_gist_consistent_int_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query) +{ + switch (strategy) + { + case RANGESTRAT_BEFORE: + if (RangeIsEmpty(key) || RangeIsEmpty(query)) + return false; + return (!range_overright_internal(typcache, key, query)); + case RANGESTRAT_OVERLEFT: + if (RangeIsEmpty(key) || RangeIsEmpty(query)) + return false; + return (!range_after_internal(typcache, key, query)); + case RANGESTRAT_OVERLAPS: + return range_overlaps_internal(typcache, key, query); + case RANGESTRAT_OVERRIGHT: + if (RangeIsEmpty(key) || RangeIsEmpty(query)) + return false; + return (!range_before_internal(typcache, key, query)); + case RANGESTRAT_AFTER: + if (RangeIsEmpty(key) || RangeIsEmpty(query)) + return false; + return (!range_overleft_internal(typcache, key, query)); + case RANGESTRAT_ADJACENT: + if (RangeIsEmpty(key) || RangeIsEmpty(query)) + return false; + if (range_adjacent_internal(typcache, key, query)) + return true; + return range_overlaps_internal(typcache, key, query); + case RANGESTRAT_CONTAINS: + return range_contains_internal(typcache, key, query); + case RANGESTRAT_CONTAINED_BY: + + /* + * Empty ranges are contained by anything, so if key is or + * contains any empty ranges, we must descend into it. Otherwise, + * descend only if key overlaps the query. + */ + if (RangeIsOrContainsEmpty(key)) + return true; + return range_overlaps_internal(typcache, key, query); + case RANGESTRAT_EQ: + + /* + * If query is empty, descend only if the key is or contains any + * empty ranges. Otherwise, descend if key contains query. + */ + if (RangeIsEmpty(query)) + return RangeIsOrContainsEmpty(key); + return range_contains_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index internal page with multirange query + */ +static bool +range_gist_consistent_int_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query) +{ + switch (strategy) + { + case RANGESTRAT_BEFORE: + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_overright_multirange_internal(typcache, key, query)); + case RANGESTRAT_OVERLEFT: + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_after_multirange_internal(typcache, key, query)); + case RANGESTRAT_OVERLAPS: + return range_overlaps_multirange_internal(typcache, key, query); + case RANGESTRAT_OVERRIGHT: + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_before_multirange_internal(typcache, key, query)); + case RANGESTRAT_AFTER: + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_overleft_multirange_internal(typcache, key, query)); + case RANGESTRAT_ADJACENT: + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + if (range_adjacent_multirange_internal(typcache, key, query)) + return true; + return range_overlaps_multirange_internal(typcache, key, query); + case RANGESTRAT_CONTAINS: + return range_contains_multirange_internal(typcache, key, query); + case RANGESTRAT_CONTAINED_BY: + + /* + * Empty ranges are contained by anything, so if key is or + * contains any empty ranges, we must descend into it. Otherwise, + * descend only if key overlaps the query. + */ + if (RangeIsOrContainsEmpty(key)) + return true; + return range_overlaps_multirange_internal(typcache, key, query); + case RANGESTRAT_EQ: + + /* + * If query is empty, descend only if the key is or contains any + * empty ranges. Otherwise, descend if key contains query. + */ + if (MultirangeIsEmpty(query)) + return RangeIsOrContainsEmpty(key); + return range_contains_multirange_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index internal page with element query + */ +static bool +range_gist_consistent_int_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query) +{ + switch (strategy) + { + case RANGESTRAT_CONTAINS_ELEM: + return range_contains_elem_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index leaf page with range query + */ +static bool +range_gist_consistent_leaf_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query) +{ + switch (strategy) + { + case RANGESTRAT_BEFORE: + return range_before_internal(typcache, key, query); + case RANGESTRAT_OVERLEFT: + return range_overleft_internal(typcache, key, query); + case RANGESTRAT_OVERLAPS: + return range_overlaps_internal(typcache, key, query); + case RANGESTRAT_OVERRIGHT: + return range_overright_internal(typcache, key, query); + case RANGESTRAT_AFTER: + return range_after_internal(typcache, key, query); + case RANGESTRAT_ADJACENT: + return range_adjacent_internal(typcache, key, query); + case RANGESTRAT_CONTAINS: + return range_contains_internal(typcache, key, query); + case RANGESTRAT_CONTAINED_BY: + return range_contained_by_internal(typcache, key, query); + case RANGESTRAT_EQ: + return range_eq_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index leaf page with multirange query + */ +static bool +range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query) +{ + switch (strategy) + { + case RANGESTRAT_BEFORE: + return range_before_multirange_internal(typcache, key, query); + case RANGESTRAT_OVERLEFT: + return range_overleft_multirange_internal(typcache, key, query); + case RANGESTRAT_OVERLAPS: + return range_overlaps_multirange_internal(typcache, key, query); + case RANGESTRAT_OVERRIGHT: + return range_overright_multirange_internal(typcache, key, query); + case RANGESTRAT_AFTER: + return range_after_multirange_internal(typcache, key, query); + case RANGESTRAT_ADJACENT: + return range_adjacent_multirange_internal(typcache, key, query); + case RANGESTRAT_CONTAINS: + return range_contains_multirange_internal(typcache, key, query); + case RANGESTRAT_CONTAINED_BY: + return multirange_contains_range_internal(typcache, query, key); + case RANGESTRAT_EQ: + return multirange_union_range_equal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index leaf page with element query + */ +static bool +range_gist_consistent_leaf_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query) +{ + switch (strategy) + { + case RANGESTRAT_CONTAINS_ELEM: + return range_contains_elem_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * Trivial split: half of entries will be placed on one page + * and the other half on the other page. + */ +static void +range_gist_fallback_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v) +{ + RangeType *left_range = NULL; + RangeType *right_range = NULL; + OffsetNumber i, + maxoff, + split_idx; + + maxoff = entryvec->n - 1; + /* Split entries before this to left page, after to right: */ + split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber; + + v->spl_nleft = 0; + v->spl_nright = 0; + for (i = FirstOffsetNumber; i <= maxoff; i++) + { + RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); + + if (i < split_idx) + PLACE_LEFT(range, i); + else + PLACE_RIGHT(range, i); + } + + v->spl_ldatum = RangeTypePGetDatum(left_range); + v->spl_rdatum = RangeTypePGetDatum(right_range); +} + +/* + * Split based on classes of ranges. + * + * See get_gist_range_class for class definitions. + * classes_groups is an array of length CLS_COUNT indicating the side of the + * split to which each class should go. + */ +static void +range_gist_class_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v, + SplitLR *classes_groups) +{ + RangeType *left_range = NULL; + RangeType *right_range = NULL; + OffsetNumber i, + maxoff; + + maxoff = entryvec->n - 1; + + v->spl_nleft = 0; + v->spl_nright = 0; + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); + int class; + + /* Get class of range */ + class = get_gist_range_class(range); + + /* Place range to appropriate page */ + if (classes_groups[class] == SPLIT_LEFT) + PLACE_LEFT(range, i); + else + { + Assert(classes_groups[class] == SPLIT_RIGHT); + PLACE_RIGHT(range, i); + } + } + + v->spl_ldatum = RangeTypePGetDatum(left_range); + v->spl_rdatum = RangeTypePGetDatum(right_range); +} + +/* + * Sorting based split. First half of entries according to the sort will be + * placed to one page, and second half of entries will be placed to other + * page. use_upper_bound parameter indicates whether to use upper or lower + * bound for sorting. + */ +static void +range_gist_single_sorting_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v, + bool use_upper_bound) +{ + SingleBoundSortItem *sortItems; + RangeType *left_range = NULL; + RangeType *right_range = NULL; + OffsetNumber i, + maxoff, + split_idx; + + maxoff = entryvec->n - 1; + + sortItems = (SingleBoundSortItem *) + palloc(maxoff * sizeof(SingleBoundSortItem)); + + /* + * Prepare auxiliary array and sort the values. + */ + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); + RangeBound bound2; + bool empty; + + sortItems[i - 1].index = i; + /* Put appropriate bound into array */ + if (use_upper_bound) + range_deserialize(typcache, range, &bound2, + &sortItems[i - 1].bound, &empty); + else + range_deserialize(typcache, range, &sortItems[i - 1].bound, + &bound2, &empty); + Assert(!empty); + } + + qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem), + single_bound_cmp, typcache); + + split_idx = maxoff / 2; + + v->spl_nleft = 0; + v->spl_nright = 0; + + for (i = 0; i < maxoff; i++) + { + int idx = sortItems[i].index; + RangeType *range = DatumGetRangeTypeP(entryvec->vector[idx].key); + + if (i < split_idx) + PLACE_LEFT(range, idx); + else + PLACE_RIGHT(range, idx); + } + + v->spl_ldatum = RangeTypePGetDatum(left_range); + v->spl_rdatum = RangeTypePGetDatum(right_range); +} + +/* + * Double sorting split algorithm. + * + * The algorithm considers dividing ranges into two groups. The first (left) + * group contains general left bound. The second (right) group contains + * general right bound. The challenge is to find upper bound of left group + * and lower bound of right group so that overlap of groups is minimal and + * ratio of distribution is acceptable. Algorithm finds for each lower bound of + * right group minimal upper bound of left group, and for each upper bound of + * left group maximal lower bound of right group. For each found pair + * range_gist_consider_split considers replacement of currently selected + * split with the new one. + * + * After that, all the entries are divided into three groups: + * 1) Entries which should be placed to the left group + * 2) Entries which should be placed to the right group + * 3) "Common entries" which can be placed to either group without affecting + * amount of overlap. + * + * The common ranges are distributed by difference of distance from lower + * bound of common range to lower bound of right group and distance from upper + * bound of common range to upper bound of left group. + * + * For details see: + * "A new double sorting-based node splitting algorithm for R-tree", + * A. Korotkov + * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36 + */ +static void +range_gist_double_sorting_split(TypeCacheEntry *typcache, + GistEntryVector *entryvec, + GIST_SPLITVEC *v) +{ + ConsiderSplitContext context; + OffsetNumber i, + maxoff; + RangeType *left_range = NULL, + *right_range = NULL; + int common_entries_count; + NonEmptyRange *by_lower, + *by_upper; + CommonEntry *common_entries; + int nentries, + i1, + i2; + RangeBound *right_lower, + *left_upper; + + memset(&context, 0, sizeof(ConsiderSplitContext)); + context.typcache = typcache; + context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); + + maxoff = entryvec->n - 1; + nentries = context.entries_count = maxoff - FirstOffsetNumber + 1; + context.first = true; + + /* Allocate arrays for sorted range bounds */ + by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange)); + by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange)); + + /* Fill arrays of bounds */ + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); + bool empty; + + range_deserialize(typcache, range, + &by_lower[i - FirstOffsetNumber].lower, + &by_lower[i - FirstOffsetNumber].upper, + &empty); + Assert(!empty); + } + + /* + * Make two arrays of range bounds: one sorted by lower bound and another + * sorted by upper bound. + */ + memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange)); + qsort_arg(by_lower, nentries, sizeof(NonEmptyRange), + interval_cmp_lower, typcache); + qsort_arg(by_upper, nentries, sizeof(NonEmptyRange), + interval_cmp_upper, typcache); + + /*---------- + * The goal is to form a left and right range, so that every entry + * range is contained by either left or right interval (or both). + * + * For example, with the ranges (0,1), (1,3), (2,3), (2,4): + * + * 0 1 2 3 4 + * +-+ + * +---+ + * +-+ + * +---+ + * + * The left and right ranges are of the form (0,a) and (b,4). + * We first consider splits where b is the lower bound of an entry. + * We iterate through all entries, and for each b, calculate the + * smallest possible a. Then we consider splits where a is the + * upper bound of an entry, and for each a, calculate the greatest + * possible b. + * + * In the above example, the first loop would consider splits: + * b=0: (0,1)-(0,4) + * b=1: (0,1)-(1,4) + * b=2: (0,3)-(2,4) + * + * And the second loop: + * a=1: (0,1)-(1,4) + * a=3: (0,3)-(2,4) + * a=4: (0,4)-(2,4) + *---------- + */ + + /* + * Iterate over lower bound of right group, finding smallest possible + * upper bound of left group. + */ + i1 = 0; + i2 = 0; + right_lower = &by_lower[i1].lower; + left_upper = &by_upper[i2].lower; + while (true) + { + /* + * Find next lower bound of right group. + */ + while (i1 < nentries && + range_cmp_bounds(typcache, right_lower, + &by_lower[i1].lower) == 0) + { + if (range_cmp_bounds(typcache, &by_lower[i1].upper, + left_upper) > 0) + left_upper = &by_lower[i1].upper; + i1++; + } + if (i1 >= nentries) + break; + right_lower = &by_lower[i1].lower; + + /* + * Find count of ranges which anyway should be placed to the left + * group. + */ + while (i2 < nentries && + range_cmp_bounds(typcache, &by_upper[i2].upper, + left_upper) <= 0) + i2++; + + /* + * Consider found split to see if it's better than what we had. + */ + range_gist_consider_split(&context, right_lower, i1, left_upper, i2); + } + + /* + * Iterate over upper bound of left group finding greatest possible lower + * bound of right group. + */ + i1 = nentries - 1; + i2 = nentries - 1; + right_lower = &by_lower[i1].upper; + left_upper = &by_upper[i2].upper; + while (true) + { + /* + * Find next upper bound of left group. + */ + while (i2 >= 0 && + range_cmp_bounds(typcache, left_upper, + &by_upper[i2].upper) == 0) + { + if (range_cmp_bounds(typcache, &by_upper[i2].lower, + right_lower) < 0) + right_lower = &by_upper[i2].lower; + i2--; + } + if (i2 < 0) + break; + left_upper = &by_upper[i2].upper; + + /* + * Find count of intervals which anyway should be placed to the right + * group. + */ + while (i1 >= 0 && + range_cmp_bounds(typcache, &by_lower[i1].lower, + right_lower) >= 0) + i1--; + + /* + * Consider found split to see if it's better than what we had. + */ + range_gist_consider_split(&context, right_lower, i1 + 1, + left_upper, i2 + 1); + } + + /* + * If we failed to find any acceptable splits, use trivial split. + */ + if (context.first) + { + range_gist_fallback_split(typcache, entryvec, v); + return; + } + + /* + * Ok, we have now selected bounds of the groups. Now we have to + * distribute entries themselves. At first we distribute entries which can + * be placed unambiguously and collect "common entries" to array. + */ + + /* Allocate vectors for results */ + v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); + v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); + v->spl_nleft = 0; + v->spl_nright = 0; + + /* + * Allocate an array for "common entries" - entries which can be placed to + * either group without affecting overlap along selected axis. + */ + common_entries_count = 0; + common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry)); + + /* + * Distribute entries which can be distributed unambiguously, and collect + * common entries. + */ + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + RangeType *range; + RangeBound lower, + upper; + bool empty; + + /* + * Get upper and lower bounds along selected axis. + */ + range = DatumGetRangeTypeP(entryvec->vector[i].key); + + range_deserialize(typcache, range, &lower, &upper, &empty); + + if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0) + { + /* Fits in the left group */ + if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0) + { + /* Fits also in the right group, so "common entry" */ + common_entries[common_entries_count].index = i; + if (context.has_subtype_diff) + { + /* + * delta = (lower - context.right_lower) - + * (context.left_upper - upper) + */ + common_entries[common_entries_count].delta = + call_subtype_diff(typcache, + lower.val, + context.right_lower->val) - + call_subtype_diff(typcache, + context.left_upper->val, + upper.val); + } + else + { + /* Without subtype_diff, take all deltas as zero */ + common_entries[common_entries_count].delta = 0; + } + common_entries_count++; + } + else + { + /* Doesn't fit to the right group, so join to the left group */ + PLACE_LEFT(range, i); + } + } + else + { + /* + * Each entry should fit on either left or right group. Since this + * entry didn't fit in the left group, it better fit in the right + * group. + */ + Assert(range_cmp_bounds(typcache, &lower, + context.right_lower) >= 0); + PLACE_RIGHT(range, i); + } + } + + /* + * Distribute "common entries", if any. + */ + if (common_entries_count > 0) + { + /* + * Sort "common entries" by calculated deltas in order to distribute + * the most ambiguous entries first. + */ + qsort(common_entries, common_entries_count, sizeof(CommonEntry), + common_entry_cmp); + + /* + * Distribute "common entries" between groups according to sorting. + */ + for (i = 0; i < common_entries_count; i++) + { + RangeType *range; + int idx = common_entries[i].index; + + range = DatumGetRangeTypeP(entryvec->vector[idx].key); + + /* + * Check if we have to place this entry in either group to achieve + * LIMIT_RATIO. + */ + if (i < context.common_left) + PLACE_LEFT(range, idx); + else + PLACE_RIGHT(range, idx); + } + } + + v->spl_ldatum = PointerGetDatum(left_range); + v->spl_rdatum = PointerGetDatum(right_range); +} + +/* + * Consider replacement of currently selected split with a better one + * during range_gist_double_sorting_split. + */ +static void +range_gist_consider_split(ConsiderSplitContext *context, + RangeBound *right_lower, int min_left_count, + RangeBound *left_upper, int max_left_count) +{ + int left_count, + right_count; + float4 ratio, + overlap; + + /* + * Calculate entries distribution ratio assuming most uniform distribution + * of common entries. + */ + if (min_left_count >= (context->entries_count + 1) / 2) + left_count = min_left_count; + else if (max_left_count <= context->entries_count / 2) + left_count = max_left_count; + else + left_count = context->entries_count / 2; + right_count = context->entries_count - left_count; + + /* + * Ratio of split: quotient between size of smaller group and total + * entries count. This is necessarily 0.5 or less; if it's less than + * LIMIT_RATIO then we will never accept the new split. + */ + ratio = ((float4) Min(left_count, right_count)) / + ((float4) context->entries_count); + + if (ratio > LIMIT_RATIO) + { + bool selectthis = false; + + /* + * The ratio is acceptable, so compare current split with previously + * selected one. We search for minimal overlap (allowing negative + * values) and minimal ratio secondarily. If subtype_diff is + * available, it's used for overlap measure. Without subtype_diff we + * use number of "common entries" as an overlap measure. + */ + if (context->has_subtype_diff) + overlap = call_subtype_diff(context->typcache, + left_upper->val, + right_lower->val); + else + overlap = max_left_count - min_left_count; + + /* If there is no previous selection, select this split */ + if (context->first) + selectthis = true; + else + { + /* + * Choose the new split if it has a smaller overlap, or same + * overlap but better ratio. + */ + if (overlap < context->overlap || + (overlap == context->overlap && ratio > context->ratio)) + selectthis = true; + } + + if (selectthis) + { + /* save information about selected split */ + context->first = false; + context->ratio = ratio; + context->overlap = overlap; + context->right_lower = right_lower; + context->left_upper = left_upper; + context->common_left = max_left_count - left_count; + context->common_right = left_count - min_left_count; + } + } +} + +/* + * Find class number for range. + * + * The class number is a valid combination of the properties of the + * range. Note: the highest possible number is 8, because CLS_EMPTY + * can't be combined with anything else. + */ +static int +get_gist_range_class(RangeType *range) +{ + int classNumber; + char flags; + + flags = range_get_flags(range); + if (flags & RANGE_EMPTY) + { + classNumber = CLS_EMPTY; + } + else + { + classNumber = 0; + if (flags & RANGE_LB_INF) + classNumber |= CLS_LOWER_INF; + if (flags & RANGE_UB_INF) + classNumber |= CLS_UPPER_INF; + if (flags & RANGE_CONTAIN_EMPTY) + classNumber |= CLS_CONTAIN_EMPTY; + } + return classNumber; +} + +/* + * Comparison function for range_gist_single_sorting_split. + */ +static int +single_bound_cmp(const void *a, const void *b, void *arg) +{ + SingleBoundSortItem *i1 = (SingleBoundSortItem *) a; + SingleBoundSortItem *i2 = (SingleBoundSortItem *) b; + TypeCacheEntry *typcache = (TypeCacheEntry *) arg; + + return range_cmp_bounds(typcache, &i1->bound, &i2->bound); +} + +/* + * Compare NonEmptyRanges by lower bound. + */ +static int +interval_cmp_lower(const void *a, const void *b, void *arg) +{ + NonEmptyRange *i1 = (NonEmptyRange *) a; + NonEmptyRange *i2 = (NonEmptyRange *) b; + TypeCacheEntry *typcache = (TypeCacheEntry *) arg; + + return range_cmp_bounds(typcache, &i1->lower, &i2->lower); +} + +/* + * Compare NonEmptyRanges by upper bound. + */ +static int +interval_cmp_upper(const void *a, const void *b, void *arg) +{ + NonEmptyRange *i1 = (NonEmptyRange *) a; + NonEmptyRange *i2 = (NonEmptyRange *) b; + TypeCacheEntry *typcache = (TypeCacheEntry *) arg; + + return range_cmp_bounds(typcache, &i1->upper, &i2->upper); +} + +/* + * Compare CommonEntrys by their deltas. + */ +static int +common_entry_cmp(const void *i1, const void *i2) +{ + double delta1 = ((CommonEntry *) i1)->delta; + double delta2 = ((CommonEntry *) i2)->delta; + + if (delta1 < delta2) + return -1; + else if (delta1 > delta2) + return 1; + else + return 0; +} + +/* + * Convenience function to invoke type-specific subtype_diff function. + * Caller must have already checked that there is one for the range type. + */ +static float8 +call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2) +{ + float8 value; + + value = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + val1, val2)); + /* Cope with buggy subtype_diff function by returning zero */ + if (value >= 0.0) + return value; + return 0.0; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_selfuncs.c new file mode 100644 index 00000000000..fbabb3e18ce --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_selfuncs.c @@ -0,0 +1,1223 @@ +/*------------------------------------------------------------------------- + * + * rangetypes_selfuncs.c + * Functions for selectivity estimation of range operators + * + * Estimates are based on histograms of lower and upper bounds, and the + * fraction of empty ranges. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/rangetypes_selfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_type.h" +#include "utils/float.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/rangetypes.h" +#include "utils/selfuncs.h" +#include "utils/typcache.h" + +static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata, + const RangeType *constval, Oid operator); +static double default_range_selectivity(Oid operator); +static double calc_hist_selectivity(TypeCacheEntry *typcache, + VariableStatData *vardata, const RangeType *constval, + Oid operator); +static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache, + const RangeBound *constbound, + const RangeBound *hist, int hist_nvalues, + bool equal); +static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, + const RangeBound *hist, int hist_length, bool equal); +static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value, + const RangeBound *hist1, const RangeBound *hist2); +static float8 get_len_position(double value, double hist1, double hist2); +static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, + const RangeBound *bound2); +static int length_hist_bsearch(Datum *length_hist_values, + int length_hist_nvalues, double value, bool equal); +static double calc_length_hist_frac(Datum *length_hist_values, + int length_hist_nvalues, double length1, double length2, bool equal); +static double calc_hist_selectivity_contained(TypeCacheEntry *typcache, + const RangeBound *lower, RangeBound *upper, + const RangeBound *hist_lower, int hist_nvalues, + Datum *length_hist_values, int length_hist_nvalues); +static double calc_hist_selectivity_contains(TypeCacheEntry *typcache, + const RangeBound *lower, const RangeBound *upper, + const RangeBound *hist_lower, int hist_nvalues, + Datum *length_hist_values, int length_hist_nvalues); + +/* + * Returns a default selectivity estimate for given operator, when we don't + * have statistics or cannot use them for some reason. + */ +static double +default_range_selectivity(Oid operator) +{ + switch (operator) + { + case OID_RANGE_OVERLAP_OP: + return 0.01; + + case OID_RANGE_CONTAINS_OP: + case OID_RANGE_CONTAINED_OP: + return 0.005; + + case OID_RANGE_CONTAINS_ELEM_OP: + case OID_RANGE_ELEM_CONTAINED_OP: + + /* + * "range @> elem" is more or less identical to a scalar + * inequality "A >= b AND A <= c". + */ + return DEFAULT_RANGE_INEQ_SEL; + + case OID_RANGE_LESS_OP: + case OID_RANGE_LESS_EQUAL_OP: + case OID_RANGE_GREATER_OP: + case OID_RANGE_GREATER_EQUAL_OP: + case OID_RANGE_LEFT_OP: + case OID_RANGE_RIGHT_OP: + case OID_RANGE_OVERLAPS_LEFT_OP: + case OID_RANGE_OVERLAPS_RIGHT_OP: + /* these are similar to regular scalar inequalities */ + return DEFAULT_INEQ_SEL; + + default: + /* all range operators should be handled above, but just in case */ + return 0.01; + } +} + +/* + * rangesel -- restriction selectivity for range operators + */ +Datum +rangesel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Selectivity selec; + TypeCacheEntry *typcache = NULL; + RangeType *constrange = NULL; + + /* + * If expression is not (variable op something) or (something op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(default_range_selectivity(operator)); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(default_range_selectivity(operator)); + } + + /* + * All the range operators are strict, so we can cope with a NULL constant + * right away. + */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + /* + * If var is on the right, commute the operator, so that we can assume the + * var is on the left in what follows. + */ + if (!varonleft) + { + /* we have other Op var, commute to make var Op other */ + operator = get_commutator(operator); + if (!operator) + { + /* Use default selectivity (should we raise an error instead?) */ + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(default_range_selectivity(operator)); + } + } + + /* + * OK, there's a Var and a Const we're dealing with here. We need the + * Const to be of same range type as the column, else we can't do anything + * useful. (Such cases will likely fail at runtime, but here we'd rather + * just return a default estimate.) + * + * If the operator is "range @> element", the constant should be of the + * element type of the range column. Convert it to a range that includes + * only that single point, so that we don't need special handling for that + * in what follows. + */ + if (operator == OID_RANGE_CONTAINS_ELEM_OP) + { + typcache = range_get_typcache(fcinfo, vardata.vartype); + + if (((Const *) other)->consttype == typcache->rngelemtype->type_id) + { + RangeBound lower, + upper; + + lower.inclusive = true; + lower.val = ((Const *) other)->constvalue; + lower.infinite = false; + lower.lower = true; + upper.inclusive = true; + upper.val = ((Const *) other)->constvalue; + upper.infinite = false; + upper.lower = false; + constrange = range_serialize(typcache, &lower, &upper, false, NULL); + } + } + else if (operator == OID_RANGE_ELEM_CONTAINED_OP) + { + /* + * Here, the Var is the elem, not the range. For now we just punt and + * return the default estimate. In future we could disassemble the + * range constant and apply scalarineqsel ... + */ + } + else if (((Const *) other)->consttype == vardata.vartype) + { + /* Both sides are the same range type */ + typcache = range_get_typcache(fcinfo, vardata.vartype); + + constrange = DatumGetRangeTypeP(((Const *) other)->constvalue); + } + + /* + * If we got a valid constant on one side of the operator, proceed to + * estimate using statistics. Otherwise punt and return a default constant + * estimate. Note that calc_rangesel need not handle + * OID_RANGE_ELEM_CONTAINED_OP. + */ + if (constrange) + selec = calc_rangesel(typcache, &vardata, constrange, operator); + else + selec = default_range_selectivity(operator); + + ReleaseVariableStats(vardata); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +static double +calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata, + const RangeType *constval, Oid operator) +{ + double hist_selec; + double selec; + float4 empty_frac, + null_frac; + + /* + * First look up the fraction of NULLs and empty ranges from pg_statistic. + */ + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + AttStatsSlot sslot; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + null_frac = stats->stanullfrac; + + /* Try to get fraction of empty ranges */ + if (get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + if (sslot.nnumbers != 1) + elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */ + empty_frac = sslot.numbers[0]; + free_attstatsslot(&sslot); + } + else + { + /* No empty fraction statistic. Assume no empty ranges. */ + empty_frac = 0.0; + } + } + else + { + /* + * No stats are available. Follow through the calculations below + * anyway, assuming no NULLs and no empty ranges. This still allows us + * to give a better-than-nothing estimate based on whether the + * constant is an empty range or not. + */ + null_frac = 0.0; + empty_frac = 0.0; + } + + if (RangeIsEmpty(constval)) + { + /* + * An empty range matches all ranges, all empty ranges, or nothing, + * depending on the operator + */ + switch (operator) + { + /* these return false if either argument is empty */ + case OID_RANGE_OVERLAP_OP: + case OID_RANGE_OVERLAPS_LEFT_OP: + case OID_RANGE_OVERLAPS_RIGHT_OP: + case OID_RANGE_LEFT_OP: + case OID_RANGE_RIGHT_OP: + /* nothing is less than an empty range */ + case OID_RANGE_LESS_OP: + selec = 0.0; + break; + + /* only empty ranges can be contained by an empty range */ + case OID_RANGE_CONTAINED_OP: + /* only empty ranges are <= an empty range */ + case OID_RANGE_LESS_EQUAL_OP: + selec = empty_frac; + break; + + /* everything contains an empty range */ + case OID_RANGE_CONTAINS_OP: + /* everything is >= an empty range */ + case OID_RANGE_GREATER_EQUAL_OP: + selec = 1.0; + break; + + /* all non-empty ranges are > an empty range */ + case OID_RANGE_GREATER_OP: + selec = 1.0 - empty_frac; + break; + + /* an element cannot be empty */ + case OID_RANGE_CONTAINS_ELEM_OP: + default: + elog(ERROR, "unexpected operator %u", operator); + selec = 0.0; /* keep compiler quiet */ + break; + } + } + else + { + /* + * Calculate selectivity using bound histograms. If that fails for + * some reason, e.g no histogram in pg_statistic, use the default + * constant estimate for the fraction of non-empty values. This is + * still somewhat better than just returning the default estimate, + * because this still takes into account the fraction of empty and + * NULL tuples, if we had statistics for them. + */ + hist_selec = calc_hist_selectivity(typcache, vardata, constval, + operator); + if (hist_selec < 0.0) + hist_selec = default_range_selectivity(operator); + + /* + * Now merge the results for the empty ranges and histogram + * calculations, realizing that the histogram covers only the + * non-null, non-empty values. + */ + if (operator == OID_RANGE_CONTAINED_OP) + { + /* empty is contained by anything non-empty */ + selec = (1.0 - empty_frac) * hist_selec + empty_frac; + } + else + { + /* with any other operator, empty Op non-empty matches nothing */ + selec = (1.0 - empty_frac) * hist_selec; + } + } + + /* all range operators are strict */ + selec *= (1.0 - null_frac); + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * Calculate range operator selectivity using histograms of range bounds. + * + * This estimate is for the portion of values that are not empty and not + * NULL. + */ +static double +calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, + const RangeType *constval, Oid operator) +{ + AttStatsSlot hslot; + AttStatsSlot lslot; + int nhist; + RangeBound *hist_lower; + RangeBound *hist_upper; + int i; + RangeBound const_lower; + RangeBound const_upper; + bool empty; + double hist_selec; + + /* Can't use the histogram with insecure range support functions */ + if (!statistic_proc_security_check(vardata, + typcache->rng_cmp_proc_finfo.fn_oid)) + return -1; + if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) && + !statistic_proc_security_check(vardata, + typcache->rng_subdiff_finfo.fn_oid)) + return -1; + + /* Try to get histogram of ranges */ + if (!(HeapTupleIsValid(vardata->statsTuple) && + get_attstatsslot(&hslot, vardata->statsTuple, + STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES))) + return -1.0; + + /* check that it's a histogram, not just a dummy entry */ + if (hslot.nvalues < 2) + { + free_attstatsslot(&hslot); + return -1.0; + } + + /* + * Convert histogram of ranges into histograms of its lower and upper + * bounds. + */ + nhist = hslot.nvalues; + hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist); + hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist); + for (i = 0; i < nhist; i++) + { + range_deserialize(typcache, DatumGetRangeTypeP(hslot.values[i]), + &hist_lower[i], &hist_upper[i], &empty); + /* The histogram should not contain any empty ranges */ + if (empty) + elog(ERROR, "bounds histogram contains an empty range"); + } + + /* @> and @< also need a histogram of range lengths */ + if (operator == OID_RANGE_CONTAINS_OP || + operator == OID_RANGE_CONTAINED_OP) + { + if (!(HeapTupleIsValid(vardata->statsTuple) && + get_attstatsslot(&lslot, vardata->statsTuple, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + InvalidOid, + ATTSTATSSLOT_VALUES))) + { + free_attstatsslot(&hslot); + return -1.0; + } + + /* check that it's a histogram, not just a dummy entry */ + if (lslot.nvalues < 2) + { + free_attstatsslot(&lslot); + free_attstatsslot(&hslot); + return -1.0; + } + } + else + memset(&lslot, 0, sizeof(lslot)); + + /* Extract the bounds of the constant value. */ + range_deserialize(typcache, constval, &const_lower, &const_upper, &empty); + Assert(!empty); + + /* + * Calculate selectivity comparing the lower or upper bound of the + * constant with the histogram of lower or upper bounds. + */ + switch (operator) + { + case OID_RANGE_LESS_OP: + + /* + * The regular b-tree comparison operators (<, <=, >, >=) compare + * the lower bounds first, and the upper bounds for values with + * equal lower bounds. Estimate that by comparing the lower bounds + * only. This gives a fairly accurate estimate assuming there + * aren't many rows with a lower bound equal to the constant's + * lower bound. + */ + hist_selec = + calc_hist_selectivity_scalar(typcache, &const_lower, + hist_lower, nhist, false); + break; + + case OID_RANGE_LESS_EQUAL_OP: + hist_selec = + calc_hist_selectivity_scalar(typcache, &const_lower, + hist_lower, nhist, true); + break; + + case OID_RANGE_GREATER_OP: + hist_selec = + 1 - calc_hist_selectivity_scalar(typcache, &const_lower, + hist_lower, nhist, false); + break; + + case OID_RANGE_GREATER_EQUAL_OP: + hist_selec = + 1 - calc_hist_selectivity_scalar(typcache, &const_lower, + hist_lower, nhist, true); + break; + + case OID_RANGE_LEFT_OP: + /* var << const when upper(var) < lower(const) */ + hist_selec = + calc_hist_selectivity_scalar(typcache, &const_lower, + hist_upper, nhist, false); + break; + + case OID_RANGE_RIGHT_OP: + /* var >> const when lower(var) > upper(const) */ + hist_selec = + 1 - calc_hist_selectivity_scalar(typcache, &const_upper, + hist_lower, nhist, true); + break; + + case OID_RANGE_OVERLAPS_RIGHT_OP: + /* compare lower bounds */ + hist_selec = + 1 - calc_hist_selectivity_scalar(typcache, &const_lower, + hist_lower, nhist, false); + break; + + case OID_RANGE_OVERLAPS_LEFT_OP: + /* compare upper bounds */ + hist_selec = + calc_hist_selectivity_scalar(typcache, &const_upper, + hist_upper, nhist, true); + break; + + case OID_RANGE_OVERLAP_OP: + case OID_RANGE_CONTAINS_ELEM_OP: + + /* + * A && B <=> NOT (A << B OR A >> B). + * + * Since A << B and A >> B are mutually exclusive events we can + * sum their probabilities to find probability of (A << B OR A >> + * B). + * + * "range @> elem" is equivalent to "range && [elem,elem]". The + * caller already constructed the singular range from the element + * constant, so just treat it the same as &&. + */ + hist_selec = + calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper, + nhist, false); + hist_selec += + (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower, + nhist, true)); + hist_selec = 1.0 - hist_selec; + break; + + case OID_RANGE_CONTAINS_OP: + hist_selec = + calc_hist_selectivity_contains(typcache, &const_lower, + &const_upper, hist_lower, nhist, + lslot.values, lslot.nvalues); + break; + + case OID_RANGE_CONTAINED_OP: + if (const_lower.infinite) + { + /* + * Lower bound no longer matters. Just estimate the fraction + * with an upper bound <= const upper bound + */ + hist_selec = + calc_hist_selectivity_scalar(typcache, &const_upper, + hist_upper, nhist, true); + } + else if (const_upper.infinite) + { + hist_selec = + 1.0 - calc_hist_selectivity_scalar(typcache, &const_lower, + hist_lower, nhist, false); + } + else + { + hist_selec = + calc_hist_selectivity_contained(typcache, &const_lower, + &const_upper, hist_lower, nhist, + lslot.values, lslot.nvalues); + } + break; + + default: + elog(ERROR, "unknown range operator %u", operator); + hist_selec = -1.0; /* keep compiler quiet */ + break; + } + + free_attstatsslot(&lslot); + free_attstatsslot(&hslot); + + return hist_selec; +} + + +/* + * Look up the fraction of values less than (or equal, if 'equal' argument + * is true) a given const in a histogram of range bounds. + */ +static double +calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound, + const RangeBound *hist, int hist_nvalues, bool equal) +{ + Selectivity selec; + int index; + + /* + * Find the histogram bin the given constant falls into. Estimate + * selectivity as the number of preceding whole bins. + */ + index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal); + selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1); + + /* Adjust using linear interpolation within the bin */ + if (index >= 0 && index < hist_nvalues - 1) + selec += get_position(typcache, constbound, &hist[index], + &hist[index + 1]) / (Selectivity) (hist_nvalues - 1); + + return selec; +} + +/* + * Binary search on an array of range bounds. Returns greatest index of range + * bound in array which is less(less or equal) than given range bound. If all + * range bounds in array are greater or equal(greater) than given range bound, + * return -1. When "equal" flag is set conditions in brackets are used. + * + * This function is used in scalar operator selectivity estimation. Another + * goal of this function is to find a histogram bin where to stop + * interpolation of portion of bounds which are less than or equal to given bound. + */ +static int +rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist, + int hist_length, bool equal) +{ + int lower = -1, + upper = hist_length - 1, + cmp, + middle; + + while (lower < upper) + { + middle = (lower + upper + 1) / 2; + cmp = range_cmp_bounds(typcache, &hist[middle], value); + + if (cmp < 0 || (equal && cmp == 0)) + lower = middle; + else + upper = middle - 1; + } + return lower; +} + + +/* + * Binary search on length histogram. Returns greatest index of range length in + * histogram which is less than (less than or equal) the given length value. If + * all lengths in the histogram are greater than (greater than or equal) the + * given length, returns -1. + */ +static int +length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues, + double value, bool equal) +{ + int lower = -1, + upper = length_hist_nvalues - 1, + middle; + + while (lower < upper) + { + double middleval; + + middle = (lower + upper + 1) / 2; + + middleval = DatumGetFloat8(length_hist_values[middle]); + if (middleval < value || (equal && middleval <= value)) + lower = middle; + else + upper = middle - 1; + } + return lower; +} + +/* + * Get relative position of value in histogram bin in [0,1] range. + */ +static float8 +get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1, + const RangeBound *hist2) +{ + bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); + float8 position; + + if (!hist1->infinite && !hist2->infinite) + { + float8 bin_width; + + /* + * Both bounds are finite. Assuming the subtype's comparison function + * works sanely, the value must be finite, too, because it lies + * somewhere between the bounds. If it doesn't, arbitrarily return + * 0.5. + */ + if (value->infinite) + return 0.5; + + /* Can't interpolate without subdiff function */ + if (!has_subdiff) + return 0.5; + + /* Calculate relative position using subdiff function. */ + bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + hist2->val, + hist1->val)); + if (isnan(bin_width) || bin_width <= 0.0) + return 0.5; /* punt for NaN or zero-width bin */ + + position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + value->val, + hist1->val)) + / bin_width; + + if (isnan(position)) + return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */ + + /* Relative position must be in [0,1] range */ + position = Max(position, 0.0); + position = Min(position, 1.0); + return position; + } + else if (hist1->infinite && !hist2->infinite) + { + /* + * Lower bin boundary is -infinite, upper is finite. If the value is + * -infinite, return 0.0 to indicate it's equal to the lower bound. + * Otherwise return 1.0 to indicate it's infinitely far from the lower + * bound. + */ + return ((value->infinite && value->lower) ? 0.0 : 1.0); + } + else if (!hist1->infinite && hist2->infinite) + { + /* same as above, but in reverse */ + return ((value->infinite && !value->lower) ? 1.0 : 0.0); + } + else + { + /* + * If both bin boundaries are infinite, they should be equal to each + * other, and the value should also be infinite and equal to both + * bounds. (But don't Assert that, to avoid crashing if a user creates + * a datatype with a broken comparison function). + * + * Assume the value to lie in the middle of the infinite bounds. + */ + return 0.5; + } +} + + +/* + * Get relative position of value in a length histogram bin in [0,1] range. + */ +static double +get_len_position(double value, double hist1, double hist2) +{ + if (!isinf(hist1) && !isinf(hist2)) + { + /* + * Both bounds are finite. The value should be finite too, because it + * lies somewhere between the bounds. If it doesn't, just return + * something. + */ + if (isinf(value)) + return 0.5; + + return 1.0 - (hist2 - value) / (hist2 - hist1); + } + else if (isinf(hist1) && !isinf(hist2)) + { + /* + * Lower bin boundary is -infinite, upper is finite. Return 1.0 to + * indicate the value is infinitely far from the lower bound. + */ + return 1.0; + } + else if (isinf(hist1) && isinf(hist2)) + { + /* same as above, but in reverse */ + return 0.0; + } + else + { + /* + * If both bin boundaries are infinite, they should be equal to each + * other, and the value should also be infinite and equal to both + * bounds. (But don't Assert that, to avoid crashing unnecessarily if + * the caller messes up) + * + * Assume the value to lie in the middle of the infinite bounds. + */ + return 0.5; + } +} + +/* + * Measure distance between two range bounds. + */ +static float8 +get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2) +{ + bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); + + if (!bound1->infinite && !bound2->infinite) + { + /* + * Neither bound is infinite, use subdiff function or return default + * value of 1.0 if no subdiff is available. + */ + if (has_subdiff) + { + float8 res; + + res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + bound2->val, + bound1->val)); + /* Reject possible NaN result, also negative result */ + if (isnan(res) || res < 0.0) + return 1.0; + else + return res; + } + else + return 1.0; + } + else if (bound1->infinite && bound2->infinite) + { + /* Both bounds are infinite */ + if (bound1->lower == bound2->lower) + return 0.0; + else + return get_float8_infinity(); + } + else + { + /* One bound is infinite, the other is not */ + return get_float8_infinity(); + } +} + +/* + * Calculate the average of function P(x), in the interval [length1, length2], + * where P(x) is the fraction of tuples with length < x (or length <= x if + * 'equal' is true). + */ +static double +calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues, + double length1, double length2, bool equal) +{ + double frac; + double A, + B, + PA, + PB; + double pos; + int i; + double area; + + Assert(length2 >= length1); + + if (length2 < 0.0) + return 0.0; /* shouldn't happen, but doesn't hurt to check */ + + /* All lengths in the table are <= infinite. */ + if (isinf(length2) && equal) + return 1.0; + + /*---------- + * The average of a function between A and B can be calculated by the + * formula: + * + * B + * 1 / + * ------- | P(x)dx + * B - A / + * A + * + * The geometrical interpretation of the integral is the area under the + * graph of P(x). P(x) is defined by the length histogram. We calculate + * the area in a piecewise fashion, iterating through the length histogram + * bins. Each bin is a trapezoid: + * + * P(x2) + * /| + * / | + * P(x1)/ | + * | | + * | | + * ---+---+-- + * x1 x2 + * + * where x1 and x2 are the boundaries of the current histogram, and P(x1) + * and P(x1) are the cumulative fraction of tuples at the boundaries. + * + * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1) + * + * The first bin contains the lower bound passed by the caller, so we + * use linear interpolation between the previous and next histogram bin + * boundary to calculate P(x1). Likewise for the last bin: we use linear + * interpolation to calculate P(x2). For the bins in between, x1 and x2 + * lie on histogram bin boundaries, so P(x1) and P(x2) are simply: + * P(x1) = (bin index) / (number of bins) + * P(x2) = (bin index + 1 / (number of bins) + */ + + /* First bin, the one that contains lower bound */ + i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal); + if (i >= length_hist_nvalues - 1) + return 1.0; + + if (i < 0) + { + i = 0; + pos = 0.0; + } + else + { + /* interpolate length1's position in the bin */ + pos = get_len_position(length1, + DatumGetFloat8(length_hist_values[i]), + DatumGetFloat8(length_hist_values[i + 1])); + } + PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1); + B = length1; + + /* + * In the degenerate case that length1 == length2, simply return + * P(length1). This is not merely an optimization: if length1 == length2, + * we'd divide by zero later on. + */ + if (length2 == length1) + return PB; + + /* + * Loop through all the bins, until we hit the last bin, the one that + * contains the upper bound. (if lower and upper bounds are in the same + * bin, this falls out immediately) + */ + area = 0.0; + for (; i < length_hist_nvalues - 1; i++) + { + double bin_upper = DatumGetFloat8(length_hist_values[i + 1]); + + /* check if we've reached the last bin */ + if (!(bin_upper < length2 || (equal && bin_upper <= length2))) + break; + + /* the upper bound of previous bin is the lower bound of this bin */ + A = B; + PA = PB; + + B = bin_upper; + PB = (double) i / (double) (length_hist_nvalues - 1); + + /* + * Add the area of this trapezoid to the total. The point of the + * if-check is to avoid NaN, in the corner case that PA == PB == 0, + * and B - A == Inf. The area of a zero-height trapezoid (PA == PB == + * 0) is zero, regardless of the width (B - A). + */ + if (PA > 0 || PB > 0) + area += 0.5 * (PB + PA) * (B - A); + } + + /* Last bin */ + A = B; + PA = PB; + + B = length2; /* last bin ends at the query upper bound */ + if (i >= length_hist_nvalues - 1) + pos = 0.0; + else + { + if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1])) + pos = 0.0; + else + pos = get_len_position(length2, DatumGetFloat8(length_hist_values[i]), DatumGetFloat8(length_hist_values[i + 1])); + } + PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1); + + if (PA > 0 || PB > 0) + area += 0.5 * (PB + PA) * (B - A); + + /* + * Ok, we have calculated the area, ie. the integral. Divide by width to + * get the requested average. + * + * Avoid NaN arising from infinite / infinite. This happens at least if + * length2 is infinite. It's not clear what the correct value would be in + * that case, so 0.5 seems as good as any value. + */ + if (isinf(area) && isinf(length2)) + frac = 0.5; + else + frac = area / (length2 - length1); + + return frac; +} + +/* + * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction + * of ranges that fall within the constant lower and upper bounds. This uses + * the histograms of range lower bounds and range lengths, on the assumption + * that the range lengths are independent of the lower bounds. + * + * The caller has already checked that constant lower and upper bounds are + * finite. + */ +static double +calc_hist_selectivity_contained(TypeCacheEntry *typcache, + const RangeBound *lower, RangeBound *upper, + const RangeBound *hist_lower, int hist_nvalues, + Datum *length_hist_values, int length_hist_nvalues) +{ + int i, + upper_index; + float8 prev_dist; + double bin_width; + double upper_bin_width; + double sum_frac; + + /* + * Begin by finding the bin containing the upper bound, in the lower bound + * histogram. Any range with a lower bound > constant upper bound can't + * match, ie. there are no matches in bins greater than upper_index. + */ + upper->inclusive = !upper->inclusive; + upper->lower = true; + upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues, + false); + + /* + * If the upper bound value is below the histogram's lower limit, there + * are no matches. + */ + if (upper_index < 0) + return 0.0; + + /* + * If the upper bound value is at or beyond the histogram's upper limit, + * start our loop at the last actual bin, as though the upper bound were + * within that bin; get_position will clamp its result to 1.0 anyway. + * (This corresponds to assuming that the data population above the + * histogram's upper limit is empty, exactly like what we just assumed for + * the lower limit.) + */ + upper_index = Min(upper_index, hist_nvalues - 2); + + /* + * Calculate upper_bin_width, ie. the fraction of the (upper_index, + * upper_index + 1) bin which is greater than upper bound of query range + * using linear interpolation of subdiff function. + */ + upper_bin_width = get_position(typcache, upper, + &hist_lower[upper_index], + &hist_lower[upper_index + 1]); + + /* + * In the loop, dist and prev_dist are the distance of the "current" bin's + * lower and upper bounds from the constant upper bound. + * + * bin_width represents the width of the current bin. Normally it is 1.0, + * meaning a full width bin, but can be less in the corner cases: start + * and end of the loop. We start with bin_width = upper_bin_width, because + * we begin at the bin containing the upper bound. + */ + prev_dist = 0.0; + bin_width = upper_bin_width; + + sum_frac = 0.0; + for (i = upper_index; i >= 0; i--) + { + double dist; + double length_hist_frac; + bool final_bin = false; + + /* + * dist -- distance from upper bound of query range to lower bound of + * the current bin in the lower bound histogram. Or to the lower bound + * of the constant range, if this is the final bin, containing the + * constant lower bound. + */ + if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0) + { + dist = get_distance(typcache, lower, upper); + + /* + * Subtract from bin_width the portion of this bin that we want to + * ignore. + */ + bin_width -= get_position(typcache, lower, &hist_lower[i], + &hist_lower[i + 1]); + if (bin_width < 0.0) + bin_width = 0.0; + final_bin = true; + } + else + dist = get_distance(typcache, &hist_lower[i], upper); + + /* + * Estimate the fraction of tuples in this bin that are narrow enough + * to not exceed the distance to the upper bound of the query range. + */ + length_hist_frac = calc_length_hist_frac(length_hist_values, + length_hist_nvalues, + prev_dist, dist, true); + + /* + * Add the fraction of tuples in this bin, with a suitable length, to + * the total. + */ + sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1); + + if (final_bin) + break; + + bin_width = 1.0; + prev_dist = dist; + } + + return sum_frac; +} + +/* + * Calculate selectivity of "var @> const" operator, ie. estimate the fraction + * of ranges that contain the constant lower and upper bounds. This uses + * the histograms of range lower bounds and range lengths, on the assumption + * that the range lengths are independent of the lower bounds. + */ +static double +calc_hist_selectivity_contains(TypeCacheEntry *typcache, + const RangeBound *lower, const RangeBound *upper, + const RangeBound *hist_lower, int hist_nvalues, + Datum *length_hist_values, int length_hist_nvalues) +{ + int i, + lower_index; + double bin_width, + lower_bin_width; + double sum_frac; + float8 prev_dist; + + /* Find the bin containing the lower bound of query range. */ + lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues, + true); + + /* + * If the lower bound value is below the histogram's lower limit, there + * are no matches. + */ + if (lower_index < 0) + return 0.0; + + /* + * If the lower bound value is at or beyond the histogram's upper limit, + * start our loop at the last actual bin, as though the upper bound were + * within that bin; get_position will clamp its result to 1.0 anyway. + * (This corresponds to assuming that the data population above the + * histogram's upper limit is empty, exactly like what we just assumed for + * the lower limit.) + */ + lower_index = Min(lower_index, hist_nvalues - 2); + + /* + * Calculate lower_bin_width, ie. the fraction of the of (lower_index, + * lower_index + 1) bin which is greater than lower bound of query range + * using linear interpolation of subdiff function. + */ + lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index], + &hist_lower[lower_index + 1]); + + /* + * Loop through all the lower bound bins, smaller than the query lower + * bound. In the loop, dist and prev_dist are the distance of the + * "current" bin's lower and upper bounds from the constant upper bound. + * We begin from query lower bound, and walk backwards, so the first bin's + * upper bound is the query lower bound, and its distance to the query + * upper bound is the length of the query range. + * + * bin_width represents the width of the current bin. Normally it is 1.0, + * meaning a full width bin, except for the first bin, which is only + * counted up to the constant lower bound. + */ + prev_dist = get_distance(typcache, lower, upper); + sum_frac = 0.0; + bin_width = lower_bin_width; + for (i = lower_index; i >= 0; i--) + { + float8 dist; + double length_hist_frac; + + /* + * dist -- distance from upper bound of query range to current value + * of lower bound histogram or lower bound of query range (if we've + * reach it). + */ + dist = get_distance(typcache, &hist_lower[i], upper); + + /* + * Get average fraction of length histogram which covers intervals + * longer than (or equal to) distance to upper bound of query range. + */ + length_hist_frac = + 1.0 - calc_length_hist_frac(length_hist_values, + length_hist_nvalues, + prev_dist, dist, false); + + sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1); + + bin_width = 1.0; + prev_dist = dist; + } + + return sum_frac; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_spgist.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_spgist.c new file mode 100644 index 00000000000..834ee0bbd05 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_spgist.c @@ -0,0 +1,998 @@ +/*------------------------------------------------------------------------- + * + * rangetypes_spgist.c + * implementation of quad tree over ranges mapped to 2d-points for SP-GiST. + * + * Quad tree is a data structure similar to a binary tree, but is adapted to + * 2d data. Each inner node of a quad tree contains a point (centroid) which + * divides the 2d-space into 4 quadrants. Each quadrant is associated with a + * child node. + * + * Ranges are mapped to 2d-points so that the lower bound is one dimension, + * and the upper bound is another. By convention, we visualize the lower bound + * to be the horizontal axis, and upper bound the vertical axis. + * + * One quirk with this mapping is the handling of empty ranges. An empty range + * doesn't have lower and upper bounds, so it cannot be mapped to 2d space in + * a straightforward way. To cope with that, the root node can have a 5th + * quadrant, which is reserved for empty ranges. Furthermore, there can be + * inner nodes in the tree with no centroid. They contain only two child nodes, + * one for empty ranges and another for non-empty ones. Such a node can appear + * as the root node, or in the tree under the 5th child of the root node (in + * which case it will only contain empty nodes). + * + * The SP-GiST picksplit function uses medians along both axes as the centroid. + * This implementation only uses the comparison function of the range element + * datatype, therefore it works for any range type. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/rangetypes_spgist.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/spgist.h" +#include "access/stratnum.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/rangetypes.h" + +static int16 getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid, + const RangeType *tst); +static int bound_cmp(const void *a, const void *b, void *arg); + +static int adjacent_inner_consistent(TypeCacheEntry *typcache, + const RangeBound *arg, const RangeBound *centroid, + const RangeBound *prev); +static int adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg, + const RangeBound *centroid); + +/* + * SP-GiST 'config' interface function. + */ +Datum +spg_range_quad_config(PG_FUNCTION_ARGS) +{ + /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */ + spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1); + + cfg->prefixType = ANYRANGEOID; + cfg->labelType = VOIDOID; /* we don't need node labels */ + cfg->canReturnData = true; + cfg->longValuesOK = false; + PG_RETURN_VOID(); +} + +/*---------- + * Determine which quadrant a 2d-mapped range falls into, relative to the + * centroid. + * + * Quadrants are numbered like this: + * + * 4 | 1 + * ----+---- + * 3 | 2 + * + * Where the lower bound of range is the horizontal axis and upper bound the + * vertical axis. + * + * Ranges on one of the axes are taken to lie in the quadrant with higher value + * along perpendicular axis. That is, a value on the horizontal axis is taken + * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to + * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in + * quadrant 1. + * + * Empty ranges are taken to lie in the special quadrant 5. + *---------- + */ +static int16 +getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid, const RangeType *tst) +{ + RangeBound centroidLower, + centroidUpper; + bool centroidEmpty; + RangeBound lower, + upper; + bool empty; + + range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, + ¢roidEmpty); + range_deserialize(typcache, tst, &lower, &upper, &empty); + + if (empty) + return 5; + + if (range_cmp_bounds(typcache, &lower, ¢roidLower) >= 0) + { + if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) + return 1; + else + return 2; + } + else + { + if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) + return 4; + else + return 3; + } +} + +/* + * Choose SP-GiST function: choose path for addition of new range. + */ +Datum +spg_range_quad_choose(PG_FUNCTION_ARGS) +{ + spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); + spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); + RangeType *inRange = DatumGetRangeTypeP(in->datum), + *centroid; + int16 quadrant; + TypeCacheEntry *typcache; + + if (in->allTheSame) + { + out->resultType = spgMatchNode; + /* nodeN will be set by core */ + out->result.matchNode.levelAdd = 0; + out->result.matchNode.restDatum = RangeTypePGetDatum(inRange); + PG_RETURN_VOID(); + } + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange)); + + /* + * A node with no centroid divides ranges purely on whether they're empty + * or not. All empty ranges go to child node 0, all non-empty ranges go to + * node 1. + */ + if (!in->hasPrefix) + { + out->resultType = spgMatchNode; + if (RangeIsEmpty(inRange)) + out->result.matchNode.nodeN = 0; + else + out->result.matchNode.nodeN = 1; + out->result.matchNode.levelAdd = 1; + out->result.matchNode.restDatum = RangeTypePGetDatum(inRange); + PG_RETURN_VOID(); + } + + centroid = DatumGetRangeTypeP(in->prefixDatum); + quadrant = getQuadrant(typcache, centroid, inRange); + + Assert(quadrant <= in->nNodes); + + /* Select node matching to quadrant number */ + out->resultType = spgMatchNode; + out->result.matchNode.nodeN = quadrant - 1; + out->result.matchNode.levelAdd = 1; + out->result.matchNode.restDatum = RangeTypePGetDatum(inRange); + + PG_RETURN_VOID(); +} + +/* + * Bound comparison for sorting. + */ +static int +bound_cmp(const void *a, const void *b, void *arg) +{ + RangeBound *ba = (RangeBound *) a; + RangeBound *bb = (RangeBound *) b; + TypeCacheEntry *typcache = (TypeCacheEntry *) arg; + + return range_cmp_bounds(typcache, ba, bb); +} + +/* + * Picksplit SP-GiST function: split ranges into nodes. Select "centroid" + * range and distribute ranges according to quadrants. + */ +Datum +spg_range_quad_picksplit(PG_FUNCTION_ARGS) +{ + spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); + spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); + int i; + int j; + int nonEmptyCount; + RangeType *centroid; + bool empty; + TypeCacheEntry *typcache; + + /* Use the median values of lower and upper bounds as the centroid range */ + RangeBound *lowerBounds, + *upperBounds; + + typcache = range_get_typcache(fcinfo, + RangeTypeGetOid(DatumGetRangeTypeP(in->datums[0]))); + + /* Allocate memory for bounds */ + lowerBounds = palloc(sizeof(RangeBound) * in->nTuples); + upperBounds = palloc(sizeof(RangeBound) * in->nTuples); + j = 0; + + /* Deserialize bounds of ranges, count non-empty ranges */ + for (i = 0; i < in->nTuples; i++) + { + range_deserialize(typcache, DatumGetRangeTypeP(in->datums[i]), + &lowerBounds[j], &upperBounds[j], &empty); + if (!empty) + j++; + } + nonEmptyCount = j; + + /* + * All the ranges are empty. The best we can do is to construct an inner + * node with no centroid, and put all ranges into node 0. If non-empty + * ranges are added later, they will be routed to node 1. + */ + if (nonEmptyCount == 0) + { + out->nNodes = 2; + out->hasPrefix = false; + /* Prefix is empty */ + out->prefixDatum = PointerGetDatum(NULL); + out->nodeLabels = NULL; + + out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); + out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); + + /* Place all ranges into node 0 */ + for (i = 0; i < in->nTuples; i++) + { + RangeType *range = DatumGetRangeTypeP(in->datums[i]); + + out->leafTupleDatums[i] = RangeTypePGetDatum(range); + out->mapTuplesToNodes[i] = 0; + } + PG_RETURN_VOID(); + } + + /* Sort range bounds in order to find medians */ + qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound), + bound_cmp, typcache); + qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound), + bound_cmp, typcache); + + /* Construct "centroid" range from medians of lower and upper bounds */ + centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2], + &upperBounds[nonEmptyCount / 2], false, NULL); + out->hasPrefix = true; + out->prefixDatum = RangeTypePGetDatum(centroid); + + /* Create node for empty ranges only if it is a root node */ + out->nNodes = (in->level == 0) ? 5 : 4; + out->nodeLabels = NULL; /* we don't need node labels */ + + out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); + out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); + + /* + * Assign ranges to corresponding nodes according to quadrants relative to + * "centroid" range. + */ + for (i = 0; i < in->nTuples; i++) + { + RangeType *range = DatumGetRangeTypeP(in->datums[i]); + int16 quadrant = getQuadrant(typcache, centroid, range); + + out->leafTupleDatums[i] = RangeTypePGetDatum(range); + out->mapTuplesToNodes[i] = quadrant - 1; + } + + PG_RETURN_VOID(); +} + +/* + * SP-GiST consistent function for inner nodes: check which nodes are + * consistent with given set of queries. + */ +Datum +spg_range_quad_inner_consistent(PG_FUNCTION_ARGS) +{ + spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); + spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); + int which; + int i; + MemoryContext oldCtx; + + /* + * For adjacent search we need also previous centroid (if any) to improve + * the precision of the consistent check. In this case needPrevious flag + * is set and centroid is passed into traversalValue. + */ + bool needPrevious = false; + + if (in->allTheSame) + { + /* Report that all nodes should be visited */ + out->nNodes = in->nNodes; + out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); + for (i = 0; i < in->nNodes; i++) + out->nodeNumbers[i] = i; + PG_RETURN_VOID(); + } + + if (!in->hasPrefix) + { + /* + * No centroid on this inner node. Such a node has two child nodes, + * the first for empty ranges, and the second for non-empty ones. + */ + Assert(in->nNodes == 2); + + /* + * Nth bit of which variable means that (N - 1)th node should be + * visited. Initially all bits are set. Bits of nodes which should be + * skipped will be unset. + */ + which = (1 << 1) | (1 << 2); + for (i = 0; i < in->nkeys; i++) + { + StrategyNumber strategy = in->scankeys[i].sk_strategy; + bool empty; + + /* + * The only strategy when second argument of operator is not range + * is RANGESTRAT_CONTAINS_ELEM. + */ + if (strategy != RANGESTRAT_CONTAINS_ELEM) + empty = RangeIsEmpty(DatumGetRangeTypeP(in->scankeys[i].sk_argument)); + else + empty = false; + + switch (strategy) + { + case RANGESTRAT_BEFORE: + case RANGESTRAT_OVERLEFT: + case RANGESTRAT_OVERLAPS: + case RANGESTRAT_OVERRIGHT: + case RANGESTRAT_AFTER: + case RANGESTRAT_ADJACENT: + /* These strategies return false if any argument is empty */ + if (empty) + which = 0; + else + which &= (1 << 2); + break; + + case RANGESTRAT_CONTAINS: + + /* + * All ranges contain an empty range. Only non-empty + * ranges can contain a non-empty range. + */ + if (!empty) + which &= (1 << 2); + break; + + case RANGESTRAT_CONTAINED_BY: + + /* + * Only an empty range is contained by an empty range. + * Both empty and non-empty ranges can be contained by a + * non-empty range. + */ + if (empty) + which &= (1 << 1); + break; + + case RANGESTRAT_CONTAINS_ELEM: + which &= (1 << 2); + break; + + case RANGESTRAT_EQ: + if (empty) + which &= (1 << 1); + else + which &= (1 << 2); + break; + + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + break; + } + if (which == 0) + break; /* no need to consider remaining conditions */ + } + } + else + { + RangeBound centroidLower, + centroidUpper; + bool centroidEmpty; + TypeCacheEntry *typcache; + RangeType *centroid; + + /* This node has a centroid. Fetch it. */ + centroid = DatumGetRangeTypeP(in->prefixDatum); + typcache = range_get_typcache(fcinfo, + RangeTypeGetOid(centroid)); + range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, + ¢roidEmpty); + + Assert(in->nNodes == 4 || in->nNodes == 5); + + /* + * Nth bit of which variable means that (N - 1)th node (Nth quadrant) + * should be visited. Initially all bits are set. Bits of nodes which + * can be skipped will be unset. + */ + which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5); + + for (i = 0; i < in->nkeys; i++) + { + StrategyNumber strategy; + RangeBound lower, + upper; + bool empty; + RangeType *range = NULL; + + RangeType *prevCentroid = NULL; + RangeBound prevLower, + prevUpper; + bool prevEmpty; + + /* Restrictions on range bounds according to scan strategy */ + RangeBound *minLower = NULL, + *maxLower = NULL, + *minUpper = NULL, + *maxUpper = NULL; + + /* Are the restrictions on range bounds inclusive? */ + bool inclusive = true; + bool strictEmpty = true; + int cmp, + which1, + which2; + + strategy = in->scankeys[i].sk_strategy; + + /* + * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but + * the argument is a single element. Expand the single element to + * a range containing only the element, and treat it like + * RANGESTRAT_CONTAINS. + */ + if (strategy == RANGESTRAT_CONTAINS_ELEM) + { + lower.inclusive = true; + lower.infinite = false; + lower.lower = true; + lower.val = in->scankeys[i].sk_argument; + + upper.inclusive = true; + upper.infinite = false; + upper.lower = false; + upper.val = in->scankeys[i].sk_argument; + + empty = false; + + strategy = RANGESTRAT_CONTAINS; + } + else + { + range = DatumGetRangeTypeP(in->scankeys[i].sk_argument); + range_deserialize(typcache, range, &lower, &upper, &empty); + } + + /* + * Most strategies are handled by forming a bounding box from the + * search key, defined by a minLower, maxLower, minUpper, + * maxUpper. Some modify 'which' directly, to specify exactly + * which quadrants need to be visited. + * + * For most strategies, nothing matches an empty search key, and + * an empty range never matches a non-empty key. If a strategy + * does not behave like that wrt. empty ranges, set strictEmpty to + * false. + */ + switch (strategy) + { + case RANGESTRAT_BEFORE: + + /* + * Range A is before range B if upper bound of A is lower + * than lower bound of B. + */ + maxUpper = &lower; + inclusive = false; + break; + + case RANGESTRAT_OVERLEFT: + + /* + * Range A is overleft to range B if upper bound of A is + * less than or equal to upper bound of B. + */ + maxUpper = &upper; + break; + + case RANGESTRAT_OVERLAPS: + + /* + * Non-empty ranges overlap, if lower bound of each range + * is lower or equal to upper bound of the other range. + */ + maxLower = &upper; + minUpper = &lower; + break; + + case RANGESTRAT_OVERRIGHT: + + /* + * Range A is overright to range B if lower bound of A is + * greater than or equal to lower bound of B. + */ + minLower = &lower; + break; + + case RANGESTRAT_AFTER: + + /* + * Range A is after range B if lower bound of A is greater + * than upper bound of B. + */ + minLower = &upper; + inclusive = false; + break; + + case RANGESTRAT_ADJACENT: + if (empty) + break; /* Skip to strictEmpty check. */ + + /* + * Previously selected quadrant could exclude possibility + * for lower or upper bounds to be adjacent. Deserialize + * previous centroid range if present for checking this. + */ + if (in->traversalValue) + { + prevCentroid = in->traversalValue; + range_deserialize(typcache, prevCentroid, + &prevLower, &prevUpper, &prevEmpty); + } + + /* + * For a range's upper bound to be adjacent to the + * argument's lower bound, it will be found along the line + * adjacent to (and just below) Y=lower. Therefore, if the + * argument's lower bound is less than the centroid's + * upper bound, the line falls in quadrants 2 and 3; if + * greater, the line falls in quadrants 1 and 4. (see + * adjacent_cmp_bounds for description of edge cases). + */ + cmp = adjacent_inner_consistent(typcache, &lower, + ¢roidUpper, + prevCentroid ? &prevUpper : NULL); + if (cmp > 0) + which1 = (1 << 1) | (1 << 4); + else if (cmp < 0) + which1 = (1 << 2) | (1 << 3); + else + which1 = 0; + + /* + * Also search for ranges's adjacent to argument's upper + * bound. They will be found along the line adjacent to + * (and just right of) X=upper, which falls in quadrants 3 + * and 4, or 1 and 2. + */ + cmp = adjacent_inner_consistent(typcache, &upper, + ¢roidLower, + prevCentroid ? &prevLower : NULL); + if (cmp > 0) + which2 = (1 << 1) | (1 << 2); + else if (cmp < 0) + which2 = (1 << 3) | (1 << 4); + else + which2 = 0; + + /* We must chase down ranges adjacent to either bound. */ + which &= which1 | which2; + + needPrevious = true; + break; + + case RANGESTRAT_CONTAINS: + + /* + * Non-empty range A contains non-empty range B if lower + * bound of A is lower or equal to lower bound of range B + * and upper bound of range A is greater than or equal to + * upper bound of range A. + * + * All non-empty ranges contain an empty range. + */ + strictEmpty = false; + if (!empty) + { + which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); + maxLower = &lower; + minUpper = &upper; + } + break; + + case RANGESTRAT_CONTAINED_BY: + /* The opposite of contains. */ + strictEmpty = false; + if (empty) + { + /* An empty range is only contained by an empty range */ + which &= (1 << 5); + } + else + { + minLower = &lower; + maxUpper = &upper; + } + break; + + case RANGESTRAT_EQ: + + /* + * Equal range can be only in the same quadrant where + * argument would be placed to. + */ + strictEmpty = false; + which &= (1 << getQuadrant(typcache, centroid, range)); + break; + + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + break; + } + + if (strictEmpty) + { + if (empty) + { + /* Scan key is empty, no branches are satisfying */ + which = 0; + break; + } + else + { + /* Shouldn't visit tree branch with empty ranges */ + which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); + } + } + + /* + * Using the bounding box, see which quadrants we have to descend + * into. + */ + if (minLower) + { + /* + * If the centroid's lower bound is less than or equal to the + * minimum lower bound, anything in the 3rd and 4th quadrants + * will have an even smaller lower bound, and thus can't + * match. + */ + if (range_cmp_bounds(typcache, ¢roidLower, minLower) <= 0) + which &= (1 << 1) | (1 << 2) | (1 << 5); + } + if (maxLower) + { + /* + * If the centroid's lower bound is greater than the maximum + * lower bound, anything in the 1st and 2nd quadrants will + * also have a greater than or equal lower bound, and thus + * can't match. If the centroid's lower bound is equal to the + * maximum lower bound, we can still exclude the 1st and 2nd + * quadrants if we're looking for a value strictly greater + * than the maximum. + */ + + cmp = range_cmp_bounds(typcache, ¢roidLower, maxLower); + if (cmp > 0 || (!inclusive && cmp == 0)) + which &= (1 << 3) | (1 << 4) | (1 << 5); + } + if (minUpper) + { + /* + * If the centroid's upper bound is less than or equal to the + * minimum upper bound, anything in the 2nd and 3rd quadrants + * will have an even smaller upper bound, and thus can't + * match. + */ + if (range_cmp_bounds(typcache, ¢roidUpper, minUpper) <= 0) + which &= (1 << 1) | (1 << 4) | (1 << 5); + } + if (maxUpper) + { + /* + * If the centroid's upper bound is greater than the maximum + * upper bound, anything in the 1st and 4th quadrants will + * also have a greater than or equal upper bound, and thus + * can't match. If the centroid's upper bound is equal to the + * maximum upper bound, we can still exclude the 1st and 4th + * quadrants if we're looking for a value strictly greater + * than the maximum. + */ + + cmp = range_cmp_bounds(typcache, ¢roidUpper, maxUpper); + if (cmp > 0 || (!inclusive && cmp == 0)) + which &= (1 << 2) | (1 << 3) | (1 << 5); + } + + if (which == 0) + break; /* no need to consider remaining conditions */ + } + } + + /* We must descend into the quadrant(s) identified by 'which' */ + out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); + if (needPrevious) + out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes); + out->nNodes = 0; + + /* + * Elements of traversalValues should be allocated in + * traversalMemoryContext + */ + oldCtx = MemoryContextSwitchTo(in->traversalMemoryContext); + + for (i = 1; i <= in->nNodes; i++) + { + if (which & (1 << i)) + { + /* Save previous prefix if needed */ + if (needPrevious) + { + Datum previousCentroid; + + /* + * We know, that in->prefixDatum in this place is varlena, + * because it's range + */ + previousCentroid = datumCopy(in->prefixDatum, false, -1); + out->traversalValues[out->nNodes] = (void *) previousCentroid; + } + out->nodeNumbers[out->nNodes] = i - 1; + out->nNodes++; + } + } + + MemoryContextSwitchTo(oldCtx); + + PG_RETURN_VOID(); +} + +/* + * adjacent_cmp_bounds + * + * Given an argument and centroid bound, this function determines if any + * bounds that are adjacent to the argument are smaller than, or greater than + * or equal to centroid. For brevity, we call the arg < centroid "left", and + * arg >= centroid case "right". This corresponds to how the quadrants are + * arranged, if you imagine that "left" is equivalent to "down" and "right" + * is equivalent to "up". + * + * For the "left" case, returns -1, and for the "right" case, returns 1. + */ +static int +adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg, + const RangeBound *centroid) +{ + int cmp; + + Assert(arg->lower != centroid->lower); + + cmp = range_cmp_bounds(typcache, arg, centroid); + + if (centroid->lower) + { + /*------ + * The argument is an upper bound, we are searching for adjacent lower + * bounds. A matching adjacent lower bound must be *larger* than the + * argument, but only just. + * + * The following table illustrates the desired result with a fixed + * argument bound, and different centroids. The CMP column shows + * the value of 'cmp' variable, and ADJ shows whether the argument + * and centroid are adjacent, per bounds_adjacent(). (N) means we + * don't need to check for that case, because it's implied by CMP. + * With the argument range [..., 500), the adjacent range we're + * searching for is [500, ...): + * + * ARGUMENT CENTROID CMP ADJ + * [..., 500) [498, ...) > (N) [500, ...) is to the right + * [..., 500) [499, ...) = (N) [500, ...) is to the right + * [..., 500) [500, ...) < Y [500, ...) is to the right + * [..., 500) [501, ...) < N [500, ...) is to the left + * + * So, we must search left when the argument is smaller than, and not + * adjacent, to the centroid. Otherwise search right. + *------ + */ + if (cmp < 0 && !bounds_adjacent(typcache, *arg, *centroid)) + return -1; + else + return 1; + } + else + { + /*------ + * The argument is a lower bound, we are searching for adjacent upper + * bounds. A matching adjacent upper bound must be *smaller* than the + * argument, but only just. + * + * ARGUMENT CENTROID CMP ADJ + * [500, ...) [..., 499) > (N) [..., 500) is to the right + * [500, ...) [..., 500) > (Y) [..., 500) is to the right + * [500, ...) [..., 501) = (N) [..., 500) is to the left + * [500, ...) [..., 502) < (N) [..., 500) is to the left + * + * We must search left when the argument is smaller than or equal to + * the centroid. Otherwise search right. We don't need to check + * whether the argument is adjacent with the centroid, because it + * doesn't matter. + *------ + */ + if (cmp <= 0) + return -1; + else + return 1; + } +} + +/*---------- + * adjacent_inner_consistent + * + * Like adjacent_cmp_bounds, but also takes into account the previous + * level's centroid. We might've traversed left (or right) at the previous + * node, in search for ranges adjacent to the other bound, even though we + * already ruled out the possibility for any matches in that direction for + * this bound. By comparing the argument with the previous centroid, and + * the previous centroid with the current centroid, we can determine which + * direction we should've moved in at previous level, and which direction we + * actually moved. + * + * If there can be any matches to the left, returns -1. If to the right, + * returns 1. If there can be no matches below this centroid, because we + * already ruled them out at the previous level, returns 0. + * + * XXX: Comparing just the previous and current level isn't foolproof; we + * might still search some branches unnecessarily. For example, imagine that + * we are searching for value 15, and we traverse the following centroids + * (only considering one bound for the moment): + * + * Level 1: 20 + * Level 2: 50 + * Level 3: 25 + * + * At this point, previous centroid is 50, current centroid is 25, and the + * target value is to the left. But because we already moved right from + * centroid 20 to 50 in the first level, there cannot be any values < 20 in + * the current branch. But we don't know that just by looking at the previous + * and current centroid, so we traverse left, unnecessarily. The reason we are + * down this branch is that we're searching for matches with the *other* + * bound. If we kept track of which bound we are searching for explicitly, + * instead of deducing that from the previous and current centroid, we could + * avoid some unnecessary work. + *---------- + */ +static int +adjacent_inner_consistent(TypeCacheEntry *typcache, const RangeBound *arg, + const RangeBound *centroid, const RangeBound *prev) +{ + if (prev) + { + int prevcmp; + int cmp; + + /* + * Which direction were we supposed to traverse at previous level, + * left or right? + */ + prevcmp = adjacent_cmp_bounds(typcache, arg, prev); + + /* and which direction did we actually go? */ + cmp = range_cmp_bounds(typcache, centroid, prev); + + /* if the two don't agree, there's nothing to see here */ + if ((prevcmp < 0 && cmp >= 0) || (prevcmp > 0 && cmp < 0)) + return 0; + } + + return adjacent_cmp_bounds(typcache, arg, centroid); +} + +/* + * SP-GiST consistent function for leaf nodes: check leaf value against query + * using corresponding function. + */ +Datum +spg_range_quad_leaf_consistent(PG_FUNCTION_ARGS) +{ + spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0); + spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1); + RangeType *leafRange = DatumGetRangeTypeP(in->leafDatum); + TypeCacheEntry *typcache; + bool res; + int i; + + /* all tests are exact */ + out->recheck = false; + + /* leafDatum is what it is... */ + out->leafValue = in->leafDatum; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(leafRange)); + + /* Perform the required comparison(s) */ + res = true; + for (i = 0; i < in->nkeys; i++) + { + Datum keyDatum = in->scankeys[i].sk_argument; + + /* Call the function corresponding to the scan strategy */ + switch (in->scankeys[i].sk_strategy) + { + case RANGESTRAT_BEFORE: + res = range_before_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_OVERLEFT: + res = range_overleft_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_OVERLAPS: + res = range_overlaps_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_OVERRIGHT: + res = range_overright_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_AFTER: + res = range_after_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_ADJACENT: + res = range_adjacent_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_CONTAINS: + res = range_contains_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_CONTAINED_BY: + res = range_contained_by_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + case RANGESTRAT_CONTAINS_ELEM: + res = range_contains_elem_internal(typcache, leafRange, + keyDatum); + break; + case RANGESTRAT_EQ: + res = range_eq_internal(typcache, leafRange, + DatumGetRangeTypeP(keyDatum)); + break; + default: + elog(ERROR, "unrecognized range strategy: %d", + in->scankeys[i].sk_strategy); + break; + } + + /* + * If leaf datum doesn't match to a query key, no need to check + * subsequent keys. + */ + if (!res) + break; + } + + PG_RETURN_BOOL(res); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_typanalyze.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_typanalyze.c new file mode 100644 index 00000000000..86810a1a6e6 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rangetypes_typanalyze.c @@ -0,0 +1,429 @@ +/*------------------------------------------------------------------------- + * + * rangetypes_typanalyze.c + * Functions for gathering statistics from range columns + * + * For a range type column, histograms of lower and upper bounds, and + * the fraction of NULL and empty ranges are collected. + * + * Both histograms have the same length, and they are combined into a + * single array of ranges. This has the same shape as the histogram that + * std_typanalyze would collect, but the values are different. Each range + * in the array is a valid range, even though the lower and upper bounds + * come from different tuples. In theory, the standard scalar selectivity + * functions could be used with the combined histogram. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/rangetypes_typanalyze.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_operator.h" +#include "commands/vacuum.h" +#include "utils/float.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/rangetypes.h" +#include "utils/multirangetypes.h" +#include "varatt.h" + +static int float8_qsort_cmp(const void *a1, const void *a2, void *arg); +static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg); +static void compute_range_stats(VacAttrStats *stats, + AnalyzeAttrFetchFunc fetchfunc, int samplerows, + double totalrows); + +/* + * range_typanalyze -- typanalyze function for range columns + */ +Datum +range_typanalyze(PG_FUNCTION_ARGS) +{ + VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0); + TypeCacheEntry *typcache; + Form_pg_attribute attr = stats->attr; + + /* Get information about range type; note column might be a domain */ + typcache = range_get_typcache(fcinfo, getBaseType(stats->attrtypid)); + + if (attr->attstattarget < 0) + attr->attstattarget = default_statistics_target; + + stats->compute_stats = compute_range_stats; + stats->extra_data = typcache; + /* same as in std_typanalyze */ + stats->minrows = 300 * attr->attstattarget; + + PG_RETURN_BOOL(true); +} + +/* + * multirange_typanalyze -- typanalyze function for multirange columns + * + * We do the same analysis as for ranges, but on the smallest range that + * completely includes the multirange. + */ +Datum +multirange_typanalyze(PG_FUNCTION_ARGS) +{ + VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0); + TypeCacheEntry *typcache; + Form_pg_attribute attr = stats->attr; + + /* Get information about multirange type; note column might be a domain */ + typcache = multirange_get_typcache(fcinfo, getBaseType(stats->attrtypid)); + + if (attr->attstattarget < 0) + attr->attstattarget = default_statistics_target; + + stats->compute_stats = compute_range_stats; + stats->extra_data = typcache; + /* same as in std_typanalyze */ + stats->minrows = 300 * attr->attstattarget; + + PG_RETURN_BOOL(true); +} + +/* + * Comparison function for sorting float8s, used for range lengths. + */ +static int +float8_qsort_cmp(const void *a1, const void *a2, void *arg) +{ + const float8 *f1 = (const float8 *) a1; + const float8 *f2 = (const float8 *) a2; + + if (*f1 < *f2) + return -1; + else if (*f1 == *f2) + return 0; + else + return 1; +} + +/* + * Comparison function for sorting RangeBounds. + */ +static int +range_bound_qsort_cmp(const void *a1, const void *a2, void *arg) +{ + RangeBound *b1 = (RangeBound *) a1; + RangeBound *b2 = (RangeBound *) a2; + TypeCacheEntry *typcache = (TypeCacheEntry *) arg; + + return range_cmp_bounds(typcache, b1, b2); +} + +/* + * compute_range_stats() -- compute statistics for a range column + */ +static void +compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, + int samplerows, double totalrows) +{ + TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data; + TypeCacheEntry *mltrng_typcache = NULL; + bool has_subdiff; + int null_cnt = 0; + int non_null_cnt = 0; + int non_empty_cnt = 0; + int empty_cnt = 0; + int range_no; + int slot_idx; + int num_bins = stats->attr->attstattarget; + int num_hist; + float8 *lengths; + RangeBound *lowers, + *uppers; + double total_width = 0; + + if (typcache->typtype == TYPTYPE_MULTIRANGE) + { + mltrng_typcache = typcache; + typcache = typcache->rngtype; + } + else + Assert(typcache->typtype == TYPTYPE_RANGE); + has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); + + /* Allocate memory to hold range bounds and lengths of the sample ranges. */ + lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows); + uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows); + lengths = (float8 *) palloc(sizeof(float8) * samplerows); + + /* Loop over the sample ranges. */ + for (range_no = 0; range_no < samplerows; range_no++) + { + Datum value; + bool isnull, + empty; + MultirangeType *multirange; + RangeType *range; + RangeBound lower, + upper; + float8 length; + + vacuum_delay_point(); + + value = fetchfunc(stats, range_no, &isnull); + if (isnull) + { + /* range is null, just count that */ + null_cnt++; + continue; + } + + /* + * XXX: should we ignore wide values, like std_typanalyze does, to + * avoid bloating the statistics table? + */ + total_width += VARSIZE_ANY(DatumGetPointer(value)); + + /* Get range and deserialize it for further analysis. */ + if (mltrng_typcache != NULL) + { + /* Treat multiranges like a big range without gaps. */ + multirange = DatumGetMultirangeTypeP(value); + if (!MultirangeIsEmpty(multirange)) + { + RangeBound tmp; + + multirange_get_bounds(typcache, multirange, 0, + &lower, &tmp); + multirange_get_bounds(typcache, multirange, + multirange->rangeCount - 1, + &tmp, &upper); + empty = false; + } + else + { + empty = true; + } + } + else + { + range = DatumGetRangeTypeP(value); + range_deserialize(typcache, range, &lower, &upper, &empty); + } + + if (!empty) + { + /* Remember bounds and length for further usage in histograms */ + lowers[non_empty_cnt] = lower; + uppers[non_empty_cnt] = upper; + + if (lower.infinite || upper.infinite) + { + /* Length of any kind of an infinite range is infinite */ + length = get_float8_infinity(); + } + else if (has_subdiff) + { + /* + * For an ordinary range, use subdiff function between upper + * and lower bound values. + */ + length = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + upper.val, lower.val)); + } + else + { + /* Use default value of 1.0 if no subdiff is available. */ + length = 1.0; + } + lengths[non_empty_cnt] = length; + + non_empty_cnt++; + } + else + empty_cnt++; + + non_null_cnt++; + } + + slot_idx = 0; + + /* We can only compute real stats if we found some non-null values. */ + if (non_null_cnt > 0) + { + Datum *bound_hist_values; + Datum *length_hist_values; + int pos, + posfrac, + delta, + deltafrac, + i; + MemoryContext old_cxt; + float4 *emptyfrac; + + stats->stats_valid = true; + /* Do the simple null-frac and width stats */ + stats->stanullfrac = (double) null_cnt / (double) samplerows; + stats->stawidth = total_width / (double) non_null_cnt; + + /* Estimate that non-null values are unique */ + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); + + /* Must copy the target values into anl_context */ + old_cxt = MemoryContextSwitchTo(stats->anl_context); + + /* + * Generate a bounds histogram slot entry if there are at least two + * values. + */ + if (non_empty_cnt >= 2) + { + /* Sort bound values */ + qsort_interruptible(lowers, non_empty_cnt, sizeof(RangeBound), + range_bound_qsort_cmp, typcache); + qsort_interruptible(uppers, non_empty_cnt, sizeof(RangeBound), + range_bound_qsort_cmp, typcache); + + num_hist = non_empty_cnt; + if (num_hist > num_bins) + num_hist = num_bins + 1; + + bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); + + /* + * The object of this loop is to construct ranges from first and + * last entries in lowers[] and uppers[] along with evenly-spaced + * values in between. So the i'th value is a range of lowers[(i * + * (nvals - 1)) / (num_hist - 1)] and uppers[(i * (nvals - 1)) / + * (num_hist - 1)]. But computing that subscript directly risks + * integer overflow when the stats target is more than a couple + * thousand. Instead we add (nvals - 1) / (num_hist - 1) to pos + * at each step, tracking the integral and fractional parts of the + * sum separately. + */ + delta = (non_empty_cnt - 1) / (num_hist - 1); + deltafrac = (non_empty_cnt - 1) % (num_hist - 1); + pos = posfrac = 0; + + for (i = 0; i < num_hist; i++) + { + bound_hist_values[i] = PointerGetDatum(range_serialize(typcache, + &lowers[pos], + &uppers[pos], + false, + NULL)); + pos += delta; + posfrac += deltafrac; + if (posfrac >= (num_hist - 1)) + { + /* fractional part exceeds 1, carry to integer part */ + pos++; + posfrac -= (num_hist - 1); + } + } + + stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM; + stats->stavalues[slot_idx] = bound_hist_values; + stats->numvalues[slot_idx] = num_hist; + + /* Store ranges even if we're analyzing a multirange column */ + stats->statypid[slot_idx] = typcache->type_id; + stats->statyplen[slot_idx] = typcache->typlen; + stats->statypbyval[slot_idx] = typcache->typbyval; + stats->statypalign[slot_idx] = typcache->typalign; + + slot_idx++; + } + + /* + * Generate a length histogram slot entry if there are at least two + * values. + */ + if (non_empty_cnt >= 2) + { + /* + * Ascending sort of range lengths for further filling of + * histogram + */ + qsort_interruptible(lengths, non_empty_cnt, sizeof(float8), + float8_qsort_cmp, NULL); + + num_hist = non_empty_cnt; + if (num_hist > num_bins) + num_hist = num_bins + 1; + + length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); + + /* + * The object of this loop is to copy the first and last lengths[] + * entries along with evenly-spaced values in between. So the i'th + * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But + * computing that subscript directly risks integer overflow when + * the stats target is more than a couple thousand. Instead we + * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking + * the integral and fractional parts of the sum separately. + */ + delta = (non_empty_cnt - 1) / (num_hist - 1); + deltafrac = (non_empty_cnt - 1) % (num_hist - 1); + pos = posfrac = 0; + + for (i = 0; i < num_hist; i++) + { + length_hist_values[i] = Float8GetDatum(lengths[pos]); + pos += delta; + posfrac += deltafrac; + if (posfrac >= (num_hist - 1)) + { + /* fractional part exceeds 1, carry to integer part */ + pos++; + posfrac -= (num_hist - 1); + } + } + } + else + { + /* + * Even when we don't create the histogram, store an empty array + * to mean "no histogram". We can't just leave stavalues NULL, + * because get_attstatsslot() errors if you ask for stavalues, and + * it's NULL. We'll still store the empty fraction in stanumbers. + */ + length_hist_values = palloc(0); + num_hist = 0; + } + stats->staop[slot_idx] = Float8LessOperator; + stats->stacoll[slot_idx] = InvalidOid; + stats->stavalues[slot_idx] = length_hist_values; + stats->numvalues[slot_idx] = num_hist; + stats->statypid[slot_idx] = FLOAT8OID; + stats->statyplen[slot_idx] = sizeof(float8); + stats->statypbyval[slot_idx] = FLOAT8PASSBYVAL; + stats->statypalign[slot_idx] = 'd'; + + /* Store the fraction of empty ranges */ + emptyfrac = (float4 *) palloc(sizeof(float4)); + *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt); + stats->stanumbers[slot_idx] = emptyfrac; + stats->numnumbers[slot_idx] = 1; + + stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM; + slot_idx++; + + MemoryContextSwitchTo(old_cxt); + } + else if (null_cnt > 0) + { + /* We found only nulls; assume the column is entirely null */ + stats->stats_valid = true; + stats->stanullfrac = 1.0; + stats->stawidth = 0; /* "unknown" */ + stats->stadistinct = 0.0; /* "unknown" */ + } + + /* + * We don't need to bother cleaning up any of our temporary palloc's. The + * hashtable should also go away, as it used a child memory context. + */ +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regexp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regexp.c new file mode 100644 index 00000000000..23c295fbc82 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regexp.c @@ -0,0 +1,2018 @@ +/*------------------------------------------------------------------------- + * + * regexp.c + * Postgres' interface to the regular expression package. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/regexp.c + * + * Alistair Crooks added the code for the regex caching + * agc - cached the regular expressions used - there's a good chance + * that we'll get a hit, so this saves a compile step for every + * attempted match. I haven't actually measured the speed improvement, + * but it `looks' a lot quicker visually when watching regression + * test output. + * + * agc - incorporated Keith Bostic's Berkeley regex code into + * the tree for all ports. To distinguish this regex code from any that + * is existent on a platform, I've prepended the string "pg_" to + * the functions regcomp, regerror, regexec and regfree. + * Fixed a bug that was originally a typo by me, where `i' was used + * instead of `oldest' when compiling regular expressions - benign + * results mostly, although occasionally it bit you... + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "regex/regex.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/varlena.h" + +#define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \ + (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL) + + +/* all the options of interest for regex functions */ +typedef struct pg_re_flags +{ + int cflags; /* compile flags for Spencer's regex code */ + bool glob; /* do it globally (for each occurrence) */ +} pg_re_flags; + +/* cross-call state for regexp_match and regexp_split functions */ +typedef struct regexp_matches_ctx +{ + text *orig_str; /* data string in original TEXT form */ + int nmatches; /* number of places where pattern matched */ + int npatterns; /* number of capturing subpatterns */ + /* We store start char index and end+1 char index for each match */ + /* so the number of entries in match_locs is nmatches * npatterns * 2 */ + int *match_locs; /* 0-based character indexes */ + int next_match; /* 0-based index of next match to process */ + /* workspace for build_regexp_match_result() */ + Datum *elems; /* has npatterns elements */ + bool *nulls; /* has npatterns elements */ + pg_wchar *wide_str; /* wide-char version of original string */ + char *conv_buf; /* conversion buffer, if needed */ + int conv_bufsiz; /* size thereof */ +} regexp_matches_ctx; + +/* + * We cache precompiled regular expressions using a "self organizing list" + * structure, in which recently-used items tend to be near the front. + * Whenever we use an entry, it's moved up to the front of the list. + * Over time, an item's average position corresponds to its frequency of use. + * + * When we first create an entry, it's inserted at the front of + * the array, dropping the entry at the end of the array if necessary to + * make room. (This might seem to be weighting the new entry too heavily, + * but if we insert new entries further back, we'll be unable to adjust to + * a sudden shift in the query mix where we are presented with MAX_CACHED_RES + * never-before-seen items used circularly. We ought to be able to handle + * that case, so we have to insert at the front.) + * + * Knuth mentions a variant strategy in which a used item is moved up just + * one place in the list. Although he says this uses fewer comparisons on + * average, it seems not to adapt very well to the situation where you have + * both some reusable patterns and a steady stream of non-reusable patterns. + * A reusable pattern that isn't used at least as often as non-reusable + * patterns are seen will "fail to keep up" and will drop off the end of the + * cache. With move-to-front, a reusable pattern is guaranteed to stay in + * the cache as long as it's used at least once in every MAX_CACHED_RES uses. + */ + +/* this is the maximum number of cached regular expressions */ +#ifndef MAX_CACHED_RES +#define MAX_CACHED_RES 32 +#endif + +/* A parent memory context for regular expressions. */ +static __thread MemoryContext RegexpCacheMemoryContext; + +/* this structure describes one cached regular expression */ +typedef struct cached_re_str +{ + MemoryContext cre_context; /* memory context for this regexp */ + char *cre_pat; /* original RE (not null terminated!) */ + int cre_pat_len; /* length of original RE, in bytes */ + int cre_flags; /* compile flags: extended,icase etc */ + Oid cre_collation; /* collation to use */ + regex_t cre_re; /* the compiled regular expression */ +} cached_re_str; + +static __thread int num_res = 0; /* # of cached re's */ +static __thread cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */ + +void RE_cleanup_cache(void) { + if (RegexpCacheMemoryContext) { + MemoryContextDelete(RegexpCacheMemoryContext); + RegexpCacheMemoryContext = NULL; + } + + num_res = 0; +} + +/* Local functions */ +static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern, + pg_re_flags *re_flags, + int start_search, + Oid collation, + bool use_subpatterns, + bool ignore_degenerate, + bool fetching_unmatched); +static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx); +static Datum build_regexp_split_result(regexp_matches_ctx *splitctx); + + +/* + * RE_compile_and_cache - compile a RE, caching if possible + * + * Returns regex_t * + * + * text_re --- the pattern, expressed as a TEXT object + * cflags --- compile options for the pattern + * collation --- collation to use for LC_CTYPE-dependent behavior + * + * Pattern is given in the database encoding. We internally convert to + * an array of pg_wchar, which is what Spencer's regex package wants. + */ +regex_t * +RE_compile_and_cache(text *text_re, int cflags, Oid collation) +{ + int text_re_len = VARSIZE_ANY_EXHDR(text_re); + char *text_re_val = VARDATA_ANY(text_re); + pg_wchar *pattern; + int pattern_len; + int i; + int regcomp_result; + cached_re_str re_temp; + char errMsg[100]; + MemoryContext oldcontext; + + /* + * Look for a match among previously compiled REs. Since the data + * structure is self-organizing with most-used entries at the front, our + * search strategy can just be to scan from the front. + */ + for (i = 0; i < num_res; i++) + { + if (re_array[i].cre_pat_len == text_re_len && + re_array[i].cre_flags == cflags && + re_array[i].cre_collation == collation && + memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0) + { + /* + * Found a match; move it to front if not there already. + */ + if (i > 0) + { + re_temp = re_array[i]; + memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str)); + re_array[0] = re_temp; + } + + return &re_array[0].cre_re; + } + } + + /* Set up the cache memory on first go through. */ + if (unlikely(RegexpCacheMemoryContext == NULL)) + RegexpCacheMemoryContext = + AllocSetContextCreate(TopMemoryContext, + "RegexpCacheMemoryContext", + ALLOCSET_SMALL_SIZES); + + /* + * Couldn't find it, so try to compile the new RE. To avoid leaking + * resources on failure, we build into the re_temp local. + */ + + /* Convert pattern string to wide characters */ + pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar)); + pattern_len = pg_mb2wchar_with_len(text_re_val, + pattern, + text_re_len); + + /* + * Make a memory context for this compiled regexp. This is initially a + * child of the current memory context, so it will be cleaned up + * automatically if compilation is interrupted and throws an ERROR. We'll + * re-parent it under the longer lived cache context if we make it to the + * bottom of this function. + */ + re_temp.cre_context = AllocSetContextCreate(CurrentMemoryContext, + "RegexpMemoryContext", + ALLOCSET_SMALL_SIZES); + oldcontext = MemoryContextSwitchTo(re_temp.cre_context); + + regcomp_result = pg_regcomp(&re_temp.cre_re, + pattern, + pattern_len, + cflags, + collation); + + pfree(pattern); + + if (regcomp_result != REG_OKAY) + { + /* re didn't compile (no need for pg_regfree, if so) */ + pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regular expression: %s", errMsg))); + } + + /* Copy the pattern into the per-regexp memory context. */ + re_temp.cre_pat = palloc(text_re_len + 1); + memcpy(re_temp.cre_pat, text_re_val, text_re_len); + + /* + * NUL-terminate it only for the benefit of the identifier used for the + * memory context, visible in the pg_backend_memory_contexts view. + */ + re_temp.cre_pat[text_re_len] = 0; + MemoryContextSetIdentifier(re_temp.cre_context, re_temp.cre_pat); + + re_temp.cre_pat_len = text_re_len; + re_temp.cre_flags = cflags; + re_temp.cre_collation = collation; + + /* + * Okay, we have a valid new item in re_temp; insert it into the storage + * array. Discard last entry if needed. + */ + if (num_res >= MAX_CACHED_RES) + { + --num_res; + Assert(num_res < MAX_CACHED_RES); + /* Delete the memory context holding the regexp and pattern. */ + MemoryContextDelete(re_array[num_res].cre_context); + } + + /* Re-parent the memory context to our long-lived cache context. */ + MemoryContextSetParent(re_temp.cre_context, RegexpCacheMemoryContext); + + if (num_res > 0) + memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str)); + + re_array[0] = re_temp; + num_res++; + + MemoryContextSwitchTo(oldcontext); + + return &re_array[0].cre_re; +} + +/* + * RE_wchar_execute - execute a RE on pg_wchar data + * + * Returns true on match, false on no match + * + * re --- the compiled pattern as returned by RE_compile_and_cache + * data --- the data to match against (need not be null-terminated) + * data_len --- the length of the data string + * start_search -- the offset in the data to start searching + * nmatch, pmatch --- optional return area for match details + * + * Data is given as array of pg_wchar which is what Spencer's regex package + * wants. + */ +static bool +RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len, + int start_search, int nmatch, regmatch_t *pmatch) +{ + int regexec_result; + char errMsg[100]; + + /* Perform RE match and return result */ + regexec_result = pg_regexec(re, + data, + data_len, + start_search, + NULL, /* no details */ + nmatch, + pmatch, + 0); + + if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) + { + /* re failed??? */ + pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("regular expression failed: %s", errMsg))); + } + + return (regexec_result == REG_OKAY); +} + +/* + * RE_execute - execute a RE + * + * Returns true on match, false on no match + * + * re --- the compiled pattern as returned by RE_compile_and_cache + * dat --- the data to match against (need not be null-terminated) + * dat_len --- the length of the data string + * nmatch, pmatch --- optional return area for match details + * + * Data is given in the database encoding. We internally + * convert to array of pg_wchar which is what Spencer's regex package wants. + */ +static bool +RE_execute(regex_t *re, char *dat, int dat_len, + int nmatch, regmatch_t *pmatch) +{ + pg_wchar *data; + int data_len; + bool match; + + /* Convert data string to wide characters */ + data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); + data_len = pg_mb2wchar_with_len(dat, data, dat_len); + + /* Perform RE match and return result */ + match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch); + + pfree(data); + return match; +} + +/* + * RE_compile_and_execute - compile and execute a RE + * + * Returns true on match, false on no match + * + * text_re --- the pattern, expressed as a TEXT object + * dat --- the data to match against (need not be null-terminated) + * dat_len --- the length of the data string + * cflags --- compile options for the pattern + * collation --- collation to use for LC_CTYPE-dependent behavior + * nmatch, pmatch --- optional return area for match details + * + * Both pattern and data are given in the database encoding. We internally + * convert to array of pg_wchar which is what Spencer's regex package wants. + */ +bool +RE_compile_and_execute(text *text_re, char *dat, int dat_len, + int cflags, Oid collation, + int nmatch, regmatch_t *pmatch) +{ + regex_t *re; + + /* Use REG_NOSUB if caller does not want sub-match details */ + if (nmatch < 2) + cflags |= REG_NOSUB; + + /* Compile RE */ + re = RE_compile_and_cache(text_re, cflags, collation); + + return RE_execute(re, dat, dat_len, nmatch, pmatch); +} + + +/* + * parse_re_flags - parse the options argument of regexp_match and friends + * + * flags --- output argument, filled with desired options + * opts --- TEXT object, or NULL for defaults + * + * This accepts all the options allowed by any of the callers; callers that + * don't want some have to reject them after the fact. + */ +static void +parse_re_flags(pg_re_flags *flags, text *opts) +{ + /* regex flavor is always folded into the compile flags */ + flags->cflags = REG_ADVANCED; + flags->glob = false; + + if (opts) + { + char *opt_p = VARDATA_ANY(opts); + int opt_len = VARSIZE_ANY_EXHDR(opts); + int i; + + for (i = 0; i < opt_len; i++) + { + switch (opt_p[i]) + { + case 'g': + flags->glob = true; + break; + case 'b': /* BREs (but why???) */ + flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE); + break; + case 'c': /* case sensitive */ + flags->cflags &= ~REG_ICASE; + break; + case 'e': /* plain EREs */ + flags->cflags |= REG_EXTENDED; + flags->cflags &= ~(REG_ADVANCED | REG_QUOTE); + break; + case 'i': /* case insensitive */ + flags->cflags |= REG_ICASE; + break; + case 'm': /* Perloid synonym for n */ + case 'n': /* \n affects ^ $ . [^ */ + flags->cflags |= REG_NEWLINE; + break; + case 'p': /* ~Perl, \n affects . [^ */ + flags->cflags |= REG_NLSTOP; + flags->cflags &= ~REG_NLANCH; + break; + case 'q': /* literal string */ + flags->cflags |= REG_QUOTE; + flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED); + break; + case 's': /* single line, \n ordinary */ + flags->cflags &= ~REG_NEWLINE; + break; + case 't': /* tight syntax */ + flags->cflags &= ~REG_EXPANDED; + break; + case 'w': /* weird, \n affects ^ $ only */ + flags->cflags &= ~REG_NLSTOP; + flags->cflags |= REG_NLANCH; + break; + case 'x': /* expanded syntax */ + flags->cflags |= REG_EXPANDED; + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid regular expression option: \"%.*s\"", + pg_mblen(opt_p + i), opt_p + i))); + break; + } + } + } +} + + +/* + * interface routines called by the function manager + */ + +Datum +nameregexeq(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(RE_compile_and_execute(p, + NameStr(*n), + strlen(NameStr(*n)), + REG_ADVANCED, + PG_GET_COLLATION(), + 0, NULL)); +} + +Datum +nameregexne(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(!RE_compile_and_execute(p, + NameStr(*n), + strlen(NameStr(*n)), + REG_ADVANCED, + PG_GET_COLLATION(), + 0, NULL)); +} + +Datum +textregexeq(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(RE_compile_and_execute(p, + VARDATA_ANY(s), + VARSIZE_ANY_EXHDR(s), + REG_ADVANCED, + PG_GET_COLLATION(), + 0, NULL)); +} + +Datum +textregexne(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(!RE_compile_and_execute(p, + VARDATA_ANY(s), + VARSIZE_ANY_EXHDR(s), + REG_ADVANCED, + PG_GET_COLLATION(), + 0, NULL)); +} + + +/* + * routines that use the regexp stuff, but ignore the case. + * for this, we use the REG_ICASE flag to pg_regcomp + */ + + +Datum +nameicregexeq(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(RE_compile_and_execute(p, + NameStr(*n), + strlen(NameStr(*n)), + REG_ADVANCED | REG_ICASE, + PG_GET_COLLATION(), + 0, NULL)); +} + +Datum +nameicregexne(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(!RE_compile_and_execute(p, + NameStr(*n), + strlen(NameStr(*n)), + REG_ADVANCED | REG_ICASE, + PG_GET_COLLATION(), + 0, NULL)); +} + +Datum +texticregexeq(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(RE_compile_and_execute(p, + VARDATA_ANY(s), + VARSIZE_ANY_EXHDR(s), + REG_ADVANCED | REG_ICASE, + PG_GET_COLLATION(), + 0, NULL)); +} + +Datum +texticregexne(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + + PG_RETURN_BOOL(!RE_compile_and_execute(p, + VARDATA_ANY(s), + VARSIZE_ANY_EXHDR(s), + REG_ADVANCED | REG_ICASE, + PG_GET_COLLATION(), + 0, NULL)); +} + + +/* + * textregexsubstr() + * Return a substring matched by a regular expression. + */ +Datum +textregexsubstr(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + regex_t *re; + regmatch_t pmatch[2]; + int so, + eo; + + /* Compile RE */ + re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION()); + + /* + * We pass two regmatch_t structs to get info about the overall match and + * the match for the first parenthesized subexpression (if any). If there + * is a parenthesized subexpression, we return what it matched; else + * return what the whole regexp matched. + */ + if (!RE_execute(re, + VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s), + 2, pmatch)) + PG_RETURN_NULL(); /* definitely no match */ + + if (re->re_nsub > 0) + { + /* has parenthesized subexpressions, use the first one */ + so = pmatch[1].rm_so; + eo = pmatch[1].rm_eo; + } + else + { + /* no parenthesized subexpression, use whole match */ + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + } + + /* + * It is possible to have a match to the whole pattern but no match for a + * subexpression; for example 'foo(bar)?' is considered to match 'foo' but + * there is no subexpression match. So this extra test for match failure + * is not redundant. + */ + if (so < 0 || eo < 0) + PG_RETURN_NULL(); + + return DirectFunctionCall3(text_substr, + PointerGetDatum(s), + Int32GetDatum(so + 1), + Int32GetDatum(eo - so)); +} + +/* + * textregexreplace_noopt() + * Return a string matched by a regular expression, with replacement. + * + * This version doesn't have an option argument: we default to case + * sensitive match, replace the first instance only. + */ +Datum +textregexreplace_noopt(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + text *r = PG_GETARG_TEXT_PP(2); + + PG_RETURN_TEXT_P(replace_text_regexp(s, p, r, + REG_ADVANCED, PG_GET_COLLATION(), + 0, 1)); +} + +/* + * textregexreplace() + * Return a string matched by a regular expression, with replacement. + */ +Datum +textregexreplace(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + text *r = PG_GETARG_TEXT_PP(2); + text *opt = PG_GETARG_TEXT_PP(3); + pg_re_flags flags; + + /* + * regexp_replace() with four arguments will be preferentially resolved as + * this form when the fourth argument is of type UNKNOWN. However, the + * user might have intended to call textregexreplace_extended_no_n. If we + * see flags that look like an integer, emit the same error that + * parse_re_flags would, but add a HINT about how to fix it. + */ + if (VARSIZE_ANY_EXHDR(opt) > 0) + { + char *opt_p = VARDATA_ANY(opt); + + if (*opt_p >= '0' && *opt_p <= '9') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid regular expression option: \"%.*s\"", + pg_mblen(opt_p), opt_p), + errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly."))); + } + + parse_re_flags(&flags, opt); + + PG_RETURN_TEXT_P(replace_text_regexp(s, p, r, + flags.cflags, PG_GET_COLLATION(), + 0, flags.glob ? 0 : 1)); +} + +/* + * textregexreplace_extended() + * Return a string matched by a regular expression, with replacement. + * Extends textregexreplace by allowing a start position and the + * choice of the occurrence to replace (0 means all occurrences). + */ +Datum +textregexreplace_extended(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + text *p = PG_GETARG_TEXT_PP(1); + text *r = PG_GETARG_TEXT_PP(2); + int start = 1; + int n = 1; + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5); + pg_re_flags re_flags; + + /* Collect optional parameters */ + if (PG_NARGS() > 3) + { + start = PG_GETARG_INT32(3); + if (start <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "start", start))); + } + if (PG_NARGS() > 4) + { + n = PG_GETARG_INT32(4); + if (n < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "n", n))); + } + + /* Determine options */ + parse_re_flags(&re_flags, flags); + + /* If N was not specified, deduce it from the 'g' flag */ + if (PG_NARGS() <= 4) + n = re_flags.glob ? 0 : 1; + + /* Do the replacement(s) */ + PG_RETURN_TEXT_P(replace_text_regexp(s, p, r, + re_flags.cflags, PG_GET_COLLATION(), + start - 1, n)); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +textregexreplace_extended_no_n(PG_FUNCTION_ARGS) +{ + return textregexreplace_extended(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +textregexreplace_extended_no_flags(PG_FUNCTION_ARGS) +{ + return textregexreplace_extended(fcinfo); +} + +/* + * similar_to_escape(), similar_escape() + * + * Convert a SQL "SIMILAR TO" regexp pattern to POSIX style, so it can be + * used by our regexp engine. + * + * similar_escape_internal() is the common workhorse for three SQL-exposed + * functions. esc_text can be passed as NULL to select the default escape + * (which is '\'), or as an empty string to select no escape character. + */ +static text * +similar_escape_internal(text *pat_text, text *esc_text) +{ + text *result; + char *p, + *e, + *r; + int plen, + elen; + bool afterescape = false; + bool incharclass = false; + int nquotes = 0; + + p = VARDATA_ANY(pat_text); + plen = VARSIZE_ANY_EXHDR(pat_text); + if (esc_text == NULL) + { + /* No ESCAPE clause provided; default to backslash as escape */ + e = "\\"; + elen = 1; + } + else + { + e = VARDATA_ANY(esc_text); + elen = VARSIZE_ANY_EXHDR(esc_text); + if (elen == 0) + e = NULL; /* no escape character */ + else if (elen > 1) + { + int escape_mblen = pg_mbstrlen_with_len(e, elen); + + if (escape_mblen > 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("invalid escape string"), + errhint("Escape string must be empty or one character."))); + } + } + + /*---------- + * We surround the transformed input string with + * ^(?: ... )$ + * which requires some explanation. We need "^" and "$" to force + * the pattern to match the entire input string as per the SQL spec. + * The "(?:" and ")" are a non-capturing set of parens; we have to have + * parens in case the string contains "|", else the "^" and "$" will + * be bound into the first and last alternatives which is not what we + * want, and the parens must be non capturing because we don't want them + * to count when selecting output for SUBSTRING. + * + * When the pattern is divided into three parts by escape-double-quotes, + * what we emit is + * ^(?:part1){1,1}?(part2){1,1}(?:part3)$ + * which requires even more explanation. The "{1,1}?" on part1 makes it + * non-greedy so that it will match the smallest possible amount of text + * not the largest, as required by SQL. The plain parens around part2 + * are capturing parens so that that part is what controls the result of + * SUBSTRING. The "{1,1}" forces part2 to be greedy, so that it matches + * the largest possible amount of text; hence part3 must match the + * smallest amount of text, as required by SQL. We don't need an explicit + * greediness marker on part3. Note that this also confines the effects + * of any "|" characters to the respective part, which is what we want. + * + * The SQL spec says that SUBSTRING's pattern must contain exactly two + * escape-double-quotes, but we only complain if there's more than two. + * With none, we act as though part1 and part3 are empty; with one, we + * act as though part3 is empty. Both behaviors fall out of omitting + * the relevant part separators in the above expansion. If the result + * of this function is used in a plain regexp match (SIMILAR TO), the + * escape-double-quotes have no effect on the match behavior. + *---------- + */ + + /* + * We need room for the prefix/postfix and part separators, plus as many + * as 3 output bytes per input byte; since the input is at most 1GB this + * can't overflow size_t. + */ + result = (text *) palloc(VARHDRSZ + 23 + 3 * (size_t) plen); + r = VARDATA(result); + + *r++ = '^'; + *r++ = '('; + *r++ = '?'; + *r++ = ':'; + + while (plen > 0) + { + char pchar = *p; + + /* + * If both the escape character and the current character from the + * pattern are multi-byte, we need to take the slow path. + * + * But if one of them is single-byte, we can process the pattern one + * byte at a time, ignoring multi-byte characters. (This works + * because all server-encodings have the property that a valid + * multi-byte character representation cannot contain the + * representation of a valid single-byte character.) + */ + + if (elen > 1) + { + int mblen = pg_mblen(p); + + if (mblen > 1) + { + /* slow, multi-byte path */ + if (afterescape) + { + *r++ = '\\'; + memcpy(r, p, mblen); + r += mblen; + afterescape = false; + } + else if (e && elen == mblen && memcmp(e, p, mblen) == 0) + { + /* SQL escape character; do not send to output */ + afterescape = true; + } + else + { + /* + * We know it's a multi-byte character, so we don't need + * to do all the comparisons to single-byte characters + * that we do below. + */ + memcpy(r, p, mblen); + r += mblen; + } + + p += mblen; + plen -= mblen; + + continue; + } + } + + /* fast path */ + if (afterescape) + { + if (pchar == '"' && !incharclass) /* escape-double-quote? */ + { + /* emit appropriate part separator, per notes above */ + if (nquotes == 0) + { + *r++ = ')'; + *r++ = '{'; + *r++ = '1'; + *r++ = ','; + *r++ = '1'; + *r++ = '}'; + *r++ = '?'; + *r++ = '('; + } + else if (nquotes == 1) + { + *r++ = ')'; + *r++ = '{'; + *r++ = '1'; + *r++ = ','; + *r++ = '1'; + *r++ = '}'; + *r++ = '('; + *r++ = '?'; + *r++ = ':'; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), + errmsg("SQL regular expression may not contain more than two escape-double-quote separators"))); + nquotes++; + } + else + { + /* + * We allow any character at all to be escaped; notably, this + * allows access to POSIX character-class escapes such as + * "\d". The SQL spec is considerably more restrictive. + */ + *r++ = '\\'; + *r++ = pchar; + } + afterescape = false; + } + else if (e && pchar == *e) + { + /* SQL escape character; do not send to output */ + afterescape = true; + } + else if (incharclass) + { + if (pchar == '\\') + *r++ = '\\'; + *r++ = pchar; + if (pchar == ']') + incharclass = false; + } + else if (pchar == '[') + { + *r++ = pchar; + incharclass = true; + } + else if (pchar == '%') + { + *r++ = '.'; + *r++ = '*'; + } + else if (pchar == '_') + *r++ = '.'; + else if (pchar == '(') + { + /* convert to non-capturing parenthesis */ + *r++ = '('; + *r++ = '?'; + *r++ = ':'; + } + else if (pchar == '\\' || pchar == '.' || + pchar == '^' || pchar == '$') + { + *r++ = '\\'; + *r++ = pchar; + } + else + *r++ = pchar; + p++, plen--; + } + + *r++ = ')'; + *r++ = '$'; + + SET_VARSIZE(result, r - ((char *) result)); + + return result; +} + +/* + * similar_to_escape(pattern, escape) + */ +Datum +similar_to_escape_2(PG_FUNCTION_ARGS) +{ + text *pat_text = PG_GETARG_TEXT_PP(0); + text *esc_text = PG_GETARG_TEXT_PP(1); + text *result; + + result = similar_escape_internal(pat_text, esc_text); + + PG_RETURN_TEXT_P(result); +} + +/* + * similar_to_escape(pattern) + * Inserts a default escape character. + */ +Datum +similar_to_escape_1(PG_FUNCTION_ARGS) +{ + text *pat_text = PG_GETARG_TEXT_PP(0); + text *result; + + result = similar_escape_internal(pat_text, NULL); + + PG_RETURN_TEXT_P(result); +} + +/* + * similar_escape(pattern, escape) + * + * Legacy function for compatibility with views stored using the + * pre-v13 expansion of SIMILAR TO. Unlike the above functions, this + * is non-strict, which leads to not-per-spec handling of "ESCAPE NULL". + */ +Datum +similar_escape(PG_FUNCTION_ARGS) +{ + text *pat_text; + text *esc_text; + text *result; + + /* This function is not strict, so must test explicitly */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + pat_text = PG_GETARG_TEXT_PP(0); + + if (PG_ARGISNULL(1)) + esc_text = NULL; /* use default escape character */ + else + esc_text = PG_GETARG_TEXT_PP(1); + + result = similar_escape_internal(pat_text, esc_text); + + PG_RETURN_TEXT_P(result); +} + +/* + * regexp_count() + * Return the number of matches of a pattern within a string. + */ +Datum +regexp_count(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pattern = PG_GETARG_TEXT_PP(1); + int start = 1; + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3); + pg_re_flags re_flags; + regexp_matches_ctx *matchctx; + + /* Collect optional parameters */ + if (PG_NARGS() > 2) + { + start = PG_GETARG_INT32(2); + if (start <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "start", start))); + } + + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /* translator: %s is a SQL function name */ + errmsg("%s does not support the \"global\" option", + "regexp_count()"))); + /* But we find all the matches anyway */ + re_flags.glob = true; + + /* Do the matching */ + matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1, + PG_GET_COLLATION(), + false, /* can ignore subexprs */ + false, false); + + PG_RETURN_INT32(matchctx->nmatches); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_count_no_start(PG_FUNCTION_ARGS) +{ + return regexp_count(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_count_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_count(fcinfo); +} + +/* + * regexp_instr() + * Return the match's position within the string + */ +Datum +regexp_instr(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pattern = PG_GETARG_TEXT_PP(1); + int start = 1; + int n = 1; + int endoption = 0; + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5); + int subexpr = 0; + int pos; + pg_re_flags re_flags; + regexp_matches_ctx *matchctx; + + /* Collect optional parameters */ + if (PG_NARGS() > 2) + { + start = PG_GETARG_INT32(2); + if (start <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "start", start))); + } + if (PG_NARGS() > 3) + { + n = PG_GETARG_INT32(3); + if (n <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "n", n))); + } + if (PG_NARGS() > 4) + { + endoption = PG_GETARG_INT32(4); + if (endoption != 0 && endoption != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "endoption", endoption))); + } + if (PG_NARGS() > 6) + { + subexpr = PG_GETARG_INT32(6); + if (subexpr < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "subexpr", subexpr))); + } + + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /* translator: %s is a SQL function name */ + errmsg("%s does not support the \"global\" option", + "regexp_instr()"))); + /* But we find all the matches anyway */ + re_flags.glob = true; + + /* Do the matching */ + matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1, + PG_GET_COLLATION(), + (subexpr > 0), /* need submatches? */ + false, false); + + /* When n exceeds matches return 0 (includes case of no matches) */ + if (n > matchctx->nmatches) + PG_RETURN_INT32(0); + + /* When subexpr exceeds number of subexpressions return 0 */ + if (subexpr > matchctx->npatterns) + PG_RETURN_INT32(0); + + /* Select the appropriate match position to return */ + pos = (n - 1) * matchctx->npatterns; + if (subexpr > 0) + pos += subexpr - 1; + pos *= 2; + if (endoption == 1) + pos += 1; + + if (matchctx->match_locs[pos] >= 0) + PG_RETURN_INT32(matchctx->match_locs[pos] + 1); + else + PG_RETURN_INT32(0); /* position not identifiable */ +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_instr_no_start(PG_FUNCTION_ARGS) +{ + return regexp_instr(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_instr_no_n(PG_FUNCTION_ARGS) +{ + return regexp_instr(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_instr_no_endoption(PG_FUNCTION_ARGS) +{ + return regexp_instr(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_instr_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_instr(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_instr_no_subexpr(PG_FUNCTION_ARGS) +{ + return regexp_instr(fcinfo); +} + +/* + * regexp_like() + * Test for a pattern match within a string. + */ +Datum +regexp_like(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pattern = PG_GETARG_TEXT_PP(1); + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + pg_re_flags re_flags; + + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /* translator: %s is a SQL function name */ + errmsg("%s does not support the \"global\" option", + "regexp_like()"))); + + /* Otherwise it's like textregexeq/texticregexeq */ + PG_RETURN_BOOL(RE_compile_and_execute(pattern, + VARDATA_ANY(str), + VARSIZE_ANY_EXHDR(str), + re_flags.cflags, + PG_GET_COLLATION(), + 0, NULL)); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_like_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_like(fcinfo); +} + +/* + * regexp_match() + * Return the first substring(s) matching a pattern within a string. + */ +Datum +regexp_match(PG_FUNCTION_ARGS) +{ + text *orig_str = PG_GETARG_TEXT_PP(0); + text *pattern = PG_GETARG_TEXT_PP(1); + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + pg_re_flags re_flags; + regexp_matches_ctx *matchctx; + + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /* translator: %s is a SQL function name */ + errmsg("%s does not support the \"global\" option", + "regexp_match()"), + errhint("Use the regexp_matches function instead."))); + + matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, 0, + PG_GET_COLLATION(), true, false, false); + + if (matchctx->nmatches == 0) + PG_RETURN_NULL(); + + Assert(matchctx->nmatches == 1); + + /* Create workspace that build_regexp_match_result needs */ + matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns); + matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns); + + PG_RETURN_DATUM(PointerGetDatum(build_regexp_match_result(matchctx))); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_match_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_match(fcinfo); +} + +/* + * regexp_matches() + * Return a table of all matches of a pattern within a string. + */ +Datum +regexp_matches(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + regexp_matches_ctx *matchctx; + + if (SRF_IS_FIRSTCALL()) + { + text *pattern = PG_GETARG_TEXT_PP(1); + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + pg_re_flags re_flags; + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* Determine options */ + parse_re_flags(&re_flags, flags); + + /* be sure to copy the input string into the multi-call ctx */ + matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern, + &re_flags, 0, + PG_GET_COLLATION(), + true, false, false); + + /* Pre-create workspace that build_regexp_match_result needs */ + matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns); + matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns); + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) matchctx; + } + + funcctx = SRF_PERCALL_SETUP(); + matchctx = (regexp_matches_ctx *) funcctx->user_fctx; + + if (matchctx->next_match < matchctx->nmatches) + { + ArrayType *result_ary; + + result_ary = build_regexp_match_result(matchctx); + matchctx->next_match++; + SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); + } + + SRF_RETURN_DONE(funcctx); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_matches_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_matches(fcinfo); +} + +/* + * setup_regexp_matches --- do the initial matching for regexp_match, + * regexp_split, and related functions + * + * To avoid having to re-find the compiled pattern on each call, we do + * all the matching in one swoop. The returned regexp_matches_ctx contains + * the locations of all the substrings matching the pattern. + * + * start_search: the character (not byte) offset in orig_str at which to + * begin the search. Returned positions are relative to orig_str anyway. + * use_subpatterns: collect data about matches to parenthesized subexpressions. + * ignore_degenerate: ignore zero-length matches. + * fetching_unmatched: caller wants to fetch unmatched substrings. + * + * We don't currently assume that fetching_unmatched is exclusive of fetching + * the matched text too; if it's set, the conversion buffer is large enough to + * fetch any single matched or unmatched string, but not any larger + * substring. (In practice, when splitting the matches are usually small + * anyway, and it didn't seem worth complicating the code further.) + */ +static regexp_matches_ctx * +setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags, + int start_search, + Oid collation, + bool use_subpatterns, + bool ignore_degenerate, + bool fetching_unmatched) +{ + regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx)); + int eml = pg_database_encoding_max_length(); + int orig_len; + pg_wchar *wide_str; + int wide_len; + int cflags; + regex_t *cpattern; + regmatch_t *pmatch; + int pmatch_len; + int array_len; + int array_idx; + int prev_match_end; + int prev_valid_match_end; + int maxlen = 0; /* largest fetch length in characters */ + + /* save original string --- we'll extract result substrings from it */ + matchctx->orig_str = orig_str; + + /* convert string to pg_wchar form for matching */ + orig_len = VARSIZE_ANY_EXHDR(orig_str); + wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1)); + wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len); + + /* set up the compiled pattern */ + cflags = re_flags->cflags; + if (!use_subpatterns) + cflags |= REG_NOSUB; + cpattern = RE_compile_and_cache(pattern, cflags, collation); + + /* do we want to remember subpatterns? */ + if (use_subpatterns && cpattern->re_nsub > 0) + { + matchctx->npatterns = cpattern->re_nsub; + pmatch_len = cpattern->re_nsub + 1; + } + else + { + use_subpatterns = false; + matchctx->npatterns = 1; + pmatch_len = 1; + } + + /* temporary output space for RE package */ + pmatch = palloc(sizeof(regmatch_t) * pmatch_len); + + /* + * the real output space (grown dynamically if needed) + * + * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather + * than at 2^27 + */ + array_len = re_flags->glob ? 255 : 31; + matchctx->match_locs = (int *) palloc(sizeof(int) * array_len); + array_idx = 0; + + /* search for the pattern, perhaps repeatedly */ + prev_match_end = 0; + prev_valid_match_end = 0; + while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search, + pmatch_len, pmatch)) + { + /* + * If requested, ignore degenerate matches, which are zero-length + * matches occurring at the start or end of a string or just after a + * previous match. + */ + if (!ignore_degenerate || + (pmatch[0].rm_so < wide_len && + pmatch[0].rm_eo > prev_match_end)) + { + /* enlarge output space if needed */ + while (array_idx + matchctx->npatterns * 2 + 1 > array_len) + { + array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */ + if (array_len > MaxAllocSize / sizeof(int)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many regular expression matches"))); + matchctx->match_locs = (int *) repalloc(matchctx->match_locs, + sizeof(int) * array_len); + } + + /* save this match's locations */ + if (use_subpatterns) + { + int i; + + for (i = 1; i <= matchctx->npatterns; i++) + { + int so = pmatch[i].rm_so; + int eo = pmatch[i].rm_eo; + + matchctx->match_locs[array_idx++] = so; + matchctx->match_locs[array_idx++] = eo; + if (so >= 0 && eo >= 0 && (eo - so) > maxlen) + maxlen = (eo - so); + } + } + else + { + int so = pmatch[0].rm_so; + int eo = pmatch[0].rm_eo; + + matchctx->match_locs[array_idx++] = so; + matchctx->match_locs[array_idx++] = eo; + if (so >= 0 && eo >= 0 && (eo - so) > maxlen) + maxlen = (eo - so); + } + matchctx->nmatches++; + + /* + * check length of unmatched portion between end of previous valid + * (nondegenerate, or degenerate but not ignored) match and start + * of current one + */ + if (fetching_unmatched && + pmatch[0].rm_so >= 0 && + (pmatch[0].rm_so - prev_valid_match_end) > maxlen) + maxlen = (pmatch[0].rm_so - prev_valid_match_end); + prev_valid_match_end = pmatch[0].rm_eo; + } + prev_match_end = pmatch[0].rm_eo; + + /* if not glob, stop after one match */ + if (!re_flags->glob) + break; + + /* + * Advance search position. Normally we start the next search at the + * end of the previous match; but if the match was of zero length, we + * have to advance by one character, or we'd just find the same match + * again. + */ + start_search = prev_match_end; + if (pmatch[0].rm_so == pmatch[0].rm_eo) + start_search++; + if (start_search > wide_len) + break; + } + + /* + * check length of unmatched portion between end of last match and end of + * input string + */ + if (fetching_unmatched && + (wide_len - prev_valid_match_end) > maxlen) + maxlen = (wide_len - prev_valid_match_end); + + /* + * Keep a note of the end position of the string for the benefit of + * splitting code. + */ + matchctx->match_locs[array_idx] = wide_len; + + if (eml > 1) + { + int64 maxsiz = eml * (int64) maxlen; + int conv_bufsiz; + + /* + * Make the conversion buffer large enough for any substring of + * interest. + * + * Worst case: assume we need the maximum size (maxlen*eml), but take + * advantage of the fact that the original string length in bytes is + * an upper bound on the byte length of any fetched substring (and we + * know that len+1 is safe to allocate because the varlena header is + * longer than 1 byte). + */ + if (maxsiz > orig_len) + conv_bufsiz = orig_len + 1; + else + conv_bufsiz = maxsiz + 1; /* safe since maxsiz < 2^30 */ + + matchctx->conv_buf = palloc(conv_bufsiz); + matchctx->conv_bufsiz = conv_bufsiz; + matchctx->wide_str = wide_str; + } + else + { + /* No need to keep the wide string if we're in a single-byte charset. */ + pfree(wide_str); + matchctx->wide_str = NULL; + matchctx->conv_buf = NULL; + matchctx->conv_bufsiz = 0; + } + + /* Clean up temp storage */ + pfree(pmatch); + + return matchctx; +} + +/* + * build_regexp_match_result - build output array for current match + */ +static ArrayType * +build_regexp_match_result(regexp_matches_ctx *matchctx) +{ + char *buf = matchctx->conv_buf; + Datum *elems = matchctx->elems; + bool *nulls = matchctx->nulls; + int dims[1]; + int lbs[1]; + int loc; + int i; + + /* Extract matching substrings from the original string */ + loc = matchctx->next_match * matchctx->npatterns * 2; + for (i = 0; i < matchctx->npatterns; i++) + { + int so = matchctx->match_locs[loc++]; + int eo = matchctx->match_locs[loc++]; + + if (so < 0 || eo < 0) + { + elems[i] = (Datum) 0; + nulls[i] = true; + } + else if (buf) + { + int len = pg_wchar2mb_with_len(matchctx->wide_str + so, + buf, + eo - so); + + Assert(len < matchctx->conv_bufsiz); + elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len)); + nulls[i] = false; + } + else + { + elems[i] = DirectFunctionCall3(text_substr, + PointerGetDatum(matchctx->orig_str), + Int32GetDatum(so + 1), + Int32GetDatum(eo - so)); + nulls[i] = false; + } + } + + /* And form an array */ + dims[0] = matchctx->npatterns; + lbs[0] = 1; + /* XXX: this hardcodes assumptions about the text type */ + return construct_md_array(elems, nulls, 1, dims, lbs, + TEXTOID, -1, false, TYPALIGN_INT); +} + +/* + * regexp_split_to_table() + * Split the string at matches of the pattern, returning the + * split-out substrings as a table. + */ +Datum +regexp_split_to_table(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + regexp_matches_ctx *splitctx; + + if (SRF_IS_FIRSTCALL()) + { + text *pattern = PG_GETARG_TEXT_PP(1); + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + pg_re_flags re_flags; + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /* translator: %s is a SQL function name */ + errmsg("%s does not support the \"global\" option", + "regexp_split_to_table()"))); + /* But we find all the matches anyway */ + re_flags.glob = true; + + /* be sure to copy the input string into the multi-call ctx */ + splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern, + &re_flags, 0, + PG_GET_COLLATION(), + false, true, true); + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) splitctx; + } + + funcctx = SRF_PERCALL_SETUP(); + splitctx = (regexp_matches_ctx *) funcctx->user_fctx; + + if (splitctx->next_match <= splitctx->nmatches) + { + Datum result = build_regexp_split_result(splitctx); + + splitctx->next_match++; + SRF_RETURN_NEXT(funcctx, result); + } + + SRF_RETURN_DONE(funcctx); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_split_to_table_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_split_to_table(fcinfo); +} + +/* + * regexp_split_to_array() + * Split the string at matches of the pattern, returning the + * split-out substrings as an array. + */ +Datum +regexp_split_to_array(PG_FUNCTION_ARGS) +{ + ArrayBuildState *astate = NULL; + pg_re_flags re_flags; + regexp_matches_ctx *splitctx; + + /* Determine options */ + parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2)); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /* translator: %s is a SQL function name */ + errmsg("%s does not support the \"global\" option", + "regexp_split_to_array()"))); + /* But we find all the matches anyway */ + re_flags.glob = true; + + splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0), + PG_GETARG_TEXT_PP(1), + &re_flags, 0, + PG_GET_COLLATION(), + false, true, true); + + while (splitctx->next_match <= splitctx->nmatches) + { + astate = accumArrayResult(astate, + build_regexp_split_result(splitctx), + false, + TEXTOID, + CurrentMemoryContext); + splitctx->next_match++; + } + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_split_to_array_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_split_to_array(fcinfo); +} + +/* + * build_regexp_split_result - build output string for current match + * + * We return the string between the current match and the previous one, + * or the string after the last match when next_match == nmatches. + */ +static Datum +build_regexp_split_result(regexp_matches_ctx *splitctx) +{ + char *buf = splitctx->conv_buf; + int startpos; + int endpos; + + if (splitctx->next_match > 0) + startpos = splitctx->match_locs[splitctx->next_match * 2 - 1]; + else + startpos = 0; + if (startpos < 0) + elog(ERROR, "invalid match ending position"); + + endpos = splitctx->match_locs[splitctx->next_match * 2]; + if (endpos < startpos) + elog(ERROR, "invalid match starting position"); + + if (buf) + { + int len; + + len = pg_wchar2mb_with_len(splitctx->wide_str + startpos, + buf, + endpos - startpos); + Assert(len < splitctx->conv_bufsiz); + return PointerGetDatum(cstring_to_text_with_len(buf, len)); + } + else + { + return DirectFunctionCall3(text_substr, + PointerGetDatum(splitctx->orig_str), + Int32GetDatum(startpos + 1), + Int32GetDatum(endpos - startpos)); + } +} + +/* + * regexp_substr() + * Return the substring that matches a regular expression pattern + */ +Datum +regexp_substr(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *pattern = PG_GETARG_TEXT_PP(1); + int start = 1; + int n = 1; + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4); + int subexpr = 0; + int so, + eo, + pos; + pg_re_flags re_flags; + regexp_matches_ctx *matchctx; + + /* Collect optional parameters */ + if (PG_NARGS() > 2) + { + start = PG_GETARG_INT32(2); + if (start <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "start", start))); + } + if (PG_NARGS() > 3) + { + n = PG_GETARG_INT32(3); + if (n <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "n", n))); + } + if (PG_NARGS() > 5) + { + subexpr = PG_GETARG_INT32(5); + if (subexpr < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": %d", + "subexpr", subexpr))); + } + + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /* translator: %s is a SQL function name */ + errmsg("%s does not support the \"global\" option", + "regexp_substr()"))); + /* But we find all the matches anyway */ + re_flags.glob = true; + + /* Do the matching */ + matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1, + PG_GET_COLLATION(), + (subexpr > 0), /* need submatches? */ + false, false); + + /* When n exceeds matches return NULL (includes case of no matches) */ + if (n > matchctx->nmatches) + PG_RETURN_NULL(); + + /* When subexpr exceeds number of subexpressions return NULL */ + if (subexpr > matchctx->npatterns) + PG_RETURN_NULL(); + + /* Select the appropriate match position to return */ + pos = (n - 1) * matchctx->npatterns; + if (subexpr > 0) + pos += subexpr - 1; + pos *= 2; + so = matchctx->match_locs[pos]; + eo = matchctx->match_locs[pos + 1]; + + if (so < 0 || eo < 0) + PG_RETURN_NULL(); /* unidentifiable location */ + + PG_RETURN_DATUM(DirectFunctionCall3(text_substr, + PointerGetDatum(matchctx->orig_str), + Int32GetDatum(so + 1), + Int32GetDatum(eo - so))); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_substr_no_start(PG_FUNCTION_ARGS) +{ + return regexp_substr(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_substr_no_n(PG_FUNCTION_ARGS) +{ + return regexp_substr(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_substr_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_substr(fcinfo); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_substr_no_subexpr(PG_FUNCTION_ARGS) +{ + return regexp_substr(fcinfo); +} + +/* + * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp + * + * The result is NULL if there is no fixed prefix, else a palloc'd string. + * If it is an exact match, not just a prefix, *exact is returned as true. + */ +char * +regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, + bool *exact) +{ + char *result; + regex_t *re; + int cflags; + int re_result; + pg_wchar *str; + size_t slen; + size_t maxlen; + char errMsg[100]; + + *exact = false; /* default result */ + + /* Compile RE */ + cflags = REG_ADVANCED; + if (case_insensitive) + cflags |= REG_ICASE; + + re = RE_compile_and_cache(text_re, cflags | REG_NOSUB, collation); + + /* Examine it to see if there's a fixed prefix */ + re_result = pg_regprefix(re, &str, &slen); + + switch (re_result) + { + case REG_NOMATCH: + return NULL; + + case REG_PREFIX: + /* continue with wchar conversion */ + break; + + case REG_EXACT: + *exact = true; + /* continue with wchar conversion */ + break; + + default: + /* re failed??? */ + pg_regerror(re_result, re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("regular expression failed: %s", errMsg))); + break; + } + + /* Convert pg_wchar result back to database encoding */ + maxlen = pg_database_encoding_max_length() * slen + 1; + result = (char *) palloc(maxlen); + slen = pg_wchar2mb_with_len(str, result, slen); + Assert(slen < maxlen); + + pfree(str); + + return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regproc.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regproc.c new file mode 100644 index 00000000000..296930eb3bc --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regproc.c @@ -0,0 +1,2018 @@ +/*------------------------------------------------------------------------- + * + * regproc.c + * Functions for the built-in types regproc, regclass, regtype, etc. + * + * These types are all binary-compatible with type Oid, and rely on Oid + * for comparison and so forth. Their only interesting behavior is in + * special I/O conversion routines. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/regproc.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> + +#include "access/htup_details.h" +#include "catalog/namespace.h" +#include "catalog/pg_class.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_type.h" +#include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/miscnodes.h" +#include "parser/parse_type.h" +#include "parser/scansup.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/regproc.h" +#include "utils/syscache.h" +#include "utils/varlena.h" + +static bool parseNumericOid(char *string, Oid *result, Node *escontext); +static bool parseDashOrOid(char *string, Oid *result, Node *escontext); +static bool parseNameAndArgTypes(const char *string, bool allowNone, + List **names, int *nargs, Oid *argtypes, + Node *escontext); + + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +/* + * regprocin - converts "proname" to proc OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_proc entry. + */ +Datum +regprocin(PG_FUNCTION_ARGS) +{ + char *pro_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + RegProcedure result; + List *names; + FuncCandidateList clist; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(pro_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* Else it's a name, possibly schema-qualified */ + + /* + * We should never get here in bootstrap mode, as all references should + * have been resolved by genbki.pl. + */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regproc values must be OIDs in bootstrap mode"); + + /* + * Normal case: parse the name into components and see if it matches any + * pg_proc entries in the current search path. + */ + names = stringToQualifiedNameList(pro_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + clist = FuncnameGetCandidates(names, -1, NIL, false, false, false, true); + + if (clist == NULL) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function \"%s\" does not exist", pro_name_or_oid))); + else if (clist->next != NULL) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_AMBIGUOUS_FUNCTION), + errmsg("more than one function named \"%s\"", + pro_name_or_oid))); + + result = clist->oid; + + PG_RETURN_OID(result); +} + +/* + * to_regproc - converts "proname" to proc OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regproc(PG_FUNCTION_ARGS) +{ + char *pro_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regprocin, pro_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regprocout - converts proc OID to "pro_name" + */ +Datum +regprocout(PG_FUNCTION_ARGS) +{ + RegProcedure proid = PG_GETARG_OID(0); + char *result; + HeapTuple proctup; + + if (proid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(proid)); + + if (HeapTupleIsValid(proctup)) + { + Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); + char *proname = NameStr(procform->proname); + + /* + * In bootstrap mode, skip the fancy namespace stuff and just return + * the proc name. (This path is only needed for debugging output + * anyway.) + */ + if (IsBootstrapProcessingMode()) + result = pstrdup(proname); + else + { + char *nspname; + FuncCandidateList clist; + + /* + * Would this proc be found (uniquely!) by regprocin? If not, + * qualify it. + */ + clist = FuncnameGetCandidates(list_make1(makeString(proname)), + -1, NIL, false, false, false, false); + if (clist != NULL && clist->next == NULL && + clist->oid == proid) + nspname = NULL; + else + nspname = get_namespace_name(procform->pronamespace); + + result = quote_qualified_identifier(nspname, proname); + } + + ReleaseSysCache(proctup); + } + else + { + /* If OID doesn't match any pg_proc entry, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", proid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regprocrecv - converts external binary format to regproc + */ +Datum +regprocrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regprocsend - converts regproc to binary format + */ +Datum +regprocsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regprocedurein - converts "proname(args)" to proc OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_proc entry. + */ +Datum +regprocedurein(PG_FUNCTION_ARGS) +{ + char *pro_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + RegProcedure result; + List *names; + int nargs; + Oid argtypes[FUNC_MAX_ARGS]; + FuncCandidateList clist; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(pro_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regprocedure values must be OIDs in bootstrap mode"); + + /* + * Else it's a name and arguments. Parse the name and arguments, look up + * potential matches in the current namespace search list, and scan to see + * which one exactly matches the given argument types. (There will not be + * more than one match.) + */ + if (!parseNameAndArgTypes(pro_name_or_oid, false, + &names, &nargs, argtypes, + escontext)) + PG_RETURN_NULL(); + + clist = FuncnameGetCandidates(names, nargs, NIL, false, false, + false, true); + + for (; clist; clist = clist->next) + { + if (memcmp(clist->args, argtypes, nargs * sizeof(Oid)) == 0) + break; + } + + if (clist == NULL) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function \"%s\" does not exist", pro_name_or_oid))); + + result = clist->oid; + + PG_RETURN_OID(result); +} + +/* + * to_regprocedure - converts "proname(args)" to proc OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regprocedure(PG_FUNCTION_ARGS) +{ + char *pro_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regprocedurein, pro_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * format_procedure - converts proc OID to "pro_name(args)" + * + * This exports the useful functionality of regprocedureout for use + * in other backend modules. The result is a palloc'd string. + */ +char * +format_procedure(Oid procedure_oid) +{ + return format_procedure_extended(procedure_oid, 0); +} + +char * +format_procedure_qualified(Oid procedure_oid) +{ + return format_procedure_extended(procedure_oid, FORMAT_PROC_FORCE_QUALIFY); +} + +/* + * format_procedure_extended - converts procedure OID to "pro_name(args)" + * + * This exports the useful functionality of regprocedureout for use + * in other backend modules. The result is a palloc'd string, or NULL. + * + * Routine to produce regprocedure names; see format_procedure above. + * + * The following bits in 'flags' modify the behavior: + * - FORMAT_PROC_INVALID_AS_NULL + * if the procedure OID is invalid or unknown, return NULL instead + * of the numeric OID. + * - FORMAT_PROC_FORCE_QUALIFY + * always schema-qualify procedure names, regardless of search_path + */ +char * +format_procedure_extended(Oid procedure_oid, bits16 flags) +{ + char *result; + HeapTuple proctup; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(procedure_oid)); + + if (HeapTupleIsValid(proctup)) + { + Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); + char *proname = NameStr(procform->proname); + int nargs = procform->pronargs; + int i; + char *nspname; + StringInfoData buf; + + /* XXX no support here for bootstrap mode */ + Assert(!IsBootstrapProcessingMode()); + + initStringInfo(&buf); + + /* + * Would this proc be found (given the right args) by regprocedurein? + * If not, or if caller requests it, we need to qualify it. + */ + if ((flags & FORMAT_PROC_FORCE_QUALIFY) == 0 && + FunctionIsVisible(procedure_oid)) + nspname = NULL; + else + nspname = get_namespace_name(procform->pronamespace); + + appendStringInfo(&buf, "%s(", + quote_qualified_identifier(nspname, proname)); + for (i = 0; i < nargs; i++) + { + Oid thisargtype = procform->proargtypes.values[i]; + + if (i > 0) + appendStringInfoChar(&buf, ','); + appendStringInfoString(&buf, + (flags & FORMAT_PROC_FORCE_QUALIFY) != 0 ? + format_type_be_qualified(thisargtype) : + format_type_be(thisargtype)); + } + appendStringInfoChar(&buf, ')'); + + result = buf.data; + + ReleaseSysCache(proctup); + } + else if ((flags & FORMAT_PROC_INVALID_AS_NULL) != 0) + { + /* If object is undefined, return NULL as wanted by caller */ + result = NULL; + } + else + { + /* If OID doesn't match any pg_proc entry, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", procedure_oid); + } + + return result; +} + +/* + * Output an objname/objargs representation for the procedure with the + * given OID. If it doesn't exist, an error is thrown. + * + * This can be used to feed get_object_address. + */ +void +format_procedure_parts(Oid procedure_oid, List **objnames, List **objargs, + bool missing_ok) +{ + HeapTuple proctup; + Form_pg_proc procform; + int nargs; + int i; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(procedure_oid)); + + if (!HeapTupleIsValid(proctup)) + { + if (!missing_ok) + elog(ERROR, "cache lookup failed for procedure with OID %u", procedure_oid); + return; + } + + procform = (Form_pg_proc) GETSTRUCT(proctup); + nargs = procform->pronargs; + + *objnames = list_make2(get_namespace_name_or_temp(procform->pronamespace), + pstrdup(NameStr(procform->proname))); + *objargs = NIL; + for (i = 0; i < nargs; i++) + { + Oid thisargtype = procform->proargtypes.values[i]; + + *objargs = lappend(*objargs, format_type_be_qualified(thisargtype)); + } + + ReleaseSysCache(proctup); +} + +/* + * regprocedureout - converts proc OID to "pro_name(args)" + */ +Datum +regprocedureout(PG_FUNCTION_ARGS) +{ + RegProcedure proid = PG_GETARG_OID(0); + char *result; + + if (proid == InvalidOid) + result = pstrdup("-"); + else + result = format_procedure(proid); + + PG_RETURN_CSTRING(result); +} + +/* + * regprocedurerecv - converts external binary format to regprocedure + */ +Datum +regprocedurerecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regproceduresend - converts regprocedure to binary format + */ +Datum +regproceduresend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regoperin - converts "oprname" to operator OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '0' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_operator entry. + */ +Datum +regoperin(PG_FUNCTION_ARGS) +{ + char *opr_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + FuncCandidateList clist; + + /* Handle "0" or numeric OID */ + if (parseNumericOid(opr_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* Else it's a name, possibly schema-qualified */ + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regoper values must be OIDs in bootstrap mode"); + + /* + * Normal case: parse the name into components and see if it matches any + * pg_operator entries in the current search path. + */ + names = stringToQualifiedNameList(opr_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + clist = OpernameGetCandidates(names, '\0', true); + + if (clist == NULL) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("operator does not exist: %s", opr_name_or_oid))); + else if (clist->next != NULL) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_AMBIGUOUS_FUNCTION), + errmsg("more than one operator named %s", + opr_name_or_oid))); + + result = clist->oid; + + PG_RETURN_OID(result); +} + +/* + * to_regoper - converts "oprname" to operator OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regoper(PG_FUNCTION_ARGS) +{ + char *opr_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regoperin, opr_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regoperout - converts operator OID to "opr_name" + */ +Datum +regoperout(PG_FUNCTION_ARGS) +{ + Oid oprid = PG_GETARG_OID(0); + char *result; + HeapTuple opertup; + + if (oprid == InvalidOid) + { + result = pstrdup("0"); + PG_RETURN_CSTRING(result); + } + + opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(oprid)); + + if (HeapTupleIsValid(opertup)) + { + Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup); + char *oprname = NameStr(operform->oprname); + + /* + * In bootstrap mode, skip the fancy namespace stuff and just return + * the oper name. (This path is only needed for debugging output + * anyway.) + */ + if (IsBootstrapProcessingMode()) + result = pstrdup(oprname); + else + { + FuncCandidateList clist; + + /* + * Would this oper be found (uniquely!) by regoperin? If not, + * qualify it. + */ + clist = OpernameGetCandidates(list_make1(makeString(oprname)), + '\0', false); + if (clist != NULL && clist->next == NULL && + clist->oid == oprid) + result = pstrdup(oprname); + else + { + const char *nspname; + + nspname = get_namespace_name(operform->oprnamespace); + nspname = quote_identifier(nspname); + result = (char *) palloc(strlen(nspname) + strlen(oprname) + 2); + sprintf(result, "%s.%s", nspname, oprname); + } + } + + ReleaseSysCache(opertup); + } + else + { + /* + * If OID doesn't match any pg_operator entry, return it numerically + */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", oprid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regoperrecv - converts external binary format to regoper + */ +Datum +regoperrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regopersend - converts regoper to binary format + */ +Datum +regopersend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regoperatorin - converts "oprname(args)" to operator OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '0' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_operator entry. + */ +Datum +regoperatorin(PG_FUNCTION_ARGS) +{ + char *opr_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + int nargs; + Oid argtypes[FUNC_MAX_ARGS]; + + /* Handle "0" or numeric OID */ + if (parseNumericOid(opr_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regoperator values must be OIDs in bootstrap mode"); + + /* + * Else it's a name and arguments. Parse the name and arguments, look up + * potential matches in the current namespace search list, and scan to see + * which one exactly matches the given argument types. (There will not be + * more than one match.) + */ + if (!parseNameAndArgTypes(opr_name_or_oid, true, + &names, &nargs, argtypes, + escontext)) + PG_RETURN_NULL(); + + if (nargs == 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_PARAMETER), + errmsg("missing argument"), + errhint("Use NONE to denote the missing argument of a unary operator."))); + if (nargs != 2) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg("too many arguments"), + errhint("Provide two argument types for operator."))); + + result = OpernameGetOprid(names, argtypes[0], argtypes[1]); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("operator does not exist: %s", opr_name_or_oid))); + + PG_RETURN_OID(result); +} + +/* + * to_regoperator - converts "oprname(args)" to operator OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regoperator(PG_FUNCTION_ARGS) +{ + char *opr_name_or_oid = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regoperatorin, opr_name_or_oid, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * format_operator_extended - converts operator OID to "opr_name(args)" + * + * This exports the useful functionality of regoperatorout for use + * in other backend modules. The result is a palloc'd string, or NULL. + * + * The following bits in 'flags' modify the behavior: + * - FORMAT_OPERATOR_INVALID_AS_NULL + * if the operator OID is invalid or unknown, return NULL instead + * of the numeric OID. + * - FORMAT_OPERATOR_FORCE_QUALIFY + * always schema-qualify operator names, regardless of search_path + */ +char * +format_operator_extended(Oid operator_oid, bits16 flags) +{ + char *result; + HeapTuple opertup; + + opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operator_oid)); + + if (HeapTupleIsValid(opertup)) + { + Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup); + char *oprname = NameStr(operform->oprname); + char *nspname; + StringInfoData buf; + + /* XXX no support here for bootstrap mode */ + Assert(!IsBootstrapProcessingMode()); + + initStringInfo(&buf); + + /* + * Would this oper be found (given the right args) by regoperatorin? + * If not, or if caller explicitly requests it, we need to qualify it. + */ + if ((flags & FORMAT_OPERATOR_FORCE_QUALIFY) != 0 || + !OperatorIsVisible(operator_oid)) + { + nspname = get_namespace_name(operform->oprnamespace); + appendStringInfo(&buf, "%s.", + quote_identifier(nspname)); + } + + appendStringInfo(&buf, "%s(", oprname); + + if (operform->oprleft) + appendStringInfo(&buf, "%s,", + (flags & FORMAT_OPERATOR_FORCE_QUALIFY) != 0 ? + format_type_be_qualified(operform->oprleft) : + format_type_be(operform->oprleft)); + else + appendStringInfoString(&buf, "NONE,"); + + if (operform->oprright) + appendStringInfo(&buf, "%s)", + (flags & FORMAT_OPERATOR_FORCE_QUALIFY) != 0 ? + format_type_be_qualified(operform->oprright) : + format_type_be(operform->oprright)); + else + appendStringInfoString(&buf, "NONE)"); + + result = buf.data; + + ReleaseSysCache(opertup); + } + else if ((flags & FORMAT_OPERATOR_INVALID_AS_NULL) != 0) + { + /* If object is undefined, return NULL as wanted by caller */ + result = NULL; + } + else + { + /* + * If OID doesn't match any pg_operator entry, return it numerically + */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", operator_oid); + } + + return result; +} + +char * +format_operator(Oid operator_oid) +{ + return format_operator_extended(operator_oid, 0); +} + +char * +format_operator_qualified(Oid operator_oid) +{ + return format_operator_extended(operator_oid, + FORMAT_OPERATOR_FORCE_QUALIFY); +} + +void +format_operator_parts(Oid operator_oid, List **objnames, List **objargs, + bool missing_ok) +{ + HeapTuple opertup; + Form_pg_operator oprForm; + + opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operator_oid)); + if (!HeapTupleIsValid(opertup)) + { + if (!missing_ok) + elog(ERROR, "cache lookup failed for operator with OID %u", + operator_oid); + return; + } + + oprForm = (Form_pg_operator) GETSTRUCT(opertup); + *objnames = list_make2(get_namespace_name_or_temp(oprForm->oprnamespace), + pstrdup(NameStr(oprForm->oprname))); + *objargs = NIL; + if (oprForm->oprleft) + *objargs = lappend(*objargs, + format_type_be_qualified(oprForm->oprleft)); + if (oprForm->oprright) + *objargs = lappend(*objargs, + format_type_be_qualified(oprForm->oprright)); + + ReleaseSysCache(opertup); +} + +/* + * regoperatorout - converts operator OID to "opr_name(args)" + */ +Datum +regoperatorout(PG_FUNCTION_ARGS) +{ + Oid oprid = PG_GETARG_OID(0); + char *result; + + if (oprid == InvalidOid) + result = pstrdup("0"); + else + result = format_operator(oprid); + + PG_RETURN_CSTRING(result); +} + +/* + * regoperatorrecv - converts external binary format to regoperator + */ +Datum +regoperatorrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regoperatorsend - converts regoperator to binary format + */ +Datum +regoperatorsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regclassin - converts "classname" to class OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_class entry. + */ +Datum +regclassin(PG_FUNCTION_ARGS) +{ + char *class_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(class_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* Else it's a name, possibly schema-qualified */ + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regclass values must be OIDs in bootstrap mode"); + + /* + * Normal case: parse the name into components and see if it matches any + * pg_class entries in the current search path. + */ + names = stringToQualifiedNameList(class_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + /* We might not even have permissions on this relation; don't lock it. */ + result = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" does not exist", + NameListToString(names)))); + + PG_RETURN_OID(result); +} + +/* + * to_regclass - converts "classname" to class OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regclass(PG_FUNCTION_ARGS) +{ + char *class_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regclassin, class_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regclassout - converts class OID to "class_name" + */ +Datum +regclassout(PG_FUNCTION_ARGS) +{ + Oid classid = PG_GETARG_OID(0); + char *result; + HeapTuple classtup; + + if (classid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + classtup = SearchSysCache1(RELOID, ObjectIdGetDatum(classid)); + + if (HeapTupleIsValid(classtup)) + { + Form_pg_class classform = (Form_pg_class) GETSTRUCT(classtup); + char *classname = NameStr(classform->relname); + + /* + * In bootstrap mode, skip the fancy namespace stuff and just return + * the class name. (This path is only needed for debugging output + * anyway.) + */ + if (IsBootstrapProcessingMode()) + result = pstrdup(classname); + else + { + char *nspname; + + /* + * Would this class be found by regclassin? If not, qualify it. + */ + if (RelationIsVisible(classid)) + nspname = NULL; + else + nspname = get_namespace_name(classform->relnamespace); + + result = quote_qualified_identifier(nspname, classname); + } + + ReleaseSysCache(classtup); + } + else + { + /* If OID doesn't match any pg_class entry, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", classid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regclassrecv - converts external binary format to regclass + */ +Datum +regclassrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regclasssend - converts regclass to binary format + */ +Datum +regclasssend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regcollationin - converts "collationname" to collation OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_collation entry. + */ +Datum +regcollationin(PG_FUNCTION_ARGS) +{ + char *collation_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(collation_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* Else it's a name, possibly schema-qualified */ + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regcollation values must be OIDs in bootstrap mode"); + + /* + * Normal case: parse the name into components and see if it matches any + * pg_collation entries in the current search path. + */ + names = stringToQualifiedNameList(collation_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + result = get_collation_oid(names, true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("collation \"%s\" for encoding \"%s\" does not exist", + NameListToString(names), GetDatabaseEncodingName()))); + + PG_RETURN_OID(result); +} + +/* + * to_regcollation - converts "collationname" to collation OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regcollation(PG_FUNCTION_ARGS) +{ + char *collation_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regcollationin, collation_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regcollationout - converts collation OID to "collation_name" + */ +Datum +regcollationout(PG_FUNCTION_ARGS) +{ + Oid collationid = PG_GETARG_OID(0); + char *result; + HeapTuple collationtup; + + if (collationid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + collationtup = SearchSysCache1(COLLOID, ObjectIdGetDatum(collationid)); + + if (HeapTupleIsValid(collationtup)) + { + Form_pg_collation collationform = (Form_pg_collation) GETSTRUCT(collationtup); + char *collationname = NameStr(collationform->collname); + + /* + * In bootstrap mode, skip the fancy namespace stuff and just return + * the collation name. (This path is only needed for debugging output + * anyway.) + */ + if (IsBootstrapProcessingMode()) + result = pstrdup(collationname); + else + { + char *nspname; + + /* + * Would this collation be found by regcollationin? If not, + * qualify it. + */ + if (CollationIsVisible(collationid)) + nspname = NULL; + else + nspname = get_namespace_name(collationform->collnamespace); + + result = quote_qualified_identifier(nspname, collationname); + } + + ReleaseSysCache(collationtup); + } + else + { + /* If OID doesn't match any pg_collation entry, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", collationid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regcollationrecv - converts external binary format to regcollation + */ +Datum +regcollationrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regcollationsend - converts regcollation to binary format + */ +Datum +regcollationsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regtypein - converts "typename" to type OID + * + * The type name can be specified using the full type syntax recognized by + * the parser; for example, DOUBLE PRECISION and INTEGER[] will work and be + * translated to the correct type names. (We ignore any typmod info + * generated by the parser, however.) + * + * We also accept a numeric OID, for symmetry with the output routine, + * and for possible use in bootstrap mode. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_type entry. + */ +Datum +regtypein(PG_FUNCTION_ARGS) +{ + char *typ_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + int32 typmod; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(typ_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* Else it's a type name, possibly schema-qualified or decorated */ + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regtype values must be OIDs in bootstrap mode"); + + /* + * Normal case: invoke the full parser to deal with special cases such as + * array syntax. We don't need to check for parseTypeString failure, + * since we'll just return anyway. + */ + (void) parseTypeString(typ_name_or_oid, &result, &typmod, escontext); + + PG_RETURN_OID(result); +} + +/* + * to_regtype - converts "typename" to type OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regtype(PG_FUNCTION_ARGS) +{ + char *typ_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regtypein, typ_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regtypeout - converts type OID to "typ_name" + */ +Datum +regtypeout(PG_FUNCTION_ARGS) +{ + Oid typid = PG_GETARG_OID(0); + char *result; + HeapTuple typetup; + + if (typid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + + if (HeapTupleIsValid(typetup)) + { + Form_pg_type typeform = (Form_pg_type) GETSTRUCT(typetup); + + /* + * In bootstrap mode, skip the fancy namespace stuff and just return + * the type name. (This path is only needed for debugging output + * anyway.) + */ + if (IsBootstrapProcessingMode()) + { + char *typname = NameStr(typeform->typname); + + result = pstrdup(typname); + } + else + result = format_type_be(typid); + + ReleaseSysCache(typetup); + } + else + { + /* If OID doesn't match any pg_type entry, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", typid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regtyperecv - converts external binary format to regtype + */ +Datum +regtyperecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regtypesend - converts regtype to binary format + */ +Datum +regtypesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regconfigin - converts "tsconfigname" to tsconfig OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_ts_config entry. + */ +Datum +regconfigin(PG_FUNCTION_ARGS) +{ + char *cfg_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(cfg_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regconfig values must be OIDs in bootstrap mode"); + + /* + * Normal case: parse the name into components and see if it matches any + * pg_ts_config entries in the current search path. + */ + names = stringToQualifiedNameList(cfg_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + result = get_ts_config_oid(names, true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search configuration \"%s\" does not exist", + NameListToString(names)))); + + PG_RETURN_OID(result); +} + +/* + * regconfigout - converts tsconfig OID to "tsconfigname" + */ +Datum +regconfigout(PG_FUNCTION_ARGS) +{ + Oid cfgid = PG_GETARG_OID(0); + char *result; + HeapTuple cfgtup; + + if (cfgid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + cfgtup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgid)); + + if (HeapTupleIsValid(cfgtup)) + { + Form_pg_ts_config cfgform = (Form_pg_ts_config) GETSTRUCT(cfgtup); + char *cfgname = NameStr(cfgform->cfgname); + char *nspname; + + /* + * Would this config be found by regconfigin? If not, qualify it. + */ + if (TSConfigIsVisible(cfgid)) + nspname = NULL; + else + nspname = get_namespace_name(cfgform->cfgnamespace); + + result = quote_qualified_identifier(nspname, cfgname); + + ReleaseSysCache(cfgtup); + } + else + { + /* If OID doesn't match any pg_ts_config row, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", cfgid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regconfigrecv - converts external binary format to regconfig + */ +Datum +regconfigrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regconfigsend - converts regconfig to binary format + */ +Datum +regconfigsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regdictionaryin - converts "tsdictionaryname" to tsdictionary OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_ts_dict entry. + */ +Datum +regdictionaryin(PG_FUNCTION_ARGS) +{ + char *dict_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(dict_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regdictionary values must be OIDs in bootstrap mode"); + + /* + * Normal case: parse the name into components and see if it matches any + * pg_ts_dict entries in the current search path. + */ + names = stringToQualifiedNameList(dict_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + result = get_ts_dict_oid(names, true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search dictionary \"%s\" does not exist", + NameListToString(names)))); + + PG_RETURN_OID(result); +} + +/* + * regdictionaryout - converts tsdictionary OID to "tsdictionaryname" + */ +Datum +regdictionaryout(PG_FUNCTION_ARGS) +{ + Oid dictid = PG_GETARG_OID(0); + char *result; + HeapTuple dicttup; + + if (dictid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + dicttup = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictid)); + + if (HeapTupleIsValid(dicttup)) + { + Form_pg_ts_dict dictform = (Form_pg_ts_dict) GETSTRUCT(dicttup); + char *dictname = NameStr(dictform->dictname); + char *nspname; + + /* + * Would this dictionary be found by regdictionaryin? If not, qualify + * it. + */ + if (TSDictionaryIsVisible(dictid)) + nspname = NULL; + else + nspname = get_namespace_name(dictform->dictnamespace); + + result = quote_qualified_identifier(nspname, dictname); + + ReleaseSysCache(dicttup); + } + else + { + /* If OID doesn't match any pg_ts_dict row, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", dictid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regdictionaryrecv - converts external binary format to regdictionary + */ +Datum +regdictionaryrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regdictionarysend - converts regdictionary to binary format + */ +Datum +regdictionarysend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +/* + * regrolein - converts "rolename" to role OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_authid entry. + */ +Datum +regrolein(PG_FUNCTION_ARGS) +{ + char *role_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(role_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regrole values must be OIDs in bootstrap mode"); + + /* Normal case: see if the name matches any pg_authid entry. */ + names = stringToQualifiedNameList(role_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + if (list_length(names) != 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + result = get_role_oid(strVal(linitial(names)), true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", + strVal(linitial(names))))); + + PG_RETURN_OID(result); +} + +/* + * to_regrole - converts "rolename" to role OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regrole(PG_FUNCTION_ARGS) +{ + char *role_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regrolein, role_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regroleout - converts role OID to "role_name" + */ +Datum +regroleout(PG_FUNCTION_ARGS) +{ + Oid roleoid = PG_GETARG_OID(0); + char *result; + + if (roleoid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + result = GetUserNameFromId(roleoid, true); + + if (result) + { + /* pstrdup is not really necessary, but it avoids a compiler warning */ + result = pstrdup(quote_identifier(result)); + } + else + { + /* If OID doesn't match any role, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", roleoid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regrolerecv - converts external binary format to regrole + */ +Datum +regrolerecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regrolesend - converts regrole to binary format + */ +Datum +regrolesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +/* + * regnamespacein - converts "nspname" to namespace OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_namespace entry. + */ +Datum +regnamespacein(PG_FUNCTION_ARGS) +{ + char *nsp_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(nsp_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regnamespace values must be OIDs in bootstrap mode"); + + /* Normal case: see if the name matches any pg_namespace entry. */ + names = stringToQualifiedNameList(nsp_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + if (list_length(names) != 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + result = get_namespace_oid(strVal(linitial(names)), true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema \"%s\" does not exist", + strVal(linitial(names))))); + + PG_RETURN_OID(result); +} + +/* + * to_regnamespace - converts "nspname" to namespace OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regnamespace(PG_FUNCTION_ARGS) +{ + char *nsp_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regnamespacein, nsp_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regnamespaceout - converts namespace OID to "nsp_name" + */ +Datum +regnamespaceout(PG_FUNCTION_ARGS) +{ + Oid nspid = PG_GETARG_OID(0); + char *result; + + if (nspid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + result = get_namespace_name(nspid); + + if (result) + { + /* pstrdup is not really necessary, but it avoids a compiler warning */ + result = pstrdup(quote_identifier(result)); + } + else + { + /* If OID doesn't match any namespace, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", nspid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regnamespacerecv - converts external binary format to regnamespace + */ +Datum +regnamespacerecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regnamespacesend - converts regnamespace to binary format + */ +Datum +regnamespacesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +/* + * text_regclass: convert text to regclass + * + * This could be replaced by CoerceViaIO, except that we need to treat + * text-to-regclass as an implicit cast to support legacy forms of nextval() + * and related functions. + */ +Datum +text_regclass(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + Oid result; + RangeVar *rv; + + rv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + + /* We might not even have permissions on this relation; don't lock it. */ + result = RangeVarGetRelid(rv, NoLock, false); + + PG_RETURN_OID(result); +} + + +/* + * Given a C string, parse it into a qualified-name list. + * + * If escontext is an ErrorSaveContext node, invalid input will be + * reported there instead of being thrown, and we return NIL. + * (NIL is not possible as a success return, since empty-input is an error.) + */ +List * +stringToQualifiedNameList(const char *string, Node *escontext) +{ + char *rawname; + List *result = NIL; + List *namelist; + ListCell *l; + + /* We need a modifiable copy of the input string. */ + rawname = pstrdup(string); + + if (!SplitIdentifierString(rawname, '.', &namelist)) + ereturn(escontext, NIL, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + if (namelist == NIL) + ereturn(escontext, NIL, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + foreach(l, namelist) + { + char *curname = (char *) lfirst(l); + + result = lappend(result, makeString(pstrdup(curname))); + } + + pfree(rawname); + list_free(namelist); + + return result; +} + +/***************************************************************************** + * SUPPORT ROUTINES * + *****************************************************************************/ + +/* + * Given a C string, see if it is all-digits (and not empty). + * If so, convert directly to OID and return true. + * If it is not all-digits, return false. + * + * If escontext is an ErrorSaveContext node, any error in oidin() will be + * reported there instead of being thrown (but we still return true). + */ +static bool +parseNumericOid(char *string, Oid *result, Node *escontext) +{ + if (string[0] >= '0' && string[0] <= '9' && + strspn(string, "0123456789") == strlen(string)) + { + Datum oid_datum; + + /* We need not care here whether oidin() fails or not. */ + (void) DirectInputFunctionCallSafe(oidin, string, + InvalidOid, -1, + escontext, + &oid_datum); + *result = DatumGetObjectId(oid_datum); + return true; + } + + /* Prevent uninitialized-variable warnings from stupider compilers. */ + *result = InvalidOid; + return false; +} + +/* + * As above, but also accept "-" as meaning 0 (InvalidOid). + */ +static bool +parseDashOrOid(char *string, Oid *result, Node *escontext) +{ + /* '-' ? */ + if (strcmp(string, "-") == 0) + { + *result = InvalidOid; + return true; + } + + /* Numeric OID? */ + return parseNumericOid(string, result, escontext); +} + +/* + * Given a C string, parse it into a qualified function or operator name + * followed by a parenthesized list of type names. Reduce the + * type names to an array of OIDs (returned into *nargs and *argtypes; + * the argtypes array should be of size FUNC_MAX_ARGS). The function or + * operator name is returned to *names as a List of Strings. + * + * If allowNone is true, accept "NONE" and return it as InvalidOid (this is + * for unary operators). + * + * Returns true on success, false on failure (the latter only possible + * if escontext is an ErrorSaveContext node). + */ +static bool +parseNameAndArgTypes(const char *string, bool allowNone, List **names, + int *nargs, Oid *argtypes, + Node *escontext) +{ + char *rawname; + char *ptr; + char *ptr2; + char *typename; + bool in_quote; + bool had_comma; + int paren_count; + Oid typeid; + int32 typmod; + + /* We need a modifiable copy of the input string. */ + rawname = pstrdup(string); + + /* Scan to find the expected left paren; mustn't be quoted */ + in_quote = false; + for (ptr = rawname; *ptr; ptr++) + { + if (*ptr == '"') + in_quote = !in_quote; + else if (*ptr == '(' && !in_quote) + break; + } + if (*ptr == '\0') + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("expected a left parenthesis"))); + + /* Separate the name and parse it into a list */ + *ptr++ = '\0'; + *names = stringToQualifiedNameList(rawname, escontext); + if (*names == NIL) + return false; + + /* Check for the trailing right parenthesis and remove it */ + ptr2 = ptr + strlen(ptr); + while (--ptr2 > ptr) + { + if (!scanner_isspace(*ptr2)) + break; + } + if (*ptr2 != ')') + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("expected a right parenthesis"))); + + *ptr2 = '\0'; + + /* Separate the remaining string into comma-separated type names */ + *nargs = 0; + had_comma = false; + + for (;;) + { + /* allow leading whitespace */ + while (scanner_isspace(*ptr)) + ptr++; + if (*ptr == '\0') + { + /* End of string. Okay unless we had a comma before. */ + if (had_comma) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("expected a type name"))); + break; + } + typename = ptr; + /* Find end of type name --- end of string or comma */ + /* ... but not a quoted or parenthesized comma */ + in_quote = false; + paren_count = 0; + for (; *ptr; ptr++) + { + if (*ptr == '"') + in_quote = !in_quote; + else if (*ptr == ',' && !in_quote && paren_count == 0) + break; + else if (!in_quote) + { + switch (*ptr) + { + case '(': + case '[': + paren_count++; + break; + case ')': + case ']': + paren_count--; + break; + } + } + } + if (in_quote || paren_count != 0) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("improper type name"))); + + ptr2 = ptr; + if (*ptr == ',') + { + had_comma = true; + *ptr++ = '\0'; + } + else + { + had_comma = false; + Assert(*ptr == '\0'); + } + /* Lop off trailing whitespace */ + while (--ptr2 >= typename) + { + if (!scanner_isspace(*ptr2)) + break; + *ptr2 = '\0'; + } + + if (allowNone && pg_strcasecmp(typename, "none") == 0) + { + /* Special case for NONE */ + typeid = InvalidOid; + typmod = -1; + } + else + { + /* Use full parser to resolve the type name */ + if (!parseTypeString(typename, &typeid, &typmod, escontext)) + return false; + } + if (*nargs >= FUNC_MAX_ARGS) + ereturn(escontext, false, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg("too many arguments"))); + + argtypes[*nargs] = typeid; + (*nargs)++; + } + + pfree(rawname); + + return true; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ri_triggers.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ri_triggers.c new file mode 100644 index 00000000000..d93b9c18611 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ri_triggers.c @@ -0,0 +1,3032 @@ +/*------------------------------------------------------------------------- + * + * ri_triggers.c + * + * Generic trigger procedures for referential integrity constraint + * checks. + * + * Note about memory management: the private hashtables kept here live + * across query and transaction boundaries, in fact they live as long as + * the backend does. This works because the hashtable structures + * themselves are allocated by dynahash.c in its permanent DynaHashCxt, + * and the SPI plans they point to are saved using SPI_keepplan(). + * There is not currently any provision for throwing away a no-longer-needed + * plan --- consider improving this someday. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * src/backend/utils/adt/ri_triggers.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "executor/spi.h" +#include "lib/ilist.h" +#include "miscadmin.h" +#include "parser/parse_coerce.h" +#include "parser/parse_relation.h" +#include "storage/bufmgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/rls.h" +#include "utils/ruleutils.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + +/* + * Local definitions + */ + +#define RI_MAX_NUMKEYS INDEX_MAX_KEYS + +#define RI_INIT_CONSTRAINTHASHSIZE 64 +#define RI_INIT_QUERYHASHSIZE (RI_INIT_CONSTRAINTHASHSIZE * 4) + +#define RI_KEYS_ALL_NULL 0 +#define RI_KEYS_SOME_NULL 1 +#define RI_KEYS_NONE_NULL 2 + +/* RI query type codes */ +/* these queries are executed against the PK (referenced) table: */ +#define RI_PLAN_CHECK_LOOKUPPK 1 +#define RI_PLAN_CHECK_LOOKUPPK_FROM_PK 2 +#define RI_PLAN_LAST_ON_PK RI_PLAN_CHECK_LOOKUPPK_FROM_PK +/* these queries are executed against the FK (referencing) table: */ +#define RI_PLAN_CASCADE_ONDELETE 3 +#define RI_PLAN_CASCADE_ONUPDATE 4 +/* For RESTRICT, the same plan can be used for both ON DELETE and ON UPDATE triggers. */ +#define RI_PLAN_RESTRICT 5 +#define RI_PLAN_SETNULL_ONDELETE 6 +#define RI_PLAN_SETNULL_ONUPDATE 7 +#define RI_PLAN_SETDEFAULT_ONDELETE 8 +#define RI_PLAN_SETDEFAULT_ONUPDATE 9 + +#define MAX_QUOTED_NAME_LEN (NAMEDATALEN*2+3) +#define MAX_QUOTED_REL_NAME_LEN (MAX_QUOTED_NAME_LEN*2) + +#define RIAttName(rel, attnum) NameStr(*attnumAttName(rel, attnum)) +#define RIAttType(rel, attnum) attnumTypeId(rel, attnum) +#define RIAttCollation(rel, attnum) attnumCollationId(rel, attnum) + +#define RI_TRIGTYPE_INSERT 1 +#define RI_TRIGTYPE_UPDATE 2 +#define RI_TRIGTYPE_DELETE 3 + + +/* + * RI_ConstraintInfo + * + * Information extracted from an FK pg_constraint entry. This is cached in + * ri_constraint_cache. + */ +typedef struct RI_ConstraintInfo +{ + Oid constraint_id; /* OID of pg_constraint entry (hash key) */ + bool valid; /* successfully initialized? */ + Oid constraint_root_id; /* OID of topmost ancestor constraint; + * same as constraint_id if not inherited */ + uint32 oidHashValue; /* hash value of constraint_id */ + uint32 rootHashValue; /* hash value of constraint_root_id */ + NameData conname; /* name of the FK constraint */ + Oid pk_relid; /* referenced relation */ + Oid fk_relid; /* referencing relation */ + char confupdtype; /* foreign key's ON UPDATE action */ + char confdeltype; /* foreign key's ON DELETE action */ + int ndelsetcols; /* number of columns referenced in ON DELETE + * SET clause */ + int16 confdelsetcols[RI_MAX_NUMKEYS]; /* attnums of cols to set on + * delete */ + char confmatchtype; /* foreign key's match type */ + int nkeys; /* number of key columns */ + int16 pk_attnums[RI_MAX_NUMKEYS]; /* attnums of referenced cols */ + int16 fk_attnums[RI_MAX_NUMKEYS]; /* attnums of referencing cols */ + Oid pf_eq_oprs[RI_MAX_NUMKEYS]; /* equality operators (PK = FK) */ + Oid pp_eq_oprs[RI_MAX_NUMKEYS]; /* equality operators (PK = PK) */ + Oid ff_eq_oprs[RI_MAX_NUMKEYS]; /* equality operators (FK = FK) */ + dlist_node valid_link; /* Link in list of valid entries */ +} RI_ConstraintInfo; + +/* + * RI_QueryKey + * + * The key identifying a prepared SPI plan in our query hashtable + */ +typedef struct RI_QueryKey +{ + Oid constr_id; /* OID of pg_constraint entry */ + int32 constr_queryno; /* query type ID, see RI_PLAN_XXX above */ +} RI_QueryKey; + +/* + * RI_QueryHashEntry + */ +typedef struct RI_QueryHashEntry +{ + RI_QueryKey key; + SPIPlanPtr plan; +} RI_QueryHashEntry; + +/* + * RI_CompareKey + * + * The key identifying an entry showing how to compare two values + */ +typedef struct RI_CompareKey +{ + Oid eq_opr; /* the equality operator to apply */ + Oid typeid; /* the data type to apply it to */ +} RI_CompareKey; + +/* + * RI_CompareHashEntry + */ +typedef struct RI_CompareHashEntry +{ + RI_CompareKey key; + bool valid; /* successfully initialized? */ + FmgrInfo eq_opr_finfo; /* call info for equality fn */ + FmgrInfo cast_func_finfo; /* in case we must coerce input */ +} RI_CompareHashEntry; + + +/* + * Local data + */ +static __thread HTAB *ri_constraint_cache = NULL; +static __thread HTAB *ri_query_cache = NULL; +static __thread HTAB *ri_compare_cache = NULL; +static __thread dclist_head ri_constraint_cache_valid_list; + + +/* + * Local function prototypes + */ +static bool ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel, + TupleTableSlot *oldslot, + const RI_ConstraintInfo *riinfo); +static Datum ri_restrict(TriggerData *trigdata, bool is_no_action); +static Datum ri_set(TriggerData *trigdata, bool is_set_null, int tgkind); +static void quoteOneName(char *buffer, const char *name); +static void quoteRelationName(char *buffer, Relation rel); +static void ri_GenerateQual(StringInfo buf, + const char *sep, + const char *leftop, Oid leftoptype, + Oid opoid, + const char *rightop, Oid rightoptype); +static void ri_GenerateQualCollation(StringInfo buf, Oid collation); +static int ri_NullCheck(TupleDesc tupDesc, TupleTableSlot *slot, + const RI_ConstraintInfo *riinfo, bool rel_is_pk); +static void ri_BuildQueryKey(RI_QueryKey *key, + const RI_ConstraintInfo *riinfo, + int32 constr_queryno); +static bool ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, + const RI_ConstraintInfo *riinfo, bool rel_is_pk); +static bool ri_AttributesEqual(Oid eq_opr, Oid typeid, + Datum oldvalue, Datum newvalue); + +static void ri_InitHashTables(void); +static void InvalidateConstraintCacheCallBack(Datum arg, int cacheid, uint32 hashvalue); +static SPIPlanPtr ri_FetchPreparedPlan(RI_QueryKey *key); +static void ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan); +static RI_CompareHashEntry *ri_HashCompareOp(Oid eq_opr, Oid typeid); + +static void ri_CheckTrigger(FunctionCallInfo fcinfo, const char *funcname, + int tgkind); +static const RI_ConstraintInfo *ri_FetchConstraintInfo(Trigger *trigger, + Relation trig_rel, bool rel_is_pk); +static const RI_ConstraintInfo *ri_LoadConstraintInfo(Oid constraintOid); +static Oid get_ri_constraint_root(Oid constrOid); +static SPIPlanPtr ri_PlanCheck(const char *querystr, int nargs, Oid *argtypes, + RI_QueryKey *qkey, Relation fk_rel, Relation pk_rel); +static bool ri_PerformCheck(const RI_ConstraintInfo *riinfo, + RI_QueryKey *qkey, SPIPlanPtr qplan, + Relation fk_rel, Relation pk_rel, + TupleTableSlot *oldslot, TupleTableSlot *newslot, + bool detectNewRows, int expect_OK); +static void ri_ExtractValues(Relation rel, TupleTableSlot *slot, + const RI_ConstraintInfo *riinfo, bool rel_is_pk, + Datum *vals, char *nulls); +static void ri_ReportViolation(const RI_ConstraintInfo *riinfo, + Relation pk_rel, Relation fk_rel, + TupleTableSlot *violatorslot, TupleDesc tupdesc, + int queryno, bool partgone) pg_attribute_noreturn(); + + +/* + * RI_FKey_check - + * + * Check foreign key existence (combined for INSERT and UPDATE). + */ +static Datum +RI_FKey_check(TriggerData *trigdata) +{ + const RI_ConstraintInfo *riinfo; + Relation fk_rel; + Relation pk_rel; + TupleTableSlot *newslot; + RI_QueryKey qkey; + SPIPlanPtr qplan; + + riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger, + trigdata->tg_relation, false); + + if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + newslot = trigdata->tg_newslot; + else + newslot = trigdata->tg_trigslot; + + /* + * We should not even consider checking the row if it is no longer valid, + * since it was either deleted (so the deferred check should be skipped) + * or updated (in which case only the latest version of the row should be + * checked). Test its liveness according to SnapshotSelf. We need pin + * and lock on the buffer to call HeapTupleSatisfiesVisibility. Caller + * should be holding pin, but not lock. + */ + if (!table_tuple_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf)) + return PointerGetDatum(NULL); + + /* + * Get the relation descriptors of the FK and PK tables. + * + * pk_rel is opened in RowShareLock mode since that's what our eventual + * SELECT FOR KEY SHARE will get on it. + */ + fk_rel = trigdata->tg_relation; + pk_rel = table_open(riinfo->pk_relid, RowShareLock); + + switch (ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false)) + { + case RI_KEYS_ALL_NULL: + + /* + * No further check needed - an all-NULL key passes every type of + * foreign key constraint. + */ + table_close(pk_rel, RowShareLock); + return PointerGetDatum(NULL); + + case RI_KEYS_SOME_NULL: + + /* + * This is the only case that differs between the three kinds of + * MATCH. + */ + switch (riinfo->confmatchtype) + { + case FKCONSTR_MATCH_FULL: + + /* + * Not allowed - MATCH FULL says either all or none of the + * attributes can be NULLs + */ + ereport(ERROR, + (errcode(ERRCODE_FOREIGN_KEY_VIOLATION), + errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"", + RelationGetRelationName(fk_rel), + NameStr(riinfo->conname)), + errdetail("MATCH FULL does not allow mixing of null and nonnull key values."), + errtableconstraint(fk_rel, + NameStr(riinfo->conname)))); + table_close(pk_rel, RowShareLock); + return PointerGetDatum(NULL); + + case FKCONSTR_MATCH_SIMPLE: + + /* + * MATCH SIMPLE - if ANY column is null, the key passes + * the constraint. + */ + table_close(pk_rel, RowShareLock); + return PointerGetDatum(NULL); + +#ifdef NOT_USED + case FKCONSTR_MATCH_PARTIAL: + + /* + * MATCH PARTIAL - all non-null columns must match. (not + * implemented, can be done by modifying the query below + * to only include non-null columns, or by writing a + * special version here) + */ + break; +#endif + } + + case RI_KEYS_NONE_NULL: + + /* + * Have a full qualified key - continue below for all three kinds + * of MATCH. + */ + break; + } + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* Fetch or prepare a saved plan for the real check */ + ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CHECK_LOOKUPPK); + + if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL) + { + StringInfoData querybuf; + char pkrelname[MAX_QUOTED_REL_NAME_LEN]; + char attname[MAX_QUOTED_NAME_LEN]; + char paramname[16]; + const char *querysep; + Oid queryoids[RI_MAX_NUMKEYS]; + const char *pk_only; + + /* ---------- + * The query string built is + * SELECT 1 FROM [ONLY] <pktable> x WHERE pkatt1 = $1 [AND ...] + * FOR KEY SHARE OF x + * The type id's for the $ parameters are those of the + * corresponding FK attributes. + * ---------- + */ + initStringInfo(&querybuf); + pk_only = pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + quoteRelationName(pkrelname, pk_rel); + appendStringInfo(&querybuf, "SELECT 1 FROM %s%s x", + pk_only, pkrelname); + querysep = "WHERE"; + for (int i = 0; i < riinfo->nkeys; i++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + + quoteOneName(attname, + RIAttName(pk_rel, riinfo->pk_attnums[i])); + sprintf(paramname, "$%d", i + 1); + ri_GenerateQual(&querybuf, querysep, + attname, pk_type, + riinfo->pf_eq_oprs[i], + paramname, fk_type); + querysep = "AND"; + queryoids[i] = fk_type; + } + appendStringInfoString(&querybuf, " FOR KEY SHARE OF x"); + + /* Prepare and save the plan */ + qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids, + &qkey, fk_rel, pk_rel); + } + + /* + * Now check that foreign key exists in PK table + * + * XXX detectNewRows must be true when a partitioned table is on the + * referenced side. The reason is that our snapshot must be fresh in + * order for the hack in find_inheritance_children() to work. + */ + ri_PerformCheck(riinfo, &qkey, qplan, + fk_rel, pk_rel, + NULL, newslot, + pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE, + SPI_OK_SELECT); + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + table_close(pk_rel, RowShareLock); + + return PointerGetDatum(NULL); +} + + +/* + * RI_FKey_check_ins - + * + * Check foreign key existence at insert event on FK table. + */ +Datum +RI_FKey_check_ins(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_check_ins", RI_TRIGTYPE_INSERT); + + /* Share code with UPDATE case. */ + return RI_FKey_check((TriggerData *) fcinfo->context); +} + + +/* + * RI_FKey_check_upd - + * + * Check foreign key existence at update event on FK table. + */ +Datum +RI_FKey_check_upd(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_check_upd", RI_TRIGTYPE_UPDATE); + + /* Share code with INSERT case. */ + return RI_FKey_check((TriggerData *) fcinfo->context); +} + + +/* + * ri_Check_Pk_Match + * + * Check to see if another PK row has been created that provides the same + * key values as the "oldslot" that's been modified or deleted in our trigger + * event. Returns true if a match is found in the PK table. + * + * We assume the caller checked that the oldslot contains no NULL key values, + * since otherwise a match is impossible. + */ +static bool +ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel, + TupleTableSlot *oldslot, + const RI_ConstraintInfo *riinfo) +{ + SPIPlanPtr qplan; + RI_QueryKey qkey; + bool result; + + /* Only called for non-null rows */ + Assert(ri_NullCheck(RelationGetDescr(pk_rel), oldslot, riinfo, true) == RI_KEYS_NONE_NULL); + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* + * Fetch or prepare a saved plan for checking PK table with values coming + * from a PK row + */ + ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CHECK_LOOKUPPK_FROM_PK); + + if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL) + { + StringInfoData querybuf; + char pkrelname[MAX_QUOTED_REL_NAME_LEN]; + char attname[MAX_QUOTED_NAME_LEN]; + char paramname[16]; + const char *querysep; + const char *pk_only; + Oid queryoids[RI_MAX_NUMKEYS]; + + /* ---------- + * The query string built is + * SELECT 1 FROM [ONLY] <pktable> x WHERE pkatt1 = $1 [AND ...] + * FOR KEY SHARE OF x + * The type id's for the $ parameters are those of the + * PK attributes themselves. + * ---------- + */ + initStringInfo(&querybuf); + pk_only = pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + quoteRelationName(pkrelname, pk_rel); + appendStringInfo(&querybuf, "SELECT 1 FROM %s%s x", + pk_only, pkrelname); + querysep = "WHERE"; + for (int i = 0; i < riinfo->nkeys; i++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + + quoteOneName(attname, + RIAttName(pk_rel, riinfo->pk_attnums[i])); + sprintf(paramname, "$%d", i + 1); + ri_GenerateQual(&querybuf, querysep, + attname, pk_type, + riinfo->pp_eq_oprs[i], + paramname, pk_type); + querysep = "AND"; + queryoids[i] = pk_type; + } + appendStringInfoString(&querybuf, " FOR KEY SHARE OF x"); + + /* Prepare and save the plan */ + qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids, + &qkey, fk_rel, pk_rel); + } + + /* + * We have a plan now. Run it. + */ + result = ri_PerformCheck(riinfo, &qkey, qplan, + fk_rel, pk_rel, + oldslot, NULL, + true, /* treat like update */ + SPI_OK_SELECT); + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + return result; +} + + +/* + * RI_FKey_noaction_del - + * + * Give an error and roll back the current transaction if the + * delete has resulted in a violation of the given referential + * integrity constraint. + */ +Datum +RI_FKey_noaction_del(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_noaction_del", RI_TRIGTYPE_DELETE); + + /* Share code with RESTRICT/UPDATE cases. */ + return ri_restrict((TriggerData *) fcinfo->context, true); +} + +/* + * RI_FKey_restrict_del - + * + * Restrict delete from PK table to rows unreferenced by foreign key. + * + * The SQL standard intends that this referential action occur exactly when + * the delete is performed, rather than after. This appears to be + * the only difference between "NO ACTION" and "RESTRICT". In Postgres + * we still implement this as an AFTER trigger, but it's non-deferrable. + */ +Datum +RI_FKey_restrict_del(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_restrict_del", RI_TRIGTYPE_DELETE); + + /* Share code with NO ACTION/UPDATE cases. */ + return ri_restrict((TriggerData *) fcinfo->context, false); +} + +/* + * RI_FKey_noaction_upd - + * + * Give an error and roll back the current transaction if the + * update has resulted in a violation of the given referential + * integrity constraint. + */ +Datum +RI_FKey_noaction_upd(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_noaction_upd", RI_TRIGTYPE_UPDATE); + + /* Share code with RESTRICT/DELETE cases. */ + return ri_restrict((TriggerData *) fcinfo->context, true); +} + +/* + * RI_FKey_restrict_upd - + * + * Restrict update of PK to rows unreferenced by foreign key. + * + * The SQL standard intends that this referential action occur exactly when + * the update is performed, rather than after. This appears to be + * the only difference between "NO ACTION" and "RESTRICT". In Postgres + * we still implement this as an AFTER trigger, but it's non-deferrable. + */ +Datum +RI_FKey_restrict_upd(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_restrict_upd", RI_TRIGTYPE_UPDATE); + + /* Share code with NO ACTION/DELETE cases. */ + return ri_restrict((TriggerData *) fcinfo->context, false); +} + +/* + * ri_restrict - + * + * Common code for ON DELETE RESTRICT, ON DELETE NO ACTION, + * ON UPDATE RESTRICT, and ON UPDATE NO ACTION. + */ +static Datum +ri_restrict(TriggerData *trigdata, bool is_no_action) +{ + const RI_ConstraintInfo *riinfo; + Relation fk_rel; + Relation pk_rel; + TupleTableSlot *oldslot; + RI_QueryKey qkey; + SPIPlanPtr qplan; + + riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger, + trigdata->tg_relation, true); + + /* + * Get the relation descriptors of the FK and PK tables and the old tuple. + * + * fk_rel is opened in RowShareLock mode since that's what our eventual + * SELECT FOR KEY SHARE will get on it. + */ + fk_rel = table_open(riinfo->fk_relid, RowShareLock); + pk_rel = trigdata->tg_relation; + oldslot = trigdata->tg_trigslot; + + /* + * If another PK row now exists providing the old key values, we should + * not do anything. However, this check should only be made in the NO + * ACTION case; in RESTRICT cases we don't wish to allow another row to be + * substituted. + */ + if (is_no_action && + ri_Check_Pk_Match(pk_rel, fk_rel, oldslot, riinfo)) + { + table_close(fk_rel, RowShareLock); + return PointerGetDatum(NULL); + } + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* + * Fetch or prepare a saved plan for the restrict lookup (it's the same + * query for delete and update cases) + */ + ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_RESTRICT); + + if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL) + { + StringInfoData querybuf; + char fkrelname[MAX_QUOTED_REL_NAME_LEN]; + char attname[MAX_QUOTED_NAME_LEN]; + char paramname[16]; + const char *querysep; + Oid queryoids[RI_MAX_NUMKEYS]; + const char *fk_only; + + /* ---------- + * The query string built is + * SELECT 1 FROM [ONLY] <fktable> x WHERE $1 = fkatt1 [AND ...] + * FOR KEY SHARE OF x + * The type id's for the $ parameters are those of the + * corresponding PK attributes. + * ---------- + */ + initStringInfo(&querybuf); + fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + quoteRelationName(fkrelname, fk_rel); + appendStringInfo(&querybuf, "SELECT 1 FROM %s%s x", + fk_only, fkrelname); + querysep = "WHERE"; + for (int i = 0; i < riinfo->nkeys; i++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); + + quoteOneName(attname, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + sprintf(paramname, "$%d", i + 1); + ri_GenerateQual(&querybuf, querysep, + paramname, pk_type, + riinfo->pf_eq_oprs[i], + attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); + querysep = "AND"; + queryoids[i] = pk_type; + } + appendStringInfoString(&querybuf, " FOR KEY SHARE OF x"); + + /* Prepare and save the plan */ + qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids, + &qkey, fk_rel, pk_rel); + } + + /* + * We have a plan now. Run it to check for existing references. + */ + ri_PerformCheck(riinfo, &qkey, qplan, + fk_rel, pk_rel, + oldslot, NULL, + true, /* must detect new rows */ + SPI_OK_SELECT); + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + table_close(fk_rel, RowShareLock); + + return PointerGetDatum(NULL); +} + + +/* + * RI_FKey_cascade_del - + * + * Cascaded delete foreign key references at delete event on PK table. + */ +Datum +RI_FKey_cascade_del(PG_FUNCTION_ARGS) +{ + TriggerData *trigdata = (TriggerData *) fcinfo->context; + const RI_ConstraintInfo *riinfo; + Relation fk_rel; + Relation pk_rel; + TupleTableSlot *oldslot; + RI_QueryKey qkey; + SPIPlanPtr qplan; + + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_cascade_del", RI_TRIGTYPE_DELETE); + + riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger, + trigdata->tg_relation, true); + + /* + * Get the relation descriptors of the FK and PK tables and the old tuple. + * + * fk_rel is opened in RowExclusiveLock mode since that's what our + * eventual DELETE will get on it. + */ + fk_rel = table_open(riinfo->fk_relid, RowExclusiveLock); + pk_rel = trigdata->tg_relation; + oldslot = trigdata->tg_trigslot; + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* Fetch or prepare a saved plan for the cascaded delete */ + ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CASCADE_ONDELETE); + + if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL) + { + StringInfoData querybuf; + char fkrelname[MAX_QUOTED_REL_NAME_LEN]; + char attname[MAX_QUOTED_NAME_LEN]; + char paramname[16]; + const char *querysep; + Oid queryoids[RI_MAX_NUMKEYS]; + const char *fk_only; + + /* ---------- + * The query string built is + * DELETE FROM [ONLY] <fktable> WHERE $1 = fkatt1 [AND ...] + * The type id's for the $ parameters are those of the + * corresponding PK attributes. + * ---------- + */ + initStringInfo(&querybuf); + fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + quoteRelationName(fkrelname, fk_rel); + appendStringInfo(&querybuf, "DELETE FROM %s%s", + fk_only, fkrelname); + querysep = "WHERE"; + for (int i = 0; i < riinfo->nkeys; i++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); + + quoteOneName(attname, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + sprintf(paramname, "$%d", i + 1); + ri_GenerateQual(&querybuf, querysep, + paramname, pk_type, + riinfo->pf_eq_oprs[i], + attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); + querysep = "AND"; + queryoids[i] = pk_type; + } + + /* Prepare and save the plan */ + qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids, + &qkey, fk_rel, pk_rel); + } + + /* + * We have a plan now. Build up the arguments from the key values in the + * deleted PK tuple and delete the referencing rows + */ + ri_PerformCheck(riinfo, &qkey, qplan, + fk_rel, pk_rel, + oldslot, NULL, + true, /* must detect new rows */ + SPI_OK_DELETE); + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + table_close(fk_rel, RowExclusiveLock); + + return PointerGetDatum(NULL); +} + + +/* + * RI_FKey_cascade_upd - + * + * Cascaded update foreign key references at update event on PK table. + */ +Datum +RI_FKey_cascade_upd(PG_FUNCTION_ARGS) +{ + TriggerData *trigdata = (TriggerData *) fcinfo->context; + const RI_ConstraintInfo *riinfo; + Relation fk_rel; + Relation pk_rel; + TupleTableSlot *newslot; + TupleTableSlot *oldslot; + RI_QueryKey qkey; + SPIPlanPtr qplan; + + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_cascade_upd", RI_TRIGTYPE_UPDATE); + + riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger, + trigdata->tg_relation, true); + + /* + * Get the relation descriptors of the FK and PK tables and the new and + * old tuple. + * + * fk_rel is opened in RowExclusiveLock mode since that's what our + * eventual UPDATE will get on it. + */ + fk_rel = table_open(riinfo->fk_relid, RowExclusiveLock); + pk_rel = trigdata->tg_relation; + newslot = trigdata->tg_newslot; + oldslot = trigdata->tg_trigslot; + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* Fetch or prepare a saved plan for the cascaded update */ + ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CASCADE_ONUPDATE); + + if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL) + { + StringInfoData querybuf; + StringInfoData qualbuf; + char fkrelname[MAX_QUOTED_REL_NAME_LEN]; + char attname[MAX_QUOTED_NAME_LEN]; + char paramname[16]; + const char *querysep; + const char *qualsep; + Oid queryoids[RI_MAX_NUMKEYS * 2]; + const char *fk_only; + + /* ---------- + * The query string built is + * UPDATE [ONLY] <fktable> SET fkatt1 = $1 [, ...] + * WHERE $n = fkatt1 [AND ...] + * The type id's for the $ parameters are those of the + * corresponding PK attributes. Note that we are assuming + * there is an assignment cast from the PK to the FK type; + * else the parser will fail. + * ---------- + */ + initStringInfo(&querybuf); + initStringInfo(&qualbuf); + fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + quoteRelationName(fkrelname, fk_rel); + appendStringInfo(&querybuf, "UPDATE %s%s SET", + fk_only, fkrelname); + querysep = ""; + qualsep = "WHERE"; + for (int i = 0, j = riinfo->nkeys; i < riinfo->nkeys; i++, j++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); + + quoteOneName(attname, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + appendStringInfo(&querybuf, + "%s %s = $%d", + querysep, attname, i + 1); + sprintf(paramname, "$%d", j + 1); + ri_GenerateQual(&qualbuf, qualsep, + paramname, pk_type, + riinfo->pf_eq_oprs[i], + attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); + querysep = ","; + qualsep = "AND"; + queryoids[i] = pk_type; + queryoids[j] = pk_type; + } + appendBinaryStringInfo(&querybuf, qualbuf.data, qualbuf.len); + + /* Prepare and save the plan */ + qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys * 2, queryoids, + &qkey, fk_rel, pk_rel); + } + + /* + * We have a plan now. Run it to update the existing references. + */ + ri_PerformCheck(riinfo, &qkey, qplan, + fk_rel, pk_rel, + oldslot, newslot, + true, /* must detect new rows */ + SPI_OK_UPDATE); + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + table_close(fk_rel, RowExclusiveLock); + + return PointerGetDatum(NULL); +} + + +/* + * RI_FKey_setnull_del - + * + * Set foreign key references to NULL values at delete event on PK table. + */ +Datum +RI_FKey_setnull_del(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_setnull_del", RI_TRIGTYPE_DELETE); + + /* Share code with UPDATE case */ + return ri_set((TriggerData *) fcinfo->context, true, RI_TRIGTYPE_DELETE); +} + +/* + * RI_FKey_setnull_upd - + * + * Set foreign key references to NULL at update event on PK table. + */ +Datum +RI_FKey_setnull_upd(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_setnull_upd", RI_TRIGTYPE_UPDATE); + + /* Share code with DELETE case */ + return ri_set((TriggerData *) fcinfo->context, true, RI_TRIGTYPE_UPDATE); +} + +/* + * RI_FKey_setdefault_del - + * + * Set foreign key references to defaults at delete event on PK table. + */ +Datum +RI_FKey_setdefault_del(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_setdefault_del", RI_TRIGTYPE_DELETE); + + /* Share code with UPDATE case */ + return ri_set((TriggerData *) fcinfo->context, false, RI_TRIGTYPE_DELETE); +} + +/* + * RI_FKey_setdefault_upd - + * + * Set foreign key references to defaults at update event on PK table. + */ +Datum +RI_FKey_setdefault_upd(PG_FUNCTION_ARGS) +{ + /* Check that this is a valid trigger call on the right time and event. */ + ri_CheckTrigger(fcinfo, "RI_FKey_setdefault_upd", RI_TRIGTYPE_UPDATE); + + /* Share code with DELETE case */ + return ri_set((TriggerData *) fcinfo->context, false, RI_TRIGTYPE_UPDATE); +} + +/* + * ri_set - + * + * Common code for ON DELETE SET NULL, ON DELETE SET DEFAULT, ON UPDATE SET + * NULL, and ON UPDATE SET DEFAULT. + */ +static Datum +ri_set(TriggerData *trigdata, bool is_set_null, int tgkind) +{ + const RI_ConstraintInfo *riinfo; + Relation fk_rel; + Relation pk_rel; + TupleTableSlot *oldslot; + RI_QueryKey qkey; + SPIPlanPtr qplan; + int32 queryno; + + riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger, + trigdata->tg_relation, true); + + /* + * Get the relation descriptors of the FK and PK tables and the old tuple. + * + * fk_rel is opened in RowExclusiveLock mode since that's what our + * eventual UPDATE will get on it. + */ + fk_rel = table_open(riinfo->fk_relid, RowExclusiveLock); + pk_rel = trigdata->tg_relation; + oldslot = trigdata->tg_trigslot; + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* + * Fetch or prepare a saved plan for the trigger. + */ + switch (tgkind) + { + case RI_TRIGTYPE_UPDATE: + queryno = is_set_null + ? RI_PLAN_SETNULL_ONUPDATE + : RI_PLAN_SETDEFAULT_ONUPDATE; + break; + case RI_TRIGTYPE_DELETE: + queryno = is_set_null + ? RI_PLAN_SETNULL_ONDELETE + : RI_PLAN_SETDEFAULT_ONDELETE; + break; + default: + elog(ERROR, "invalid tgkind passed to ri_set"); + } + + ri_BuildQueryKey(&qkey, riinfo, queryno); + + if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL) + { + StringInfoData querybuf; + char fkrelname[MAX_QUOTED_REL_NAME_LEN]; + char attname[MAX_QUOTED_NAME_LEN]; + char paramname[16]; + const char *querysep; + const char *qualsep; + Oid queryoids[RI_MAX_NUMKEYS]; + const char *fk_only; + int num_cols_to_set; + const int16 *set_cols; + + switch (tgkind) + { + case RI_TRIGTYPE_UPDATE: + num_cols_to_set = riinfo->nkeys; + set_cols = riinfo->fk_attnums; + break; + case RI_TRIGTYPE_DELETE: + + /* + * If confdelsetcols are present, then we only update the + * columns specified in that array, otherwise we update all + * the referencing columns. + */ + if (riinfo->ndelsetcols != 0) + { + num_cols_to_set = riinfo->ndelsetcols; + set_cols = riinfo->confdelsetcols; + } + else + { + num_cols_to_set = riinfo->nkeys; + set_cols = riinfo->fk_attnums; + } + break; + default: + elog(ERROR, "invalid tgkind passed to ri_set"); + } + + /* ---------- + * The query string built is + * UPDATE [ONLY] <fktable> SET fkatt1 = {NULL|DEFAULT} [, ...] + * WHERE $1 = fkatt1 [AND ...] + * The type id's for the $ parameters are those of the + * corresponding PK attributes. + * ---------- + */ + initStringInfo(&querybuf); + fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + quoteRelationName(fkrelname, fk_rel); + appendStringInfo(&querybuf, "UPDATE %s%s SET", + fk_only, fkrelname); + + /* + * Add assignment clauses + */ + querysep = ""; + for (int i = 0; i < num_cols_to_set; i++) + { + quoteOneName(attname, RIAttName(fk_rel, set_cols[i])); + appendStringInfo(&querybuf, + "%s %s = %s", + querysep, attname, + is_set_null ? "NULL" : "DEFAULT"); + querysep = ","; + } + + /* + * Add WHERE clause + */ + qualsep = "WHERE"; + for (int i = 0; i < riinfo->nkeys; i++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); + + quoteOneName(attname, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + + sprintf(paramname, "$%d", i + 1); + ri_GenerateQual(&querybuf, qualsep, + paramname, pk_type, + riinfo->pf_eq_oprs[i], + attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); + qualsep = "AND"; + queryoids[i] = pk_type; + } + + /* Prepare and save the plan */ + qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids, + &qkey, fk_rel, pk_rel); + } + + /* + * We have a plan now. Run it to update the existing references. + */ + ri_PerformCheck(riinfo, &qkey, qplan, + fk_rel, pk_rel, + oldslot, NULL, + true, /* must detect new rows */ + SPI_OK_UPDATE); + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + table_close(fk_rel, RowExclusiveLock); + + if (is_set_null) + return PointerGetDatum(NULL); + else + { + /* + * If we just deleted or updated the PK row whose key was equal to the + * FK columns' default values, and a referencing row exists in the FK + * table, we would have updated that row to the same values it already + * had --- and RI_FKey_fk_upd_check_required would hence believe no + * check is necessary. So we need to do another lookup now and in + * case a reference still exists, abort the operation. That is + * already implemented in the NO ACTION trigger, so just run it. (This + * recheck is only needed in the SET DEFAULT case, since CASCADE would + * remove such rows in case of a DELETE operation or would change the + * FK key values in case of an UPDATE, while SET NULL is certain to + * result in rows that satisfy the FK constraint.) + */ + return ri_restrict(trigdata, true); + } +} + + +/* + * RI_FKey_pk_upd_check_required - + * + * Check if we really need to fire the RI trigger for an update or delete to a PK + * relation. This is called by the AFTER trigger queue manager to see if + * it can skip queuing an instance of an RI trigger. Returns true if the + * trigger must be fired, false if we can prove the constraint will still + * be satisfied. + * + * newslot will be NULL if this is called for a delete. + */ +bool +RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel, + TupleTableSlot *oldslot, TupleTableSlot *newslot) +{ + const RI_ConstraintInfo *riinfo; + + riinfo = ri_FetchConstraintInfo(trigger, pk_rel, true); + + /* + * If any old key value is NULL, the row could not have been referenced by + * an FK row, so no check is needed. + */ + if (ri_NullCheck(RelationGetDescr(pk_rel), oldslot, riinfo, true) != RI_KEYS_NONE_NULL) + return false; + + /* If all old and new key values are equal, no check is needed */ + if (newslot && ri_KeysEqual(pk_rel, oldslot, newslot, riinfo, true)) + return false; + + /* Else we need to fire the trigger. */ + return true; +} + +/* + * RI_FKey_fk_upd_check_required - + * + * Check if we really need to fire the RI trigger for an update to an FK + * relation. This is called by the AFTER trigger queue manager to see if + * it can skip queuing an instance of an RI trigger. Returns true if the + * trigger must be fired, false if we can prove the constraint will still + * be satisfied. + */ +bool +RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, + TupleTableSlot *oldslot, TupleTableSlot *newslot) +{ + const RI_ConstraintInfo *riinfo; + int ri_nullcheck; + Datum xminDatum; + TransactionId xmin; + bool isnull; + + /* + * AfterTriggerSaveEvent() handles things such that this function is never + * called for partitioned tables. + */ + Assert(fk_rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE); + + riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false); + + ri_nullcheck = ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false); + + /* + * If all new key values are NULL, the row satisfies the constraint, so no + * check is needed. + */ + if (ri_nullcheck == RI_KEYS_ALL_NULL) + return false; + + /* + * If some new key values are NULL, the behavior depends on the match + * type. + */ + else if (ri_nullcheck == RI_KEYS_SOME_NULL) + { + switch (riinfo->confmatchtype) + { + case FKCONSTR_MATCH_SIMPLE: + + /* + * If any new key value is NULL, the row must satisfy the + * constraint, so no check is needed. + */ + return false; + + case FKCONSTR_MATCH_PARTIAL: + + /* + * Don't know, must run full check. + */ + break; + + case FKCONSTR_MATCH_FULL: + + /* + * If some new key values are NULL, the row fails the + * constraint. We must not throw error here, because the row + * might get invalidated before the constraint is to be + * checked, but we should queue the event to apply the check + * later. + */ + return true; + } + } + + /* + * Continues here for no new key values are NULL, or we couldn't decide + * yet. + */ + + /* + * If the original row was inserted by our own transaction, we must fire + * the trigger whether or not the keys are equal. This is because our + * UPDATE will invalidate the INSERT so that the INSERT RI trigger will + * not do anything; so we had better do the UPDATE check. (We could skip + * this if we knew the INSERT trigger already fired, but there is no easy + * way to know that.) + */ + xminDatum = slot_getsysattr(oldslot, MinTransactionIdAttributeNumber, &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + if (TransactionIdIsCurrentTransactionId(xmin)) + return true; + + /* If all old and new key values are equal, no check is needed */ + if (ri_KeysEqual(fk_rel, oldslot, newslot, riinfo, false)) + return false; + + /* Else we need to fire the trigger. */ + return true; +} + +/* + * RI_Initial_Check - + * + * Check an entire table for non-matching values using a single query. + * This is not a trigger procedure, but is called during ALTER TABLE + * ADD FOREIGN KEY to validate the initial table contents. + * + * We expect that the caller has made provision to prevent any problems + * caused by concurrent actions. This could be either by locking rel and + * pkrel at ShareRowExclusiveLock or higher, or by otherwise ensuring + * that triggers implementing the checks are already active. + * Hence, we do not need to lock individual rows for the check. + * + * If the check fails because the current user doesn't have permissions + * to read both tables, return false to let our caller know that they will + * need to do something else to check the constraint. + */ +bool +RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) +{ + const RI_ConstraintInfo *riinfo; + StringInfoData querybuf; + char pkrelname[MAX_QUOTED_REL_NAME_LEN]; + char fkrelname[MAX_QUOTED_REL_NAME_LEN]; + char pkattname[MAX_QUOTED_NAME_LEN + 3]; + char fkattname[MAX_QUOTED_NAME_LEN + 3]; + RangeTblEntry *rte; + RTEPermissionInfo *pk_perminfo; + RTEPermissionInfo *fk_perminfo; + List *rtes = NIL; + List *perminfos = NIL; + const char *sep; + const char *fk_only; + const char *pk_only; + int save_nestlevel; + char workmembuf[32]; + int spi_result; + SPIPlanPtr qplan; + + riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false); + + /* + * Check to make sure current user has enough permissions to do the test + * query. (If not, caller can fall back to the trigger method, which + * works because it changes user IDs on the fly.) + * + * XXX are there any other show-stopper conditions to check? + */ + pk_perminfo = makeNode(RTEPermissionInfo); + pk_perminfo->relid = RelationGetRelid(pk_rel); + pk_perminfo->requiredPerms = ACL_SELECT; + perminfos = lappend(perminfos, pk_perminfo); + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(pk_rel); + rte->relkind = pk_rel->rd_rel->relkind; + rte->rellockmode = AccessShareLock; + rte->perminfoindex = list_length(perminfos); + rtes = lappend(rtes, rte); + + fk_perminfo = makeNode(RTEPermissionInfo); + fk_perminfo->relid = RelationGetRelid(fk_rel); + fk_perminfo->requiredPerms = ACL_SELECT; + perminfos = lappend(perminfos, fk_perminfo); + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(fk_rel); + rte->relkind = fk_rel->rd_rel->relkind; + rte->rellockmode = AccessShareLock; + rte->perminfoindex = list_length(perminfos); + rtes = lappend(rtes, rte); + + for (int i = 0; i < riinfo->nkeys; i++) + { + int attno; + + attno = riinfo->pk_attnums[i] - FirstLowInvalidHeapAttributeNumber; + pk_perminfo->selectedCols = bms_add_member(pk_perminfo->selectedCols, attno); + + attno = riinfo->fk_attnums[i] - FirstLowInvalidHeapAttributeNumber; + fk_perminfo->selectedCols = bms_add_member(fk_perminfo->selectedCols, attno); + } + + if (!ExecCheckPermissions(rtes, perminfos, false)) + return false; + + /* + * Also punt if RLS is enabled on either table unless this role has the + * bypassrls right or is the table owner of the table(s) involved which + * have RLS enabled. + */ + if (!has_bypassrls_privilege(GetUserId()) && + ((pk_rel->rd_rel->relrowsecurity && + !object_ownercheck(RelationRelationId, RelationGetRelid(pk_rel), + GetUserId())) || + (fk_rel->rd_rel->relrowsecurity && + !object_ownercheck(RelationRelationId, RelationGetRelid(fk_rel), + GetUserId())))) + return false; + + /*---------- + * The query string built is: + * SELECT fk.keycols FROM [ONLY] relname fk + * LEFT OUTER JOIN [ONLY] pkrelname pk + * ON (pk.pkkeycol1=fk.keycol1 [AND ...]) + * WHERE pk.pkkeycol1 IS NULL AND + * For MATCH SIMPLE: + * (fk.keycol1 IS NOT NULL [AND ...]) + * For MATCH FULL: + * (fk.keycol1 IS NOT NULL [OR ...]) + * + * We attach COLLATE clauses to the operators when comparing columns + * that have different collations. + *---------- + */ + initStringInfo(&querybuf); + appendStringInfoString(&querybuf, "SELECT "); + sep = ""; + for (int i = 0; i < riinfo->nkeys; i++) + { + quoteOneName(fkattname, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + appendStringInfo(&querybuf, "%sfk.%s", sep, fkattname); + sep = ", "; + } + + quoteRelationName(pkrelname, pk_rel); + quoteRelationName(fkrelname, fk_rel); + fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + pk_only = pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + appendStringInfo(&querybuf, + " FROM %s%s fk LEFT OUTER JOIN %s%s pk ON", + fk_only, fkrelname, pk_only, pkrelname); + + strcpy(pkattname, "pk."); + strcpy(fkattname, "fk."); + sep = "("; + for (int i = 0; i < riinfo->nkeys; i++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); + + quoteOneName(pkattname + 3, + RIAttName(pk_rel, riinfo->pk_attnums[i])); + quoteOneName(fkattname + 3, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + ri_GenerateQual(&querybuf, sep, + pkattname, pk_type, + riinfo->pf_eq_oprs[i], + fkattname, fk_type); + if (pk_coll != fk_coll) + ri_GenerateQualCollation(&querybuf, pk_coll); + sep = "AND"; + } + + /* + * It's sufficient to test any one pk attribute for null to detect a join + * failure. + */ + quoteOneName(pkattname, RIAttName(pk_rel, riinfo->pk_attnums[0])); + appendStringInfo(&querybuf, ") WHERE pk.%s IS NULL AND (", pkattname); + + sep = ""; + for (int i = 0; i < riinfo->nkeys; i++) + { + quoteOneName(fkattname, RIAttName(fk_rel, riinfo->fk_attnums[i])); + appendStringInfo(&querybuf, + "%sfk.%s IS NOT NULL", + sep, fkattname); + switch (riinfo->confmatchtype) + { + case FKCONSTR_MATCH_SIMPLE: + sep = " AND "; + break; + case FKCONSTR_MATCH_FULL: + sep = " OR "; + break; + } + } + appendStringInfoChar(&querybuf, ')'); + + /* + * Temporarily increase work_mem so that the check query can be executed + * more efficiently. It seems okay to do this because the query is simple + * enough to not use a multiple of work_mem, and one typically would not + * have many large foreign-key validations happening concurrently. So + * this seems to meet the criteria for being considered a "maintenance" + * operation, and accordingly we use maintenance_work_mem. However, we + * must also set hash_mem_multiplier to 1, since it is surely not okay to + * let that get applied to the maintenance_work_mem value. + * + * We use the equivalent of a function SET option to allow the setting to + * persist for exactly the duration of the check query. guc.c also takes + * care of undoing the setting on error. + */ + save_nestlevel = NewGUCNestLevel(); + + snprintf(workmembuf, sizeof(workmembuf), "%d", maintenance_work_mem); + (void) set_config_option("work_mem", workmembuf, + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + (void) set_config_option("hash_mem_multiplier", "1", + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* + * Generate the plan. We don't need to cache it, and there are no + * arguments to the plan. + */ + qplan = SPI_prepare(querybuf.data, 0, NULL); + + if (qplan == NULL) + elog(ERROR, "SPI_prepare returned %s for %s", + SPI_result_code_string(SPI_result), querybuf.data); + + /* + * Run the plan. For safety we force a current snapshot to be used. (In + * transaction-snapshot mode, this arguably violates transaction isolation + * rules, but we really haven't got much choice.) We don't need to + * register the snapshot, because SPI_execute_snapshot will see to it. We + * need at most one tuple returned, so pass limit = 1. + */ + spi_result = SPI_execute_snapshot(qplan, + NULL, NULL, + GetLatestSnapshot(), + InvalidSnapshot, + true, false, 1); + + /* Check result */ + if (spi_result != SPI_OK_SELECT) + elog(ERROR, "SPI_execute_snapshot returned %s", SPI_result_code_string(spi_result)); + + /* Did we find a tuple violating the constraint? */ + if (SPI_processed > 0) + { + TupleTableSlot *slot; + HeapTuple tuple = SPI_tuptable->vals[0]; + TupleDesc tupdesc = SPI_tuptable->tupdesc; + RI_ConstraintInfo fake_riinfo; + + slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual); + + heap_deform_tuple(tuple, tupdesc, + slot->tts_values, slot->tts_isnull); + ExecStoreVirtualTuple(slot); + + /* + * The columns to look at in the result tuple are 1..N, not whatever + * they are in the fk_rel. Hack up riinfo so that the subroutines + * called here will behave properly. + * + * In addition to this, we have to pass the correct tupdesc to + * ri_ReportViolation, overriding its normal habit of using the pk_rel + * or fk_rel's tupdesc. + */ + memcpy(&fake_riinfo, riinfo, sizeof(RI_ConstraintInfo)); + for (int i = 0; i < fake_riinfo.nkeys; i++) + fake_riinfo.fk_attnums[i] = i + 1; + + /* + * If it's MATCH FULL, and there are any nulls in the FK keys, + * complain about that rather than the lack of a match. MATCH FULL + * disallows partially-null FK rows. + */ + if (fake_riinfo.confmatchtype == FKCONSTR_MATCH_FULL && + ri_NullCheck(tupdesc, slot, &fake_riinfo, false) != RI_KEYS_NONE_NULL) + ereport(ERROR, + (errcode(ERRCODE_FOREIGN_KEY_VIOLATION), + errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"", + RelationGetRelationName(fk_rel), + NameStr(fake_riinfo.conname)), + errdetail("MATCH FULL does not allow mixing of null and nonnull key values."), + errtableconstraint(fk_rel, + NameStr(fake_riinfo.conname)))); + + /* + * We tell ri_ReportViolation we were doing the RI_PLAN_CHECK_LOOKUPPK + * query, which isn't true, but will cause it to use + * fake_riinfo.fk_attnums as we need. + */ + ri_ReportViolation(&fake_riinfo, + pk_rel, fk_rel, + slot, tupdesc, + RI_PLAN_CHECK_LOOKUPPK, false); + + ExecDropSingleTupleTableSlot(slot); + } + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + /* + * Restore work_mem and hash_mem_multiplier. + */ + AtEOXact_GUC(true, save_nestlevel); + + return true; +} + +/* + * RI_PartitionRemove_Check - + * + * Verify no referencing values exist, when a partition is detached on + * the referenced side of a foreign key constraint. + */ +void +RI_PartitionRemove_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) +{ + const RI_ConstraintInfo *riinfo; + StringInfoData querybuf; + char *constraintDef; + char pkrelname[MAX_QUOTED_REL_NAME_LEN]; + char fkrelname[MAX_QUOTED_REL_NAME_LEN]; + char pkattname[MAX_QUOTED_NAME_LEN + 3]; + char fkattname[MAX_QUOTED_NAME_LEN + 3]; + const char *sep; + const char *fk_only; + int save_nestlevel; + char workmembuf[32]; + int spi_result; + SPIPlanPtr qplan; + int i; + + riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false); + + /* + * We don't check permissions before displaying the error message, on the + * assumption that the user detaching the partition must have enough + * privileges to examine the table contents anyhow. + */ + + /*---------- + * The query string built is: + * SELECT fk.keycols FROM [ONLY] relname fk + * JOIN pkrelname pk + * ON (pk.pkkeycol1=fk.keycol1 [AND ...]) + * WHERE (<partition constraint>) AND + * For MATCH SIMPLE: + * (fk.keycol1 IS NOT NULL [AND ...]) + * For MATCH FULL: + * (fk.keycol1 IS NOT NULL [OR ...]) + * + * We attach COLLATE clauses to the operators when comparing columns + * that have different collations. + *---------- + */ + initStringInfo(&querybuf); + appendStringInfoString(&querybuf, "SELECT "); + sep = ""; + for (i = 0; i < riinfo->nkeys; i++) + { + quoteOneName(fkattname, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + appendStringInfo(&querybuf, "%sfk.%s", sep, fkattname); + sep = ", "; + } + + quoteRelationName(pkrelname, pk_rel); + quoteRelationName(fkrelname, fk_rel); + fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? + "" : "ONLY "; + appendStringInfo(&querybuf, + " FROM %s%s fk JOIN %s pk ON", + fk_only, fkrelname, pkrelname); + strcpy(pkattname, "pk."); + strcpy(fkattname, "fk."); + sep = "("; + for (i = 0; i < riinfo->nkeys; i++) + { + Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); + Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); + + quoteOneName(pkattname + 3, + RIAttName(pk_rel, riinfo->pk_attnums[i])); + quoteOneName(fkattname + 3, + RIAttName(fk_rel, riinfo->fk_attnums[i])); + ri_GenerateQual(&querybuf, sep, + pkattname, pk_type, + riinfo->pf_eq_oprs[i], + fkattname, fk_type); + if (pk_coll != fk_coll) + ri_GenerateQualCollation(&querybuf, pk_coll); + sep = "AND"; + } + + /* + * Start the WHERE clause with the partition constraint (except if this is + * the default partition and there's no other partition, because the + * partition constraint is the empty string in that case.) + */ + constraintDef = pg_get_partconstrdef_string(RelationGetRelid(pk_rel), "pk"); + if (constraintDef && constraintDef[0] != '\0') + appendStringInfo(&querybuf, ") WHERE %s AND (", + constraintDef); + else + appendStringInfoString(&querybuf, ") WHERE ("); + + sep = ""; + for (i = 0; i < riinfo->nkeys; i++) + { + quoteOneName(fkattname, RIAttName(fk_rel, riinfo->fk_attnums[i])); + appendStringInfo(&querybuf, + "%sfk.%s IS NOT NULL", + sep, fkattname); + switch (riinfo->confmatchtype) + { + case FKCONSTR_MATCH_SIMPLE: + sep = " AND "; + break; + case FKCONSTR_MATCH_FULL: + sep = " OR "; + break; + } + } + appendStringInfoChar(&querybuf, ')'); + + /* + * Temporarily increase work_mem so that the check query can be executed + * more efficiently. It seems okay to do this because the query is simple + * enough to not use a multiple of work_mem, and one typically would not + * have many large foreign-key validations happening concurrently. So + * this seems to meet the criteria for being considered a "maintenance" + * operation, and accordingly we use maintenance_work_mem. However, we + * must also set hash_mem_multiplier to 1, since it is surely not okay to + * let that get applied to the maintenance_work_mem value. + * + * We use the equivalent of a function SET option to allow the setting to + * persist for exactly the duration of the check query. guc.c also takes + * care of undoing the setting on error. + */ + save_nestlevel = NewGUCNestLevel(); + + snprintf(workmembuf, sizeof(workmembuf), "%d", maintenance_work_mem); + (void) set_config_option("work_mem", workmembuf, + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + (void) set_config_option("hash_mem_multiplier", "1", + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* + * Generate the plan. We don't need to cache it, and there are no + * arguments to the plan. + */ + qplan = SPI_prepare(querybuf.data, 0, NULL); + + if (qplan == NULL) + elog(ERROR, "SPI_prepare returned %s for %s", + SPI_result_code_string(SPI_result), querybuf.data); + + /* + * Run the plan. For safety we force a current snapshot to be used. (In + * transaction-snapshot mode, this arguably violates transaction isolation + * rules, but we really haven't got much choice.) We don't need to + * register the snapshot, because SPI_execute_snapshot will see to it. We + * need at most one tuple returned, so pass limit = 1. + */ + spi_result = SPI_execute_snapshot(qplan, + NULL, NULL, + GetLatestSnapshot(), + InvalidSnapshot, + true, false, 1); + + /* Check result */ + if (spi_result != SPI_OK_SELECT) + elog(ERROR, "SPI_execute_snapshot returned %s", SPI_result_code_string(spi_result)); + + /* Did we find a tuple that would violate the constraint? */ + if (SPI_processed > 0) + { + TupleTableSlot *slot; + HeapTuple tuple = SPI_tuptable->vals[0]; + TupleDesc tupdesc = SPI_tuptable->tupdesc; + RI_ConstraintInfo fake_riinfo; + + slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual); + + heap_deform_tuple(tuple, tupdesc, + slot->tts_values, slot->tts_isnull); + ExecStoreVirtualTuple(slot); + + /* + * The columns to look at in the result tuple are 1..N, not whatever + * they are in the fk_rel. Hack up riinfo so that ri_ReportViolation + * will behave properly. + * + * In addition to this, we have to pass the correct tupdesc to + * ri_ReportViolation, overriding its normal habit of using the pk_rel + * or fk_rel's tupdesc. + */ + memcpy(&fake_riinfo, riinfo, sizeof(RI_ConstraintInfo)); + for (i = 0; i < fake_riinfo.nkeys; i++) + fake_riinfo.pk_attnums[i] = i + 1; + + ri_ReportViolation(&fake_riinfo, pk_rel, fk_rel, + slot, tupdesc, 0, true); + } + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + /* + * Restore work_mem and hash_mem_multiplier. + */ + AtEOXact_GUC(true, save_nestlevel); +} + + +/* ---------- + * Local functions below + * ---------- + */ + + +/* + * quoteOneName --- safely quote a single SQL name + * + * buffer must be MAX_QUOTED_NAME_LEN long (includes room for \0) + */ +static void +quoteOneName(char *buffer, const char *name) +{ + /* Rather than trying to be smart, just always quote it. */ + *buffer++ = '"'; + while (*name) + { + if (*name == '"') + *buffer++ = '"'; + *buffer++ = *name++; + } + *buffer++ = '"'; + *buffer = '\0'; +} + +/* + * quoteRelationName --- safely quote a fully qualified relation name + * + * buffer must be MAX_QUOTED_REL_NAME_LEN long (includes room for \0) + */ +static void +quoteRelationName(char *buffer, Relation rel) +{ + quoteOneName(buffer, get_namespace_name(RelationGetNamespace(rel))); + buffer += strlen(buffer); + *buffer++ = '.'; + quoteOneName(buffer, RelationGetRelationName(rel)); +} + +/* + * ri_GenerateQual --- generate a WHERE clause equating two variables + * + * This basically appends " sep leftop op rightop" to buf, adding casts + * and schema qualification as needed to ensure that the parser will select + * the operator we specify. leftop and rightop should be parenthesized + * if they aren't variables or parameters. + */ +static void +ri_GenerateQual(StringInfo buf, + const char *sep, + const char *leftop, Oid leftoptype, + Oid opoid, + const char *rightop, Oid rightoptype) +{ + appendStringInfo(buf, " %s ", sep); + generate_operator_clause(buf, leftop, leftoptype, opoid, + rightop, rightoptype); +} + +/* + * ri_GenerateQualCollation --- add a COLLATE spec to a WHERE clause + * + * At present, we intentionally do not use this function for RI queries that + * compare a variable to a $n parameter. Since parameter symbols always have + * default collation, the effect will be to use the variable's collation. + * Now that is only strictly correct when testing the referenced column, since + * the SQL standard specifies that RI comparisons should use the referenced + * column's collation. However, so long as all collations have the same + * notion of equality (which they do, because texteq reduces to bitwise + * equality), there's no visible semantic impact from using the referencing + * column's collation when testing it, and this is a good thing to do because + * it lets us use a normal index on the referencing column. However, we do + * have to use this function when directly comparing the referencing and + * referenced columns, if they are of different collations; else the parser + * will fail to resolve the collation to use. + */ +static void +ri_GenerateQualCollation(StringInfo buf, Oid collation) +{ + HeapTuple tp; + Form_pg_collation colltup; + char *collname; + char onename[MAX_QUOTED_NAME_LEN]; + + /* Nothing to do if it's a noncollatable data type */ + if (!OidIsValid(collation)) + return; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collation); + colltup = (Form_pg_collation) GETSTRUCT(tp); + collname = NameStr(colltup->collname); + + /* + * We qualify the name always, for simplicity and to ensure the query is + * not search-path-dependent. + */ + quoteOneName(onename, get_namespace_name(colltup->collnamespace)); + appendStringInfo(buf, " COLLATE %s", onename); + quoteOneName(onename, collname); + appendStringInfo(buf, ".%s", onename); + + ReleaseSysCache(tp); +} + +/* ---------- + * ri_BuildQueryKey - + * + * Construct a hashtable key for a prepared SPI plan of an FK constraint. + * + * key: output argument, *key is filled in based on the other arguments + * riinfo: info derived from pg_constraint entry + * constr_queryno: an internal number identifying the query type + * (see RI_PLAN_XXX constants at head of file) + * ---------- + */ +static void +ri_BuildQueryKey(RI_QueryKey *key, const RI_ConstraintInfo *riinfo, + int32 constr_queryno) +{ + /* + * Inherited constraints with a common ancestor can share ri_query_cache + * entries for all query types except RI_PLAN_CHECK_LOOKUPPK_FROM_PK. + * Except in that case, the query processes the other table involved in + * the FK constraint (i.e., not the table on which the trigger has been + * fired), and so it will be the same for all members of the inheritance + * tree. So we may use the root constraint's OID in the hash key, rather + * than the constraint's own OID. This avoids creating duplicate SPI + * plans, saving lots of work and memory when there are many partitions + * with similar FK constraints. + * + * (Note that we must still have a separate RI_ConstraintInfo for each + * constraint, because partitions can have different column orders, + * resulting in different pk_attnums[] or fk_attnums[] array contents.) + * + * We assume struct RI_QueryKey contains no padding bytes, else we'd need + * to use memset to clear them. + */ + if (constr_queryno != RI_PLAN_CHECK_LOOKUPPK_FROM_PK) + key->constr_id = riinfo->constraint_root_id; + else + key->constr_id = riinfo->constraint_id; + key->constr_queryno = constr_queryno; +} + +/* + * Check that RI trigger function was called in expected context + */ +static void +ri_CheckTrigger(FunctionCallInfo fcinfo, const char *funcname, int tgkind) +{ + TriggerData *trigdata = (TriggerData *) fcinfo->context; + + if (!CALLED_AS_TRIGGER(fcinfo)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" was not called by trigger manager", funcname))); + + /* + * Check proper event + */ + if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) || + !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" must be fired AFTER ROW", funcname))); + + switch (tgkind) + { + case RI_TRIGTYPE_INSERT: + if (!TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" must be fired for INSERT", funcname))); + break; + case RI_TRIGTYPE_UPDATE: + if (!TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" must be fired for UPDATE", funcname))); + break; + case RI_TRIGTYPE_DELETE: + if (!TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" must be fired for DELETE", funcname))); + break; + } +} + + +/* + * Fetch the RI_ConstraintInfo struct for the trigger's FK constraint. + */ +static const RI_ConstraintInfo * +ri_FetchConstraintInfo(Trigger *trigger, Relation trig_rel, bool rel_is_pk) +{ + Oid constraintOid = trigger->tgconstraint; + const RI_ConstraintInfo *riinfo; + + /* + * Check that the FK constraint's OID is available; it might not be if + * we've been invoked via an ordinary trigger or an old-style "constraint + * trigger". + */ + if (!OidIsValid(constraintOid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("no pg_constraint entry for trigger \"%s\" on table \"%s\"", + trigger->tgname, RelationGetRelationName(trig_rel)), + errhint("Remove this referential integrity trigger and its mates, then do ALTER TABLE ADD CONSTRAINT."))); + + /* Find or create a hashtable entry for the constraint */ + riinfo = ri_LoadConstraintInfo(constraintOid); + + /* Do some easy cross-checks against the trigger call data */ + if (rel_is_pk) + { + if (riinfo->fk_relid != trigger->tgconstrrelid || + riinfo->pk_relid != RelationGetRelid(trig_rel)) + elog(ERROR, "wrong pg_constraint entry for trigger \"%s\" on table \"%s\"", + trigger->tgname, RelationGetRelationName(trig_rel)); + } + else + { + if (riinfo->fk_relid != RelationGetRelid(trig_rel) || + riinfo->pk_relid != trigger->tgconstrrelid) + elog(ERROR, "wrong pg_constraint entry for trigger \"%s\" on table \"%s\"", + trigger->tgname, RelationGetRelationName(trig_rel)); + } + + if (riinfo->confmatchtype != FKCONSTR_MATCH_FULL && + riinfo->confmatchtype != FKCONSTR_MATCH_PARTIAL && + riinfo->confmatchtype != FKCONSTR_MATCH_SIMPLE) + elog(ERROR, "unrecognized confmatchtype: %d", + riinfo->confmatchtype); + + if (riinfo->confmatchtype == FKCONSTR_MATCH_PARTIAL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("MATCH PARTIAL not yet implemented"))); + + return riinfo; +} + +/* + * Fetch or create the RI_ConstraintInfo struct for an FK constraint. + */ +static const RI_ConstraintInfo * +ri_LoadConstraintInfo(Oid constraintOid) +{ + RI_ConstraintInfo *riinfo; + bool found; + HeapTuple tup; + Form_pg_constraint conForm; + + /* + * On the first call initialize the hashtable + */ + if (!ri_constraint_cache) + ri_InitHashTables(); + + /* + * Find or create a hash entry. If we find a valid one, just return it. + */ + riinfo = (RI_ConstraintInfo *) hash_search(ri_constraint_cache, + &constraintOid, + HASH_ENTER, &found); + if (!found) + riinfo->valid = false; + else if (riinfo->valid) + return riinfo; + + /* + * Fetch the pg_constraint row so we can fill in the entry. + */ + tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintOid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for constraint %u", constraintOid); + conForm = (Form_pg_constraint) GETSTRUCT(tup); + + if (conForm->contype != CONSTRAINT_FOREIGN) /* should not happen */ + elog(ERROR, "constraint %u is not a foreign key constraint", + constraintOid); + + /* And extract data */ + Assert(riinfo->constraint_id == constraintOid); + if (OidIsValid(conForm->conparentid)) + riinfo->constraint_root_id = + get_ri_constraint_root(conForm->conparentid); + else + riinfo->constraint_root_id = constraintOid; + riinfo->oidHashValue = GetSysCacheHashValue1(CONSTROID, + ObjectIdGetDatum(constraintOid)); + riinfo->rootHashValue = GetSysCacheHashValue1(CONSTROID, + ObjectIdGetDatum(riinfo->constraint_root_id)); + memcpy(&riinfo->conname, &conForm->conname, sizeof(NameData)); + riinfo->pk_relid = conForm->confrelid; + riinfo->fk_relid = conForm->conrelid; + riinfo->confupdtype = conForm->confupdtype; + riinfo->confdeltype = conForm->confdeltype; + riinfo->confmatchtype = conForm->confmatchtype; + + DeconstructFkConstraintRow(tup, + &riinfo->nkeys, + riinfo->fk_attnums, + riinfo->pk_attnums, + riinfo->pf_eq_oprs, + riinfo->pp_eq_oprs, + riinfo->ff_eq_oprs, + &riinfo->ndelsetcols, + riinfo->confdelsetcols); + + ReleaseSysCache(tup); + + /* + * For efficient processing of invalidation messages below, we keep a + * doubly-linked count list of all currently valid entries. + */ + dclist_push_tail(&ri_constraint_cache_valid_list, &riinfo->valid_link); + + riinfo->valid = true; + + return riinfo; +} + +/* + * get_ri_constraint_root + * Returns the OID of the constraint's root parent + */ +static Oid +get_ri_constraint_root(Oid constrOid) +{ + for (;;) + { + HeapTuple tuple; + Oid constrParentOid; + + tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constrOid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for constraint %u", constrOid); + constrParentOid = ((Form_pg_constraint) GETSTRUCT(tuple))->conparentid; + ReleaseSysCache(tuple); + if (!OidIsValid(constrParentOid)) + break; /* we reached the root constraint */ + constrOid = constrParentOid; + } + return constrOid; +} + +/* + * Callback for pg_constraint inval events + * + * While most syscache callbacks just flush all their entries, pg_constraint + * gets enough update traffic that it's probably worth being smarter. + * Invalidate any ri_constraint_cache entry associated with the syscache + * entry with the specified hash value, or all entries if hashvalue == 0. + * + * Note: at the time a cache invalidation message is processed there may be + * active references to the cache. Because of this we never remove entries + * from the cache, but only mark them invalid, which is harmless to active + * uses. (Any query using an entry should hold a lock sufficient to keep that + * data from changing under it --- but we may get cache flushes anyway.) + */ +static void +InvalidateConstraintCacheCallBack(Datum arg, int cacheid, uint32 hashvalue) +{ + dlist_mutable_iter iter; + + Assert(ri_constraint_cache != NULL); + + /* + * If the list of currently valid entries gets excessively large, we mark + * them all invalid so we can empty the list. This arrangement avoids + * O(N^2) behavior in situations where a session touches many foreign keys + * and also does many ALTER TABLEs, such as a restore from pg_dump. + */ + if (dclist_count(&ri_constraint_cache_valid_list) > 1000) + hashvalue = 0; /* pretend it's a cache reset */ + + dclist_foreach_modify(iter, &ri_constraint_cache_valid_list) + { + RI_ConstraintInfo *riinfo = dclist_container(RI_ConstraintInfo, + valid_link, iter.cur); + + /* + * We must invalidate not only entries directly matching the given + * hash value, but also child entries, in case the invalidation + * affects a root constraint. + */ + if (hashvalue == 0 || + riinfo->oidHashValue == hashvalue || + riinfo->rootHashValue == hashvalue) + { + riinfo->valid = false; + /* Remove invalidated entries from the list, too */ + dclist_delete_from(&ri_constraint_cache_valid_list, iter.cur); + } + } +} + + +/* + * Prepare execution plan for a query to enforce an RI restriction + */ +static SPIPlanPtr +ri_PlanCheck(const char *querystr, int nargs, Oid *argtypes, + RI_QueryKey *qkey, Relation fk_rel, Relation pk_rel) +{ + SPIPlanPtr qplan; + Relation query_rel; + Oid save_userid; + int save_sec_context; + + /* + * Use the query type code to determine whether the query is run against + * the PK or FK table; we'll do the check as that table's owner + */ + if (qkey->constr_queryno <= RI_PLAN_LAST_ON_PK) + query_rel = pk_rel; + else + query_rel = fk_rel; + + /* Switch to proper UID to perform check as */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(RelationGetForm(query_rel)->relowner, + save_sec_context | SECURITY_LOCAL_USERID_CHANGE | + SECURITY_NOFORCE_RLS); + + /* Create the plan */ + qplan = SPI_prepare(querystr, nargs, argtypes); + + if (qplan == NULL) + elog(ERROR, "SPI_prepare returned %s for %s", SPI_result_code_string(SPI_result), querystr); + + /* Restore UID and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* Save the plan */ + SPI_keepplan(qplan); + ri_HashPreparedPlan(qkey, qplan); + + return qplan; +} + +/* + * Perform a query to enforce an RI restriction + */ +static bool +ri_PerformCheck(const RI_ConstraintInfo *riinfo, + RI_QueryKey *qkey, SPIPlanPtr qplan, + Relation fk_rel, Relation pk_rel, + TupleTableSlot *oldslot, TupleTableSlot *newslot, + bool detectNewRows, int expect_OK) +{ + Relation query_rel, + source_rel; + bool source_is_pk; + Snapshot test_snapshot; + Snapshot crosscheck_snapshot; + int limit; + int spi_result; + Oid save_userid; + int save_sec_context; + Datum vals[RI_MAX_NUMKEYS * 2]; + char nulls[RI_MAX_NUMKEYS * 2]; + + /* + * Use the query type code to determine whether the query is run against + * the PK or FK table; we'll do the check as that table's owner + */ + if (qkey->constr_queryno <= RI_PLAN_LAST_ON_PK) + query_rel = pk_rel; + else + query_rel = fk_rel; + + /* + * The values for the query are taken from the table on which the trigger + * is called - it is normally the other one with respect to query_rel. An + * exception is ri_Check_Pk_Match(), which uses the PK table for both (and + * sets queryno to RI_PLAN_CHECK_LOOKUPPK_FROM_PK). We might eventually + * need some less klugy way to determine this. + */ + if (qkey->constr_queryno == RI_PLAN_CHECK_LOOKUPPK) + { + source_rel = fk_rel; + source_is_pk = false; + } + else + { + source_rel = pk_rel; + source_is_pk = true; + } + + /* Extract the parameters to be passed into the query */ + if (newslot) + { + ri_ExtractValues(source_rel, newslot, riinfo, source_is_pk, + vals, nulls); + if (oldslot) + ri_ExtractValues(source_rel, oldslot, riinfo, source_is_pk, + vals + riinfo->nkeys, nulls + riinfo->nkeys); + } + else + { + ri_ExtractValues(source_rel, oldslot, riinfo, source_is_pk, + vals, nulls); + } + + /* + * In READ COMMITTED mode, we just need to use an up-to-date regular + * snapshot, and we will see all rows that could be interesting. But in + * transaction-snapshot mode, we can't change the transaction snapshot. If + * the caller passes detectNewRows == false then it's okay to do the query + * with the transaction snapshot; otherwise we use a current snapshot, and + * tell the executor to error out if it finds any rows under the current + * snapshot that wouldn't be visible per the transaction snapshot. Note + * that SPI_execute_snapshot will register the snapshots, so we don't need + * to bother here. + */ + if (IsolationUsesXactSnapshot() && detectNewRows) + { + CommandCounterIncrement(); /* be sure all my own work is visible */ + test_snapshot = GetLatestSnapshot(); + crosscheck_snapshot = GetTransactionSnapshot(); + } + else + { + /* the default SPI behavior is okay */ + test_snapshot = InvalidSnapshot; + crosscheck_snapshot = InvalidSnapshot; + } + + /* + * If this is a select query (e.g., for a 'no action' or 'restrict' + * trigger), we only need to see if there is a single row in the table, + * matching the key. Otherwise, limit = 0 - because we want the query to + * affect ALL the matching rows. + */ + limit = (expect_OK == SPI_OK_SELECT) ? 1 : 0; + + /* Switch to proper UID to perform check as */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(RelationGetForm(query_rel)->relowner, + save_sec_context | SECURITY_LOCAL_USERID_CHANGE | + SECURITY_NOFORCE_RLS); + + /* Finally we can run the query. */ + spi_result = SPI_execute_snapshot(qplan, + vals, nulls, + test_snapshot, crosscheck_snapshot, + false, false, limit); + + /* Restore UID and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* Check result */ + if (spi_result < 0) + elog(ERROR, "SPI_execute_snapshot returned %s", SPI_result_code_string(spi_result)); + + if (expect_OK >= 0 && spi_result != expect_OK) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("referential integrity query on \"%s\" from constraint \"%s\" on \"%s\" gave unexpected result", + RelationGetRelationName(pk_rel), + NameStr(riinfo->conname), + RelationGetRelationName(fk_rel)), + errhint("This is most likely due to a rule having rewritten the query."))); + + /* XXX wouldn't it be clearer to do this part at the caller? */ + if (qkey->constr_queryno != RI_PLAN_CHECK_LOOKUPPK_FROM_PK && + expect_OK == SPI_OK_SELECT && + (SPI_processed == 0) == (qkey->constr_queryno == RI_PLAN_CHECK_LOOKUPPK)) + ri_ReportViolation(riinfo, + pk_rel, fk_rel, + newslot ? newslot : oldslot, + NULL, + qkey->constr_queryno, false); + + return SPI_processed != 0; +} + +/* + * Extract fields from a tuple into Datum/nulls arrays + */ +static void +ri_ExtractValues(Relation rel, TupleTableSlot *slot, + const RI_ConstraintInfo *riinfo, bool rel_is_pk, + Datum *vals, char *nulls) +{ + const int16 *attnums; + bool isnull; + + if (rel_is_pk) + attnums = riinfo->pk_attnums; + else + attnums = riinfo->fk_attnums; + + for (int i = 0; i < riinfo->nkeys; i++) + { + vals[i] = slot_getattr(slot, attnums[i], &isnull); + nulls[i] = isnull ? 'n' : ' '; + } +} + +/* + * Produce an error report + * + * If the failed constraint was on insert/update to the FK table, + * we want the key names and values extracted from there, and the error + * message to look like 'key blah is not present in PK'. + * Otherwise, the attr names and values come from the PK table and the + * message looks like 'key blah is still referenced from FK'. + */ +static void +ri_ReportViolation(const RI_ConstraintInfo *riinfo, + Relation pk_rel, Relation fk_rel, + TupleTableSlot *violatorslot, TupleDesc tupdesc, + int queryno, bool partgone) +{ + StringInfoData key_names; + StringInfoData key_values; + bool onfk; + const int16 *attnums; + Oid rel_oid; + AclResult aclresult; + bool has_perm = true; + + /* + * Determine which relation to complain about. If tupdesc wasn't passed + * by caller, assume the violator tuple came from there. + */ + onfk = (queryno == RI_PLAN_CHECK_LOOKUPPK); + if (onfk) + { + attnums = riinfo->fk_attnums; + rel_oid = fk_rel->rd_id; + if (tupdesc == NULL) + tupdesc = fk_rel->rd_att; + } + else + { + attnums = riinfo->pk_attnums; + rel_oid = pk_rel->rd_id; + if (tupdesc == NULL) + tupdesc = pk_rel->rd_att; + } + + /* + * Check permissions- if the user does not have access to view the data in + * any of the key columns then we don't include the errdetail() below. + * + * Check if RLS is enabled on the relation first. If so, we don't return + * any specifics to avoid leaking data. + * + * Check table-level permissions next and, failing that, column-level + * privileges. + * + * When a partition at the referenced side is being detached/dropped, we + * needn't check, since the user must be the table owner anyway. + */ + if (partgone) + has_perm = true; + else if (check_enable_rls(rel_oid, InvalidOid, true) != RLS_ENABLED) + { + aclresult = pg_class_aclcheck(rel_oid, GetUserId(), ACL_SELECT); + if (aclresult != ACLCHECK_OK) + { + /* Try for column-level permissions */ + for (int idx = 0; idx < riinfo->nkeys; idx++) + { + aclresult = pg_attribute_aclcheck(rel_oid, attnums[idx], + GetUserId(), + ACL_SELECT); + + /* No access to the key */ + if (aclresult != ACLCHECK_OK) + { + has_perm = false; + break; + } + } + } + } + else + has_perm = false; + + if (has_perm) + { + /* Get printable versions of the keys involved */ + initStringInfo(&key_names); + initStringInfo(&key_values); + for (int idx = 0; idx < riinfo->nkeys; idx++) + { + int fnum = attnums[idx]; + Form_pg_attribute att = TupleDescAttr(tupdesc, fnum - 1); + char *name, + *val; + Datum datum; + bool isnull; + + name = NameStr(att->attname); + + datum = slot_getattr(violatorslot, fnum, &isnull); + if (!isnull) + { + Oid foutoid; + bool typisvarlena; + + getTypeOutputInfo(att->atttypid, &foutoid, &typisvarlena); + val = OidOutputFunctionCall(foutoid, datum); + } + else + val = "null"; + + if (idx > 0) + { + appendStringInfoString(&key_names, ", "); + appendStringInfoString(&key_values, ", "); + } + appendStringInfoString(&key_names, name); + appendStringInfoString(&key_values, val); + } + } + + if (partgone) + ereport(ERROR, + (errcode(ERRCODE_FOREIGN_KEY_VIOLATION), + errmsg("removing partition \"%s\" violates foreign key constraint \"%s\"", + RelationGetRelationName(pk_rel), + NameStr(riinfo->conname)), + errdetail("Key (%s)=(%s) is still referenced from table \"%s\".", + key_names.data, key_values.data, + RelationGetRelationName(fk_rel)), + errtableconstraint(fk_rel, NameStr(riinfo->conname)))); + else if (onfk) + ereport(ERROR, + (errcode(ERRCODE_FOREIGN_KEY_VIOLATION), + errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"", + RelationGetRelationName(fk_rel), + NameStr(riinfo->conname)), + has_perm ? + errdetail("Key (%s)=(%s) is not present in table \"%s\".", + key_names.data, key_values.data, + RelationGetRelationName(pk_rel)) : + errdetail("Key is not present in table \"%s\".", + RelationGetRelationName(pk_rel)), + errtableconstraint(fk_rel, NameStr(riinfo->conname)))); + else + ereport(ERROR, + (errcode(ERRCODE_FOREIGN_KEY_VIOLATION), + errmsg("update or delete on table \"%s\" violates foreign key constraint \"%s\" on table \"%s\"", + RelationGetRelationName(pk_rel), + NameStr(riinfo->conname), + RelationGetRelationName(fk_rel)), + has_perm ? + errdetail("Key (%s)=(%s) is still referenced from table \"%s\".", + key_names.data, key_values.data, + RelationGetRelationName(fk_rel)) : + errdetail("Key is still referenced from table \"%s\".", + RelationGetRelationName(fk_rel)), + errtableconstraint(fk_rel, NameStr(riinfo->conname)))); +} + + +/* + * ri_NullCheck - + * + * Determine the NULL state of all key values in a tuple + * + * Returns one of RI_KEYS_ALL_NULL, RI_KEYS_NONE_NULL or RI_KEYS_SOME_NULL. + */ +static int +ri_NullCheck(TupleDesc tupDesc, + TupleTableSlot *slot, + const RI_ConstraintInfo *riinfo, bool rel_is_pk) +{ + const int16 *attnums; + bool allnull = true; + bool nonenull = true; + + if (rel_is_pk) + attnums = riinfo->pk_attnums; + else + attnums = riinfo->fk_attnums; + + for (int i = 0; i < riinfo->nkeys; i++) + { + if (slot_attisnull(slot, attnums[i])) + nonenull = false; + else + allnull = false; + } + + if (allnull) + return RI_KEYS_ALL_NULL; + + if (nonenull) + return RI_KEYS_NONE_NULL; + + return RI_KEYS_SOME_NULL; +} + + +/* + * ri_InitHashTables - + * + * Initialize our internal hash tables. + */ +static void +ri_InitHashTables(void) +{ + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RI_ConstraintInfo); + ri_constraint_cache = hash_create("RI constraint cache", + RI_INIT_CONSTRAINTHASHSIZE, + &ctl, HASH_ELEM | HASH_BLOBS); + + /* Arrange to flush cache on pg_constraint changes */ + CacheRegisterSyscacheCallback(CONSTROID, + InvalidateConstraintCacheCallBack, + (Datum) 0); + + ctl.keysize = sizeof(RI_QueryKey); + ctl.entrysize = sizeof(RI_QueryHashEntry); + ri_query_cache = hash_create("RI query cache", + RI_INIT_QUERYHASHSIZE, + &ctl, HASH_ELEM | HASH_BLOBS); + + ctl.keysize = sizeof(RI_CompareKey); + ctl.entrysize = sizeof(RI_CompareHashEntry); + ri_compare_cache = hash_create("RI compare cache", + RI_INIT_QUERYHASHSIZE, + &ctl, HASH_ELEM | HASH_BLOBS); +} + + +/* + * ri_FetchPreparedPlan - + * + * Lookup for a query key in our private hash table of prepared + * and saved SPI execution plans. Return the plan if found or NULL. + */ +static SPIPlanPtr +ri_FetchPreparedPlan(RI_QueryKey *key) +{ + RI_QueryHashEntry *entry; + SPIPlanPtr plan; + + /* + * On the first call initialize the hashtable + */ + if (!ri_query_cache) + ri_InitHashTables(); + + /* + * Lookup for the key + */ + entry = (RI_QueryHashEntry *) hash_search(ri_query_cache, + key, + HASH_FIND, NULL); + if (entry == NULL) + return NULL; + + /* + * Check whether the plan is still valid. If it isn't, we don't want to + * simply rely on plancache.c to regenerate it; rather we should start + * from scratch and rebuild the query text too. This is to cover cases + * such as table/column renames. We depend on the plancache machinery to + * detect possible invalidations, though. + * + * CAUTION: this check is only trustworthy if the caller has already + * locked both FK and PK rels. + */ + plan = entry->plan; + if (plan && SPI_plan_is_valid(plan)) + return plan; + + /* + * Otherwise we might as well flush the cached plan now, to free a little + * memory space before we make a new one. + */ + entry->plan = NULL; + if (plan) + SPI_freeplan(plan); + + return NULL; +} + + +/* + * ri_HashPreparedPlan - + * + * Add another plan to our private SPI query plan hashtable. + */ +static void +ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan) +{ + RI_QueryHashEntry *entry; + bool found; + + /* + * On the first call initialize the hashtable + */ + if (!ri_query_cache) + ri_InitHashTables(); + + /* + * Add the new plan. We might be overwriting an entry previously found + * invalid by ri_FetchPreparedPlan. + */ + entry = (RI_QueryHashEntry *) hash_search(ri_query_cache, + key, + HASH_ENTER, &found); + Assert(!found || entry->plan == NULL); + entry->plan = plan; +} + + +/* + * ri_KeysEqual - + * + * Check if all key values in OLD and NEW are equal. + * + * Note: at some point we might wish to redefine this as checking for + * "IS NOT DISTINCT" rather than "=", that is, allow two nulls to be + * considered equal. Currently there is no need since all callers have + * previously found at least one of the rows to contain no nulls. + */ +static bool +ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, + const RI_ConstraintInfo *riinfo, bool rel_is_pk) +{ + const int16 *attnums; + + if (rel_is_pk) + attnums = riinfo->pk_attnums; + else + attnums = riinfo->fk_attnums; + + /* XXX: could be worthwhile to fetch all necessary attrs at once */ + for (int i = 0; i < riinfo->nkeys; i++) + { + Datum oldvalue; + Datum newvalue; + bool isnull; + + /* + * Get one attribute's oldvalue. If it is NULL - they're not equal. + */ + oldvalue = slot_getattr(oldslot, attnums[i], &isnull); + if (isnull) + return false; + + /* + * Get one attribute's newvalue. If it is NULL - they're not equal. + */ + newvalue = slot_getattr(newslot, attnums[i], &isnull); + if (isnull) + return false; + + if (rel_is_pk) + { + /* + * If we are looking at the PK table, then do a bytewise + * comparison. We must propagate PK changes if the value is + * changed to one that "looks" different but would compare as + * equal using the equality operator. This only makes a + * difference for ON UPDATE CASCADE, but for consistency we treat + * all changes to the PK the same. + */ + Form_pg_attribute att = TupleDescAttr(oldslot->tts_tupleDescriptor, attnums[i] - 1); + + if (!datum_image_eq(oldvalue, newvalue, att->attbyval, att->attlen)) + return false; + } + else + { + /* + * For the FK table, compare with the appropriate equality + * operator. Changes that compare equal will still satisfy the + * constraint after the update. + */ + if (!ri_AttributesEqual(riinfo->ff_eq_oprs[i], RIAttType(rel, attnums[i]), + oldvalue, newvalue)) + return false; + } + } + + return true; +} + + +/* + * ri_AttributesEqual - + * + * Call the appropriate equality comparison operator for two values. + * + * NB: we have already checked that neither value is null. + */ +static bool +ri_AttributesEqual(Oid eq_opr, Oid typeid, + Datum oldvalue, Datum newvalue) +{ + RI_CompareHashEntry *entry = ri_HashCompareOp(eq_opr, typeid); + + /* Do we need to cast the values? */ + if (OidIsValid(entry->cast_func_finfo.fn_oid)) + { + oldvalue = FunctionCall3(&entry->cast_func_finfo, + oldvalue, + Int32GetDatum(-1), /* typmod */ + BoolGetDatum(false)); /* implicit coercion */ + newvalue = FunctionCall3(&entry->cast_func_finfo, + newvalue, + Int32GetDatum(-1), /* typmod */ + BoolGetDatum(false)); /* implicit coercion */ + } + + /* + * Apply the comparison operator. + * + * Note: This function is part of a call stack that determines whether an + * update to a row is significant enough that it needs checking or action + * on the other side of a foreign-key constraint. Therefore, the + * comparison here would need to be done with the collation of the *other* + * table. For simplicity (e.g., we might not even have the other table + * open), we'll just use the default collation here, which could lead to + * some false negatives. All this would break if we ever allow + * database-wide collations to be nondeterministic. + */ + return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo, + DEFAULT_COLLATION_OID, + oldvalue, newvalue)); +} + +/* + * ri_HashCompareOp - + * + * See if we know how to compare two values, and create a new hash entry + * if not. + */ +static RI_CompareHashEntry * +ri_HashCompareOp(Oid eq_opr, Oid typeid) +{ + RI_CompareKey key; + RI_CompareHashEntry *entry; + bool found; + + /* + * On the first call initialize the hashtable + */ + if (!ri_compare_cache) + ri_InitHashTables(); + + /* + * Find or create a hash entry. Note we're assuming RI_CompareKey + * contains no struct padding. + */ + key.eq_opr = eq_opr; + key.typeid = typeid; + entry = (RI_CompareHashEntry *) hash_search(ri_compare_cache, + &key, + HASH_ENTER, &found); + if (!found) + entry->valid = false; + + /* + * If not already initialized, do so. Since we'll keep this hash entry + * for the life of the backend, put any subsidiary info for the function + * cache structs into TopMemoryContext. + */ + if (!entry->valid) + { + Oid lefttype, + righttype, + castfunc; + CoercionPathType pathtype; + + /* We always need to know how to call the equality operator */ + fmgr_info_cxt(get_opcode(eq_opr), &entry->eq_opr_finfo, + TopMemoryContext); + + /* + * If we chose to use a cast from FK to PK type, we may have to apply + * the cast function to get to the operator's input type. + * + * XXX eventually it would be good to support array-coercion cases + * here and in ri_AttributesEqual(). At the moment there is no point + * because cases involving nonidentical array types will be rejected + * at constraint creation time. + * + * XXX perhaps also consider supporting CoerceViaIO? No need at the + * moment since that will never be generated for implicit coercions. + */ + op_input_types(eq_opr, &lefttype, &righttype); + Assert(lefttype == righttype); + if (typeid == lefttype) + castfunc = InvalidOid; /* simplest case */ + else + { + pathtype = find_coercion_pathway(lefttype, typeid, + COERCION_IMPLICIT, + &castfunc); + if (pathtype != COERCION_PATH_FUNC && + pathtype != COERCION_PATH_RELABELTYPE) + { + /* + * The declared input type of the eq_opr might be a + * polymorphic type such as ANYARRAY or ANYENUM, or other + * special cases such as RECORD; find_coercion_pathway + * currently doesn't subsume these special cases. + */ + if (!IsBinaryCoercible(typeid, lefttype)) + elog(ERROR, "no conversion function from %s to %s", + format_type_be(typeid), + format_type_be(lefttype)); + } + } + if (OidIsValid(castfunc)) + fmgr_info_cxt(castfunc, &entry->cast_func_finfo, + TopMemoryContext); + else + entry->cast_func_finfo.fn_oid = InvalidOid; + entry->valid = true; + } + + return entry; +} + + +/* + * Given a trigger function OID, determine whether it is an RI trigger, + * and if so whether it is attached to PK or FK relation. + */ +int +RI_FKey_trigger_type(Oid tgfoid) +{ + switch (tgfoid) + { + case F_RI_FKEY_CASCADE_DEL: + case F_RI_FKEY_CASCADE_UPD: + case F_RI_FKEY_RESTRICT_DEL: + case F_RI_FKEY_RESTRICT_UPD: + case F_RI_FKEY_SETNULL_DEL: + case F_RI_FKEY_SETNULL_UPD: + case F_RI_FKEY_SETDEFAULT_DEL: + case F_RI_FKEY_SETDEFAULT_UPD: + case F_RI_FKEY_NOACTION_DEL: + case F_RI_FKEY_NOACTION_UPD: + return RI_TRIGGER_PK; + + case F_RI_FKEY_CHECK_INS: + case F_RI_FKEY_CHECK_UPD: + return RI_TRIGGER_FK; + } + + return RI_TRIGGER_NONE; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rowtypes.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rowtypes.c new file mode 100644 index 00000000000..ad176651d85 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/rowtypes.c @@ -0,0 +1,2044 @@ +/*------------------------------------------------------------------------- + * + * rowtypes.c + * I/O and comparison functions for generic composite types. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/rowtypes.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> + +#include "access/detoast.h" +#include "access/htup_details.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/typcache.h" + + +/* + * structure to cache metadata needed for record I/O + */ +typedef struct ColumnIOData +{ + Oid column_type; + Oid typiofunc; + Oid typioparam; + bool typisvarlena; + FmgrInfo proc; +} ColumnIOData; + +typedef struct RecordIOData +{ + Oid record_type; + int32 record_typmod; + int ncolumns; + ColumnIOData columns[FLEXIBLE_ARRAY_MEMBER]; +} RecordIOData; + +/* + * structure to cache metadata needed for record comparison + */ +typedef struct ColumnCompareData +{ + TypeCacheEntry *typentry; /* has everything we need, actually */ +} ColumnCompareData; + +typedef struct RecordCompareData +{ + int ncolumns; /* allocated length of columns[] */ + Oid record1_type; + int32 record1_typmod; + Oid record2_type; + int32 record2_typmod; + ColumnCompareData columns[FLEXIBLE_ARRAY_MEMBER]; +} RecordCompareData; + + +/* + * record_in - input routine for any composite type. + */ +Datum +record_in(PG_FUNCTION_ARGS) +{ + char *string = PG_GETARG_CSTRING(0); + Oid tupType = PG_GETARG_OID(1); + int32 tupTypmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + HeapTupleHeader result; + TupleDesc tupdesc; + HeapTuple tuple; + RecordIOData *my_extra; + bool needComma = false; + int ncolumns; + int i; + char *ptr; + Datum *values; + bool *nulls; + StringInfoData buf; + + check_stack_depth(); /* recurses for record-type columns */ + + /* + * Give a friendly error message if we did not get enough info to identify + * the target record type. (lookup_rowtype_tupdesc would fail anyway, but + * with a non-user-friendly message.) In ordinary SQL usage, we'll get -1 + * for typmod, since composite types and RECORD have no type modifiers at + * the SQL level, and thus must fail for RECORD. However some callers can + * supply a valid typmod, and then we can do something useful for RECORD. + */ + if (tupType == RECORDOID && tupTypmod < 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("input of anonymous composite types is not implemented"))); + + /* + * This comes from the composite type's pg_type.oid and stores system oids + * in user tables, specifically DatumTupleFields. This oid must be + * preserved by binary upgrades. + */ + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + ncolumns = tupdesc->natts; + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns != ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + my_extra->record_type = InvalidOid; + my_extra->record_typmod = 0; + } + + if (my_extra->record_type != tupType || + my_extra->record_typmod != tupTypmod) + { + MemSet(my_extra, 0, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra->record_type = tupType; + my_extra->record_typmod = tupTypmod; + my_extra->ncolumns = ncolumns; + } + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + /* + * Scan the string. We use "buf" to accumulate the de-quoted data for + * each column, which is then fed to the appropriate input converter. + */ + ptr = string; + /* Allow leading whitespace */ + while (*ptr && isspace((unsigned char) *ptr)) + ptr++; + if (*ptr++ != '(') + { + errsave(escontext, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed record literal: \"%s\"", string), + errdetail("Missing left parenthesis."))); + goto fail; + } + + initStringInfo(&buf); + + for (i = 0; i < ncolumns; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + ColumnIOData *column_info = &my_extra->columns[i]; + Oid column_type = att->atttypid; + char *column_data; + + /* Ignore dropped columns in datatype, but fill with nulls */ + if (att->attisdropped) + { + values[i] = (Datum) 0; + nulls[i] = true; + continue; + } + + if (needComma) + { + /* Skip comma that separates prior field from this one */ + if (*ptr == ',') + ptr++; + else + /* *ptr must be ')' */ + { + errsave(escontext, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed record literal: \"%s\"", string), + errdetail("Too few columns."))); + goto fail; + } + } + + /* Check for null: completely empty input means null */ + if (*ptr == ',' || *ptr == ')') + { + column_data = NULL; + nulls[i] = true; + } + else + { + /* Extract string for this column */ + bool inquote = false; + + resetStringInfo(&buf); + while (inquote || !(*ptr == ',' || *ptr == ')')) + { + char ch = *ptr++; + + if (ch == '\0') + { + errsave(escontext, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed record literal: \"%s\"", + string), + errdetail("Unexpected end of input."))); + goto fail; + } + if (ch == '\\') + { + if (*ptr == '\0') + { + errsave(escontext, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed record literal: \"%s\"", + string), + errdetail("Unexpected end of input."))); + goto fail; + } + appendStringInfoChar(&buf, *ptr++); + } + else if (ch == '"') + { + if (!inquote) + inquote = true; + else if (*ptr == '"') + { + /* doubled quote within quote sequence */ + appendStringInfoChar(&buf, *ptr++); + } + else + inquote = false; + } + else + appendStringInfoChar(&buf, ch); + } + + column_data = buf.data; + nulls[i] = false; + } + + /* + * Convert the column value + */ + if (column_info->column_type != column_type) + { + getTypeInputInfo(column_type, + &column_info->typiofunc, + &column_info->typioparam); + fmgr_info_cxt(column_info->typiofunc, &column_info->proc, + fcinfo->flinfo->fn_mcxt); + column_info->column_type = column_type; + } + + if (!InputFunctionCallSafe(&column_info->proc, + column_data, + column_info->typioparam, + att->atttypmod, + escontext, + &values[i])) + goto fail; + + /* + * Prep for next column + */ + needComma = true; + } + + if (*ptr++ != ')') + { + errsave(escontext, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed record literal: \"%s\"", string), + errdetail("Too many columns."))); + goto fail; + } + /* Allow trailing whitespace */ + while (*ptr && isspace((unsigned char) *ptr)) + ptr++; + if (*ptr) + { + errsave(escontext, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed record literal: \"%s\"", string), + errdetail("Junk after right parenthesis."))); + goto fail; + } + + tuple = heap_form_tuple(tupdesc, values, nulls); + + /* + * We cannot return tuple->t_data because heap_form_tuple allocates it as + * part of a larger chunk, and our caller may expect to be able to pfree + * our result. So must copy the info into a new palloc chunk. + */ + result = (HeapTupleHeader) palloc(tuple->t_len); + memcpy(result, tuple->t_data, tuple->t_len); + + heap_freetuple(tuple); + pfree(buf.data); + pfree(values); + pfree(nulls); + ReleaseTupleDesc(tupdesc); + + PG_RETURN_HEAPTUPLEHEADER(result); + + /* exit here once we've done lookup_rowtype_tupdesc */ +fail: + ReleaseTupleDesc(tupdesc); + PG_RETURN_NULL(); +} + +/* + * record_out - output routine for any composite type. + */ +Datum +record_out(PG_FUNCTION_ARGS) +{ + HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tuple; + RecordIOData *my_extra; + bool needComma = false; + int ncolumns; + int i; + Datum *values; + bool *nulls; + StringInfoData buf; + + check_stack_depth(); /* recurses for record-type columns */ + + /* Extract type info from the tuple itself */ + tupType = HeapTupleHeaderGetTypeId(rec); + tupTypmod = HeapTupleHeaderGetTypMod(rec); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + ncolumns = tupdesc->natts; + + /* Build a temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(rec); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = rec; + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns != ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + my_extra->record_type = InvalidOid; + my_extra->record_typmod = 0; + } + + if (my_extra->record_type != tupType || + my_extra->record_typmod != tupTypmod) + { + MemSet(my_extra, 0, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra->record_type = tupType; + my_extra->record_typmod = tupTypmod; + my_extra->ncolumns = ncolumns; + } + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + /* Break down the tuple into fields */ + heap_deform_tuple(&tuple, tupdesc, values, nulls); + + /* And build the result string */ + initStringInfo(&buf); + + appendStringInfoChar(&buf, '('); + + for (i = 0; i < ncolumns; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + ColumnIOData *column_info = &my_extra->columns[i]; + Oid column_type = att->atttypid; + Datum attr; + char *value; + char *tmp; + bool nq; + + /* Ignore dropped columns in datatype */ + if (att->attisdropped) + continue; + + if (needComma) + appendStringInfoChar(&buf, ','); + needComma = true; + + if (nulls[i]) + { + /* emit nothing... */ + continue; + } + + /* + * Convert the column value to text + */ + if (column_info->column_type != column_type) + { + getTypeOutputInfo(column_type, + &column_info->typiofunc, + &column_info->typisvarlena); + fmgr_info_cxt(column_info->typiofunc, &column_info->proc, + fcinfo->flinfo->fn_mcxt); + column_info->column_type = column_type; + } + + attr = values[i]; + value = OutputFunctionCall(&column_info->proc, attr); + + /* Detect whether we need double quotes for this value */ + nq = (value[0] == '\0'); /* force quotes for empty string */ + for (tmp = value; *tmp; tmp++) + { + char ch = *tmp; + + if (ch == '"' || ch == '\\' || + ch == '(' || ch == ')' || ch == ',' || + isspace((unsigned char) ch)) + { + nq = true; + break; + } + } + + /* And emit the string */ + if (nq) + appendStringInfoCharMacro(&buf, '"'); + for (tmp = value; *tmp; tmp++) + { + char ch = *tmp; + + if (ch == '"' || ch == '\\') + appendStringInfoCharMacro(&buf, ch); + appendStringInfoCharMacro(&buf, ch); + } + if (nq) + appendStringInfoCharMacro(&buf, '"'); + } + + appendStringInfoChar(&buf, ')'); + + pfree(values); + pfree(nulls); + ReleaseTupleDesc(tupdesc); + + PG_RETURN_CSTRING(buf.data); +} + +/* + * record_recv - binary input routine for any composite type. + */ +Datum +record_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Oid tupType = PG_GETARG_OID(1); + int32 tupTypmod = PG_GETARG_INT32(2); + HeapTupleHeader result; + TupleDesc tupdesc; + HeapTuple tuple; + RecordIOData *my_extra; + int ncolumns; + int usercols; + int validcols; + int i; + Datum *values; + bool *nulls; + + check_stack_depth(); /* recurses for record-type columns */ + + /* + * Give a friendly error message if we did not get enough info to identify + * the target record type. (lookup_rowtype_tupdesc would fail anyway, but + * with a non-user-friendly message.) In ordinary SQL usage, we'll get -1 + * for typmod, since composite types and RECORD have no type modifiers at + * the SQL level, and thus must fail for RECORD. However some callers can + * supply a valid typmod, and then we can do something useful for RECORD. + */ + if (tupType == RECORDOID && tupTypmod < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("input of anonymous composite types is not implemented"))); + + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + ncolumns = tupdesc->natts; + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns != ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + my_extra->record_type = InvalidOid; + my_extra->record_typmod = 0; + } + + if (my_extra->record_type != tupType || + my_extra->record_typmod != tupTypmod) + { + MemSet(my_extra, 0, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra->record_type = tupType; + my_extra->record_typmod = tupTypmod; + my_extra->ncolumns = ncolumns; + } + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + /* Fetch number of columns user thinks it has */ + usercols = pq_getmsgint(buf, 4); + + /* Need to scan to count nondeleted columns */ + validcols = 0; + for (i = 0; i < ncolumns; i++) + { + if (!TupleDescAttr(tupdesc, i)->attisdropped) + validcols++; + } + if (usercols != validcols) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("wrong number of columns: %d, expected %d", + usercols, validcols))); + + /* Process each column */ + for (i = 0; i < ncolumns; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + ColumnIOData *column_info = &my_extra->columns[i]; + Oid column_type = att->atttypid; + Oid coltypoid; + int itemlen; + StringInfoData item_buf; + StringInfo bufptr; + char csave; + + /* Ignore dropped columns in datatype, but fill with nulls */ + if (att->attisdropped) + { + values[i] = (Datum) 0; + nulls[i] = true; + continue; + } + + /* Check column type recorded in the data */ + coltypoid = pq_getmsgint(buf, sizeof(Oid)); + + /* + * From a security standpoint, it doesn't matter whether the input's + * column type matches what we expect: the column type's receive + * function has to be robust enough to cope with invalid data. + * However, from a user-friendliness standpoint, it's nicer to + * complain about type mismatches than to throw "improper binary + * format" errors. But there's a problem: only built-in types have + * OIDs that are stable enough to believe that a mismatch is a real + * issue. So complain only if both OIDs are in the built-in range. + * Otherwise, carry on with the column type we "should" be getting. + */ + if (coltypoid != column_type && + coltypoid < FirstGenbkiObjectId && + column_type < FirstGenbkiObjectId) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("binary data has type %u (%s) instead of expected %u (%s) in record column %d", + coltypoid, + format_type_extended(coltypoid, -1, + FORMAT_TYPE_ALLOW_INVALID), + column_type, + format_type_extended(column_type, -1, + FORMAT_TYPE_ALLOW_INVALID), + i + 1))); + + /* Get and check the item length */ + itemlen = pq_getmsgint(buf, 4); + if (itemlen < -1 || itemlen > (buf->len - buf->cursor)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("insufficient data left in message"))); + + if (itemlen == -1) + { + /* -1 length means NULL */ + bufptr = NULL; + nulls[i] = true; + csave = 0; /* keep compiler quiet */ + } + else + { + /* + * Rather than copying data around, we just set up a phony + * StringInfo pointing to the correct portion of the input buffer. + * We assume we can scribble on the input buffer so as to maintain + * the convention that StringInfos have a trailing null. + */ + item_buf.data = &buf->data[buf->cursor]; + item_buf.maxlen = itemlen + 1; + item_buf.len = itemlen; + item_buf.cursor = 0; + + buf->cursor += itemlen; + + csave = buf->data[buf->cursor]; + buf->data[buf->cursor] = '\0'; + + bufptr = &item_buf; + nulls[i] = false; + } + + /* Now call the column's receiveproc */ + if (column_info->column_type != column_type) + { + getTypeBinaryInputInfo(column_type, + &column_info->typiofunc, + &column_info->typioparam); + fmgr_info_cxt(column_info->typiofunc, &column_info->proc, + fcinfo->flinfo->fn_mcxt); + column_info->column_type = column_type; + } + + values[i] = ReceiveFunctionCall(&column_info->proc, + bufptr, + column_info->typioparam, + att->atttypmod); + + if (bufptr) + { + /* Trouble if it didn't eat the whole buffer */ + if (item_buf.cursor != itemlen) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("improper binary format in record column %d", + i + 1))); + + buf->data[buf->cursor] = csave; + } + } + + tuple = heap_form_tuple(tupdesc, values, nulls); + + /* + * We cannot return tuple->t_data because heap_form_tuple allocates it as + * part of a larger chunk, and our caller may expect to be able to pfree + * our result. So must copy the info into a new palloc chunk. + */ + result = (HeapTupleHeader) palloc(tuple->t_len); + memcpy(result, tuple->t_data, tuple->t_len); + + heap_freetuple(tuple); + pfree(values); + pfree(nulls); + ReleaseTupleDesc(tupdesc); + + PG_RETURN_HEAPTUPLEHEADER(result); +} + +/* + * record_send - binary output routine for any composite type. + */ +Datum +record_send(PG_FUNCTION_ARGS) +{ + HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tuple; + RecordIOData *my_extra; + int ncolumns; + int validcols; + int i; + Datum *values; + bool *nulls; + StringInfoData buf; + + check_stack_depth(); /* recurses for record-type columns */ + + /* Extract type info from the tuple itself */ + tupType = HeapTupleHeaderGetTypeId(rec); + tupTypmod = HeapTupleHeaderGetTypMod(rec); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + ncolumns = tupdesc->natts; + + /* Build a temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(rec); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = rec; + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns != ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; + my_extra->record_type = InvalidOid; + my_extra->record_typmod = 0; + } + + if (my_extra->record_type != tupType || + my_extra->record_typmod != tupTypmod) + { + MemSet(my_extra, 0, + offsetof(RecordIOData, columns) + + ncolumns * sizeof(ColumnIOData)); + my_extra->record_type = tupType; + my_extra->record_typmod = tupTypmod; + my_extra->ncolumns = ncolumns; + } + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + /* Break down the tuple into fields */ + heap_deform_tuple(&tuple, tupdesc, values, nulls); + + /* And build the result string */ + pq_begintypsend(&buf); + + /* Need to scan to count nondeleted columns */ + validcols = 0; + for (i = 0; i < ncolumns; i++) + { + if (!TupleDescAttr(tupdesc, i)->attisdropped) + validcols++; + } + pq_sendint32(&buf, validcols); + + for (i = 0; i < ncolumns; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + ColumnIOData *column_info = &my_extra->columns[i]; + Oid column_type = att->atttypid; + Datum attr; + bytea *outputbytes; + + /* Ignore dropped columns in datatype */ + if (att->attisdropped) + continue; + + pq_sendint32(&buf, column_type); + + if (nulls[i]) + { + /* emit -1 data length to signify a NULL */ + pq_sendint32(&buf, -1); + continue; + } + + /* + * Convert the column value to binary + */ + if (column_info->column_type != column_type) + { + getTypeBinaryOutputInfo(column_type, + &column_info->typiofunc, + &column_info->typisvarlena); + fmgr_info_cxt(column_info->typiofunc, &column_info->proc, + fcinfo->flinfo->fn_mcxt); + column_info->column_type = column_type; + } + + attr = values[i]; + outputbytes = SendFunctionCall(&column_info->proc, attr); + pq_sendint32(&buf, VARSIZE(outputbytes) - VARHDRSZ); + pq_sendbytes(&buf, VARDATA(outputbytes), + VARSIZE(outputbytes) - VARHDRSZ); + } + + pfree(values); + pfree(nulls); + ReleaseTupleDesc(tupdesc); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * record_cmp() + * Internal comparison function for records. + * + * Returns -1, 0 or 1 + * + * Do not assume that the two inputs are exactly the same record type; + * for instance we might be comparing an anonymous ROW() construct against a + * named composite type. We will compare as long as they have the same number + * of non-dropped columns of the same types. + */ +static int +record_cmp(FunctionCallInfo fcinfo) +{ + HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0); + HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1); + int result = 0; + Oid tupType1; + Oid tupType2; + int32 tupTypmod1; + int32 tupTypmod2; + TupleDesc tupdesc1; + TupleDesc tupdesc2; + HeapTupleData tuple1; + HeapTupleData tuple2; + int ncolumns1; + int ncolumns2; + RecordCompareData *my_extra; + int ncols; + Datum *values1; + Datum *values2; + bool *nulls1; + bool *nulls2; + int i1; + int i2; + int j; + + check_stack_depth(); /* recurses for record-type columns */ + + /* Extract type info from the tuples */ + tupType1 = HeapTupleHeaderGetTypeId(record1); + tupTypmod1 = HeapTupleHeaderGetTypMod(record1); + tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1); + ncolumns1 = tupdesc1->natts; + tupType2 = HeapTupleHeaderGetTypeId(record2); + tupTypmod2 = HeapTupleHeaderGetTypMod(record2); + tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2); + ncolumns2 = tupdesc2->natts; + + /* Build temporary HeapTuple control structures */ + tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); + ItemPointerSetInvalid(&(tuple1.t_self)); + tuple1.t_tableOid = InvalidOid; + tuple1.t_data = record1; + tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); + ItemPointerSetInvalid(&(tuple2.t_self)); + tuple2.t_tableOid = InvalidOid; + tuple2.t_data = record2; + + /* + * We arrange to look up the needed comparison info just once per series + * of calls, assuming the record types don't change underneath us. + */ + ncols = Max(ncolumns1, ncolumns2); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns < ncols) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordCompareData, columns) + + ncols * sizeof(ColumnCompareData)); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + my_extra->ncolumns = ncols; + my_extra->record1_type = InvalidOid; + my_extra->record1_typmod = 0; + my_extra->record2_type = InvalidOid; + my_extra->record2_typmod = 0; + } + + if (my_extra->record1_type != tupType1 || + my_extra->record1_typmod != tupTypmod1 || + my_extra->record2_type != tupType2 || + my_extra->record2_typmod != tupTypmod2) + { + MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData)); + my_extra->record1_type = tupType1; + my_extra->record1_typmod = tupTypmod1; + my_extra->record2_type = tupType2; + my_extra->record2_typmod = tupTypmod2; + } + + /* Break down the tuples into fields */ + values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum)); + nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool)); + heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1); + values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum)); + nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool)); + heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2); + + /* + * Scan corresponding columns, allowing for dropped columns in different + * places in the two rows. i1 and i2 are physical column indexes, j is + * the logical column index. + */ + i1 = i2 = j = 0; + while (i1 < ncolumns1 || i2 < ncolumns2) + { + Form_pg_attribute att1; + Form_pg_attribute att2; + TypeCacheEntry *typentry; + Oid collation; + + /* + * Skip dropped columns + */ + if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped) + { + i1++; + continue; + } + if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped) + { + i2++; + continue; + } + if (i1 >= ncolumns1 || i2 >= ncolumns2) + break; /* we'll deal with mismatch below loop */ + + att1 = TupleDescAttr(tupdesc1, i1); + att2 = TupleDescAttr(tupdesc2, i2); + + /* + * Have two matching columns, they must be same type + */ + if (att1->atttypid != att2->atttypid) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare dissimilar column types %s and %s at record column %d", + format_type_be(att1->atttypid), + format_type_be(att2->atttypid), + j + 1))); + + /* + * If they're not same collation, we don't complain here, but the + * comparison function might. + */ + collation = att1->attcollation; + if (collation != att2->attcollation) + collation = InvalidOid; + + /* + * Lookup the comparison function if not done already + */ + typentry = my_extra->columns[j].typentry; + if (typentry == NULL || + typentry->type_id != att1->atttypid) + { + typentry = lookup_type_cache(att1->atttypid, + TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a comparison function for type %s", + format_type_be(typentry->type_id)))); + my_extra->columns[j].typentry = typentry; + } + + /* + * We consider two NULLs equal; NULL > not-NULL. + */ + if (!nulls1[i1] || !nulls2[i2]) + { + LOCAL_FCINFO(locfcinfo, 2); + int32 cmpresult; + + if (nulls1[i1]) + { + /* arg1 is greater than arg2 */ + result = 1; + break; + } + if (nulls2[i2]) + { + /* arg1 is less than arg2 */ + result = -1; + break; + } + + /* Compare the pair of elements */ + InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2, + collation, NULL, NULL); + locfcinfo->args[0].value = values1[i1]; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = values2[i2]; + locfcinfo->args[1].isnull = false; + cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo)); + + /* We don't expect comparison support functions to return null */ + Assert(!locfcinfo->isnull); + + if (cmpresult < 0) + { + /* arg1 is less than arg2 */ + result = -1; + break; + } + else if (cmpresult > 0) + { + /* arg1 is greater than arg2 */ + result = 1; + break; + } + } + + /* equal, so continue to next column */ + i1++, i2++, j++; + } + + /* + * If we didn't break out of the loop early, check for column count + * mismatch. (We do not report such mismatch if we found unequal column + * values; is that a feature or a bug?) + */ + if (result == 0) + { + if (i1 != ncolumns1 || i2 != ncolumns2) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare record types with different numbers of columns"))); + } + + pfree(values1); + pfree(nulls1); + pfree(values2); + pfree(nulls2); + ReleaseTupleDesc(tupdesc1); + ReleaseTupleDesc(tupdesc2); + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(record1, 0); + PG_FREE_IF_COPY(record2, 1); + + return result; +} + +/* + * record_eq : + * compares two records for equality + * result : + * returns true if the records are equal, false otherwise. + * + * Note: we do not use record_cmp here, since equality may be meaningful in + * datatypes that don't have a total ordering (and hence no btree support). + */ +Datum +record_eq(PG_FUNCTION_ARGS) +{ + HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0); + HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1); + bool result = true; + Oid tupType1; + Oid tupType2; + int32 tupTypmod1; + int32 tupTypmod2; + TupleDesc tupdesc1; + TupleDesc tupdesc2; + HeapTupleData tuple1; + HeapTupleData tuple2; + int ncolumns1; + int ncolumns2; + RecordCompareData *my_extra; + int ncols; + Datum *values1; + Datum *values2; + bool *nulls1; + bool *nulls2; + int i1; + int i2; + int j; + + check_stack_depth(); /* recurses for record-type columns */ + + /* Extract type info from the tuples */ + tupType1 = HeapTupleHeaderGetTypeId(record1); + tupTypmod1 = HeapTupleHeaderGetTypMod(record1); + tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1); + ncolumns1 = tupdesc1->natts; + tupType2 = HeapTupleHeaderGetTypeId(record2); + tupTypmod2 = HeapTupleHeaderGetTypMod(record2); + tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2); + ncolumns2 = tupdesc2->natts; + + /* Build temporary HeapTuple control structures */ + tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); + ItemPointerSetInvalid(&(tuple1.t_self)); + tuple1.t_tableOid = InvalidOid; + tuple1.t_data = record1; + tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); + ItemPointerSetInvalid(&(tuple2.t_self)); + tuple2.t_tableOid = InvalidOid; + tuple2.t_data = record2; + + /* + * We arrange to look up the needed comparison info just once per series + * of calls, assuming the record types don't change underneath us. + */ + ncols = Max(ncolumns1, ncolumns2); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns < ncols) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordCompareData, columns) + + ncols * sizeof(ColumnCompareData)); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + my_extra->ncolumns = ncols; + my_extra->record1_type = InvalidOid; + my_extra->record1_typmod = 0; + my_extra->record2_type = InvalidOid; + my_extra->record2_typmod = 0; + } + + if (my_extra->record1_type != tupType1 || + my_extra->record1_typmod != tupTypmod1 || + my_extra->record2_type != tupType2 || + my_extra->record2_typmod != tupTypmod2) + { + MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData)); + my_extra->record1_type = tupType1; + my_extra->record1_typmod = tupTypmod1; + my_extra->record2_type = tupType2; + my_extra->record2_typmod = tupTypmod2; + } + + /* Break down the tuples into fields */ + values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum)); + nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool)); + heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1); + values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum)); + nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool)); + heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2); + + /* + * Scan corresponding columns, allowing for dropped columns in different + * places in the two rows. i1 and i2 are physical column indexes, j is + * the logical column index. + */ + i1 = i2 = j = 0; + while (i1 < ncolumns1 || i2 < ncolumns2) + { + LOCAL_FCINFO(locfcinfo, 2); + Form_pg_attribute att1; + Form_pg_attribute att2; + TypeCacheEntry *typentry; + Oid collation; + bool oprresult; + + /* + * Skip dropped columns + */ + if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped) + { + i1++; + continue; + } + if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped) + { + i2++; + continue; + } + if (i1 >= ncolumns1 || i2 >= ncolumns2) + break; /* we'll deal with mismatch below loop */ + + att1 = TupleDescAttr(tupdesc1, i1); + att2 = TupleDescAttr(tupdesc2, i2); + + /* + * Have two matching columns, they must be same type + */ + if (att1->atttypid != att2->atttypid) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare dissimilar column types %s and %s at record column %d", + format_type_be(att1->atttypid), + format_type_be(att2->atttypid), + j + 1))); + + /* + * If they're not same collation, we don't complain here, but the + * equality function might. + */ + collation = att1->attcollation; + if (collation != att2->attcollation) + collation = InvalidOid; + + /* + * Lookup the equality function if not done already + */ + typentry = my_extra->columns[j].typentry; + if (typentry == NULL || + typentry->type_id != att1->atttypid) + { + typentry = lookup_type_cache(att1->atttypid, + TYPECACHE_EQ_OPR_FINFO); + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(typentry->type_id)))); + my_extra->columns[j].typentry = typentry; + } + + /* + * We consider two NULLs equal; NULL > not-NULL. + */ + if (!nulls1[i1] || !nulls2[i2]) + { + if (nulls1[i1] || nulls2[i2]) + { + result = false; + break; + } + + /* Compare the pair of elements */ + InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2, + collation, NULL, NULL); + locfcinfo->args[0].value = values1[i1]; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = values2[i2]; + locfcinfo->args[1].isnull = false; + oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo)); + if (locfcinfo->isnull || !oprresult) + { + result = false; + break; + } + } + + /* equal, so continue to next column */ + i1++, i2++, j++; + } + + /* + * If we didn't break out of the loop early, check for column count + * mismatch. (We do not report such mismatch if we found unequal column + * values; is that a feature or a bug?) + */ + if (result) + { + if (i1 != ncolumns1 || i2 != ncolumns2) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare record types with different numbers of columns"))); + } + + pfree(values1); + pfree(nulls1); + pfree(values2); + pfree(nulls2); + ReleaseTupleDesc(tupdesc1); + ReleaseTupleDesc(tupdesc2); + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(record1, 0); + PG_FREE_IF_COPY(record2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +record_ne(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(!DatumGetBool(record_eq(fcinfo))); +} + +Datum +record_lt(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_cmp(fcinfo) < 0); +} + +Datum +record_gt(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_cmp(fcinfo) > 0); +} + +Datum +record_le(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_cmp(fcinfo) <= 0); +} + +Datum +record_ge(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_cmp(fcinfo) >= 0); +} + +Datum +btrecordcmp(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(record_cmp(fcinfo)); +} + + +/* + * record_image_cmp : + * Internal byte-oriented comparison function for records. + * + * Returns -1, 0 or 1 + * + * Note: The normal concepts of "equality" do not apply here; different + * representation of values considered to be equal are not considered to be + * identical. As an example, for the citext type 'A' and 'a' are equal, but + * they are not identical. + */ +static int +record_image_cmp(FunctionCallInfo fcinfo) +{ + HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0); + HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1); + int result = 0; + Oid tupType1; + Oid tupType2; + int32 tupTypmod1; + int32 tupTypmod2; + TupleDesc tupdesc1; + TupleDesc tupdesc2; + HeapTupleData tuple1; + HeapTupleData tuple2; + int ncolumns1; + int ncolumns2; + RecordCompareData *my_extra; + int ncols; + Datum *values1; + Datum *values2; + bool *nulls1; + bool *nulls2; + int i1; + int i2; + int j; + + /* Extract type info from the tuples */ + tupType1 = HeapTupleHeaderGetTypeId(record1); + tupTypmod1 = HeapTupleHeaderGetTypMod(record1); + tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1); + ncolumns1 = tupdesc1->natts; + tupType2 = HeapTupleHeaderGetTypeId(record2); + tupTypmod2 = HeapTupleHeaderGetTypMod(record2); + tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2); + ncolumns2 = tupdesc2->natts; + + /* Build temporary HeapTuple control structures */ + tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); + ItemPointerSetInvalid(&(tuple1.t_self)); + tuple1.t_tableOid = InvalidOid; + tuple1.t_data = record1; + tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); + ItemPointerSetInvalid(&(tuple2.t_self)); + tuple2.t_tableOid = InvalidOid; + tuple2.t_data = record2; + + /* + * We arrange to look up the needed comparison info just once per series + * of calls, assuming the record types don't change underneath us. + */ + ncols = Max(ncolumns1, ncolumns2); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns < ncols) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordCompareData, columns) + + ncols * sizeof(ColumnCompareData)); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + my_extra->ncolumns = ncols; + my_extra->record1_type = InvalidOid; + my_extra->record1_typmod = 0; + my_extra->record2_type = InvalidOid; + my_extra->record2_typmod = 0; + } + + if (my_extra->record1_type != tupType1 || + my_extra->record1_typmod != tupTypmod1 || + my_extra->record2_type != tupType2 || + my_extra->record2_typmod != tupTypmod2) + { + MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData)); + my_extra->record1_type = tupType1; + my_extra->record1_typmod = tupTypmod1; + my_extra->record2_type = tupType2; + my_extra->record2_typmod = tupTypmod2; + } + + /* Break down the tuples into fields */ + values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum)); + nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool)); + heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1); + values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum)); + nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool)); + heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2); + + /* + * Scan corresponding columns, allowing for dropped columns in different + * places in the two rows. i1 and i2 are physical column indexes, j is + * the logical column index. + */ + i1 = i2 = j = 0; + while (i1 < ncolumns1 || i2 < ncolumns2) + { + Form_pg_attribute att1; + Form_pg_attribute att2; + + /* + * Skip dropped columns + */ + if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped) + { + i1++; + continue; + } + if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped) + { + i2++; + continue; + } + if (i1 >= ncolumns1 || i2 >= ncolumns2) + break; /* we'll deal with mismatch below loop */ + + att1 = TupleDescAttr(tupdesc1, i1); + att2 = TupleDescAttr(tupdesc2, i2); + + /* + * Have two matching columns, they must be same type + */ + if (att1->atttypid != att2->atttypid) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare dissimilar column types %s and %s at record column %d", + format_type_be(att1->atttypid), + format_type_be(att2->atttypid), + j + 1))); + + /* + * The same type should have the same length (or both should be + * variable). + */ + Assert(att1->attlen == att2->attlen); + + /* + * We consider two NULLs equal; NULL > not-NULL. + */ + if (!nulls1[i1] || !nulls2[i2]) + { + int cmpresult = 0; + + if (nulls1[i1]) + { + /* arg1 is greater than arg2 */ + result = 1; + break; + } + if (nulls2[i2]) + { + /* arg1 is less than arg2 */ + result = -1; + break; + } + + /* Compare the pair of elements */ + if (att1->attbyval) + { + if (values1[i1] != values2[i2]) + cmpresult = (values1[i1] < values2[i2]) ? -1 : 1; + } + else if (att1->attlen > 0) + { + cmpresult = memcmp(DatumGetPointer(values1[i1]), + DatumGetPointer(values2[i2]), + att1->attlen); + } + else if (att1->attlen == -1) + { + Size len1, + len2; + struct varlena *arg1val; + struct varlena *arg2val; + + len1 = toast_raw_datum_size(values1[i1]); + len2 = toast_raw_datum_size(values2[i2]); + arg1val = PG_DETOAST_DATUM_PACKED(values1[i1]); + arg2val = PG_DETOAST_DATUM_PACKED(values2[i2]); + + cmpresult = memcmp(VARDATA_ANY(arg1val), + VARDATA_ANY(arg2val), + Min(len1, len2) - VARHDRSZ); + if ((cmpresult == 0) && (len1 != len2)) + cmpresult = (len1 < len2) ? -1 : 1; + + if ((Pointer) arg1val != (Pointer) values1[i1]) + pfree(arg1val); + if ((Pointer) arg2val != (Pointer) values2[i2]) + pfree(arg2val); + } + else + elog(ERROR, "unexpected attlen: %d", att1->attlen); + + if (cmpresult < 0) + { + /* arg1 is less than arg2 */ + result = -1; + break; + } + else if (cmpresult > 0) + { + /* arg1 is greater than arg2 */ + result = 1; + break; + } + } + + /* equal, so continue to next column */ + i1++, i2++, j++; + } + + /* + * If we didn't break out of the loop early, check for column count + * mismatch. (We do not report such mismatch if we found unequal column + * values; is that a feature or a bug?) + */ + if (result == 0) + { + if (i1 != ncolumns1 || i2 != ncolumns2) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare record types with different numbers of columns"))); + } + + pfree(values1); + pfree(nulls1); + pfree(values2); + pfree(nulls2); + ReleaseTupleDesc(tupdesc1); + ReleaseTupleDesc(tupdesc2); + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(record1, 0); + PG_FREE_IF_COPY(record2, 1); + + return result; +} + +/* + * record_image_eq : + * compares two records for identical contents, based on byte images + * result : + * returns true if the records are identical, false otherwise. + * + * Note: we do not use record_image_cmp here, since we can avoid + * de-toasting for unequal lengths this way. + */ +Datum +record_image_eq(PG_FUNCTION_ARGS) +{ + HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0); + HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1); + bool result = true; + Oid tupType1; + Oid tupType2; + int32 tupTypmod1; + int32 tupTypmod2; + TupleDesc tupdesc1; + TupleDesc tupdesc2; + HeapTupleData tuple1; + HeapTupleData tuple2; + int ncolumns1; + int ncolumns2; + RecordCompareData *my_extra; + int ncols; + Datum *values1; + Datum *values2; + bool *nulls1; + bool *nulls2; + int i1; + int i2; + int j; + + /* Extract type info from the tuples */ + tupType1 = HeapTupleHeaderGetTypeId(record1); + tupTypmod1 = HeapTupleHeaderGetTypMod(record1); + tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1); + ncolumns1 = tupdesc1->natts; + tupType2 = HeapTupleHeaderGetTypeId(record2); + tupTypmod2 = HeapTupleHeaderGetTypMod(record2); + tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2); + ncolumns2 = tupdesc2->natts; + + /* Build temporary HeapTuple control structures */ + tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); + ItemPointerSetInvalid(&(tuple1.t_self)); + tuple1.t_tableOid = InvalidOid; + tuple1.t_data = record1; + tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); + ItemPointerSetInvalid(&(tuple2.t_self)); + tuple2.t_tableOid = InvalidOid; + tuple2.t_data = record2; + + /* + * We arrange to look up the needed comparison info just once per series + * of calls, assuming the record types don't change underneath us. + */ + ncols = Max(ncolumns1, ncolumns2); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns < ncols) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordCompareData, columns) + + ncols * sizeof(ColumnCompareData)); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + my_extra->ncolumns = ncols; + my_extra->record1_type = InvalidOid; + my_extra->record1_typmod = 0; + my_extra->record2_type = InvalidOid; + my_extra->record2_typmod = 0; + } + + if (my_extra->record1_type != tupType1 || + my_extra->record1_typmod != tupTypmod1 || + my_extra->record2_type != tupType2 || + my_extra->record2_typmod != tupTypmod2) + { + MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData)); + my_extra->record1_type = tupType1; + my_extra->record1_typmod = tupTypmod1; + my_extra->record2_type = tupType2; + my_extra->record2_typmod = tupTypmod2; + } + + /* Break down the tuples into fields */ + values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum)); + nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool)); + heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1); + values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum)); + nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool)); + heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2); + + /* + * Scan corresponding columns, allowing for dropped columns in different + * places in the two rows. i1 and i2 are physical column indexes, j is + * the logical column index. + */ + i1 = i2 = j = 0; + while (i1 < ncolumns1 || i2 < ncolumns2) + { + Form_pg_attribute att1; + Form_pg_attribute att2; + + /* + * Skip dropped columns + */ + if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped) + { + i1++; + continue; + } + if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped) + { + i2++; + continue; + } + if (i1 >= ncolumns1 || i2 >= ncolumns2) + break; /* we'll deal with mismatch below loop */ + + att1 = TupleDescAttr(tupdesc1, i1); + att2 = TupleDescAttr(tupdesc2, i2); + + /* + * Have two matching columns, they must be same type + */ + if (att1->atttypid != att2->atttypid) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare dissimilar column types %s and %s at record column %d", + format_type_be(att1->atttypid), + format_type_be(att2->atttypid), + j + 1))); + + /* + * We consider two NULLs equal; NULL > not-NULL. + */ + if (!nulls1[i1] || !nulls2[i2]) + { + if (nulls1[i1] || nulls2[i2]) + { + result = false; + break; + } + + /* Compare the pair of elements */ + result = datum_image_eq(values1[i1], values2[i2], att1->attbyval, att2->attlen); + if (!result) + break; + } + + /* equal, so continue to next column */ + i1++, i2++, j++; + } + + /* + * If we didn't break out of the loop early, check for column count + * mismatch. (We do not report such mismatch if we found unequal column + * values; is that a feature or a bug?) + */ + if (result) + { + if (i1 != ncolumns1 || i2 != ncolumns2) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot compare record types with different numbers of columns"))); + } + + pfree(values1); + pfree(nulls1); + pfree(values2); + pfree(nulls2); + ReleaseTupleDesc(tupdesc1); + ReleaseTupleDesc(tupdesc2); + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(record1, 0); + PG_FREE_IF_COPY(record2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +record_image_ne(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(!DatumGetBool(record_image_eq(fcinfo))); +} + +Datum +record_image_lt(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_image_cmp(fcinfo) < 0); +} + +Datum +record_image_gt(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_image_cmp(fcinfo) > 0); +} + +Datum +record_image_le(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_image_cmp(fcinfo) <= 0); +} + +Datum +record_image_ge(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_image_cmp(fcinfo) >= 0); +} + +Datum +btrecordimagecmp(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(record_image_cmp(fcinfo)); +} + + +/* + * Row type hash functions + */ + +Datum +hash_record(PG_FUNCTION_ARGS) +{ + HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0); + uint32 result = 0; + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tuple; + int ncolumns; + RecordCompareData *my_extra; + Datum *values; + bool *nulls; + + check_stack_depth(); /* recurses for record-type columns */ + + /* Extract type info from tuple */ + tupType = HeapTupleHeaderGetTypeId(record); + tupTypmod = HeapTupleHeaderGetTypMod(record); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + ncolumns = tupdesc->natts; + + /* Build temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(record); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = record; + + /* + * We arrange to look up the needed hashing info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns < ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordCompareData, columns) + + ncolumns * sizeof(ColumnCompareData)); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + my_extra->ncolumns = ncolumns; + my_extra->record1_type = InvalidOid; + my_extra->record1_typmod = 0; + } + + if (my_extra->record1_type != tupType || + my_extra->record1_typmod != tupTypmod) + { + MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData)); + my_extra->record1_type = tupType; + my_extra->record1_typmod = tupTypmod; + } + + /* Break down the tuple into fields */ + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + heap_deform_tuple(&tuple, tupdesc, values, nulls); + + for (int i = 0; i < ncolumns; i++) + { + Form_pg_attribute att; + TypeCacheEntry *typentry; + uint32 element_hash; + + att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + + /* + * Lookup the hash function if not done already + */ + typentry = my_extra->columns[i].typentry; + if (typentry == NULL || + typentry->type_id != att->atttypid) + { + typentry = lookup_type_cache(att->atttypid, + TYPECACHE_HASH_PROC_FINFO); + if (!OidIsValid(typentry->hash_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(typentry->type_id)))); + my_extra->columns[i].typentry = typentry; + } + + /* Compute hash of element */ + if (nulls[i]) + { + element_hash = 0; + } + else + { + LOCAL_FCINFO(locfcinfo, 1); + + InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1, + att->attcollation, NULL, NULL); + locfcinfo->args[0].value = values[i]; + locfcinfo->args[0].isnull = false; + element_hash = DatumGetUInt32(FunctionCallInvoke(locfcinfo)); + + /* We don't expect hash support functions to return null */ + Assert(!locfcinfo->isnull); + } + + /* see hash_array() */ + result = (result << 5) - result + element_hash; + } + + pfree(values); + pfree(nulls); + ReleaseTupleDesc(tupdesc); + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(record, 0); + + PG_RETURN_UINT32(result); +} + +Datum +hash_record_extended(PG_FUNCTION_ARGS) +{ + HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0); + uint64 seed = PG_GETARG_INT64(1); + uint64 result = 0; + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tuple; + int ncolumns; + RecordCompareData *my_extra; + Datum *values; + bool *nulls; + + check_stack_depth(); /* recurses for record-type columns */ + + /* Extract type info from tuple */ + tupType = HeapTupleHeaderGetTypeId(record); + tupTypmod = HeapTupleHeaderGetTypMod(record); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + ncolumns = tupdesc->natts; + + /* Build temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(record); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = record; + + /* + * We arrange to look up the needed hashing info just once per series of + * calls, assuming the record type doesn't change underneath us. + */ + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || + my_extra->ncolumns < ncolumns) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + offsetof(RecordCompareData, columns) + + ncolumns * sizeof(ColumnCompareData)); + my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; + my_extra->ncolumns = ncolumns; + my_extra->record1_type = InvalidOid; + my_extra->record1_typmod = 0; + } + + if (my_extra->record1_type != tupType || + my_extra->record1_typmod != tupTypmod) + { + MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData)); + my_extra->record1_type = tupType; + my_extra->record1_typmod = tupTypmod; + } + + /* Break down the tuple into fields */ + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + heap_deform_tuple(&tuple, tupdesc, values, nulls); + + for (int i = 0; i < ncolumns; i++) + { + Form_pg_attribute att; + TypeCacheEntry *typentry; + uint64 element_hash; + + att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + + /* + * Lookup the hash function if not done already + */ + typentry = my_extra->columns[i].typentry; + if (typentry == NULL || + typentry->type_id != att->atttypid) + { + typentry = lookup_type_cache(att->atttypid, + TYPECACHE_HASH_EXTENDED_PROC_FINFO); + if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an extended hash function for type %s", + format_type_be(typentry->type_id)))); + my_extra->columns[i].typentry = typentry; + } + + /* Compute hash of element */ + if (nulls[i]) + { + element_hash = 0; + } + else + { + LOCAL_FCINFO(locfcinfo, 2); + + InitFunctionCallInfoData(*locfcinfo, &typentry->hash_extended_proc_finfo, 2, + att->attcollation, NULL, NULL); + locfcinfo->args[0].value = values[i]; + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].value = Int64GetDatum(seed); + locfcinfo->args[0].isnull = false; + element_hash = DatumGetUInt64(FunctionCallInvoke(locfcinfo)); + + /* We don't expect hash support functions to return null */ + Assert(!locfcinfo->isnull); + } + + /* see hash_array_extended() */ + result = (result << 5) - result + element_hash; + } + + pfree(values); + pfree(nulls); + ReleaseTupleDesc(tupdesc); + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(record, 0); + + PG_RETURN_UINT64(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ruleutils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ruleutils.c new file mode 100644 index 00000000000..400b3795827 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/ruleutils.c @@ -0,0 +1,12616 @@ +/*------------------------------------------------------------------------- + * + * ruleutils.c + * Functions to convert stored expressions/querytrees back to + * source text + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/ruleutils.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <unistd.h> +#include <fcntl.h> + +#include "access/amapi.h" +#include "access/htup_details.h" +#include "access/relation.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_am.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_language.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_partitioned_table.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_trigger.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "commands/tablespace.h" +#include "common/keywords.h" +#include "executor/spi.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/pathnodes.h" +#include "optimizer/optimizer.h" +#include "parser/parse_agg.h" +#include "parser/parse_func.h" +#include "parser/parse_node.h" +#include "parser/parse_oper.h" +#include "parser/parse_relation.h" +#include "parser/parser.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteHandler.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rewriteSupport.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/hsearch.h" +#include "utils/lsyscache.h" +#include "utils/partcache.h" +#include "utils/rel.h" +#include "utils/ruleutils.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/typcache.h" +#include "utils/varlena.h" +#include "utils/xml.h" + +/* ---------- + * Pretty formatting constants + * ---------- + */ + +/* Indent counts */ +#define PRETTYINDENT_STD 8 +#define PRETTYINDENT_JOIN 4 +#define PRETTYINDENT_VAR 4 + +#define PRETTYINDENT_LIMIT 40 /* wrap limit */ + +/* Pretty flags */ +#define PRETTYFLAG_PAREN 0x0001 +#define PRETTYFLAG_INDENT 0x0002 +#define PRETTYFLAG_SCHEMA 0x0004 + +/* Standard conversion of a "bool pretty" option to detailed flags */ +#define GET_PRETTY_FLAGS(pretty) \ + ((pretty) ? (PRETTYFLAG_PAREN | PRETTYFLAG_INDENT | PRETTYFLAG_SCHEMA) \ + : PRETTYFLAG_INDENT) + +/* Default line length for pretty-print wrapping: 0 means wrap always */ +#define WRAP_COLUMN_DEFAULT 0 + +/* macros to test if pretty action needed */ +#define PRETTY_PAREN(context) ((context)->prettyFlags & PRETTYFLAG_PAREN) +#define PRETTY_INDENT(context) ((context)->prettyFlags & PRETTYFLAG_INDENT) +#define PRETTY_SCHEMA(context) ((context)->prettyFlags & PRETTYFLAG_SCHEMA) + + +/* ---------- + * Local data types + * ---------- + */ + +/* Context info needed for invoking a recursive querytree display routine */ +typedef struct +{ + StringInfo buf; /* output buffer to append to */ + List *namespaces; /* List of deparse_namespace nodes */ + List *windowClause; /* Current query level's WINDOW clause */ + List *windowTList; /* targetlist for resolving WINDOW clause */ + int prettyFlags; /* enabling of pretty-print functions */ + int wrapColumn; /* max line length, or -1 for no limit */ + int indentLevel; /* current indent level for pretty-print */ + bool varprefix; /* true to print prefixes on Vars */ + ParseExprKind special_exprkind; /* set only for exprkinds needing special + * handling */ + Bitmapset *appendparents; /* if not null, map child Vars of these relids + * back to the parent rel */ +} deparse_context; + +/* + * Each level of query context around a subtree needs a level of Var namespace. + * A Var having varlevelsup=N refers to the N'th item (counting from 0) in + * the current context's namespaces list. + * + * rtable is the list of actual RTEs from the Query or PlannedStmt. + * rtable_names holds the alias name to be used for each RTE (either a C + * string, or NULL for nameless RTEs such as unnamed joins). + * rtable_columns holds the column alias names to be used for each RTE. + * + * subplans is a list of Plan trees for SubPlans and CTEs (it's only used + * in the PlannedStmt case). + * ctes is a list of CommonTableExpr nodes (only used in the Query case). + * appendrels, if not null (it's only used in the PlannedStmt case), is an + * array of AppendRelInfo nodes, indexed by child relid. We use that to map + * child-table Vars to their inheritance parents. + * + * In some cases we need to make names of merged JOIN USING columns unique + * across the whole query, not only per-RTE. If so, unique_using is true + * and using_names is a list of C strings representing names already assigned + * to USING columns. + * + * When deparsing plan trees, there is always just a single item in the + * deparse_namespace list (since a plan tree never contains Vars with + * varlevelsup > 0). We store the Plan node that is the immediate + * parent of the expression to be deparsed, as well as a list of that + * Plan's ancestors. In addition, we store its outer and inner subplan nodes, + * as well as their targetlists, and the index tlist if the current plan node + * might contain INDEX_VAR Vars. (These fields could be derived on-the-fly + * from the current Plan node, but it seems notationally clearer to set them + * up as separate fields.) + */ +typedef struct +{ + List *rtable; /* List of RangeTblEntry nodes */ + List *rtable_names; /* Parallel list of names for RTEs */ + List *rtable_columns; /* Parallel list of deparse_columns structs */ + List *subplans; /* List of Plan trees for SubPlans */ + List *ctes; /* List of CommonTableExpr nodes */ + AppendRelInfo **appendrels; /* Array of AppendRelInfo nodes, or NULL */ + /* Workspace for column alias assignment: */ + bool unique_using; /* Are we making USING names globally unique */ + List *using_names; /* List of assigned names for USING columns */ + /* Remaining fields are used only when deparsing a Plan tree: */ + Plan *plan; /* immediate parent of current expression */ + List *ancestors; /* ancestors of plan */ + Plan *outer_plan; /* outer subnode, or NULL if none */ + Plan *inner_plan; /* inner subnode, or NULL if none */ + List *outer_tlist; /* referent for OUTER_VAR Vars */ + List *inner_tlist; /* referent for INNER_VAR Vars */ + List *index_tlist; /* referent for INDEX_VAR Vars */ + /* Special namespace representing a function signature: */ + char *funcname; + int numargs; + char **argnames; +} deparse_namespace; + +/* + * Per-relation data about column alias names. + * + * Selecting aliases is unreasonably complicated because of the need to dump + * rules/views whose underlying tables may have had columns added, deleted, or + * renamed since the query was parsed. We must nonetheless print the rule/view + * in a form that can be reloaded and will produce the same results as before. + * + * For each RTE used in the query, we must assign column aliases that are + * unique within that RTE. SQL does not require this of the original query, + * but due to factors such as *-expansion we need to be able to uniquely + * reference every column in a decompiled query. As long as we qualify all + * column references, per-RTE uniqueness is sufficient for that. + * + * However, we can't ensure per-column name uniqueness for unnamed join RTEs, + * since they just inherit column names from their input RTEs, and we can't + * rename the columns at the join level. Most of the time this isn't an issue + * because we don't need to reference the join's output columns as such; we + * can reference the input columns instead. That approach can fail for merged + * JOIN USING columns, however, so when we have one of those in an unnamed + * join, we have to make that column's alias globally unique across the whole + * query to ensure it can be referenced unambiguously. + * + * Another problem is that a JOIN USING clause requires the columns to be + * merged to have the same aliases in both input RTEs, and that no other + * columns in those RTEs or their children conflict with the USING names. + * To handle that, we do USING-column alias assignment in a recursive + * traversal of the query's jointree. When descending through a JOIN with + * USING, we preassign the USING column names to the child columns, overriding + * other rules for column alias assignment. We also mark each RTE with a list + * of all USING column names selected for joins containing that RTE, so that + * when we assign other columns' aliases later, we can avoid conflicts. + * + * Another problem is that if a JOIN's input tables have had columns added or + * deleted since the query was parsed, we must generate a column alias list + * for the join that matches the current set of input columns --- otherwise, a + * change in the number of columns in the left input would throw off matching + * of aliases to columns of the right input. Thus, positions in the printable + * column alias list are not necessarily one-for-one with varattnos of the + * JOIN, so we need a separate new_colnames[] array for printing purposes. + */ +typedef struct +{ + /* + * colnames is an array containing column aliases to use for columns that + * existed when the query was parsed. Dropped columns have NULL entries. + * This array can be directly indexed by varattno to get a Var's name. + * + * Non-NULL entries are guaranteed unique within the RTE, *except* when + * this is for an unnamed JOIN RTE. In that case we merely copy up names + * from the two input RTEs. + * + * During the recursive descent in set_using_names(), forcible assignment + * of a child RTE's column name is represented by pre-setting that element + * of the child's colnames array. So at that stage, NULL entries in this + * array just mean that no name has been preassigned, not necessarily that + * the column is dropped. + */ + int num_cols; /* length of colnames[] array */ + char **colnames; /* array of C strings and NULLs */ + + /* + * new_colnames is an array containing column aliases to use for columns + * that would exist if the query was re-parsed against the current + * definitions of its base tables. This is what to print as the column + * alias list for the RTE. This array does not include dropped columns, + * but it will include columns added since original parsing. Indexes in + * it therefore have little to do with current varattno values. As above, + * entries are unique unless this is for an unnamed JOIN RTE. (In such an + * RTE, we never actually print this array, but we must compute it anyway + * for possible use in computing column names of upper joins.) The + * parallel array is_new_col marks which of these columns are new since + * original parsing. Entries with is_new_col false must match the + * non-NULL colnames entries one-for-one. + */ + int num_new_cols; /* length of new_colnames[] array */ + char **new_colnames; /* array of C strings */ + bool *is_new_col; /* array of bool flags */ + + /* This flag tells whether we should actually print a column alias list */ + bool printaliases; + + /* This list has all names used as USING names in joins above this RTE */ + List *parentUsing; /* names assigned to parent merged columns */ + + /* + * If this struct is for a JOIN RTE, we fill these fields during the + * set_using_names() pass to describe its relationship to its child RTEs. + * + * leftattnos and rightattnos are arrays with one entry per existing + * output column of the join (hence, indexable by join varattno). For a + * simple reference to a column of the left child, leftattnos[i] is the + * child RTE's attno and rightattnos[i] is zero; and conversely for a + * column of the right child. But for merged columns produced by JOIN + * USING/NATURAL JOIN, both leftattnos[i] and rightattnos[i] are nonzero. + * Note that a simple reference might be to a child RTE column that's been + * dropped; but that's OK since the column could not be used in the query. + * + * If it's a JOIN USING, usingNames holds the alias names selected for the + * merged columns (these might be different from the original USING list, + * if we had to modify names to achieve uniqueness). + */ + int leftrti; /* rangetable index of left child */ + int rightrti; /* rangetable index of right child */ + int *leftattnos; /* left-child varattnos of join cols, or 0 */ + int *rightattnos; /* right-child varattnos of join cols, or 0 */ + List *usingNames; /* names assigned to merged columns */ +} deparse_columns; + +/* This macro is analogous to rt_fetch(), but for deparse_columns structs */ +#define deparse_columns_fetch(rangetable_index, dpns) \ + ((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1)) + +/* + * Entry in set_rtable_names' hash table + */ +typedef struct +{ + char name[NAMEDATALEN]; /* Hash key --- must be first */ + int counter; /* Largest addition used so far for name */ +} NameHashEntry; + +/* Callback signature for resolve_special_varno() */ +typedef void (*rsv_callback) (Node *node, deparse_context *context, + void *callback_arg); + + +/* ---------- + * Global data + * ---------- + */ +static __thread SPIPlanPtr plan_getrulebyoid = NULL; +static __thread const char *query_getrulebyoid = "SELECT * FROM pg_catalog.pg_rewrite WHERE oid = $1"; +static __thread SPIPlanPtr plan_getviewrule = NULL; +static __thread const char *query_getviewrule = "SELECT * FROM pg_catalog.pg_rewrite WHERE ev_class = $1 AND rulename = $2"; + +/* GUC parameters */ +__thread bool quote_all_identifiers = false; + + +/* ---------- + * Local functions + * + * Most of these functions used to use fixed-size buffers to build their + * results. Now, they take an (already initialized) StringInfo object + * as a parameter, and append their text output to its contents. + * ---------- + */ +static char *deparse_expression_pretty(Node *expr, List *dpcontext, + bool forceprefix, bool showimplicit, + int prettyFlags, int startIndent); +static char *pg_get_viewdef_worker(Oid viewoid, + int prettyFlags, int wrapColumn); +static char *pg_get_triggerdef_worker(Oid trigid, bool pretty); +static int decompile_column_index_array(Datum column_index_array, Oid relId, + StringInfo buf); +static char *pg_get_ruledef_worker(Oid ruleoid, int prettyFlags); +static char *pg_get_indexdef_worker(Oid indexrelid, int colno, + const Oid *excludeOps, + bool attrsOnly, bool keysOnly, + bool showTblSpc, bool inherits, + int prettyFlags, bool missing_ok); +static char *pg_get_statisticsobj_worker(Oid statextid, bool columns_only, + bool missing_ok); +static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, + bool attrsOnly, bool missing_ok); +static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, + int prettyFlags, bool missing_ok); +static text *pg_get_expr_worker(text *expr, Oid relid, int prettyFlags); +static int print_function_arguments(StringInfo buf, HeapTuple proctup, + bool print_table_args, bool print_defaults); +static void print_function_rettype(StringInfo buf, HeapTuple proctup); +static void print_function_trftypes(StringInfo buf, HeapTuple proctup); +static void print_function_sqlbody(StringInfo buf, HeapTuple proctup); +static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces, + Bitmapset *rels_used); +static void set_deparse_for_query(deparse_namespace *dpns, Query *query, + List *parent_namespaces); +static void set_simple_column_names(deparse_namespace *dpns); +static bool has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode); +static void set_using_names(deparse_namespace *dpns, Node *jtnode, + List *parentUsing); +static void set_relation_column_names(deparse_namespace *dpns, + RangeTblEntry *rte, + deparse_columns *colinfo); +static void set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte, + deparse_columns *colinfo); +static bool colname_is_unique(const char *colname, deparse_namespace *dpns, + deparse_columns *colinfo); +static char *make_colname_unique(char *colname, deparse_namespace *dpns, + deparse_columns *colinfo); +static void expand_colnames_array_to(deparse_columns *colinfo, int n); +static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte, + deparse_columns *colinfo); +static char *get_rtable_name(int rtindex, deparse_context *context); +static void set_deparse_plan(deparse_namespace *dpns, Plan *plan); +static Plan *find_recursive_union(deparse_namespace *dpns, + WorkTableScan *wtscan); +static void push_child_plan(deparse_namespace *dpns, Plan *plan, + deparse_namespace *save_dpns); +static void pop_child_plan(deparse_namespace *dpns, + deparse_namespace *save_dpns); +static void push_ancestor_plan(deparse_namespace *dpns, ListCell *ancestor_cell, + deparse_namespace *save_dpns); +static void pop_ancestor_plan(deparse_namespace *dpns, + deparse_namespace *save_dpns); +static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, + int prettyFlags); +static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, + int prettyFlags, int wrapColumn); +static void get_query_def(Query *query, StringInfo buf, List *parentnamespace, + TupleDesc resultDesc, bool colNamesVisible, + int prettyFlags, int wrapColumn, int startIndent); +static void get_values_def(List *values_lists, deparse_context *context); +static void get_with_clause(Query *query, deparse_context *context); +static void get_select_query_def(Query *query, deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible); +static void get_insert_query_def(Query *query, deparse_context *context, + bool colNamesVisible); +static void get_update_query_def(Query *query, deparse_context *context, + bool colNamesVisible); +static void get_update_query_targetlist_def(Query *query, List *targetList, + deparse_context *context, + RangeTblEntry *rte); +static void get_delete_query_def(Query *query, deparse_context *context, + bool colNamesVisible); +static void get_merge_query_def(Query *query, deparse_context *context, + bool colNamesVisible); +static void get_utility_query_def(Query *query, deparse_context *context); +static void get_basic_select_query(Query *query, deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible); +static void get_target_list(List *targetList, deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible); +static void get_setop_query(Node *setOp, Query *query, + deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible); +static Node *get_rule_sortgroupclause(Index ref, List *tlist, + bool force_colno, + deparse_context *context); +static void get_rule_groupingset(GroupingSet *gset, List *targetlist, + bool omit_parens, deparse_context *context); +static void get_rule_orderby(List *orderList, List *targetList, + bool force_colno, deparse_context *context); +static void get_rule_windowclause(Query *query, deparse_context *context); +static void get_rule_windowspec(WindowClause *wc, List *targetList, + deparse_context *context); +static char *get_variable(Var *var, int levelsup, bool istoplevel, + deparse_context *context); +static void get_special_variable(Node *node, deparse_context *context, + void *callback_arg); +static void resolve_special_varno(Node *node, deparse_context *context, + rsv_callback callback, void *callback_arg); +static Node *find_param_referent(Param *param, deparse_context *context, + deparse_namespace **dpns_p, ListCell **ancestor_cell_p); +static void get_parameter(Param *param, deparse_context *context); +static const char *get_simple_binary_op_name(OpExpr *expr); +static bool isSimpleNode(Node *node, Node *parentNode, int prettyFlags); +static void appendContextKeyword(deparse_context *context, const char *str, + int indentBefore, int indentAfter, int indentPlus); +static void removeStringInfoSpaces(StringInfo str); +static void get_rule_expr(Node *node, deparse_context *context, + bool showimplicit); +static void get_rule_expr_toplevel(Node *node, deparse_context *context, + bool showimplicit); +static void get_rule_list_toplevel(List *lst, deparse_context *context, + bool showimplicit); +static void get_rule_expr_funccall(Node *node, deparse_context *context, + bool showimplicit); +static bool looks_like_function(Node *node); +static void get_oper_expr(OpExpr *expr, deparse_context *context); +static void get_func_expr(FuncExpr *expr, deparse_context *context, + bool showimplicit); +static void get_agg_expr(Aggref *aggref, deparse_context *context, + Aggref *original_aggref); +static void get_agg_expr_helper(Aggref *aggref, deparse_context *context, + Aggref *original_aggref, const char *funcname, + const char *options, bool is_json_objectagg); +static void get_agg_combine_expr(Node *node, deparse_context *context, + void *callback_arg); +static void get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context); +static void get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context, + const char *funcname, const char *options, + bool is_json_objectagg); +static bool get_func_sql_syntax(FuncExpr *expr, deparse_context *context); +static void get_coercion_expr(Node *arg, deparse_context *context, + Oid resulttype, int32 resulttypmod, + Node *parentNode); +static void get_const_expr(Const *constval, deparse_context *context, + int showtype); +static void get_const_collation(Const *constval, deparse_context *context); +static void get_json_format(JsonFormat *format, StringInfo buf); +static void get_json_constructor(JsonConstructorExpr *ctor, + deparse_context *context, bool showimplicit); +static void get_json_constructor_options(JsonConstructorExpr *ctor, + StringInfo buf); +static void get_json_agg_constructor(JsonConstructorExpr *ctor, + deparse_context *context, + const char *funcname, + bool is_json_objectagg); +static void simple_quote_literal(StringInfo buf, const char *val); +static void get_sublink_expr(SubLink *sublink, deparse_context *context); +static void get_tablefunc(TableFunc *tf, deparse_context *context, + bool showimplicit); +static void get_from_clause(Query *query, const char *prefix, + deparse_context *context); +static void get_from_clause_item(Node *jtnode, Query *query, + deparse_context *context); +static void get_rte_alias(RangeTblEntry *rte, int varno, bool use_as, + deparse_context *context); +static void get_column_alias_list(deparse_columns *colinfo, + deparse_context *context); +static void get_from_clause_coldeflist(RangeTblFunction *rtfunc, + deparse_columns *colinfo, + deparse_context *context); +static void get_tablesample_def(TableSampleClause *tablesample, + deparse_context *context); +static void get_opclass_name(Oid opclass, Oid actual_datatype, + StringInfo buf); +static Node *processIndirection(Node *node, deparse_context *context); +static void printSubscripts(SubscriptingRef *sbsref, deparse_context *context); +static char *get_relation_name(Oid relid); +static char *generate_relation_name(Oid relid, List *namespaces); +static char *generate_qualified_relation_name(Oid relid); +static char *generate_function_name(Oid funcid, int nargs, + List *argnames, Oid *argtypes, + bool has_variadic, bool *use_variadic_p, + ParseExprKind special_exprkind); +static char *generate_operator_name(Oid operid, Oid arg1, Oid arg2); +static void add_cast_to(StringInfo buf, Oid typid); +static char *generate_qualified_type_name(Oid typid); +static text *string_to_text(char *str); +static char *flatten_reloptions(Oid relid); +static void get_reloptions(StringInfo buf, Datum reloptions); + +#define only_marker(rte) ((rte)->inh ? "" : "ONLY ") + + +/* ---------- + * pg_get_ruledef - Do it all and return a text + * that could be used as a statement + * to recreate the rule + * ---------- + */ +Datum +pg_get_ruledef(PG_FUNCTION_ARGS) +{ + Oid ruleoid = PG_GETARG_OID(0); + int prettyFlags; + char *res; + + prettyFlags = PRETTYFLAG_INDENT; + + res = pg_get_ruledef_worker(ruleoid, prettyFlags); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + + +Datum +pg_get_ruledef_ext(PG_FUNCTION_ARGS) +{ + Oid ruleoid = PG_GETARG_OID(0); + bool pretty = PG_GETARG_BOOL(1); + int prettyFlags; + char *res; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + res = pg_get_ruledef_worker(ruleoid, prettyFlags); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + + +static char * +pg_get_ruledef_worker(Oid ruleoid, int prettyFlags) +{ + Datum args[1]; + char nulls[1]; + int spirc; + HeapTuple ruletup; + TupleDesc rulettc; + StringInfoData buf; + + /* + * Do this first so that string is alloc'd in outer context not SPI's. + */ + initStringInfo(&buf); + + /* + * Connect to SPI manager + */ + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* + * On the first call prepare the plan to lookup pg_rewrite. We read + * pg_rewrite over the SPI manager instead of using the syscache to be + * checked for read access on pg_rewrite. + */ + if (plan_getrulebyoid == NULL) + { + Oid argtypes[1]; + SPIPlanPtr plan; + + argtypes[0] = OIDOID; + plan = SPI_prepare(query_getrulebyoid, 1, argtypes); + if (plan == NULL) + elog(ERROR, "SPI_prepare failed for \"%s\"", query_getrulebyoid); + SPI_keepplan(plan); + plan_getrulebyoid = plan; + } + + /* + * Get the pg_rewrite tuple for this rule + */ + args[0] = ObjectIdGetDatum(ruleoid); + nulls[0] = ' '; + spirc = SPI_execute_plan(plan_getrulebyoid, args, nulls, true, 0); + if (spirc != SPI_OK_SELECT) + elog(ERROR, "failed to get pg_rewrite tuple for rule %u", ruleoid); + if (SPI_processed != 1) + { + /* + * There is no tuple data available here, just keep the output buffer + * empty. + */ + } + else + { + /* + * Get the rule's definition and put it into executor's memory + */ + ruletup = SPI_tuptable->vals[0]; + rulettc = SPI_tuptable->tupdesc; + make_ruledef(&buf, ruletup, rulettc, prettyFlags); + } + + /* + * Disconnect from SPI manager + */ + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + if (buf.len == 0) + return NULL; + + return buf.data; +} + + +/* ---------- + * pg_get_viewdef - Mainly the same thing, but we + * only return the SELECT part of a view + * ---------- + */ +Datum +pg_get_viewdef(PG_FUNCTION_ARGS) +{ + /* By OID */ + Oid viewoid = PG_GETARG_OID(0); + int prettyFlags; + char *res; + + prettyFlags = PRETTYFLAG_INDENT; + + res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + + +Datum +pg_get_viewdef_ext(PG_FUNCTION_ARGS) +{ + /* By OID */ + Oid viewoid = PG_GETARG_OID(0); + bool pretty = PG_GETARG_BOOL(1); + int prettyFlags; + char *res; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +Datum +pg_get_viewdef_wrap(PG_FUNCTION_ARGS) +{ + /* By OID */ + Oid viewoid = PG_GETARG_OID(0); + int wrap = PG_GETARG_INT32(1); + int prettyFlags; + char *res; + + /* calling this implies we want pretty printing */ + prettyFlags = GET_PRETTY_FLAGS(true); + + res = pg_get_viewdef_worker(viewoid, prettyFlags, wrap); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +Datum +pg_get_viewdef_name(PG_FUNCTION_ARGS) +{ + /* By qualified name */ + text *viewname = PG_GETARG_TEXT_PP(0); + int prettyFlags; + RangeVar *viewrel; + Oid viewoid; + char *res; + + prettyFlags = PRETTYFLAG_INDENT; + + /* Look up view name. Can't lock it - we might not have privileges. */ + viewrel = makeRangeVarFromNameList(textToQualifiedNameList(viewname)); + viewoid = RangeVarGetRelid(viewrel, NoLock, false); + + res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + + +Datum +pg_get_viewdef_name_ext(PG_FUNCTION_ARGS) +{ + /* By qualified name */ + text *viewname = PG_GETARG_TEXT_PP(0); + bool pretty = PG_GETARG_BOOL(1); + int prettyFlags; + RangeVar *viewrel; + Oid viewoid; + char *res; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + /* Look up view name. Can't lock it - we might not have privileges. */ + viewrel = makeRangeVarFromNameList(textToQualifiedNameList(viewname)); + viewoid = RangeVarGetRelid(viewrel, NoLock, false); + + res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Common code for by-OID and by-name variants of pg_get_viewdef + */ +static char * +pg_get_viewdef_worker(Oid viewoid, int prettyFlags, int wrapColumn) +{ + Datum args[2]; + char nulls[2]; + int spirc; + HeapTuple ruletup; + TupleDesc rulettc; + StringInfoData buf; + + /* + * Do this first so that string is alloc'd in outer context not SPI's. + */ + initStringInfo(&buf); + + /* + * Connect to SPI manager + */ + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* + * On the first call prepare the plan to lookup pg_rewrite. We read + * pg_rewrite over the SPI manager instead of using the syscache to be + * checked for read access on pg_rewrite. + */ + if (plan_getviewrule == NULL) + { + Oid argtypes[2]; + SPIPlanPtr plan; + + argtypes[0] = OIDOID; + argtypes[1] = NAMEOID; + plan = SPI_prepare(query_getviewrule, 2, argtypes); + if (plan == NULL) + elog(ERROR, "SPI_prepare failed for \"%s\"", query_getviewrule); + SPI_keepplan(plan); + plan_getviewrule = plan; + } + + /* + * Get the pg_rewrite tuple for the view's SELECT rule + */ + args[0] = ObjectIdGetDatum(viewoid); + args[1] = DirectFunctionCall1(namein, CStringGetDatum(ViewSelectRuleName)); + nulls[0] = ' '; + nulls[1] = ' '; + spirc = SPI_execute_plan(plan_getviewrule, args, nulls, true, 0); + if (spirc != SPI_OK_SELECT) + elog(ERROR, "failed to get pg_rewrite tuple for view %u", viewoid); + if (SPI_processed != 1) + { + /* + * There is no tuple data available here, just keep the output buffer + * empty. + */ + } + else + { + /* + * Get the rule's definition and put it into executor's memory + */ + ruletup = SPI_tuptable->vals[0]; + rulettc = SPI_tuptable->tupdesc; + make_viewdef(&buf, ruletup, rulettc, prettyFlags, wrapColumn); + } + + /* + * Disconnect from SPI manager + */ + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); + + if (buf.len == 0) + return NULL; + + return buf.data; +} + +/* ---------- + * pg_get_triggerdef - Get the definition of a trigger + * ---------- + */ +Datum +pg_get_triggerdef(PG_FUNCTION_ARGS) +{ + Oid trigid = PG_GETARG_OID(0); + char *res; + + res = pg_get_triggerdef_worker(trigid, false); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +Datum +pg_get_triggerdef_ext(PG_FUNCTION_ARGS) +{ + Oid trigid = PG_GETARG_OID(0); + bool pretty = PG_GETARG_BOOL(1); + char *res; + + res = pg_get_triggerdef_worker(trigid, pretty); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +static char * +pg_get_triggerdef_worker(Oid trigid, bool pretty) +{ + HeapTuple ht_trig; + Form_pg_trigger trigrec; + StringInfoData buf; + Relation tgrel; + ScanKeyData skey[1]; + SysScanDesc tgscan; + int findx = 0; + char *tgname; + char *tgoldtable; + char *tgnewtable; + Datum value; + bool isnull; + + /* + * Fetch the pg_trigger tuple by the Oid of the trigger + */ + tgrel = table_open(TriggerRelationId, AccessShareLock); + + ScanKeyInit(&skey[0], + Anum_pg_trigger_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(trigid)); + + tgscan = systable_beginscan(tgrel, TriggerOidIndexId, true, + NULL, 1, skey); + + ht_trig = systable_getnext(tgscan); + + if (!HeapTupleIsValid(ht_trig)) + { + systable_endscan(tgscan); + table_close(tgrel, AccessShareLock); + return NULL; + } + + trigrec = (Form_pg_trigger) GETSTRUCT(ht_trig); + + /* + * Start the trigger definition. Note that the trigger's name should never + * be schema-qualified, but the trigger rel's name may be. + */ + initStringInfo(&buf); + + tgname = NameStr(trigrec->tgname); + appendStringInfo(&buf, "CREATE %sTRIGGER %s ", + OidIsValid(trigrec->tgconstraint) ? "CONSTRAINT " : "", + quote_identifier(tgname)); + + if (TRIGGER_FOR_BEFORE(trigrec->tgtype)) + appendStringInfoString(&buf, "BEFORE"); + else if (TRIGGER_FOR_AFTER(trigrec->tgtype)) + appendStringInfoString(&buf, "AFTER"); + else if (TRIGGER_FOR_INSTEAD(trigrec->tgtype)) + appendStringInfoString(&buf, "INSTEAD OF"); + else + elog(ERROR, "unexpected tgtype value: %d", trigrec->tgtype); + + if (TRIGGER_FOR_INSERT(trigrec->tgtype)) + { + appendStringInfoString(&buf, " INSERT"); + findx++; + } + if (TRIGGER_FOR_DELETE(trigrec->tgtype)) + { + if (findx > 0) + appendStringInfoString(&buf, " OR DELETE"); + else + appendStringInfoString(&buf, " DELETE"); + findx++; + } + if (TRIGGER_FOR_UPDATE(trigrec->tgtype)) + { + if (findx > 0) + appendStringInfoString(&buf, " OR UPDATE"); + else + appendStringInfoString(&buf, " UPDATE"); + findx++; + /* tgattr is first var-width field, so OK to access directly */ + if (trigrec->tgattr.dim1 > 0) + { + int i; + + appendStringInfoString(&buf, " OF "); + for (i = 0; i < trigrec->tgattr.dim1; i++) + { + char *attname; + + if (i > 0) + appendStringInfoString(&buf, ", "); + attname = get_attname(trigrec->tgrelid, + trigrec->tgattr.values[i], false); + appendStringInfoString(&buf, quote_identifier(attname)); + } + } + } + if (TRIGGER_FOR_TRUNCATE(trigrec->tgtype)) + { + if (findx > 0) + appendStringInfoString(&buf, " OR TRUNCATE"); + else + appendStringInfoString(&buf, " TRUNCATE"); + findx++; + } + + /* + * In non-pretty mode, always schema-qualify the target table name for + * safety. In pretty mode, schema-qualify only if not visible. + */ + appendStringInfo(&buf, " ON %s ", + pretty ? + generate_relation_name(trigrec->tgrelid, NIL) : + generate_qualified_relation_name(trigrec->tgrelid)); + + if (OidIsValid(trigrec->tgconstraint)) + { + if (OidIsValid(trigrec->tgconstrrelid)) + appendStringInfo(&buf, "FROM %s ", + generate_relation_name(trigrec->tgconstrrelid, NIL)); + if (!trigrec->tgdeferrable) + appendStringInfoString(&buf, "NOT "); + appendStringInfoString(&buf, "DEFERRABLE INITIALLY "); + if (trigrec->tginitdeferred) + appendStringInfoString(&buf, "DEFERRED "); + else + appendStringInfoString(&buf, "IMMEDIATE "); + } + + value = fastgetattr(ht_trig, Anum_pg_trigger_tgoldtable, + tgrel->rd_att, &isnull); + if (!isnull) + tgoldtable = NameStr(*DatumGetName(value)); + else + tgoldtable = NULL; + value = fastgetattr(ht_trig, Anum_pg_trigger_tgnewtable, + tgrel->rd_att, &isnull); + if (!isnull) + tgnewtable = NameStr(*DatumGetName(value)); + else + tgnewtable = NULL; + if (tgoldtable != NULL || tgnewtable != NULL) + { + appendStringInfoString(&buf, "REFERENCING "); + if (tgoldtable != NULL) + appendStringInfo(&buf, "OLD TABLE AS %s ", + quote_identifier(tgoldtable)); + if (tgnewtable != NULL) + appendStringInfo(&buf, "NEW TABLE AS %s ", + quote_identifier(tgnewtable)); + } + + if (TRIGGER_FOR_ROW(trigrec->tgtype)) + appendStringInfoString(&buf, "FOR EACH ROW "); + else + appendStringInfoString(&buf, "FOR EACH STATEMENT "); + + /* If the trigger has a WHEN qualification, add that */ + value = fastgetattr(ht_trig, Anum_pg_trigger_tgqual, + tgrel->rd_att, &isnull); + if (!isnull) + { + Node *qual; + char relkind; + deparse_context context; + deparse_namespace dpns; + RangeTblEntry *oldrte; + RangeTblEntry *newrte; + + appendStringInfoString(&buf, "WHEN ("); + + qual = stringToNode(TextDatumGetCString(value)); + + relkind = get_rel_relkind(trigrec->tgrelid); + + /* Build minimal OLD and NEW RTEs for the rel */ + oldrte = makeNode(RangeTblEntry); + oldrte->rtekind = RTE_RELATION; + oldrte->relid = trigrec->tgrelid; + oldrte->relkind = relkind; + oldrte->rellockmode = AccessShareLock; + oldrte->alias = makeAlias("old", NIL); + oldrte->eref = oldrte->alias; + oldrte->lateral = false; + oldrte->inh = false; + oldrte->inFromCl = true; + + newrte = makeNode(RangeTblEntry); + newrte->rtekind = RTE_RELATION; + newrte->relid = trigrec->tgrelid; + newrte->relkind = relkind; + newrte->rellockmode = AccessShareLock; + newrte->alias = makeAlias("new", NIL); + newrte->eref = newrte->alias; + newrte->lateral = false; + newrte->inh = false; + newrte->inFromCl = true; + + /* Build two-element rtable */ + memset(&dpns, 0, sizeof(dpns)); + dpns.rtable = list_make2(oldrte, newrte); + dpns.subplans = NIL; + dpns.ctes = NIL; + dpns.appendrels = NULL; + set_rtable_names(&dpns, NIL, NULL); + set_simple_column_names(&dpns); + + /* Set up context with one-deep namespace stack */ + context.buf = &buf; + context.namespaces = list_make1(&dpns); + context.windowClause = NIL; + context.windowTList = NIL; + context.varprefix = true; + context.prettyFlags = GET_PRETTY_FLAGS(pretty); + context.wrapColumn = WRAP_COLUMN_DEFAULT; + context.indentLevel = PRETTYINDENT_STD; + context.special_exprkind = EXPR_KIND_NONE; + context.appendparents = NULL; + + get_rule_expr(qual, &context, false); + + appendStringInfoString(&buf, ") "); + } + + appendStringInfo(&buf, "EXECUTE FUNCTION %s(", + generate_function_name(trigrec->tgfoid, 0, + NIL, NULL, + false, NULL, EXPR_KIND_NONE)); + + if (trigrec->tgnargs > 0) + { + char *p; + int i; + + value = fastgetattr(ht_trig, Anum_pg_trigger_tgargs, + tgrel->rd_att, &isnull); + if (isnull) + elog(ERROR, "tgargs is null for trigger %u", trigid); + p = (char *) VARDATA_ANY(DatumGetByteaPP(value)); + for (i = 0; i < trigrec->tgnargs; i++) + { + if (i > 0) + appendStringInfoString(&buf, ", "); + simple_quote_literal(&buf, p); + /* advance p to next string embedded in tgargs */ + while (*p) + p++; + p++; + } + } + + /* We deliberately do not put semi-colon at end */ + appendStringInfoChar(&buf, ')'); + + /* Clean up */ + systable_endscan(tgscan); + + table_close(tgrel, AccessShareLock); + + return buf.data; +} + +/* ---------- + * pg_get_indexdef - Get the definition of an index + * + * In the extended version, there is a colno argument as well as pretty bool. + * if colno == 0, we want a complete index definition. + * if colno > 0, we only want the Nth index key's variable or expression. + * + * Note that the SQL-function versions of this omit any info about the + * index tablespace; this is intentional because pg_dump wants it that way. + * However pg_get_indexdef_string() includes the index tablespace. + * ---------- + */ +Datum +pg_get_indexdef(PG_FUNCTION_ARGS) +{ + Oid indexrelid = PG_GETARG_OID(0); + int prettyFlags; + char *res; + + prettyFlags = PRETTYFLAG_INDENT; + + res = pg_get_indexdef_worker(indexrelid, 0, NULL, + false, false, + false, false, + prettyFlags, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +Datum +pg_get_indexdef_ext(PG_FUNCTION_ARGS) +{ + Oid indexrelid = PG_GETARG_OID(0); + int32 colno = PG_GETARG_INT32(1); + bool pretty = PG_GETARG_BOOL(2); + int prettyFlags; + char *res; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + res = pg_get_indexdef_worker(indexrelid, colno, NULL, + colno != 0, false, + false, false, + prettyFlags, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal version for use by ALTER TABLE. + * Includes a tablespace clause in the result. + * Returns a palloc'd C string; no pretty-printing. + */ +char * +pg_get_indexdef_string(Oid indexrelid) +{ + return pg_get_indexdef_worker(indexrelid, 0, NULL, + false, false, + true, true, + 0, false); +} + +/* Internal version that just reports the key-column definitions */ +char * +pg_get_indexdef_columns(Oid indexrelid, bool pretty) +{ + int prettyFlags; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + return pg_get_indexdef_worker(indexrelid, 0, NULL, + true, true, + false, false, + prettyFlags, false); +} + +/* Internal version, extensible with flags to control its behavior */ +char * +pg_get_indexdef_columns_extended(Oid indexrelid, bits16 flags) +{ + bool pretty = ((flags & RULE_INDEXDEF_PRETTY) != 0); + bool keys_only = ((flags & RULE_INDEXDEF_KEYS_ONLY) != 0); + int prettyFlags; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + return pg_get_indexdef_worker(indexrelid, 0, NULL, + true, keys_only, + false, false, + prettyFlags, false); +} + +/* + * Internal workhorse to decompile an index definition. + * + * This is now used for exclusion constraints as well: if excludeOps is not + * NULL then it points to an array of exclusion operator OIDs. + */ +static char * +pg_get_indexdef_worker(Oid indexrelid, int colno, + const Oid *excludeOps, + bool attrsOnly, bool keysOnly, + bool showTblSpc, bool inherits, + int prettyFlags, bool missing_ok) +{ + /* might want a separate isConstraint parameter later */ + bool isConstraint = (excludeOps != NULL); + HeapTuple ht_idx; + HeapTuple ht_idxrel; + HeapTuple ht_am; + Form_pg_index idxrec; + Form_pg_class idxrelrec; + Form_pg_am amrec; + IndexAmRoutine *amroutine; + List *indexprs; + ListCell *indexpr_item; + List *context; + Oid indrelid; + int keyno; + Datum indcollDatum; + Datum indclassDatum; + Datum indoptionDatum; + oidvector *indcollation; + oidvector *indclass; + int2vector *indoption; + StringInfoData buf; + char *str; + char *sep; + + /* + * Fetch the pg_index tuple by the Oid of the index + */ + ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexrelid)); + if (!HeapTupleIsValid(ht_idx)) + { + if (missing_ok) + return NULL; + elog(ERROR, "cache lookup failed for index %u", indexrelid); + } + idxrec = (Form_pg_index) GETSTRUCT(ht_idx); + + indrelid = idxrec->indrelid; + Assert(indexrelid == idxrec->indexrelid); + + /* Must get indcollation, indclass, and indoption the hard way */ + indcollDatum = SysCacheGetAttrNotNull(INDEXRELID, ht_idx, + Anum_pg_index_indcollation); + indcollation = (oidvector *) DatumGetPointer(indcollDatum); + + indclassDatum = SysCacheGetAttrNotNull(INDEXRELID, ht_idx, + Anum_pg_index_indclass); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + + indoptionDatum = SysCacheGetAttrNotNull(INDEXRELID, ht_idx, + Anum_pg_index_indoption); + indoption = (int2vector *) DatumGetPointer(indoptionDatum); + + /* + * Fetch the pg_class tuple of the index relation + */ + ht_idxrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indexrelid)); + if (!HeapTupleIsValid(ht_idxrel)) + elog(ERROR, "cache lookup failed for relation %u", indexrelid); + idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel); + + /* + * Fetch the pg_am tuple of the index' access method + */ + ht_am = SearchSysCache1(AMOID, ObjectIdGetDatum(idxrelrec->relam)); + if (!HeapTupleIsValid(ht_am)) + elog(ERROR, "cache lookup failed for access method %u", + idxrelrec->relam); + amrec = (Form_pg_am) GETSTRUCT(ht_am); + + /* Fetch the index AM's API struct */ + amroutine = GetIndexAmRoutine(amrec->amhandler); + + /* + * Get the index expressions, if any. (NOTE: we do not use the relcache + * versions of the expressions and predicate, because we want to display + * non-const-folded expressions.) + */ + if (!heap_attisnull(ht_idx, Anum_pg_index_indexprs, NULL)) + { + Datum exprsDatum; + char *exprsString; + + exprsDatum = SysCacheGetAttrNotNull(INDEXRELID, ht_idx, + Anum_pg_index_indexprs); + exprsString = TextDatumGetCString(exprsDatum); + indexprs = (List *) stringToNode(exprsString); + pfree(exprsString); + } + else + indexprs = NIL; + + indexpr_item = list_head(indexprs); + + context = deparse_context_for(get_relation_name(indrelid), indrelid); + + /* + * Start the index definition. Note that the index's name should never be + * schema-qualified, but the indexed rel's name may be. + */ + initStringInfo(&buf); + + if (!attrsOnly) + { + if (!isConstraint) + appendStringInfo(&buf, "CREATE %sINDEX %s ON %s%s USING %s (", + idxrec->indisunique ? "UNIQUE " : "", + quote_identifier(NameStr(idxrelrec->relname)), + idxrelrec->relkind == RELKIND_PARTITIONED_INDEX + && !inherits ? "ONLY " : "", + (prettyFlags & PRETTYFLAG_SCHEMA) ? + generate_relation_name(indrelid, NIL) : + generate_qualified_relation_name(indrelid), + quote_identifier(NameStr(amrec->amname))); + else /* currently, must be EXCLUDE constraint */ + appendStringInfo(&buf, "EXCLUDE USING %s (", + quote_identifier(NameStr(amrec->amname))); + } + + /* + * Report the indexed attributes + */ + sep = ""; + for (keyno = 0; keyno < idxrec->indnatts; keyno++) + { + AttrNumber attnum = idxrec->indkey.values[keyno]; + Oid keycoltype; + Oid keycolcollation; + + /* + * Ignore non-key attributes if told to. + */ + if (keysOnly && keyno >= idxrec->indnkeyatts) + break; + + /* Otherwise, print INCLUDE to divide key and non-key attrs. */ + if (!colno && keyno == idxrec->indnkeyatts) + { + appendStringInfoString(&buf, ") INCLUDE ("); + sep = ""; + } + + if (!colno) + appendStringInfoString(&buf, sep); + sep = ", "; + + if (attnum != 0) + { + /* Simple index column */ + char *attname; + int32 keycoltypmod; + + attname = get_attname(indrelid, attnum, false); + if (!colno || colno == keyno + 1) + appendStringInfoString(&buf, quote_identifier(attname)); + get_atttypetypmodcoll(indrelid, attnum, + &keycoltype, &keycoltypmod, + &keycolcollation); + } + else + { + /* expressional index */ + Node *indexkey; + + if (indexpr_item == NULL) + elog(ERROR, "too few entries in indexprs list"); + indexkey = (Node *) lfirst(indexpr_item); + indexpr_item = lnext(indexprs, indexpr_item); + /* Deparse */ + str = deparse_expression_pretty(indexkey, context, false, false, + prettyFlags, 0); + if (!colno || colno == keyno + 1) + { + /* Need parens if it's not a bare function call */ + if (looks_like_function(indexkey)) + appendStringInfoString(&buf, str); + else + appendStringInfo(&buf, "(%s)", str); + } + keycoltype = exprType(indexkey); + keycolcollation = exprCollation(indexkey); + } + + /* Print additional decoration for (selected) key columns */ + if (!attrsOnly && keyno < idxrec->indnkeyatts && + (!colno || colno == keyno + 1)) + { + int16 opt = indoption->values[keyno]; + Oid indcoll = indcollation->values[keyno]; + Datum attoptions = get_attoptions(indexrelid, keyno + 1); + bool has_options = attoptions != (Datum) 0; + + /* Add collation, if not default for column */ + if (OidIsValid(indcoll) && indcoll != keycolcollation) + appendStringInfo(&buf, " COLLATE %s", + generate_collation_name((indcoll))); + + /* Add the operator class name, if not default */ + get_opclass_name(indclass->values[keyno], + has_options ? InvalidOid : keycoltype, &buf); + + if (has_options) + { + appendStringInfoString(&buf, " ("); + get_reloptions(&buf, attoptions); + appendStringInfoChar(&buf, ')'); + } + + /* Add options if relevant */ + if (amroutine->amcanorder) + { + /* if it supports sort ordering, report DESC and NULLS opts */ + if (opt & INDOPTION_DESC) + { + appendStringInfoString(&buf, " DESC"); + /* NULLS FIRST is the default in this case */ + if (!(opt & INDOPTION_NULLS_FIRST)) + appendStringInfoString(&buf, " NULLS LAST"); + } + else + { + if (opt & INDOPTION_NULLS_FIRST) + appendStringInfoString(&buf, " NULLS FIRST"); + } + } + + /* Add the exclusion operator if relevant */ + if (excludeOps != NULL) + appendStringInfo(&buf, " WITH %s", + generate_operator_name(excludeOps[keyno], + keycoltype, + keycoltype)); + } + } + + if (!attrsOnly) + { + appendStringInfoChar(&buf, ')'); + + if (idxrec->indnullsnotdistinct) + appendStringInfoString(&buf, " NULLS NOT DISTINCT"); + + /* + * If it has options, append "WITH (options)" + */ + str = flatten_reloptions(indexrelid); + if (str) + { + appendStringInfo(&buf, " WITH (%s)", str); + pfree(str); + } + + /* + * Print tablespace, but only if requested + */ + if (showTblSpc) + { + Oid tblspc; + + tblspc = get_rel_tablespace(indexrelid); + if (OidIsValid(tblspc)) + { + if (isConstraint) + appendStringInfoString(&buf, " USING INDEX"); + appendStringInfo(&buf, " TABLESPACE %s", + quote_identifier(get_tablespace_name(tblspc))); + } + } + + /* + * If it's a partial index, decompile and append the predicate + */ + if (!heap_attisnull(ht_idx, Anum_pg_index_indpred, NULL)) + { + Node *node; + Datum predDatum; + char *predString; + + /* Convert text string to node tree */ + predDatum = SysCacheGetAttrNotNull(INDEXRELID, ht_idx, + Anum_pg_index_indpred); + predString = TextDatumGetCString(predDatum); + node = (Node *) stringToNode(predString); + pfree(predString); + + /* Deparse */ + str = deparse_expression_pretty(node, context, false, false, + prettyFlags, 0); + if (isConstraint) + appendStringInfo(&buf, " WHERE (%s)", str); + else + appendStringInfo(&buf, " WHERE %s", str); + } + } + + /* Clean up */ + ReleaseSysCache(ht_idx); + ReleaseSysCache(ht_idxrel); + ReleaseSysCache(ht_am); + + return buf.data; +} + +/* ---------- + * pg_get_querydef + * + * Public entry point to deparse one query parsetree. + * The pretty flags are determined by GET_PRETTY_FLAGS(pretty). + * + * The result is a palloc'd C string. + * ---------- + */ +char * +pg_get_querydef(Query *query, bool pretty) +{ + StringInfoData buf; + int prettyFlags; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + initStringInfo(&buf); + + get_query_def(query, &buf, NIL, NULL, true, + prettyFlags, WRAP_COLUMN_DEFAULT, 0); + + return buf.data; +} + +/* + * pg_get_statisticsobjdef + * Get the definition of an extended statistics object + */ +Datum +pg_get_statisticsobjdef(PG_FUNCTION_ARGS) +{ + Oid statextid = PG_GETARG_OID(0); + char *res; + + res = pg_get_statisticsobj_worker(statextid, false, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal version for use by ALTER TABLE. + * Includes a tablespace clause in the result. + * Returns a palloc'd C string; no pretty-printing. + */ +char * +pg_get_statisticsobjdef_string(Oid statextid) +{ + return pg_get_statisticsobj_worker(statextid, false, false); +} + +/* + * pg_get_statisticsobjdef_columns + * Get columns and expressions for an extended statistics object + */ +Datum +pg_get_statisticsobjdef_columns(PG_FUNCTION_ARGS) +{ + Oid statextid = PG_GETARG_OID(0); + char *res; + + res = pg_get_statisticsobj_worker(statextid, true, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal workhorse to decompile an extended statistics object. + */ +static char * +pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok) +{ + Form_pg_statistic_ext statextrec; + HeapTuple statexttup; + StringInfoData buf; + int colno; + char *nsp; + ArrayType *arr; + char *enabled; + Datum datum; + bool ndistinct_enabled; + bool dependencies_enabled; + bool mcv_enabled; + int i; + List *context; + ListCell *lc; + List *exprs = NIL; + bool has_exprs; + int ncolumns; + + statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); + + if (!HeapTupleIsValid(statexttup)) + { + if (missing_ok) + return NULL; + elog(ERROR, "cache lookup failed for statistics object %u", statextid); + } + + /* has the statistics expressions? */ + has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL); + + statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); + + /* + * Get the statistics expressions, if any. (NOTE: we do not use the + * relcache versions of the expressions, because we want to display + * non-const-folded expressions.) + */ + if (has_exprs) + { + Datum exprsDatum; + char *exprsString; + + exprsDatum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, + Anum_pg_statistic_ext_stxexprs); + exprsString = TextDatumGetCString(exprsDatum); + exprs = (List *) stringToNode(exprsString); + pfree(exprsString); + } + else + exprs = NIL; + + /* count the number of columns (attributes and expressions) */ + ncolumns = statextrec->stxkeys.dim1 + list_length(exprs); + + initStringInfo(&buf); + + if (!columns_only) + { + nsp = get_namespace_name_or_temp(statextrec->stxnamespace); + appendStringInfo(&buf, "CREATE STATISTICS %s", + quote_qualified_identifier(nsp, + NameStr(statextrec->stxname))); + + /* + * Decode the stxkind column so that we know which stats types to + * print. + */ + datum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, + Anum_pg_statistic_ext_stxkind); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "stxkind is not a 1-D char array"); + enabled = (char *) ARR_DATA_PTR(arr); + + ndistinct_enabled = false; + dependencies_enabled = false; + mcv_enabled = false; + + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + if (enabled[i] == STATS_EXT_NDISTINCT) + ndistinct_enabled = true; + else if (enabled[i] == STATS_EXT_DEPENDENCIES) + dependencies_enabled = true; + else if (enabled[i] == STATS_EXT_MCV) + mcv_enabled = true; + + /* ignore STATS_EXT_EXPRESSIONS (it's built automatically) */ + } + + /* + * If any option is disabled, then we'll need to append the types + * clause to show which options are enabled. We omit the types clause + * on purpose when all options are enabled, so a pg_dump/pg_restore + * will create all statistics types on a newer postgres version, if + * the statistics had all options enabled on the original version. + * + * But if the statistics is defined on just a single column, it has to + * be an expression statistics. In that case we don't need to specify + * kinds. + */ + if ((!ndistinct_enabled || !dependencies_enabled || !mcv_enabled) && + (ncolumns > 1)) + { + bool gotone = false; + + appendStringInfoString(&buf, " ("); + + if (ndistinct_enabled) + { + appendStringInfoString(&buf, "ndistinct"); + gotone = true; + } + + if (dependencies_enabled) + { + appendStringInfo(&buf, "%sdependencies", gotone ? ", " : ""); + gotone = true; + } + + if (mcv_enabled) + appendStringInfo(&buf, "%smcv", gotone ? ", " : ""); + + appendStringInfoChar(&buf, ')'); + } + + appendStringInfoString(&buf, " ON "); + } + + /* decode simple column references */ + for (colno = 0; colno < statextrec->stxkeys.dim1; colno++) + { + AttrNumber attnum = statextrec->stxkeys.values[colno]; + char *attname; + + if (colno > 0) + appendStringInfoString(&buf, ", "); + + attname = get_attname(statextrec->stxrelid, attnum, false); + + appendStringInfoString(&buf, quote_identifier(attname)); + } + + context = deparse_context_for(get_relation_name(statextrec->stxrelid), + statextrec->stxrelid); + + foreach(lc, exprs) + { + Node *expr = (Node *) lfirst(lc); + char *str; + int prettyFlags = PRETTYFLAG_PAREN; + + str = deparse_expression_pretty(expr, context, false, false, + prettyFlags, 0); + + if (colno > 0) + appendStringInfoString(&buf, ", "); + + /* Need parens if it's not a bare function call */ + if (looks_like_function(expr)) + appendStringInfoString(&buf, str); + else + appendStringInfo(&buf, "(%s)", str); + + colno++; + } + + if (!columns_only) + appendStringInfo(&buf, " FROM %s", + generate_relation_name(statextrec->stxrelid, NIL)); + + ReleaseSysCache(statexttup); + + return buf.data; +} + +/* + * Generate text array of expressions for statistics object. + */ +Datum +pg_get_statisticsobjdef_expressions(PG_FUNCTION_ARGS) +{ + Oid statextid = PG_GETARG_OID(0); + Form_pg_statistic_ext statextrec; + HeapTuple statexttup; + Datum datum; + List *context; + ListCell *lc; + List *exprs = NIL; + bool has_exprs; + char *tmp; + ArrayBuildState *astate = NULL; + + statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); + + if (!HeapTupleIsValid(statexttup)) + PG_RETURN_NULL(); + + /* Does the stats object have expressions? */ + has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL); + + /* no expressions? we're done */ + if (!has_exprs) + { + ReleaseSysCache(statexttup); + PG_RETURN_NULL(); + } + + statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); + + /* + * Get the statistics expressions, and deparse them into text values. + */ + datum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, + Anum_pg_statistic_ext_stxexprs); + tmp = TextDatumGetCString(datum); + exprs = (List *) stringToNode(tmp); + pfree(tmp); + + context = deparse_context_for(get_relation_name(statextrec->stxrelid), + statextrec->stxrelid); + + foreach(lc, exprs) + { + Node *expr = (Node *) lfirst(lc); + char *str; + int prettyFlags = PRETTYFLAG_INDENT; + + str = deparse_expression_pretty(expr, context, false, false, + prettyFlags, 0); + + astate = accumArrayResult(astate, + PointerGetDatum(cstring_to_text(str)), + false, + TEXTOID, + CurrentMemoryContext); + } + + ReleaseSysCache(statexttup); + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} + +/* + * pg_get_partkeydef + * + * Returns the partition key specification, ie, the following: + * + * { RANGE | LIST | HASH } (column opt_collation opt_opclass [, ...]) + */ +Datum +pg_get_partkeydef(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + char *res; + + res = pg_get_partkeydef_worker(relid, PRETTYFLAG_INDENT, false, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* Internal version that just reports the column definitions */ +char * +pg_get_partkeydef_columns(Oid relid, bool pretty) +{ + int prettyFlags; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + return pg_get_partkeydef_worker(relid, prettyFlags, true, false); +} + +/* + * Internal workhorse to decompile a partition key definition. + */ +static char * +pg_get_partkeydef_worker(Oid relid, int prettyFlags, + bool attrsOnly, bool missing_ok) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + oidvector *partclass; + oidvector *partcollation; + List *partexprs; + ListCell *partexpr_item; + List *context; + Datum datum; + StringInfoData buf; + int keyno; + char *str; + char *sep; + + tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + { + if (missing_ok) + return NULL; + elog(ERROR, "cache lookup failed for partition key of %u", relid); + } + + form = (Form_pg_partitioned_table) GETSTRUCT(tuple); + + Assert(form->partrelid == relid); + + /* Must get partclass and partcollation the hard way */ + datum = SysCacheGetAttrNotNull(PARTRELID, tuple, + Anum_pg_partitioned_table_partclass); + partclass = (oidvector *) DatumGetPointer(datum); + + datum = SysCacheGetAttrNotNull(PARTRELID, tuple, + Anum_pg_partitioned_table_partcollation); + partcollation = (oidvector *) DatumGetPointer(datum); + + + /* + * Get the expressions, if any. (NOTE: we do not use the relcache + * versions of the expressions, because we want to display + * non-const-folded expressions.) + */ + if (!heap_attisnull(tuple, Anum_pg_partitioned_table_partexprs, NULL)) + { + Datum exprsDatum; + char *exprsString; + + exprsDatum = SysCacheGetAttrNotNull(PARTRELID, tuple, + Anum_pg_partitioned_table_partexprs); + exprsString = TextDatumGetCString(exprsDatum); + partexprs = (List *) stringToNode(exprsString); + + if (!IsA(partexprs, List)) + elog(ERROR, "unexpected node type found in partexprs: %d", + (int) nodeTag(partexprs)); + + pfree(exprsString); + } + else + partexprs = NIL; + + partexpr_item = list_head(partexprs); + context = deparse_context_for(get_relation_name(relid), relid); + + initStringInfo(&buf); + + switch (form->partstrat) + { + case PARTITION_STRATEGY_HASH: + if (!attrsOnly) + appendStringInfoString(&buf, "HASH"); + break; + case PARTITION_STRATEGY_LIST: + if (!attrsOnly) + appendStringInfoString(&buf, "LIST"); + break; + case PARTITION_STRATEGY_RANGE: + if (!attrsOnly) + appendStringInfoString(&buf, "RANGE"); + break; + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) form->partstrat); + } + + if (!attrsOnly) + appendStringInfoString(&buf, " ("); + sep = ""; + for (keyno = 0; keyno < form->partnatts; keyno++) + { + AttrNumber attnum = form->partattrs.values[keyno]; + Oid keycoltype; + Oid keycolcollation; + Oid partcoll; + + appendStringInfoString(&buf, sep); + sep = ", "; + if (attnum != 0) + { + /* Simple attribute reference */ + char *attname; + int32 keycoltypmod; + + attname = get_attname(relid, attnum, false); + appendStringInfoString(&buf, quote_identifier(attname)); + get_atttypetypmodcoll(relid, attnum, + &keycoltype, &keycoltypmod, + &keycolcollation); + } + else + { + /* Expression */ + Node *partkey; + + if (partexpr_item == NULL) + elog(ERROR, "too few entries in partexprs list"); + partkey = (Node *) lfirst(partexpr_item); + partexpr_item = lnext(partexprs, partexpr_item); + + /* Deparse */ + str = deparse_expression_pretty(partkey, context, false, false, + prettyFlags, 0); + /* Need parens if it's not a bare function call */ + if (looks_like_function(partkey)) + appendStringInfoString(&buf, str); + else + appendStringInfo(&buf, "(%s)", str); + + keycoltype = exprType(partkey); + keycolcollation = exprCollation(partkey); + } + + /* Add collation, if not default for column */ + partcoll = partcollation->values[keyno]; + if (!attrsOnly && OidIsValid(partcoll) && partcoll != keycolcollation) + appendStringInfo(&buf, " COLLATE %s", + generate_collation_name((partcoll))); + + /* Add the operator class name, if not default */ + if (!attrsOnly) + get_opclass_name(partclass->values[keyno], keycoltype, &buf); + } + + if (!attrsOnly) + appendStringInfoChar(&buf, ')'); + + /* Clean up */ + ReleaseSysCache(tuple); + + return buf.data; +} + +/* + * pg_get_partition_constraintdef + * + * Returns partition constraint expression as a string for the input relation + */ +Datum +pg_get_partition_constraintdef(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + Expr *constr_expr; + int prettyFlags; + List *context; + char *consrc; + + constr_expr = get_partition_qual_relid(relationId); + + /* Quick exit if no partition constraint */ + if (constr_expr == NULL) + PG_RETURN_NULL(); + + /* + * Deparse and return the constraint expression. + */ + prettyFlags = PRETTYFLAG_INDENT; + context = deparse_context_for(get_relation_name(relationId), relationId); + consrc = deparse_expression_pretty((Node *) constr_expr, context, false, + false, prettyFlags, 0); + + PG_RETURN_TEXT_P(string_to_text(consrc)); +} + +/* + * pg_get_partconstrdef_string + * + * Returns the partition constraint as a C-string for the input relation, with + * the given alias. No pretty-printing. + */ +char * +pg_get_partconstrdef_string(Oid partitionId, char *aliasname) +{ + Expr *constr_expr; + List *context; + + constr_expr = get_partition_qual_relid(partitionId); + context = deparse_context_for(aliasname, partitionId); + + return deparse_expression((Node *) constr_expr, context, true, false); +} + +/* + * pg_get_constraintdef + * + * Returns the definition for the constraint, ie, everything that needs to + * appear after "ALTER TABLE ... ADD CONSTRAINT <constraintname>". + */ +Datum +pg_get_constraintdef(PG_FUNCTION_ARGS) +{ + Oid constraintId = PG_GETARG_OID(0); + int prettyFlags; + char *res; + + prettyFlags = PRETTYFLAG_INDENT; + + res = pg_get_constraintdef_worker(constraintId, false, prettyFlags, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +Datum +pg_get_constraintdef_ext(PG_FUNCTION_ARGS) +{ + Oid constraintId = PG_GETARG_OID(0); + bool pretty = PG_GETARG_BOOL(1); + int prettyFlags; + char *res; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + res = pg_get_constraintdef_worker(constraintId, false, prettyFlags, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal version that returns a full ALTER TABLE ... ADD CONSTRAINT command + */ +char * +pg_get_constraintdef_command(Oid constraintId) +{ + return pg_get_constraintdef_worker(constraintId, true, 0, false); +} + +/* + * As of 9.4, we now use an MVCC snapshot for this. + */ +static char * +pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, + int prettyFlags, bool missing_ok) +{ + HeapTuple tup; + Form_pg_constraint conForm; + StringInfoData buf; + SysScanDesc scandesc; + ScanKeyData scankey[1]; + Snapshot snapshot = RegisterSnapshot(GetTransactionSnapshot()); + Relation relation = table_open(ConstraintRelationId, AccessShareLock); + + ScanKeyInit(&scankey[0], + Anum_pg_constraint_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(constraintId)); + + scandesc = systable_beginscan(relation, + ConstraintOidIndexId, + true, + snapshot, + 1, + scankey); + + /* + * We later use the tuple with SysCacheGetAttr() as if we had obtained it + * via SearchSysCache, which works fine. + */ + tup = systable_getnext(scandesc); + + UnregisterSnapshot(snapshot); + + if (!HeapTupleIsValid(tup)) + { + if (missing_ok) + { + systable_endscan(scandesc); + table_close(relation, AccessShareLock); + return NULL; + } + elog(ERROR, "could not find tuple for constraint %u", constraintId); + } + + conForm = (Form_pg_constraint) GETSTRUCT(tup); + + initStringInfo(&buf); + + if (fullCommand) + { + if (OidIsValid(conForm->conrelid)) + { + /* + * Currently, callers want ALTER TABLE (without ONLY) for CHECK + * constraints, and other types of constraints don't inherit + * anyway so it doesn't matter whether we say ONLY or not. Someday + * we might need to let callers specify whether to put ONLY in the + * command. + */ + appendStringInfo(&buf, "ALTER TABLE %s ADD CONSTRAINT %s ", + generate_qualified_relation_name(conForm->conrelid), + quote_identifier(NameStr(conForm->conname))); + } + else + { + /* Must be a domain constraint */ + Assert(OidIsValid(conForm->contypid)); + appendStringInfo(&buf, "ALTER DOMAIN %s ADD CONSTRAINT %s ", + generate_qualified_type_name(conForm->contypid), + quote_identifier(NameStr(conForm->conname))); + } + } + + switch (conForm->contype) + { + case CONSTRAINT_FOREIGN: + { + Datum val; + bool isnull; + const char *string; + + /* Start off the constraint definition */ + appendStringInfoString(&buf, "FOREIGN KEY ("); + + /* Fetch and build referencing-column list */ + val = SysCacheGetAttrNotNull(CONSTROID, tup, + Anum_pg_constraint_conkey); + + decompile_column_index_array(val, conForm->conrelid, &buf); + + /* add foreign relation name */ + appendStringInfo(&buf, ") REFERENCES %s(", + generate_relation_name(conForm->confrelid, + NIL)); + + /* Fetch and build referenced-column list */ + val = SysCacheGetAttrNotNull(CONSTROID, tup, + Anum_pg_constraint_confkey); + + decompile_column_index_array(val, conForm->confrelid, &buf); + + appendStringInfoChar(&buf, ')'); + + /* Add match type */ + switch (conForm->confmatchtype) + { + case FKCONSTR_MATCH_FULL: + string = " MATCH FULL"; + break; + case FKCONSTR_MATCH_PARTIAL: + string = " MATCH PARTIAL"; + break; + case FKCONSTR_MATCH_SIMPLE: + string = ""; + break; + default: + elog(ERROR, "unrecognized confmatchtype: %d", + conForm->confmatchtype); + string = ""; /* keep compiler quiet */ + break; + } + appendStringInfoString(&buf, string); + + /* Add ON UPDATE and ON DELETE clauses, if needed */ + switch (conForm->confupdtype) + { + case FKCONSTR_ACTION_NOACTION: + string = NULL; /* suppress default */ + break; + case FKCONSTR_ACTION_RESTRICT: + string = "RESTRICT"; + break; + case FKCONSTR_ACTION_CASCADE: + string = "CASCADE"; + break; + case FKCONSTR_ACTION_SETNULL: + string = "SET NULL"; + break; + case FKCONSTR_ACTION_SETDEFAULT: + string = "SET DEFAULT"; + break; + default: + elog(ERROR, "unrecognized confupdtype: %d", + conForm->confupdtype); + string = NULL; /* keep compiler quiet */ + break; + } + if (string) + appendStringInfo(&buf, " ON UPDATE %s", string); + + switch (conForm->confdeltype) + { + case FKCONSTR_ACTION_NOACTION: + string = NULL; /* suppress default */ + break; + case FKCONSTR_ACTION_RESTRICT: + string = "RESTRICT"; + break; + case FKCONSTR_ACTION_CASCADE: + string = "CASCADE"; + break; + case FKCONSTR_ACTION_SETNULL: + string = "SET NULL"; + break; + case FKCONSTR_ACTION_SETDEFAULT: + string = "SET DEFAULT"; + break; + default: + elog(ERROR, "unrecognized confdeltype: %d", + conForm->confdeltype); + string = NULL; /* keep compiler quiet */ + break; + } + if (string) + appendStringInfo(&buf, " ON DELETE %s", string); + + /* + * Add columns specified to SET NULL or SET DEFAULT if + * provided. + */ + val = SysCacheGetAttr(CONSTROID, tup, + Anum_pg_constraint_confdelsetcols, &isnull); + if (!isnull) + { + appendStringInfoString(&buf, " ("); + decompile_column_index_array(val, conForm->conrelid, &buf); + appendStringInfoChar(&buf, ')'); + } + + break; + } + case CONSTRAINT_PRIMARY: + case CONSTRAINT_UNIQUE: + { + Datum val; + Oid indexId; + int keyatts; + HeapTuple indtup; + + /* Start off the constraint definition */ + if (conForm->contype == CONSTRAINT_PRIMARY) + appendStringInfoString(&buf, "PRIMARY KEY "); + else + appendStringInfoString(&buf, "UNIQUE "); + + indexId = conForm->conindid; + + indtup = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId)); + if (!HeapTupleIsValid(indtup)) + elog(ERROR, "cache lookup failed for index %u", indexId); + if (conForm->contype == CONSTRAINT_UNIQUE && + ((Form_pg_index) GETSTRUCT(indtup))->indnullsnotdistinct) + appendStringInfoString(&buf, "NULLS NOT DISTINCT "); + + appendStringInfoChar(&buf, '('); + + /* Fetch and build target column list */ + val = SysCacheGetAttrNotNull(CONSTROID, tup, + Anum_pg_constraint_conkey); + + keyatts = decompile_column_index_array(val, conForm->conrelid, &buf); + + appendStringInfoChar(&buf, ')'); + + /* Build including column list (from pg_index.indkeys) */ + val = SysCacheGetAttrNotNull(INDEXRELID, indtup, + Anum_pg_index_indnatts); + if (DatumGetInt32(val) > keyatts) + { + Datum cols; + Datum *keys; + int nKeys; + int j; + + appendStringInfoString(&buf, " INCLUDE ("); + + cols = SysCacheGetAttrNotNull(INDEXRELID, indtup, + Anum_pg_index_indkey); + + deconstruct_array_builtin(DatumGetArrayTypeP(cols), INT2OID, + &keys, NULL, &nKeys); + + for (j = keyatts; j < nKeys; j++) + { + char *colName; + + colName = get_attname(conForm->conrelid, + DatumGetInt16(keys[j]), false); + if (j > keyatts) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, quote_identifier(colName)); + } + + appendStringInfoChar(&buf, ')'); + } + ReleaseSysCache(indtup); + + /* XXX why do we only print these bits if fullCommand? */ + if (fullCommand && OidIsValid(indexId)) + { + char *options = flatten_reloptions(indexId); + Oid tblspc; + + if (options) + { + appendStringInfo(&buf, " WITH (%s)", options); + pfree(options); + } + + /* + * Print the tablespace, unless it's the database default. + * This is to help ALTER TABLE usage of this facility, + * which needs this behavior to recreate exact catalog + * state. + */ + tblspc = get_rel_tablespace(indexId); + if (OidIsValid(tblspc)) + appendStringInfo(&buf, " USING INDEX TABLESPACE %s", + quote_identifier(get_tablespace_name(tblspc))); + } + + break; + } + case CONSTRAINT_CHECK: + { + Datum val; + char *conbin; + char *consrc; + Node *expr; + List *context; + + /* Fetch constraint expression in parsetree form */ + val = SysCacheGetAttrNotNull(CONSTROID, tup, + Anum_pg_constraint_conbin); + + conbin = TextDatumGetCString(val); + expr = stringToNode(conbin); + + /* Set up deparsing context for Var nodes in constraint */ + if (conForm->conrelid != InvalidOid) + { + /* relation constraint */ + context = deparse_context_for(get_relation_name(conForm->conrelid), + conForm->conrelid); + } + else + { + /* domain constraint --- can't have Vars */ + context = NIL; + } + + consrc = deparse_expression_pretty(expr, context, false, false, + prettyFlags, 0); + + /* + * Now emit the constraint definition, adding NO INHERIT if + * necessary. + * + * There are cases where the constraint expression will be + * fully parenthesized and we don't need the outer parens ... + * but there are other cases where we do need 'em. Be + * conservative for now. + * + * Note that simply checking for leading '(' and trailing ')' + * would NOT be good enough, consider "(x > 0) AND (y > 0)". + */ + appendStringInfo(&buf, "CHECK (%s)%s", + consrc, + conForm->connoinherit ? " NO INHERIT" : ""); + break; + } + case CONSTRAINT_TRIGGER: + + /* + * There isn't an ALTER TABLE syntax for creating a user-defined + * constraint trigger, but it seems better to print something than + * throw an error; if we throw error then this function couldn't + * safely be applied to all rows of pg_constraint. + */ + appendStringInfoString(&buf, "TRIGGER"); + break; + case CONSTRAINT_EXCLUSION: + { + Oid indexOid = conForm->conindid; + Datum val; + Datum *elems; + int nElems; + int i; + Oid *operators; + + /* Extract operator OIDs from the pg_constraint tuple */ + val = SysCacheGetAttrNotNull(CONSTROID, tup, + Anum_pg_constraint_conexclop); + + deconstruct_array_builtin(DatumGetArrayTypeP(val), OIDOID, + &elems, NULL, &nElems); + + operators = (Oid *) palloc(nElems * sizeof(Oid)); + for (i = 0; i < nElems; i++) + operators[i] = DatumGetObjectId(elems[i]); + + /* pg_get_indexdef_worker does the rest */ + /* suppress tablespace because pg_dump wants it that way */ + appendStringInfoString(&buf, + pg_get_indexdef_worker(indexOid, + 0, + operators, + false, + false, + false, + false, + prettyFlags, + false)); + break; + } + default: + elog(ERROR, "invalid constraint type \"%c\"", conForm->contype); + break; + } + + if (conForm->condeferrable) + appendStringInfoString(&buf, " DEFERRABLE"); + if (conForm->condeferred) + appendStringInfoString(&buf, " INITIALLY DEFERRED"); + if (!conForm->convalidated) + appendStringInfoString(&buf, " NOT VALID"); + + /* Cleanup */ + systable_endscan(scandesc); + table_close(relation, AccessShareLock); + + return buf.data; +} + + +/* + * Convert an int16[] Datum into a comma-separated list of column names + * for the indicated relation; append the list to buf. Returns the number + * of keys. + */ +static int +decompile_column_index_array(Datum column_index_array, Oid relId, + StringInfo buf) +{ + Datum *keys; + int nKeys; + int j; + + /* Extract data from array of int16 */ + deconstruct_array_builtin(DatumGetArrayTypeP(column_index_array), INT2OID, + &keys, NULL, &nKeys); + + for (j = 0; j < nKeys; j++) + { + char *colName; + + colName = get_attname(relId, DatumGetInt16(keys[j]), false); + + if (j == 0) + appendStringInfoString(buf, quote_identifier(colName)); + else + appendStringInfo(buf, ", %s", quote_identifier(colName)); + } + + return nKeys; +} + + +/* ---------- + * pg_get_expr - Decompile an expression tree + * + * Input: an expression tree in nodeToString form, and a relation OID + * + * Output: reverse-listed expression + * + * Currently, the expression can only refer to a single relation, namely + * the one specified by the second parameter. This is sufficient for + * partial indexes, column default expressions, etc. We also support + * Var-free expressions, for which the OID can be InvalidOid. + * + * If the OID is nonzero but not actually valid, don't throw an error, + * just return NULL. This is a bit questionable, but it's what we've + * done historically, and it can help avoid unwanted failures when + * examining catalog entries for just-deleted relations. + * + * We expect this function to work, or throw a reasonably clean error, + * for any node tree that can appear in a catalog pg_node_tree column. + * Query trees, such as those appearing in pg_rewrite.ev_action, are + * not supported. Nor are expressions in more than one relation, which + * can appear in places like pg_rewrite.ev_qual. + * ---------- + */ +Datum +pg_get_expr(PG_FUNCTION_ARGS) +{ + text *expr = PG_GETARG_TEXT_PP(0); + Oid relid = PG_GETARG_OID(1); + text *result; + int prettyFlags; + + prettyFlags = PRETTYFLAG_INDENT; + + result = pg_get_expr_worker(expr, relid, prettyFlags); + if (result) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +Datum +pg_get_expr_ext(PG_FUNCTION_ARGS) +{ + text *expr = PG_GETARG_TEXT_PP(0); + Oid relid = PG_GETARG_OID(1); + bool pretty = PG_GETARG_BOOL(2); + text *result; + int prettyFlags; + + prettyFlags = GET_PRETTY_FLAGS(pretty); + + result = pg_get_expr_worker(expr, relid, prettyFlags); + if (result) + PG_RETURN_TEXT_P(result); + else + PG_RETURN_NULL(); +} + +static text * +pg_get_expr_worker(text *expr, Oid relid, int prettyFlags) +{ + Node *node; + Node *tst; + Relids relids; + List *context; + char *exprstr; + Relation rel = NULL; + char *str; + + /* Convert input pg_node_tree (really TEXT) object to C string */ + exprstr = text_to_cstring(expr); + + /* Convert expression to node tree */ + node = (Node *) stringToNode(exprstr); + + pfree(exprstr); + + /* + * Throw error if the input is a querytree rather than an expression tree. + * While we could support queries here, there seems no very good reason + * to. In most such catalog columns, we'll see a List of Query nodes, or + * even nested Lists, so drill down to a non-List node before checking. + */ + tst = node; + while (tst && IsA(tst, List)) + tst = linitial((List *) tst); + if (tst && IsA(tst, Query)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input is a query, not an expression"))); + + /* + * Throw error if the expression contains Vars we won't be able to + * deparse. + */ + relids = pull_varnos(NULL, node); + if (OidIsValid(relid)) + { + if (!bms_is_subset(relids, bms_make_singleton(1))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("expression contains variables of more than one relation"))); + } + else + { + if (!bms_is_empty(relids)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("expression contains variables"))); + } + + /* + * Prepare deparse context if needed. If we are deparsing with a relid, + * we need to transiently open and lock the rel, to make sure it won't go + * away underneath us. (set_relation_column_names would lock it anyway, + * so this isn't really introducing any new behavior.) + */ + if (OidIsValid(relid)) + { + rel = try_relation_open(relid, AccessShareLock); + if (rel == NULL) + return NULL; + context = deparse_context_for(RelationGetRelationName(rel), relid); + } + else + context = NIL; + + /* Deparse */ + str = deparse_expression_pretty(node, context, false, false, + prettyFlags, 0); + + if (rel != NULL) + relation_close(rel, AccessShareLock); + + return string_to_text(str); +} + + +/* ---------- + * pg_get_userbyid - Get a user name by roleid and + * fallback to 'unknown (OID=n)' + * ---------- + */ +Datum +pg_get_userbyid(PG_FUNCTION_ARGS) +{ + Oid roleid = PG_GETARG_OID(0); + Name result; + HeapTuple roletup; + Form_pg_authid role_rec; + + /* + * Allocate space for the result + */ + result = (Name) palloc(NAMEDATALEN); + memset(NameStr(*result), 0, NAMEDATALEN); + + /* + * Get the pg_authid entry and print the result + */ + roletup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(roleid)); + if (HeapTupleIsValid(roletup)) + { + role_rec = (Form_pg_authid) GETSTRUCT(roletup); + *result = role_rec->rolname; + ReleaseSysCache(roletup); + } + else + sprintf(NameStr(*result), "unknown (OID=%u)", roleid); + + PG_RETURN_NAME(result); +} + + +/* + * pg_get_serial_sequence + * Get the name of the sequence used by an identity or serial column, + * formatted suitably for passing to setval, nextval or currval. + * First parameter is not treated as double-quoted, second parameter + * is --- see documentation for reason. + */ +Datum +pg_get_serial_sequence(PG_FUNCTION_ARGS) +{ + text *tablename = PG_GETARG_TEXT_PP(0); + text *columnname = PG_GETARG_TEXT_PP(1); + RangeVar *tablerv; + Oid tableOid; + char *column; + AttrNumber attnum; + Oid sequenceId = InvalidOid; + Relation depRel; + ScanKeyData key[3]; + SysScanDesc scan; + HeapTuple tup; + + /* Look up table name. Can't lock it - we might not have privileges. */ + tablerv = makeRangeVarFromNameList(textToQualifiedNameList(tablename)); + tableOid = RangeVarGetRelid(tablerv, NoLock, false); + + /* Get the number of the column */ + column = text_to_cstring(columnname); + + attnum = get_attnum(tableOid, column); + if (attnum == InvalidAttrNumber) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + column, tablerv->relname))); + + /* Search the dependency table for the dependent sequence */ + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(tableOid)); + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(attnum)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + /* + * Look for an auto dependency (serial column) or internal dependency + * (identity column) of a sequence on a column. (We need the relkind + * test because indexes can also have auto dependencies on columns.) + */ + if (deprec->classid == RelationRelationId && + deprec->objsubid == 0 && + (deprec->deptype == DEPENDENCY_AUTO || + deprec->deptype == DEPENDENCY_INTERNAL) && + get_rel_relkind(deprec->objid) == RELKIND_SEQUENCE) + { + sequenceId = deprec->objid; + break; + } + } + + systable_endscan(scan); + table_close(depRel, AccessShareLock); + + if (OidIsValid(sequenceId)) + { + char *result; + + result = generate_qualified_relation_name(sequenceId); + + PG_RETURN_TEXT_P(string_to_text(result)); + } + + PG_RETURN_NULL(); +} + + +/* + * pg_get_functiondef + * Returns the complete "CREATE OR REPLACE FUNCTION ..." statement for + * the specified function. + * + * Note: if you change the output format of this function, be careful not + * to break psql's rules (in \ef and \sf) for identifying the start of the + * function body. To wit: the function body starts on a line that begins with + * "AS ", "BEGIN ", or "RETURN ", and no preceding line will look like that. + */ +Datum +pg_get_functiondef(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + StringInfoData buf; + StringInfoData dq; + HeapTuple proctup; + Form_pg_proc proc; + bool isfunction; + Datum tmp; + bool isnull; + const char *prosrc; + const char *name; + const char *nsp; + float4 procost; + int oldlen; + + initStringInfo(&buf); + + /* Look up the function */ + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + PG_RETURN_NULL(); + + proc = (Form_pg_proc) GETSTRUCT(proctup); + name = NameStr(proc->proname); + + if (proc->prokind == PROKIND_AGGREGATE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is an aggregate function", name))); + + isfunction = (proc->prokind != PROKIND_PROCEDURE); + + /* + * We always qualify the function name, to ensure the right function gets + * replaced. + */ + nsp = get_namespace_name_or_temp(proc->pronamespace); + appendStringInfo(&buf, "CREATE OR REPLACE %s %s(", + isfunction ? "FUNCTION" : "PROCEDURE", + quote_qualified_identifier(nsp, name)); + (void) print_function_arguments(&buf, proctup, false, true); + appendStringInfoString(&buf, ")\n"); + if (isfunction) + { + appendStringInfoString(&buf, " RETURNS "); + print_function_rettype(&buf, proctup); + appendStringInfoChar(&buf, '\n'); + } + + print_function_trftypes(&buf, proctup); + + appendStringInfo(&buf, " LANGUAGE %s\n", + quote_identifier(get_language_name(proc->prolang, false))); + + /* Emit some miscellaneous options on one line */ + oldlen = buf.len; + + if (proc->prokind == PROKIND_WINDOW) + appendStringInfoString(&buf, " WINDOW"); + switch (proc->provolatile) + { + case PROVOLATILE_IMMUTABLE: + appendStringInfoString(&buf, " IMMUTABLE"); + break; + case PROVOLATILE_STABLE: + appendStringInfoString(&buf, " STABLE"); + break; + case PROVOLATILE_VOLATILE: + break; + } + + switch (proc->proparallel) + { + case PROPARALLEL_SAFE: + appendStringInfoString(&buf, " PARALLEL SAFE"); + break; + case PROPARALLEL_RESTRICTED: + appendStringInfoString(&buf, " PARALLEL RESTRICTED"); + break; + case PROPARALLEL_UNSAFE: + break; + } + + if (proc->proisstrict) + appendStringInfoString(&buf, " STRICT"); + if (proc->prosecdef) + appendStringInfoString(&buf, " SECURITY DEFINER"); + if (proc->proleakproof) + appendStringInfoString(&buf, " LEAKPROOF"); + + /* This code for the default cost and rows should match functioncmds.c */ + if (proc->prolang == INTERNALlanguageId || + proc->prolang == ClanguageId) + procost = 1; + else + procost = 100; + if (proc->procost != procost) + appendStringInfo(&buf, " COST %g", proc->procost); + + if (proc->prorows > 0 && proc->prorows != 1000) + appendStringInfo(&buf, " ROWS %g", proc->prorows); + + if (proc->prosupport) + { + Oid argtypes[1]; + + /* + * We should qualify the support function's name if it wouldn't be + * resolved by lookup in the current search path. + */ + argtypes[0] = INTERNALOID; + appendStringInfo(&buf, " SUPPORT %s", + generate_function_name(proc->prosupport, 1, + NIL, argtypes, + false, NULL, EXPR_KIND_NONE)); + } + + if (oldlen != buf.len) + appendStringInfoChar(&buf, '\n'); + + /* Emit any proconfig options, one per line */ + tmp = SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_proconfig, &isnull); + if (!isnull) + { + ArrayType *a = DatumGetArrayTypeP(tmp); + int i; + + Assert(ARR_ELEMTYPE(a) == TEXTOID); + Assert(ARR_NDIM(a) == 1); + Assert(ARR_LBOUND(a)[0] == 1); + + for (i = 1; i <= ARR_DIMS(a)[0]; i++) + { + Datum d; + + d = array_ref(a, 1, &i, + -1 /* varlenarray */ , + -1 /* TEXT's typlen */ , + false /* TEXT's typbyval */ , + TYPALIGN_INT /* TEXT's typalign */ , + &isnull); + if (!isnull) + { + char *configitem = TextDatumGetCString(d); + char *pos; + + pos = strchr(configitem, '='); + if (pos == NULL) + continue; + *pos++ = '\0'; + + appendStringInfo(&buf, " SET %s TO ", + quote_identifier(configitem)); + + /* + * Variables that are marked GUC_LIST_QUOTE were already fully + * quoted by flatten_set_variable_args() before they were put + * into the proconfig array. However, because the quoting + * rules used there aren't exactly like SQL's, we have to + * break the list value apart and then quote the elements as + * string literals. (The elements may be double-quoted as-is, + * but we can't just feed them to the SQL parser; it would do + * the wrong thing with elements that are zero-length or + * longer than NAMEDATALEN.) + * + * Variables that are not so marked should just be emitted as + * simple string literals. If the variable is not known to + * guc.c, we'll do that; this makes it unsafe to use + * GUC_LIST_QUOTE for extension variables. + */ + if (GetConfigOptionFlags(configitem, true) & GUC_LIST_QUOTE) + { + List *namelist; + ListCell *lc; + + /* Parse string into list of identifiers */ + if (!SplitGUCList(pos, ',', &namelist)) + { + /* this shouldn't fail really */ + elog(ERROR, "invalid list syntax in proconfig item"); + } + foreach(lc, namelist) + { + char *curname = (char *) lfirst(lc); + + simple_quote_literal(&buf, curname); + if (lnext(namelist, lc)) + appendStringInfoString(&buf, ", "); + } + } + else + simple_quote_literal(&buf, pos); + appendStringInfoChar(&buf, '\n'); + } + } + } + + /* And finally the function definition ... */ + (void) SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_prosqlbody, &isnull); + if (proc->prolang == SQLlanguageId && !isnull) + { + print_function_sqlbody(&buf, proctup); + } + else + { + appendStringInfoString(&buf, "AS "); + + tmp = SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_probin, &isnull); + if (!isnull) + { + simple_quote_literal(&buf, TextDatumGetCString(tmp)); + appendStringInfoString(&buf, ", "); /* assume prosrc isn't null */ + } + + tmp = SysCacheGetAttrNotNull(PROCOID, proctup, Anum_pg_proc_prosrc); + prosrc = TextDatumGetCString(tmp); + + /* + * We always use dollar quoting. Figure out a suitable delimiter. + * + * Since the user is likely to be editing the function body string, we + * shouldn't use a short delimiter that he might easily create a + * conflict with. Hence prefer "$function$"/"$procedure$", but extend + * if needed. + */ + initStringInfo(&dq); + appendStringInfoChar(&dq, '$'); + appendStringInfoString(&dq, (isfunction ? "function" : "procedure")); + while (strstr(prosrc, dq.data) != NULL) + appendStringInfoChar(&dq, 'x'); + appendStringInfoChar(&dq, '$'); + + appendBinaryStringInfo(&buf, dq.data, dq.len); + appendStringInfoString(&buf, prosrc); + appendBinaryStringInfo(&buf, dq.data, dq.len); + } + + appendStringInfoChar(&buf, '\n'); + + ReleaseSysCache(proctup); + + PG_RETURN_TEXT_P(string_to_text(buf.data)); +} + +/* + * pg_get_function_arguments + * Get a nicely-formatted list of arguments for a function. + * This is everything that would go between the parentheses in + * CREATE FUNCTION. + */ +Datum +pg_get_function_arguments(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + StringInfoData buf; + HeapTuple proctup; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + PG_RETURN_NULL(); + + initStringInfo(&buf); + + (void) print_function_arguments(&buf, proctup, false, true); + + ReleaseSysCache(proctup); + + PG_RETURN_TEXT_P(string_to_text(buf.data)); +} + +/* + * pg_get_function_identity_arguments + * Get a formatted list of arguments for a function. + * This is everything that would go between the parentheses in + * ALTER FUNCTION, etc. In particular, don't print defaults. + */ +Datum +pg_get_function_identity_arguments(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + StringInfoData buf; + HeapTuple proctup; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + PG_RETURN_NULL(); + + initStringInfo(&buf); + + (void) print_function_arguments(&buf, proctup, false, false); + + ReleaseSysCache(proctup); + + PG_RETURN_TEXT_P(string_to_text(buf.data)); +} + +/* + * pg_get_function_result + * Get a nicely-formatted version of the result type of a function. + * This is what would appear after RETURNS in CREATE FUNCTION. + */ +Datum +pg_get_function_result(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + StringInfoData buf; + HeapTuple proctup; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + PG_RETURN_NULL(); + + if (((Form_pg_proc) GETSTRUCT(proctup))->prokind == PROKIND_PROCEDURE) + { + ReleaseSysCache(proctup); + PG_RETURN_NULL(); + } + + initStringInfo(&buf); + + print_function_rettype(&buf, proctup); + + ReleaseSysCache(proctup); + + PG_RETURN_TEXT_P(string_to_text(buf.data)); +} + +/* + * Guts of pg_get_function_result: append the function's return type + * to the specified buffer. + */ +static void +print_function_rettype(StringInfo buf, HeapTuple proctup) +{ + Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(proctup); + int ntabargs = 0; + StringInfoData rbuf; + + initStringInfo(&rbuf); + + if (proc->proretset) + { + /* It might be a table function; try to print the arguments */ + appendStringInfoString(&rbuf, "TABLE("); + ntabargs = print_function_arguments(&rbuf, proctup, true, false); + if (ntabargs > 0) + appendStringInfoChar(&rbuf, ')'); + else + resetStringInfo(&rbuf); + } + + if (ntabargs == 0) + { + /* Not a table function, so do the normal thing */ + if (proc->proretset) + appendStringInfoString(&rbuf, "SETOF "); + appendStringInfoString(&rbuf, format_type_be(proc->prorettype)); + } + + appendBinaryStringInfo(buf, rbuf.data, rbuf.len); +} + +/* + * Common code for pg_get_function_arguments and pg_get_function_result: + * append the desired subset of arguments to buf. We print only TABLE + * arguments when print_table_args is true, and all the others when it's false. + * We print argument defaults only if print_defaults is true. + * Function return value is the number of arguments printed. + */ +static int +print_function_arguments(StringInfo buf, HeapTuple proctup, + bool print_table_args, bool print_defaults) +{ + Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(proctup); + int numargs; + Oid *argtypes; + char **argnames; + char *argmodes; + int insertorderbyat = -1; + int argsprinted; + int inputargno; + int nlackdefaults; + List *argdefaults = NIL; + ListCell *nextargdefault = NULL; + int i; + + numargs = get_func_arg_info(proctup, + &argtypes, &argnames, &argmodes); + + nlackdefaults = numargs; + if (print_defaults && proc->pronargdefaults > 0) + { + Datum proargdefaults; + bool isnull; + + proargdefaults = SysCacheGetAttr(PROCOID, proctup, + Anum_pg_proc_proargdefaults, + &isnull); + if (!isnull) + { + char *str; + + str = TextDatumGetCString(proargdefaults); + argdefaults = castNode(List, stringToNode(str)); + pfree(str); + nextargdefault = list_head(argdefaults); + /* nlackdefaults counts only *input* arguments lacking defaults */ + nlackdefaults = proc->pronargs - list_length(argdefaults); + } + } + + /* Check for special treatment of ordered-set aggregates */ + if (proc->prokind == PROKIND_AGGREGATE) + { + HeapTuple aggtup; + Form_pg_aggregate agg; + + aggtup = SearchSysCache1(AGGFNOID, proc->oid); + if (!HeapTupleIsValid(aggtup)) + elog(ERROR, "cache lookup failed for aggregate %u", + proc->oid); + agg = (Form_pg_aggregate) GETSTRUCT(aggtup); + if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) + insertorderbyat = agg->aggnumdirectargs; + ReleaseSysCache(aggtup); + } + + argsprinted = 0; + inputargno = 0; + for (i = 0; i < numargs; i++) + { + Oid argtype = argtypes[i]; + char *argname = argnames ? argnames[i] : NULL; + char argmode = argmodes ? argmodes[i] : PROARGMODE_IN; + const char *modename; + bool isinput; + + switch (argmode) + { + case PROARGMODE_IN: + + /* + * For procedures, explicitly mark all argument modes, so as + * to avoid ambiguity with the SQL syntax for DROP PROCEDURE. + */ + if (proc->prokind == PROKIND_PROCEDURE) + modename = "IN "; + else + modename = ""; + isinput = true; + break; + case PROARGMODE_INOUT: + modename = "INOUT "; + isinput = true; + break; + case PROARGMODE_OUT: + modename = "OUT "; + isinput = false; + break; + case PROARGMODE_VARIADIC: + modename = "VARIADIC "; + isinput = true; + break; + case PROARGMODE_TABLE: + modename = ""; + isinput = false; + break; + default: + elog(ERROR, "invalid parameter mode '%c'", argmode); + modename = NULL; /* keep compiler quiet */ + isinput = false; + break; + } + if (isinput) + inputargno++; /* this is a 1-based counter */ + + if (print_table_args != (argmode == PROARGMODE_TABLE)) + continue; + + if (argsprinted == insertorderbyat) + { + if (argsprinted) + appendStringInfoChar(buf, ' '); + appendStringInfoString(buf, "ORDER BY "); + } + else if (argsprinted) + appendStringInfoString(buf, ", "); + + appendStringInfoString(buf, modename); + if (argname && argname[0]) + appendStringInfo(buf, "%s ", quote_identifier(argname)); + appendStringInfoString(buf, format_type_be(argtype)); + if (print_defaults && isinput && inputargno > nlackdefaults) + { + Node *expr; + + Assert(nextargdefault != NULL); + expr = (Node *) lfirst(nextargdefault); + nextargdefault = lnext(argdefaults, nextargdefault); + + appendStringInfo(buf, " DEFAULT %s", + deparse_expression(expr, NIL, false, false)); + } + argsprinted++; + + /* nasty hack: print the last arg twice for variadic ordered-set agg */ + if (argsprinted == insertorderbyat && i == numargs - 1) + { + i--; + /* aggs shouldn't have defaults anyway, but just to be sure ... */ + print_defaults = false; + } + } + + return argsprinted; +} + +static bool +is_input_argument(int nth, const char *argmodes) +{ + return (!argmodes + || argmodes[nth] == PROARGMODE_IN + || argmodes[nth] == PROARGMODE_INOUT + || argmodes[nth] == PROARGMODE_VARIADIC); +} + +/* + * Append used transformed types to specified buffer + */ +static void +print_function_trftypes(StringInfo buf, HeapTuple proctup) +{ + Oid *trftypes; + int ntypes; + + ntypes = get_func_trftypes(proctup, &trftypes); + if (ntypes > 0) + { + int i; + + appendStringInfoString(buf, " TRANSFORM "); + for (i = 0; i < ntypes; i++) + { + if (i != 0) + appendStringInfoString(buf, ", "); + appendStringInfo(buf, "FOR TYPE %s", format_type_be(trftypes[i])); + } + appendStringInfoChar(buf, '\n'); + } +} + +/* + * Get textual representation of a function argument's default value. The + * second argument of this function is the argument number among all arguments + * (i.e. proallargtypes, *not* proargtypes), starting with 1, because that's + * how information_schema.sql uses it. + */ +Datum +pg_get_function_arg_default(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + int32 nth_arg = PG_GETARG_INT32(1); + HeapTuple proctup; + Form_pg_proc proc; + int numargs; + Oid *argtypes; + char **argnames; + char *argmodes; + int i; + List *argdefaults; + Node *node; + char *str; + int nth_inputarg; + Datum proargdefaults; + bool isnull; + int nth_default; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + PG_RETURN_NULL(); + + numargs = get_func_arg_info(proctup, &argtypes, &argnames, &argmodes); + if (nth_arg < 1 || nth_arg > numargs || !is_input_argument(nth_arg - 1, argmodes)) + { + ReleaseSysCache(proctup); + PG_RETURN_NULL(); + } + + nth_inputarg = 0; + for (i = 0; i < nth_arg; i++) + if (is_input_argument(i, argmodes)) + nth_inputarg++; + + proargdefaults = SysCacheGetAttr(PROCOID, proctup, + Anum_pg_proc_proargdefaults, + &isnull); + if (isnull) + { + ReleaseSysCache(proctup); + PG_RETURN_NULL(); + } + + str = TextDatumGetCString(proargdefaults); + argdefaults = castNode(List, stringToNode(str)); + pfree(str); + + proc = (Form_pg_proc) GETSTRUCT(proctup); + + /* + * Calculate index into proargdefaults: proargdefaults corresponds to the + * last N input arguments, where N = pronargdefaults. + */ + nth_default = nth_inputarg - 1 - (proc->pronargs - proc->pronargdefaults); + + if (nth_default < 0 || nth_default >= list_length(argdefaults)) + { + ReleaseSysCache(proctup); + PG_RETURN_NULL(); + } + node = list_nth(argdefaults, nth_default); + str = deparse_expression(node, NIL, false, false); + + ReleaseSysCache(proctup); + + PG_RETURN_TEXT_P(string_to_text(str)); +} + +static void +print_function_sqlbody(StringInfo buf, HeapTuple proctup) +{ + int numargs; + Oid *argtypes; + char **argnames; + char *argmodes; + deparse_namespace dpns = {0}; + Datum tmp; + Node *n; + + dpns.funcname = pstrdup(NameStr(((Form_pg_proc) GETSTRUCT(proctup))->proname)); + numargs = get_func_arg_info(proctup, + &argtypes, &argnames, &argmodes); + dpns.numargs = numargs; + dpns.argnames = argnames; + + tmp = SysCacheGetAttrNotNull(PROCOID, proctup, Anum_pg_proc_prosqlbody); + n = stringToNode(TextDatumGetCString(tmp)); + + if (IsA(n, List)) + { + List *stmts; + ListCell *lc; + + stmts = linitial(castNode(List, n)); + + appendStringInfoString(buf, "BEGIN ATOMIC\n"); + + foreach(lc, stmts) + { + Query *query = lfirst_node(Query, lc); + + /* It seems advisable to get at least AccessShareLock on rels */ + AcquireRewriteLocks(query, false, false); + get_query_def(query, buf, list_make1(&dpns), NULL, false, + PRETTYFLAG_INDENT, WRAP_COLUMN_DEFAULT, 1); + appendStringInfoChar(buf, ';'); + appendStringInfoChar(buf, '\n'); + } + + appendStringInfoString(buf, "END"); + } + else + { + Query *query = castNode(Query, n); + + /* It seems advisable to get at least AccessShareLock on rels */ + AcquireRewriteLocks(query, false, false); + get_query_def(query, buf, list_make1(&dpns), NULL, false, + 0, WRAP_COLUMN_DEFAULT, 0); + } +} + +Datum +pg_get_function_sqlbody(PG_FUNCTION_ARGS) +{ + Oid funcid = PG_GETARG_OID(0); + StringInfoData buf; + HeapTuple proctup; + bool isnull; + + initStringInfo(&buf); + + /* Look up the function */ + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + PG_RETURN_NULL(); + + (void) SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_prosqlbody, &isnull); + if (isnull) + { + ReleaseSysCache(proctup); + PG_RETURN_NULL(); + } + + print_function_sqlbody(&buf, proctup); + + ReleaseSysCache(proctup); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len)); +} + + +/* + * deparse_expression - General utility for deparsing expressions + * + * calls deparse_expression_pretty with all prettyPrinting disabled + */ +char * +deparse_expression(Node *expr, List *dpcontext, + bool forceprefix, bool showimplicit) +{ + return deparse_expression_pretty(expr, dpcontext, forceprefix, + showimplicit, 0, 0); +} + +/* ---------- + * deparse_expression_pretty - General utility for deparsing expressions + * + * expr is the node tree to be deparsed. It must be a transformed expression + * tree (ie, not the raw output of gram.y). + * + * dpcontext is a list of deparse_namespace nodes representing the context + * for interpreting Vars in the node tree. It can be NIL if no Vars are + * expected. + * + * forceprefix is true to force all Vars to be prefixed with their table names. + * + * showimplicit is true to force all implicit casts to be shown explicitly. + * + * Tries to pretty up the output according to prettyFlags and startIndent. + * + * The result is a palloc'd string. + * ---------- + */ +static char * +deparse_expression_pretty(Node *expr, List *dpcontext, + bool forceprefix, bool showimplicit, + int prettyFlags, int startIndent) +{ + StringInfoData buf; + deparse_context context; + + initStringInfo(&buf); + context.buf = &buf; + context.namespaces = dpcontext; + context.windowClause = NIL; + context.windowTList = NIL; + context.varprefix = forceprefix; + context.prettyFlags = prettyFlags; + context.wrapColumn = WRAP_COLUMN_DEFAULT; + context.indentLevel = startIndent; + context.special_exprkind = EXPR_KIND_NONE; + context.appendparents = NULL; + + get_rule_expr(expr, &context, showimplicit); + + return buf.data; +} + +/* ---------- + * deparse_context_for - Build deparse context for a single relation + * + * Given the reference name (alias) and OID of a relation, build deparsing + * context for an expression referencing only that relation (as varno 1, + * varlevelsup 0). This is sufficient for many uses of deparse_expression. + * ---------- + */ +List * +deparse_context_for(const char *aliasname, Oid relid) +{ + deparse_namespace *dpns; + RangeTblEntry *rte; + + dpns = (deparse_namespace *) palloc0(sizeof(deparse_namespace)); + + /* Build a minimal RTE for the rel */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = relid; + rte->relkind = RELKIND_RELATION; /* no need for exactness here */ + rte->rellockmode = AccessShareLock; + rte->alias = makeAlias(aliasname, NIL); + rte->eref = rte->alias; + rte->lateral = false; + rte->inh = false; + rte->inFromCl = true; + + /* Build one-element rtable */ + dpns->rtable = list_make1(rte); + dpns->subplans = NIL; + dpns->ctes = NIL; + dpns->appendrels = NULL; + set_rtable_names(dpns, NIL, NULL); + set_simple_column_names(dpns); + + /* Return a one-deep namespace stack */ + return list_make1(dpns); +} + +/* + * deparse_context_for_plan_tree - Build deparse context for a Plan tree + * + * When deparsing an expression in a Plan tree, we use the plan's rangetable + * to resolve names of simple Vars. The initialization of column names for + * this is rather expensive if the rangetable is large, and it'll be the same + * for every expression in the Plan tree; so we do it just once and re-use + * the result of this function for each expression. (Note that the result + * is not usable until set_deparse_context_plan() is applied to it.) + * + * In addition to the PlannedStmt, pass the per-RTE alias names + * assigned by a previous call to select_rtable_names_for_explain. + */ +List * +deparse_context_for_plan_tree(PlannedStmt *pstmt, List *rtable_names) +{ + deparse_namespace *dpns; + + dpns = (deparse_namespace *) palloc0(sizeof(deparse_namespace)); + + /* Initialize fields that stay the same across the whole plan tree */ + dpns->rtable = pstmt->rtable; + dpns->rtable_names = rtable_names; + dpns->subplans = pstmt->subplans; + dpns->ctes = NIL; + if (pstmt->appendRelations) + { + /* Set up the array, indexed by child relid */ + int ntables = list_length(dpns->rtable); + ListCell *lc; + + dpns->appendrels = (AppendRelInfo **) + palloc0((ntables + 1) * sizeof(AppendRelInfo *)); + foreach(lc, pstmt->appendRelations) + { + AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc); + Index crelid = appinfo->child_relid; + + Assert(crelid > 0 && crelid <= ntables); + Assert(dpns->appendrels[crelid] == NULL); + dpns->appendrels[crelid] = appinfo; + } + } + else + dpns->appendrels = NULL; /* don't need it */ + + /* + * Set up column name aliases. We will get rather bogus results for join + * RTEs, but that doesn't matter because plan trees don't contain any join + * alias Vars. + */ + set_simple_column_names(dpns); + + /* Return a one-deep namespace stack */ + return list_make1(dpns); +} + +/* + * set_deparse_context_plan - Specify Plan node containing expression + * + * When deparsing an expression in a Plan tree, we might have to resolve + * OUTER_VAR, INNER_VAR, or INDEX_VAR references. To do this, the caller must + * provide the parent Plan node. Then OUTER_VAR and INNER_VAR references + * can be resolved by drilling down into the left and right child plans. + * Similarly, INDEX_VAR references can be resolved by reference to the + * indextlist given in a parent IndexOnlyScan node, or to the scan tlist in + * ForeignScan and CustomScan nodes. (Note that we don't currently support + * deparsing of indexquals in regular IndexScan or BitmapIndexScan nodes; + * for those, we can only deparse the indexqualorig fields, which won't + * contain INDEX_VAR Vars.) + * + * The ancestors list is a list of the Plan's parent Plan and SubPlan nodes, + * the most-closely-nested first. This is needed to resolve PARAM_EXEC + * Params. Note we assume that all the Plan nodes share the same rtable. + * + * Once this function has been called, deparse_expression() can be called on + * subsidiary expression(s) of the specified Plan node. To deparse + * expressions of a different Plan node in the same Plan tree, re-call this + * function to identify the new parent Plan node. + * + * The result is the same List passed in; this is a notational convenience. + */ +List * +set_deparse_context_plan(List *dpcontext, Plan *plan, List *ancestors) +{ + deparse_namespace *dpns; + + /* Should always have one-entry namespace list for Plan deparsing */ + Assert(list_length(dpcontext) == 1); + dpns = (deparse_namespace *) linitial(dpcontext); + + /* Set our attention on the specific plan node passed in */ + dpns->ancestors = ancestors; + set_deparse_plan(dpns, plan); + + return dpcontext; +} + +/* + * select_rtable_names_for_explain - Select RTE aliases for EXPLAIN + * + * Determine the relation aliases we'll use during an EXPLAIN operation. + * This is just a frontend to set_rtable_names. We have to expose the aliases + * to EXPLAIN because EXPLAIN needs to know the right alias names to print. + */ +List * +select_rtable_names_for_explain(List *rtable, Bitmapset *rels_used) +{ + deparse_namespace dpns; + + memset(&dpns, 0, sizeof(dpns)); + dpns.rtable = rtable; + dpns.subplans = NIL; + dpns.ctes = NIL; + dpns.appendrels = NULL; + set_rtable_names(&dpns, NIL, rels_used); + /* We needn't bother computing column aliases yet */ + + return dpns.rtable_names; +} + +/* + * set_rtable_names: select RTE aliases to be used in printing a query + * + * We fill in dpns->rtable_names with a list of names that is one-for-one with + * the already-filled dpns->rtable list. Each RTE name is unique among those + * in the new namespace plus any ancestor namespaces listed in + * parent_namespaces. + * + * If rels_used isn't NULL, only RTE indexes listed in it are given aliases. + * + * Note that this function is only concerned with relation names, not column + * names. + */ +static void +set_rtable_names(deparse_namespace *dpns, List *parent_namespaces, + Bitmapset *rels_used) +{ + HASHCTL hash_ctl; + HTAB *names_hash; + NameHashEntry *hentry; + bool found; + int rtindex; + ListCell *lc; + + dpns->rtable_names = NIL; + /* nothing more to do if empty rtable */ + if (dpns->rtable == NIL) + return; + + /* + * We use a hash table to hold known names, so that this process is O(N) + * not O(N^2) for N names. + */ + hash_ctl.keysize = NAMEDATALEN; + hash_ctl.entrysize = sizeof(NameHashEntry); + hash_ctl.hcxt = CurrentMemoryContext; + names_hash = hash_create("set_rtable_names names", + list_length(dpns->rtable), + &hash_ctl, + HASH_ELEM | HASH_STRINGS | HASH_CONTEXT); + + /* Preload the hash table with names appearing in parent_namespaces */ + foreach(lc, parent_namespaces) + { + deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, olddpns->rtable_names) + { + char *oldname = (char *) lfirst(lc2); + + if (oldname == NULL) + continue; + hentry = (NameHashEntry *) hash_search(names_hash, + oldname, + HASH_ENTER, + &found); + /* we do not complain about duplicate names in parent namespaces */ + hentry->counter = 0; + } + } + + /* Now we can scan the rtable */ + rtindex = 1; + foreach(lc, dpns->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + char *refname; + + /* Just in case this takes an unreasonable amount of time ... */ + CHECK_FOR_INTERRUPTS(); + + if (rels_used && !bms_is_member(rtindex, rels_used)) + { + /* Ignore unreferenced RTE */ + refname = NULL; + } + else if (rte->alias) + { + /* If RTE has a user-defined alias, prefer that */ + refname = rte->alias->aliasname; + } + else if (rte->rtekind == RTE_RELATION) + { + /* Use the current actual name of the relation */ + refname = get_rel_name(rte->relid); + } + else if (rte->rtekind == RTE_JOIN) + { + /* Unnamed join has no refname */ + refname = NULL; + } + else + { + /* Otherwise use whatever the parser assigned */ + refname = rte->eref->aliasname; + } + + /* + * If the selected name isn't unique, append digits to make it so, and + * make a new hash entry for it once we've got a unique name. For a + * very long input name, we might have to truncate to stay within + * NAMEDATALEN. + */ + if (refname) + { + hentry = (NameHashEntry *) hash_search(names_hash, + refname, + HASH_ENTER, + &found); + if (found) + { + /* Name already in use, must choose a new one */ + int refnamelen = strlen(refname); + char *modname = (char *) palloc(refnamelen + 16); + NameHashEntry *hentry2; + + do + { + hentry->counter++; + for (;;) + { + memcpy(modname, refname, refnamelen); + sprintf(modname + refnamelen, "_%d", hentry->counter); + if (strlen(modname) < NAMEDATALEN) + break; + /* drop chars from refname to keep all the digits */ + refnamelen = pg_mbcliplen(refname, refnamelen, + refnamelen - 1); + } + hentry2 = (NameHashEntry *) hash_search(names_hash, + modname, + HASH_ENTER, + &found); + } while (found); + hentry2->counter = 0; /* init new hash entry */ + refname = modname; + } + else + { + /* Name not previously used, need only initialize hentry */ + hentry->counter = 0; + } + } + + dpns->rtable_names = lappend(dpns->rtable_names, refname); + rtindex++; + } + + hash_destroy(names_hash); +} + +/* + * set_deparse_for_query: set up deparse_namespace for deparsing a Query tree + * + * For convenience, this is defined to initialize the deparse_namespace struct + * from scratch. + */ +static void +set_deparse_for_query(deparse_namespace *dpns, Query *query, + List *parent_namespaces) +{ + ListCell *lc; + ListCell *lc2; + + /* Initialize *dpns and fill rtable/ctes links */ + memset(dpns, 0, sizeof(deparse_namespace)); + dpns->rtable = query->rtable; + dpns->subplans = NIL; + dpns->ctes = query->cteList; + dpns->appendrels = NULL; + + /* Assign a unique relation alias to each RTE */ + set_rtable_names(dpns, parent_namespaces, NULL); + + /* Initialize dpns->rtable_columns to contain zeroed structs */ + dpns->rtable_columns = NIL; + while (list_length(dpns->rtable_columns) < list_length(dpns->rtable)) + dpns->rtable_columns = lappend(dpns->rtable_columns, + palloc0(sizeof(deparse_columns))); + + /* If it's a utility query, it won't have a jointree */ + if (query->jointree) + { + /* Detect whether global uniqueness of USING names is needed */ + dpns->unique_using = + has_dangerous_join_using(dpns, (Node *) query->jointree); + + /* + * Select names for columns merged by USING, via a recursive pass over + * the query jointree. + */ + set_using_names(dpns, (Node *) query->jointree, NIL); + } + + /* + * Now assign remaining column aliases for each RTE. We do this in a + * linear scan of the rtable, so as to process RTEs whether or not they + * are in the jointree (we mustn't miss NEW.*, INSERT target relations, + * etc). JOIN RTEs must be processed after their children, but this is + * okay because they appear later in the rtable list than their children + * (cf Asserts in identify_join_columns()). + */ + forboth(lc, dpns->rtable, lc2, dpns->rtable_columns) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + deparse_columns *colinfo = (deparse_columns *) lfirst(lc2); + + if (rte->rtekind == RTE_JOIN) + set_join_column_names(dpns, rte, colinfo); + else + set_relation_column_names(dpns, rte, colinfo); + } +} + +/* + * set_simple_column_names: fill in column aliases for non-query situations + * + * This handles EXPLAIN and cases where we only have relation RTEs. Without + * a join tree, we can't do anything smart about join RTEs, but we don't + * need to (note that EXPLAIN should never see join alias Vars anyway). + * If we do hit a join RTE we'll just process it like a non-table base RTE. + */ +static void +set_simple_column_names(deparse_namespace *dpns) +{ + ListCell *lc; + ListCell *lc2; + + /* Initialize dpns->rtable_columns to contain zeroed structs */ + dpns->rtable_columns = NIL; + while (list_length(dpns->rtable_columns) < list_length(dpns->rtable)) + dpns->rtable_columns = lappend(dpns->rtable_columns, + palloc0(sizeof(deparse_columns))); + + /* Assign unique column aliases within each RTE */ + forboth(lc, dpns->rtable, lc2, dpns->rtable_columns) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + deparse_columns *colinfo = (deparse_columns *) lfirst(lc2); + + set_relation_column_names(dpns, rte, colinfo); + } +} + +/* + * has_dangerous_join_using: search jointree for unnamed JOIN USING + * + * Merged columns of a JOIN USING may act differently from either of the input + * columns, either because they are merged with COALESCE (in a FULL JOIN) or + * because an implicit coercion of the underlying input column is required. + * In such a case the column must be referenced as a column of the JOIN not as + * a column of either input. And this is problematic if the join is unnamed + * (alias-less): we cannot qualify the column's name with an RTE name, since + * there is none. (Forcibly assigning an alias to the join is not a solution, + * since that will prevent legal references to tables below the join.) + * To ensure that every column in the query is unambiguously referenceable, + * we must assign such merged columns names that are globally unique across + * the whole query, aliasing other columns out of the way as necessary. + * + * Because the ensuing re-aliasing is fairly damaging to the readability of + * the query, we don't do this unless we have to. So, we must pre-scan + * the join tree to see if we have to, before starting set_using_names(). + */ +static bool +has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode) +{ + if (IsA(jtnode, RangeTblRef)) + { + /* nothing to do here */ + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *lc; + + foreach(lc, f->fromlist) + { + if (has_dangerous_join_using(dpns, (Node *) lfirst(lc))) + return true; + } + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Is it an unnamed JOIN with USING? */ + if (j->alias == NULL && j->usingClause) + { + /* + * Yes, so check each join alias var to see if any of them are not + * simple references to underlying columns. If so, we have a + * dangerous situation and must pick unique aliases. + */ + RangeTblEntry *jrte = rt_fetch(j->rtindex, dpns->rtable); + + /* We need only examine the merged columns */ + for (int i = 0; i < jrte->joinmergedcols; i++) + { + Node *aliasvar = list_nth(jrte->joinaliasvars, i); + + if (!IsA(aliasvar, Var)) + return true; + } + } + + /* Nope, but inspect children */ + if (has_dangerous_join_using(dpns, j->larg)) + return true; + if (has_dangerous_join_using(dpns, j->rarg)) + return true; + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); + return false; +} + +/* + * set_using_names: select column aliases to be used for merged USING columns + * + * We do this during a recursive descent of the query jointree. + * dpns->unique_using must already be set to determine the global strategy. + * + * Column alias info is saved in the dpns->rtable_columns list, which is + * assumed to be filled with pre-zeroed deparse_columns structs. + * + * parentUsing is a list of all USING aliases assigned in parent joins of + * the current jointree node. (The passed-in list must not be modified.) + */ +static void +set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing) +{ + if (IsA(jtnode, RangeTblRef)) + { + /* nothing to do now */ + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *lc; + + foreach(lc, f->fromlist) + set_using_names(dpns, (Node *) lfirst(lc), parentUsing); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + RangeTblEntry *rte = rt_fetch(j->rtindex, dpns->rtable); + deparse_columns *colinfo = deparse_columns_fetch(j->rtindex, dpns); + int *leftattnos; + int *rightattnos; + deparse_columns *leftcolinfo; + deparse_columns *rightcolinfo; + int i; + ListCell *lc; + + /* Get info about the shape of the join */ + identify_join_columns(j, rte, colinfo); + leftattnos = colinfo->leftattnos; + rightattnos = colinfo->rightattnos; + + /* Look up the not-yet-filled-in child deparse_columns structs */ + leftcolinfo = deparse_columns_fetch(colinfo->leftrti, dpns); + rightcolinfo = deparse_columns_fetch(colinfo->rightrti, dpns); + + /* + * If this join is unnamed, then we cannot substitute new aliases at + * this level, so any name requirements pushed down to here must be + * pushed down again to the children. + */ + if (rte->alias == NULL) + { + for (i = 0; i < colinfo->num_cols; i++) + { + char *colname = colinfo->colnames[i]; + + if (colname == NULL) + continue; + + /* Push down to left column, unless it's a system column */ + if (leftattnos[i] > 0) + { + expand_colnames_array_to(leftcolinfo, leftattnos[i]); + leftcolinfo->colnames[leftattnos[i] - 1] = colname; + } + + /* Same on the righthand side */ + if (rightattnos[i] > 0) + { + expand_colnames_array_to(rightcolinfo, rightattnos[i]); + rightcolinfo->colnames[rightattnos[i] - 1] = colname; + } + } + } + + /* + * If there's a USING clause, select the USING column names and push + * those names down to the children. We have two strategies: + * + * If dpns->unique_using is true, we force all USING names to be + * unique across the whole query level. In principle we'd only need + * the names of dangerous USING columns to be globally unique, but to + * safely assign all USING names in a single pass, we have to enforce + * the same uniqueness rule for all of them. However, if a USING + * column's name has been pushed down from the parent, we should use + * it as-is rather than making a uniqueness adjustment. This is + * necessary when we're at an unnamed join, and it creates no risk of + * ambiguity. Also, if there's a user-written output alias for a + * merged column, we prefer to use that rather than the input name; + * this simplifies the logic and seems likely to lead to less aliasing + * overall. + * + * If dpns->unique_using is false, we only need USING names to be + * unique within their own join RTE. We still need to honor + * pushed-down names, though. + * + * Though significantly different in results, these two strategies are + * implemented by the same code, with only the difference of whether + * to put assigned names into dpns->using_names. + */ + if (j->usingClause) + { + /* Copy the input parentUsing list so we don't modify it */ + parentUsing = list_copy(parentUsing); + + /* USING names must correspond to the first join output columns */ + expand_colnames_array_to(colinfo, list_length(j->usingClause)); + i = 0; + foreach(lc, j->usingClause) + { + char *colname = strVal(lfirst(lc)); + + /* Assert it's a merged column */ + Assert(leftattnos[i] != 0 && rightattnos[i] != 0); + + /* Adopt passed-down name if any, else select unique name */ + if (colinfo->colnames[i] != NULL) + colname = colinfo->colnames[i]; + else + { + /* Prefer user-written output alias if any */ + if (rte->alias && i < list_length(rte->alias->colnames)) + colname = strVal(list_nth(rte->alias->colnames, i)); + /* Make it appropriately unique */ + colname = make_colname_unique(colname, dpns, colinfo); + if (dpns->unique_using) + dpns->using_names = lappend(dpns->using_names, + colname); + /* Save it as output column name, too */ + colinfo->colnames[i] = colname; + } + + /* Remember selected names for use later */ + colinfo->usingNames = lappend(colinfo->usingNames, colname); + parentUsing = lappend(parentUsing, colname); + + /* Push down to left column, unless it's a system column */ + if (leftattnos[i] > 0) + { + expand_colnames_array_to(leftcolinfo, leftattnos[i]); + leftcolinfo->colnames[leftattnos[i] - 1] = colname; + } + + /* Same on the righthand side */ + if (rightattnos[i] > 0) + { + expand_colnames_array_to(rightcolinfo, rightattnos[i]); + rightcolinfo->colnames[rightattnos[i] - 1] = colname; + } + + i++; + } + } + + /* Mark child deparse_columns structs with correct parentUsing info */ + leftcolinfo->parentUsing = parentUsing; + rightcolinfo->parentUsing = parentUsing; + + /* Now recursively assign USING column names in children */ + set_using_names(dpns, j->larg, parentUsing); + set_using_names(dpns, j->rarg, parentUsing); + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} + +/* + * set_relation_column_names: select column aliases for a non-join RTE + * + * Column alias info is saved in *colinfo, which is assumed to be pre-zeroed. + * If any colnames entries are already filled in, those override local + * choices. + */ +static void +set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte, + deparse_columns *colinfo) +{ + int ncolumns; + char **real_colnames; + bool changed_any; + int noldcolumns; + int i; + int j; + + /* + * Construct an array of the current "real" column names of the RTE. + * real_colnames[] will be indexed by physical column number, with NULL + * entries for dropped columns. + */ + if (rte->rtekind == RTE_RELATION) + { + /* Relation --- look to the system catalogs for up-to-date info */ + Relation rel; + TupleDesc tupdesc; + + rel = relation_open(rte->relid, AccessShareLock); + tupdesc = RelationGetDescr(rel); + + ncolumns = tupdesc->natts; + real_colnames = (char **) palloc(ncolumns * sizeof(char *)); + + for (i = 0; i < ncolumns; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + + if (attr->attisdropped) + real_colnames[i] = NULL; + else + real_colnames[i] = pstrdup(NameStr(attr->attname)); + } + relation_close(rel, AccessShareLock); + } + else + { + /* Otherwise get the column names from eref or expandRTE() */ + List *colnames; + ListCell *lc; + + /* + * Functions returning composites have the annoying property that some + * of the composite type's columns might have been dropped since the + * query was parsed. If possible, use expandRTE() to handle that + * case, since it has the tedious logic needed to find out about + * dropped columns. However, if we're explaining a plan, then we + * don't have rte->functions because the planner thinks that won't be + * needed later, and that breaks expandRTE(). So in that case we have + * to rely on rte->eref, which may lead us to report a dropped + * column's old name; that seems close enough for EXPLAIN's purposes. + * + * For non-RELATION, non-FUNCTION RTEs, we can just look at rte->eref, + * which should be sufficiently up-to-date: no other RTE types can + * have columns get dropped from under them after parsing. + */ + if (rte->rtekind == RTE_FUNCTION && rte->functions != NIL) + { + /* Since we're not creating Vars, rtindex etc. don't matter */ + expandRTE(rte, 1, 0, -1, true /* include dropped */ , + &colnames, NULL); + } + else + colnames = rte->eref->colnames; + + ncolumns = list_length(colnames); + real_colnames = (char **) palloc(ncolumns * sizeof(char *)); + + i = 0; + foreach(lc, colnames) + { + /* + * If the column name we find here is an empty string, then it's a + * dropped column, so change to NULL. + */ + char *cname = strVal(lfirst(lc)); + + if (cname[0] == '\0') + cname = NULL; + real_colnames[i] = cname; + i++; + } + } + + /* + * Ensure colinfo->colnames has a slot for each column. (It could be long + * enough already, if we pushed down a name for the last column.) Note: + * it's possible that there are now more columns than there were when the + * query was parsed, ie colnames could be longer than rte->eref->colnames. + * We must assign unique aliases to the new columns too, else there could + * be unresolved conflicts when the view/rule is reloaded. + */ + expand_colnames_array_to(colinfo, ncolumns); + Assert(colinfo->num_cols == ncolumns); + + /* + * Make sufficiently large new_colnames and is_new_col arrays, too. + * + * Note: because we leave colinfo->num_new_cols zero until after the loop, + * colname_is_unique will not consult that array, which is fine because it + * would only be duplicate effort. + */ + colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *)); + colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool)); + + /* + * Scan the columns, select a unique alias for each one, and store it in + * colinfo->colnames and colinfo->new_colnames. The former array has NULL + * entries for dropped columns, the latter omits them. Also mark + * new_colnames entries as to whether they are new since parse time; this + * is the case for entries beyond the length of rte->eref->colnames. + */ + noldcolumns = list_length(rte->eref->colnames); + changed_any = false; + j = 0; + for (i = 0; i < ncolumns; i++) + { + char *real_colname = real_colnames[i]; + char *colname = colinfo->colnames[i]; + + /* Skip dropped columns */ + if (real_colname == NULL) + { + Assert(colname == NULL); /* colnames[i] is already NULL */ + continue; + } + + /* If alias already assigned, that's what to use */ + if (colname == NULL) + { + /* If user wrote an alias, prefer that over real column name */ + if (rte->alias && i < list_length(rte->alias->colnames)) + colname = strVal(list_nth(rte->alias->colnames, i)); + else + colname = real_colname; + + /* Unique-ify and insert into colinfo */ + colname = make_colname_unique(colname, dpns, colinfo); + + colinfo->colnames[i] = colname; + } + + /* Put names of non-dropped columns in new_colnames[] too */ + colinfo->new_colnames[j] = colname; + /* And mark them as new or not */ + colinfo->is_new_col[j] = (i >= noldcolumns); + j++; + + /* Remember if any assigned aliases differ from "real" name */ + if (!changed_any && strcmp(colname, real_colname) != 0) + changed_any = true; + } + + /* + * Set correct length for new_colnames[] array. (Note: if columns have + * been added, colinfo->num_cols includes them, which is not really quite + * right but is harmless, since any new columns must be at the end where + * they won't affect varattnos of pre-existing columns.) + */ + colinfo->num_new_cols = j; + + /* + * For a relation RTE, we need only print the alias column names if any + * are different from the underlying "real" names. For a function RTE, + * always emit a complete column alias list; this is to protect against + * possible instability of the default column names (eg, from altering + * parameter names). For tablefunc RTEs, we never print aliases, because + * the column names are part of the clause itself. For other RTE types, + * print if we changed anything OR if there were user-written column + * aliases (since the latter would be part of the underlying "reality"). + */ + if (rte->rtekind == RTE_RELATION) + colinfo->printaliases = changed_any; + else if (rte->rtekind == RTE_FUNCTION) + colinfo->printaliases = true; + else if (rte->rtekind == RTE_TABLEFUNC) + colinfo->printaliases = false; + else if (rte->alias && rte->alias->colnames != NIL) + colinfo->printaliases = true; + else + colinfo->printaliases = changed_any; +} + +/* + * set_join_column_names: select column aliases for a join RTE + * + * Column alias info is saved in *colinfo, which is assumed to be pre-zeroed. + * If any colnames entries are already filled in, those override local + * choices. Also, names for USING columns were already chosen by + * set_using_names(). We further expect that column alias selection has been + * completed for both input RTEs. + */ +static void +set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte, + deparse_columns *colinfo) +{ + deparse_columns *leftcolinfo; + deparse_columns *rightcolinfo; + bool changed_any; + int noldcolumns; + int nnewcolumns; + Bitmapset *leftmerged = NULL; + Bitmapset *rightmerged = NULL; + int i; + int j; + int ic; + int jc; + + /* Look up the previously-filled-in child deparse_columns structs */ + leftcolinfo = deparse_columns_fetch(colinfo->leftrti, dpns); + rightcolinfo = deparse_columns_fetch(colinfo->rightrti, dpns); + + /* + * Ensure colinfo->colnames has a slot for each column. (It could be long + * enough already, if we pushed down a name for the last column.) Note: + * it's possible that one or both inputs now have more columns than there + * were when the query was parsed, but we'll deal with that below. We + * only need entries in colnames for pre-existing columns. + */ + noldcolumns = list_length(rte->eref->colnames); + expand_colnames_array_to(colinfo, noldcolumns); + Assert(colinfo->num_cols == noldcolumns); + + /* + * Scan the join output columns, select an alias for each one, and store + * it in colinfo->colnames. If there are USING columns, set_using_names() + * already selected their names, so we can start the loop at the first + * non-merged column. + */ + changed_any = false; + for (i = list_length(colinfo->usingNames); i < noldcolumns; i++) + { + char *colname = colinfo->colnames[i]; + char *real_colname; + + /* Join column must refer to at least one input column */ + Assert(colinfo->leftattnos[i] != 0 || colinfo->rightattnos[i] != 0); + + /* Get the child column name */ + if (colinfo->leftattnos[i] > 0) + real_colname = leftcolinfo->colnames[colinfo->leftattnos[i] - 1]; + else if (colinfo->rightattnos[i] > 0) + real_colname = rightcolinfo->colnames[colinfo->rightattnos[i] - 1]; + else + { + /* We're joining system columns --- use eref name */ + real_colname = strVal(list_nth(rte->eref->colnames, i)); + } + + /* If child col has been dropped, no need to assign a join colname */ + if (real_colname == NULL) + { + colinfo->colnames[i] = NULL; + continue; + } + + /* In an unnamed join, just report child column names as-is */ + if (rte->alias == NULL) + { + colinfo->colnames[i] = real_colname; + continue; + } + + /* If alias already assigned, that's what to use */ + if (colname == NULL) + { + /* If user wrote an alias, prefer that over real column name */ + if (rte->alias && i < list_length(rte->alias->colnames)) + colname = strVal(list_nth(rte->alias->colnames, i)); + else + colname = real_colname; + + /* Unique-ify and insert into colinfo */ + colname = make_colname_unique(colname, dpns, colinfo); + + colinfo->colnames[i] = colname; + } + + /* Remember if any assigned aliases differ from "real" name */ + if (!changed_any && strcmp(colname, real_colname) != 0) + changed_any = true; + } + + /* + * Calculate number of columns the join would have if it were re-parsed + * now, and create storage for the new_colnames and is_new_col arrays. + * + * Note: colname_is_unique will be consulting new_colnames[] during the + * loops below, so its not-yet-filled entries must be zeroes. + */ + nnewcolumns = leftcolinfo->num_new_cols + rightcolinfo->num_new_cols - + list_length(colinfo->usingNames); + colinfo->num_new_cols = nnewcolumns; + colinfo->new_colnames = (char **) palloc0(nnewcolumns * sizeof(char *)); + colinfo->is_new_col = (bool *) palloc0(nnewcolumns * sizeof(bool)); + + /* + * Generating the new_colnames array is a bit tricky since any new columns + * added since parse time must be inserted in the right places. This code + * must match the parser, which will order a join's columns as merged + * columns first (in USING-clause order), then non-merged columns from the + * left input (in attnum order), then non-merged columns from the right + * input (ditto). If one of the inputs is itself a join, its columns will + * be ordered according to the same rule, which means newly-added columns + * might not be at the end. We can figure out what's what by consulting + * the leftattnos and rightattnos arrays plus the input is_new_col arrays. + * + * In these loops, i indexes leftattnos/rightattnos (so it's join varattno + * less one), j indexes new_colnames/is_new_col, and ic/jc have similar + * meanings for the current child RTE. + */ + + /* Handle merged columns; they are first and can't be new */ + i = j = 0; + while (i < noldcolumns && + colinfo->leftattnos[i] != 0 && + colinfo->rightattnos[i] != 0) + { + /* column name is already determined and known unique */ + colinfo->new_colnames[j] = colinfo->colnames[i]; + colinfo->is_new_col[j] = false; + + /* build bitmapsets of child attnums of merged columns */ + if (colinfo->leftattnos[i] > 0) + leftmerged = bms_add_member(leftmerged, colinfo->leftattnos[i]); + if (colinfo->rightattnos[i] > 0) + rightmerged = bms_add_member(rightmerged, colinfo->rightattnos[i]); + + i++, j++; + } + + /* Handle non-merged left-child columns */ + ic = 0; + for (jc = 0; jc < leftcolinfo->num_new_cols; jc++) + { + char *child_colname = leftcolinfo->new_colnames[jc]; + + if (!leftcolinfo->is_new_col[jc]) + { + /* Advance ic to next non-dropped old column of left child */ + while (ic < leftcolinfo->num_cols && + leftcolinfo->colnames[ic] == NULL) + ic++; + Assert(ic < leftcolinfo->num_cols); + ic++; + /* If it is a merged column, we already processed it */ + if (bms_is_member(ic, leftmerged)) + continue; + /* Else, advance i to the corresponding existing join column */ + while (i < colinfo->num_cols && + colinfo->colnames[i] == NULL) + i++; + Assert(i < colinfo->num_cols); + Assert(ic == colinfo->leftattnos[i]); + /* Use the already-assigned name of this column */ + colinfo->new_colnames[j] = colinfo->colnames[i]; + i++; + } + else + { + /* + * Unique-ify the new child column name and assign, unless we're + * in an unnamed join, in which case just copy + */ + if (rte->alias != NULL) + { + colinfo->new_colnames[j] = + make_colname_unique(child_colname, dpns, colinfo); + if (!changed_any && + strcmp(colinfo->new_colnames[j], child_colname) != 0) + changed_any = true; + } + else + colinfo->new_colnames[j] = child_colname; + } + + colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc]; + j++; + } + + /* Handle non-merged right-child columns in exactly the same way */ + ic = 0; + for (jc = 0; jc < rightcolinfo->num_new_cols; jc++) + { + char *child_colname = rightcolinfo->new_colnames[jc]; + + if (!rightcolinfo->is_new_col[jc]) + { + /* Advance ic to next non-dropped old column of right child */ + while (ic < rightcolinfo->num_cols && + rightcolinfo->colnames[ic] == NULL) + ic++; + Assert(ic < rightcolinfo->num_cols); + ic++; + /* If it is a merged column, we already processed it */ + if (bms_is_member(ic, rightmerged)) + continue; + /* Else, advance i to the corresponding existing join column */ + while (i < colinfo->num_cols && + colinfo->colnames[i] == NULL) + i++; + Assert(i < colinfo->num_cols); + Assert(ic == colinfo->rightattnos[i]); + /* Use the already-assigned name of this column */ + colinfo->new_colnames[j] = colinfo->colnames[i]; + i++; + } + else + { + /* + * Unique-ify the new child column name and assign, unless we're + * in an unnamed join, in which case just copy + */ + if (rte->alias != NULL) + { + colinfo->new_colnames[j] = + make_colname_unique(child_colname, dpns, colinfo); + if (!changed_any && + strcmp(colinfo->new_colnames[j], child_colname) != 0) + changed_any = true; + } + else + colinfo->new_colnames[j] = child_colname; + } + + colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc]; + j++; + } + + /* Assert we processed the right number of columns */ +#ifdef USE_ASSERT_CHECKING + while (i < colinfo->num_cols && colinfo->colnames[i] == NULL) + i++; + Assert(i == colinfo->num_cols); + Assert(j == nnewcolumns); +#endif + + /* + * For a named join, print column aliases if we changed any from the child + * names. Unnamed joins cannot print aliases. + */ + if (rte->alias != NULL) + colinfo->printaliases = changed_any; + else + colinfo->printaliases = false; +} + +/* + * colname_is_unique: is colname distinct from already-chosen column names? + * + * dpns is query-wide info, colinfo is for the column's RTE + */ +static bool +colname_is_unique(const char *colname, deparse_namespace *dpns, + deparse_columns *colinfo) +{ + int i; + ListCell *lc; + + /* Check against already-assigned column aliases within RTE */ + for (i = 0; i < colinfo->num_cols; i++) + { + char *oldname = colinfo->colnames[i]; + + if (oldname && strcmp(oldname, colname) == 0) + return false; + } + + /* + * If we're building a new_colnames array, check that too (this will be + * partially but not completely redundant with the previous checks) + */ + for (i = 0; i < colinfo->num_new_cols; i++) + { + char *oldname = colinfo->new_colnames[i]; + + if (oldname && strcmp(oldname, colname) == 0) + return false; + } + + /* Also check against USING-column names that must be globally unique */ + foreach(lc, dpns->using_names) + { + char *oldname = (char *) lfirst(lc); + + if (strcmp(oldname, colname) == 0) + return false; + } + + /* Also check against names already assigned for parent-join USING cols */ + foreach(lc, colinfo->parentUsing) + { + char *oldname = (char *) lfirst(lc); + + if (strcmp(oldname, colname) == 0) + return false; + } + + return true; +} + +/* + * make_colname_unique: modify colname if necessary to make it unique + * + * dpns is query-wide info, colinfo is for the column's RTE + */ +static char * +make_colname_unique(char *colname, deparse_namespace *dpns, + deparse_columns *colinfo) +{ + /* + * If the selected name isn't unique, append digits to make it so. For a + * very long input name, we might have to truncate to stay within + * NAMEDATALEN. + */ + if (!colname_is_unique(colname, dpns, colinfo)) + { + int colnamelen = strlen(colname); + char *modname = (char *) palloc(colnamelen + 16); + int i = 0; + + do + { + i++; + for (;;) + { + memcpy(modname, colname, colnamelen); + sprintf(modname + colnamelen, "_%d", i); + if (strlen(modname) < NAMEDATALEN) + break; + /* drop chars from colname to keep all the digits */ + colnamelen = pg_mbcliplen(colname, colnamelen, + colnamelen - 1); + } + } while (!colname_is_unique(modname, dpns, colinfo)); + colname = modname; + } + return colname; +} + +/* + * expand_colnames_array_to: make colinfo->colnames at least n items long + * + * Any added array entries are initialized to zero. + */ +static void +expand_colnames_array_to(deparse_columns *colinfo, int n) +{ + if (n > colinfo->num_cols) + { + if (colinfo->colnames == NULL) + colinfo->colnames = palloc0_array(char *, n); + else + colinfo->colnames = repalloc0_array(colinfo->colnames, char *, colinfo->num_cols, n); + colinfo->num_cols = n; + } +} + +/* + * identify_join_columns: figure out where columns of a join come from + * + * Fills the join-specific fields of the colinfo struct, except for + * usingNames which is filled later. + */ +static void +identify_join_columns(JoinExpr *j, RangeTblEntry *jrte, + deparse_columns *colinfo) +{ + int numjoincols; + int jcolno; + int rcolno; + ListCell *lc; + + /* Extract left/right child RT indexes */ + if (IsA(j->larg, RangeTblRef)) + colinfo->leftrti = ((RangeTblRef *) j->larg)->rtindex; + else if (IsA(j->larg, JoinExpr)) + colinfo->leftrti = ((JoinExpr *) j->larg)->rtindex; + else + elog(ERROR, "unrecognized node type in jointree: %d", + (int) nodeTag(j->larg)); + if (IsA(j->rarg, RangeTblRef)) + colinfo->rightrti = ((RangeTblRef *) j->rarg)->rtindex; + else if (IsA(j->rarg, JoinExpr)) + colinfo->rightrti = ((JoinExpr *) j->rarg)->rtindex; + else + elog(ERROR, "unrecognized node type in jointree: %d", + (int) nodeTag(j->rarg)); + + /* Assert children will be processed earlier than join in second pass */ + Assert(colinfo->leftrti < j->rtindex); + Assert(colinfo->rightrti < j->rtindex); + + /* Initialize result arrays with zeroes */ + numjoincols = list_length(jrte->joinaliasvars); + Assert(numjoincols == list_length(jrte->eref->colnames)); + colinfo->leftattnos = (int *) palloc0(numjoincols * sizeof(int)); + colinfo->rightattnos = (int *) palloc0(numjoincols * sizeof(int)); + + /* + * Deconstruct RTE's joinleftcols/joinrightcols into desired format. + * Recall that the column(s) merged due to USING are the first column(s) + * of the join output. We need not do anything special while scanning + * joinleftcols, but while scanning joinrightcols we must distinguish + * merged from unmerged columns. + */ + jcolno = 0; + foreach(lc, jrte->joinleftcols) + { + int leftattno = lfirst_int(lc); + + colinfo->leftattnos[jcolno++] = leftattno; + } + rcolno = 0; + foreach(lc, jrte->joinrightcols) + { + int rightattno = lfirst_int(lc); + + if (rcolno < jrte->joinmergedcols) /* merged column? */ + colinfo->rightattnos[rcolno] = rightattno; + else + colinfo->rightattnos[jcolno++] = rightattno; + rcolno++; + } + Assert(jcolno == numjoincols); +} + +/* + * get_rtable_name: convenience function to get a previously assigned RTE alias + * + * The RTE must belong to the topmost namespace level in "context". + */ +static char * +get_rtable_name(int rtindex, deparse_context *context) +{ + deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces); + + Assert(rtindex > 0 && rtindex <= list_length(dpns->rtable_names)); + return (char *) list_nth(dpns->rtable_names, rtindex - 1); +} + +/* + * set_deparse_plan: set up deparse_namespace to parse subexpressions + * of a given Plan node + * + * This sets the plan, outer_plan, inner_plan, outer_tlist, inner_tlist, + * and index_tlist fields. Caller must already have adjusted the ancestors + * list if necessary. Note that the rtable, subplans, and ctes fields do + * not need to change when shifting attention to different plan nodes in a + * single plan tree. + */ +static void +set_deparse_plan(deparse_namespace *dpns, Plan *plan) +{ + dpns->plan = plan; + + /* + * We special-case Append and MergeAppend to pretend that the first child + * plan is the OUTER referent; we have to interpret OUTER Vars in their + * tlists according to one of the children, and the first one is the most + * natural choice. + */ + if (IsA(plan, Append)) + dpns->outer_plan = linitial(((Append *) plan)->appendplans); + else if (IsA(plan, MergeAppend)) + dpns->outer_plan = linitial(((MergeAppend *) plan)->mergeplans); + else + dpns->outer_plan = outerPlan(plan); + + if (dpns->outer_plan) + dpns->outer_tlist = dpns->outer_plan->targetlist; + else + dpns->outer_tlist = NIL; + + /* + * For a SubqueryScan, pretend the subplan is INNER referent. (We don't + * use OUTER because that could someday conflict with the normal meaning.) + * Likewise, for a CteScan, pretend the subquery's plan is INNER referent. + * For a WorkTableScan, locate the parent RecursiveUnion plan node and use + * that as INNER referent. + * + * For MERGE, pretend the ModifyTable's source plan (its outer plan) is + * INNER referent. This is the join from the target relation to the data + * source, and all INNER_VAR Vars in other parts of the query refer to its + * targetlist. + * + * For ON CONFLICT .. UPDATE we just need the inner tlist to point to the + * excluded expression's tlist. (Similar to the SubqueryScan we don't want + * to reuse OUTER, it's used for RETURNING in some modify table cases, + * although not INSERT .. CONFLICT). + */ + if (IsA(plan, SubqueryScan)) + dpns->inner_plan = ((SubqueryScan *) plan)->subplan; + else if (IsA(plan, CteScan)) + dpns->inner_plan = list_nth(dpns->subplans, + ((CteScan *) plan)->ctePlanId - 1); + else if (IsA(plan, WorkTableScan)) + dpns->inner_plan = find_recursive_union(dpns, + (WorkTableScan *) plan); + else if (IsA(plan, ModifyTable)) + { + if (((ModifyTable *) plan)->operation == CMD_MERGE) + dpns->inner_plan = outerPlan(plan); + else + dpns->inner_plan = plan; + } + else + dpns->inner_plan = innerPlan(plan); + + if (IsA(plan, ModifyTable) && ((ModifyTable *) plan)->operation == CMD_INSERT) + dpns->inner_tlist = ((ModifyTable *) plan)->exclRelTlist; + else if (dpns->inner_plan) + dpns->inner_tlist = dpns->inner_plan->targetlist; + else + dpns->inner_tlist = NIL; + + /* Set up referent for INDEX_VAR Vars, if needed */ + if (IsA(plan, IndexOnlyScan)) + dpns->index_tlist = ((IndexOnlyScan *) plan)->indextlist; + else if (IsA(plan, ForeignScan)) + dpns->index_tlist = ((ForeignScan *) plan)->fdw_scan_tlist; + else if (IsA(plan, CustomScan)) + dpns->index_tlist = ((CustomScan *) plan)->custom_scan_tlist; + else + dpns->index_tlist = NIL; +} + +/* + * Locate the ancestor plan node that is the RecursiveUnion generating + * the WorkTableScan's work table. We can match on wtParam, since that + * should be unique within the plan tree. + */ +static Plan * +find_recursive_union(deparse_namespace *dpns, WorkTableScan *wtscan) +{ + ListCell *lc; + + foreach(lc, dpns->ancestors) + { + Plan *ancestor = (Plan *) lfirst(lc); + + if (IsA(ancestor, RecursiveUnion) && + ((RecursiveUnion *) ancestor)->wtParam == wtscan->wtParam) + return ancestor; + } + elog(ERROR, "could not find RecursiveUnion for WorkTableScan with wtParam %d", + wtscan->wtParam); + return NULL; +} + +/* + * push_child_plan: temporarily transfer deparsing attention to a child plan + * + * When expanding an OUTER_VAR or INNER_VAR reference, we must adjust the + * deparse context in case the referenced expression itself uses + * OUTER_VAR/INNER_VAR. We modify the top stack entry in-place to avoid + * affecting levelsup issues (although in a Plan tree there really shouldn't + * be any). + * + * Caller must provide a local deparse_namespace variable to save the + * previous state for pop_child_plan. + */ +static void +push_child_plan(deparse_namespace *dpns, Plan *plan, + deparse_namespace *save_dpns) +{ + /* Save state for restoration later */ + *save_dpns = *dpns; + + /* Link current plan node into ancestors list */ + dpns->ancestors = lcons(dpns->plan, dpns->ancestors); + + /* Set attention on selected child */ + set_deparse_plan(dpns, plan); +} + +/* + * pop_child_plan: undo the effects of push_child_plan + */ +static void +pop_child_plan(deparse_namespace *dpns, deparse_namespace *save_dpns) +{ + List *ancestors; + + /* Get rid of ancestors list cell added by push_child_plan */ + ancestors = list_delete_first(dpns->ancestors); + + /* Restore fields changed by push_child_plan */ + *dpns = *save_dpns; + + /* Make sure dpns->ancestors is right (may be unnecessary) */ + dpns->ancestors = ancestors; +} + +/* + * push_ancestor_plan: temporarily transfer deparsing attention to an + * ancestor plan + * + * When expanding a Param reference, we must adjust the deparse context + * to match the plan node that contains the expression being printed; + * otherwise we'd fail if that expression itself contains a Param or + * OUTER_VAR/INNER_VAR/INDEX_VAR variable. + * + * The target ancestor is conveniently identified by the ListCell holding it + * in dpns->ancestors. + * + * Caller must provide a local deparse_namespace variable to save the + * previous state for pop_ancestor_plan. + */ +static void +push_ancestor_plan(deparse_namespace *dpns, ListCell *ancestor_cell, + deparse_namespace *save_dpns) +{ + Plan *plan = (Plan *) lfirst(ancestor_cell); + + /* Save state for restoration later */ + *save_dpns = *dpns; + + /* Build a new ancestor list with just this node's ancestors */ + dpns->ancestors = + list_copy_tail(dpns->ancestors, + list_cell_number(dpns->ancestors, ancestor_cell) + 1); + + /* Set attention on selected ancestor */ + set_deparse_plan(dpns, plan); +} + +/* + * pop_ancestor_plan: undo the effects of push_ancestor_plan + */ +static void +pop_ancestor_plan(deparse_namespace *dpns, deparse_namespace *save_dpns) +{ + /* Free the ancestor list made in push_ancestor_plan */ + list_free(dpns->ancestors); + + /* Restore fields changed by push_ancestor_plan */ + *dpns = *save_dpns; +} + + +/* ---------- + * make_ruledef - reconstruct the CREATE RULE command + * for a given pg_rewrite tuple + * ---------- + */ +static void +make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, + int prettyFlags) +{ + char *rulename; + char ev_type; + Oid ev_class; + bool is_instead; + char *ev_qual; + char *ev_action; + List *actions; + Relation ev_relation; + TupleDesc viewResultDesc = NULL; + int fno; + Datum dat; + bool isnull; + + /* + * Get the attribute values from the rules tuple + */ + fno = SPI_fnumber(rulettc, "rulename"); + dat = SPI_getbinval(ruletup, rulettc, fno, &isnull); + Assert(!isnull); + rulename = NameStr(*(DatumGetName(dat))); + + fno = SPI_fnumber(rulettc, "ev_type"); + dat = SPI_getbinval(ruletup, rulettc, fno, &isnull); + Assert(!isnull); + ev_type = DatumGetChar(dat); + + fno = SPI_fnumber(rulettc, "ev_class"); + dat = SPI_getbinval(ruletup, rulettc, fno, &isnull); + Assert(!isnull); + ev_class = DatumGetObjectId(dat); + + fno = SPI_fnumber(rulettc, "is_instead"); + dat = SPI_getbinval(ruletup, rulettc, fno, &isnull); + Assert(!isnull); + is_instead = DatumGetBool(dat); + + fno = SPI_fnumber(rulettc, "ev_qual"); + ev_qual = SPI_getvalue(ruletup, rulettc, fno); + Assert(ev_qual != NULL); + + fno = SPI_fnumber(rulettc, "ev_action"); + ev_action = SPI_getvalue(ruletup, rulettc, fno); + Assert(ev_action != NULL); + actions = (List *) stringToNode(ev_action); + if (actions == NIL) + elog(ERROR, "invalid empty ev_action list"); + + ev_relation = table_open(ev_class, AccessShareLock); + + /* + * Build the rules definition text + */ + appendStringInfo(buf, "CREATE RULE %s AS", + quote_identifier(rulename)); + + if (prettyFlags & PRETTYFLAG_INDENT) + appendStringInfoString(buf, "\n ON "); + else + appendStringInfoString(buf, " ON "); + + /* The event the rule is fired for */ + switch (ev_type) + { + case '1': + appendStringInfoString(buf, "SELECT"); + viewResultDesc = RelationGetDescr(ev_relation); + break; + + case '2': + appendStringInfoString(buf, "UPDATE"); + break; + + case '3': + appendStringInfoString(buf, "INSERT"); + break; + + case '4': + appendStringInfoString(buf, "DELETE"); + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("rule \"%s\" has unsupported event type %d", + rulename, ev_type))); + break; + } + + /* The relation the rule is fired on */ + appendStringInfo(buf, " TO %s", + (prettyFlags & PRETTYFLAG_SCHEMA) ? + generate_relation_name(ev_class, NIL) : + generate_qualified_relation_name(ev_class)); + + /* If the rule has an event qualification, add it */ + if (strcmp(ev_qual, "<>") != 0) + { + Node *qual; + Query *query; + deparse_context context; + deparse_namespace dpns; + + if (prettyFlags & PRETTYFLAG_INDENT) + appendStringInfoString(buf, "\n "); + appendStringInfoString(buf, " WHERE "); + + qual = stringToNode(ev_qual); + + /* + * We need to make a context for recognizing any Vars in the qual + * (which can only be references to OLD and NEW). Use the rtable of + * the first query in the action list for this purpose. + */ + query = (Query *) linitial(actions); + + /* + * If the action is INSERT...SELECT, OLD/NEW have been pushed down + * into the SELECT, and that's what we need to look at. (Ugly kluge + * ... try to fix this when we redesign querytrees.) + */ + query = getInsertSelectQuery(query, NULL); + + /* Must acquire locks right away; see notes in get_query_def() */ + AcquireRewriteLocks(query, false, false); + + context.buf = buf; + context.namespaces = list_make1(&dpns); + context.windowClause = NIL; + context.windowTList = NIL; + context.varprefix = (list_length(query->rtable) != 1); + context.prettyFlags = prettyFlags; + context.wrapColumn = WRAP_COLUMN_DEFAULT; + context.indentLevel = PRETTYINDENT_STD; + context.special_exprkind = EXPR_KIND_NONE; + context.appendparents = NULL; + + set_deparse_for_query(&dpns, query, NIL); + + get_rule_expr(qual, &context, false); + } + + appendStringInfoString(buf, " DO "); + + /* The INSTEAD keyword (if so) */ + if (is_instead) + appendStringInfoString(buf, "INSTEAD "); + + /* Finally the rules actions */ + if (list_length(actions) > 1) + { + ListCell *action; + Query *query; + + appendStringInfoChar(buf, '('); + foreach(action, actions) + { + query = (Query *) lfirst(action); + get_query_def(query, buf, NIL, viewResultDesc, true, + prettyFlags, WRAP_COLUMN_DEFAULT, 0); + if (prettyFlags) + appendStringInfoString(buf, ";\n"); + else + appendStringInfoString(buf, "; "); + } + appendStringInfoString(buf, ");"); + } + else + { + Query *query; + + query = (Query *) linitial(actions); + get_query_def(query, buf, NIL, viewResultDesc, true, + prettyFlags, WRAP_COLUMN_DEFAULT, 0); + appendStringInfoChar(buf, ';'); + } + + table_close(ev_relation, AccessShareLock); +} + + +/* ---------- + * make_viewdef - reconstruct the SELECT part of a + * view rewrite rule + * ---------- + */ +static void +make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, + int prettyFlags, int wrapColumn) +{ + Query *query; + char ev_type; + Oid ev_class; + bool is_instead; + char *ev_qual; + char *ev_action; + List *actions; + Relation ev_relation; + int fno; + Datum dat; + bool isnull; + + /* + * Get the attribute values from the rules tuple + */ + fno = SPI_fnumber(rulettc, "ev_type"); + dat = SPI_getbinval(ruletup, rulettc, fno, &isnull); + Assert(!isnull); + ev_type = DatumGetChar(dat); + + fno = SPI_fnumber(rulettc, "ev_class"); + dat = SPI_getbinval(ruletup, rulettc, fno, &isnull); + Assert(!isnull); + ev_class = DatumGetObjectId(dat); + + fno = SPI_fnumber(rulettc, "is_instead"); + dat = SPI_getbinval(ruletup, rulettc, fno, &isnull); + Assert(!isnull); + is_instead = DatumGetBool(dat); + + fno = SPI_fnumber(rulettc, "ev_qual"); + ev_qual = SPI_getvalue(ruletup, rulettc, fno); + Assert(ev_qual != NULL); + + fno = SPI_fnumber(rulettc, "ev_action"); + ev_action = SPI_getvalue(ruletup, rulettc, fno); + Assert(ev_action != NULL); + actions = (List *) stringToNode(ev_action); + + if (list_length(actions) != 1) + { + /* keep output buffer empty and leave */ + return; + } + + query = (Query *) linitial(actions); + + if (ev_type != '1' || !is_instead || + strcmp(ev_qual, "<>") != 0 || query->commandType != CMD_SELECT) + { + /* keep output buffer empty and leave */ + return; + } + + ev_relation = table_open(ev_class, AccessShareLock); + + get_query_def(query, buf, NIL, RelationGetDescr(ev_relation), true, + prettyFlags, wrapColumn, 0); + appendStringInfoChar(buf, ';'); + + table_close(ev_relation, AccessShareLock); +} + + +/* ---------- + * get_query_def - Parse back one query parsetree + * + * query: parsetree to be displayed + * buf: output text is appended to buf + * parentnamespace: list (initially empty) of outer-level deparse_namespace's + * resultDesc: if not NULL, the output tuple descriptor for the view + * represented by a SELECT query. We use the column names from it + * to label SELECT output columns, in preference to names in the query + * colNamesVisible: true if the surrounding context cares about the output + * column names at all (as, for example, an EXISTS() context does not); + * when false, we can suppress dummy column labels such as "?column?" + * prettyFlags: bitmask of PRETTYFLAG_XXX options + * wrapColumn: maximum line length, or -1 to disable wrapping + * startIndent: initial indentation amount + * ---------- + */ +static void +get_query_def(Query *query, StringInfo buf, List *parentnamespace, + TupleDesc resultDesc, bool colNamesVisible, + int prettyFlags, int wrapColumn, int startIndent) +{ + deparse_context context; + deparse_namespace dpns; + + /* Guard against excessively long or deeply-nested queries */ + CHECK_FOR_INTERRUPTS(); + check_stack_depth(); + + /* + * Before we begin to examine the query, acquire locks on referenced + * relations, and fix up deleted columns in JOIN RTEs. This ensures + * consistent results. Note we assume it's OK to scribble on the passed + * querytree! + * + * We are only deparsing the query (we are not about to execute it), so we + * only need AccessShareLock on the relations it mentions. + */ + AcquireRewriteLocks(query, false, false); + + context.buf = buf; + context.namespaces = lcons(&dpns, list_copy(parentnamespace)); + context.windowClause = NIL; + context.windowTList = NIL; + context.varprefix = (parentnamespace != NIL || + list_length(query->rtable) != 1); + context.prettyFlags = prettyFlags; + context.wrapColumn = wrapColumn; + context.indentLevel = startIndent; + context.special_exprkind = EXPR_KIND_NONE; + context.appendparents = NULL; + + set_deparse_for_query(&dpns, query, parentnamespace); + + switch (query->commandType) + { + case CMD_SELECT: + get_select_query_def(query, &context, resultDesc, colNamesVisible); + break; + + case CMD_UPDATE: + get_update_query_def(query, &context, colNamesVisible); + break; + + case CMD_INSERT: + get_insert_query_def(query, &context, colNamesVisible); + break; + + case CMD_DELETE: + get_delete_query_def(query, &context, colNamesVisible); + break; + + case CMD_MERGE: + get_merge_query_def(query, &context, colNamesVisible); + break; + + case CMD_NOTHING: + appendStringInfoString(buf, "NOTHING"); + break; + + case CMD_UTILITY: + get_utility_query_def(query, &context); + break; + + default: + elog(ERROR, "unrecognized query command type: %d", + query->commandType); + break; + } +} + +/* ---------- + * get_values_def - Parse back a VALUES list + * ---------- + */ +static void +get_values_def(List *values_lists, deparse_context *context) +{ + StringInfo buf = context->buf; + bool first_list = true; + ListCell *vtl; + + appendStringInfoString(buf, "VALUES "); + + foreach(vtl, values_lists) + { + List *sublist = (List *) lfirst(vtl); + bool first_col = true; + ListCell *lc; + + if (first_list) + first_list = false; + else + appendStringInfoString(buf, ", "); + + appendStringInfoChar(buf, '('); + foreach(lc, sublist) + { + Node *col = (Node *) lfirst(lc); + + if (first_col) + first_col = false; + else + appendStringInfoChar(buf, ','); + + /* + * Print the value. Whole-row Vars need special treatment. + */ + get_rule_expr_toplevel(col, context, false); + } + appendStringInfoChar(buf, ')'); + } +} + +/* ---------- + * get_with_clause - Parse back a WITH clause + * ---------- + */ +static void +get_with_clause(Query *query, deparse_context *context) +{ + StringInfo buf = context->buf; + const char *sep; + ListCell *l; + + if (query->cteList == NIL) + return; + + if (PRETTY_INDENT(context)) + { + context->indentLevel += PRETTYINDENT_STD; + appendStringInfoChar(buf, ' '); + } + + if (query->hasRecursive) + sep = "WITH RECURSIVE "; + else + sep = "WITH "; + foreach(l, query->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(l); + + appendStringInfoString(buf, sep); + appendStringInfoString(buf, quote_identifier(cte->ctename)); + if (cte->aliascolnames) + { + bool first = true; + ListCell *col; + + appendStringInfoChar(buf, '('); + foreach(col, cte->aliascolnames) + { + if (first) + first = false; + else + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, + quote_identifier(strVal(lfirst(col)))); + } + appendStringInfoChar(buf, ')'); + } + appendStringInfoString(buf, " AS "); + switch (cte->ctematerialized) + { + case CTEMaterializeDefault: + break; + case CTEMaterializeAlways: + appendStringInfoString(buf, "MATERIALIZED "); + break; + case CTEMaterializeNever: + appendStringInfoString(buf, "NOT MATERIALIZED "); + break; + } + appendStringInfoChar(buf, '('); + if (PRETTY_INDENT(context)) + appendContextKeyword(context, "", 0, 0, 0); + get_query_def((Query *) cte->ctequery, buf, context->namespaces, NULL, + true, + context->prettyFlags, context->wrapColumn, + context->indentLevel); + if (PRETTY_INDENT(context)) + appendContextKeyword(context, "", 0, 0, 0); + appendStringInfoChar(buf, ')'); + + if (cte->search_clause) + { + bool first = true; + ListCell *lc; + + appendStringInfo(buf, " SEARCH %s FIRST BY ", + cte->search_clause->search_breadth_first ? "BREADTH" : "DEPTH"); + + foreach(lc, cte->search_clause->search_col_list) + { + if (first) + first = false; + else + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, + quote_identifier(strVal(lfirst(lc)))); + } + + appendStringInfo(buf, " SET %s", quote_identifier(cte->search_clause->search_seq_column)); + } + + if (cte->cycle_clause) + { + bool first = true; + ListCell *lc; + + appendStringInfoString(buf, " CYCLE "); + + foreach(lc, cte->cycle_clause->cycle_col_list) + { + if (first) + first = false; + else + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, + quote_identifier(strVal(lfirst(lc)))); + } + + appendStringInfo(buf, " SET %s", quote_identifier(cte->cycle_clause->cycle_mark_column)); + + { + Const *cmv = castNode(Const, cte->cycle_clause->cycle_mark_value); + Const *cmd = castNode(Const, cte->cycle_clause->cycle_mark_default); + + if (!(cmv->consttype == BOOLOID && !cmv->constisnull && DatumGetBool(cmv->constvalue) == true && + cmd->consttype == BOOLOID && !cmd->constisnull && DatumGetBool(cmd->constvalue) == false)) + { + appendStringInfoString(buf, " TO "); + get_rule_expr(cte->cycle_clause->cycle_mark_value, context, false); + appendStringInfoString(buf, " DEFAULT "); + get_rule_expr(cte->cycle_clause->cycle_mark_default, context, false); + } + } + + appendStringInfo(buf, " USING %s", quote_identifier(cte->cycle_clause->cycle_path_column)); + } + + sep = ", "; + } + + if (PRETTY_INDENT(context)) + { + context->indentLevel -= PRETTYINDENT_STD; + appendContextKeyword(context, "", 0, 0, 0); + } + else + appendStringInfoChar(buf, ' '); +} + +/* ---------- + * get_select_query_def - Parse back a SELECT parsetree + * ---------- + */ +static void +get_select_query_def(Query *query, deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible) +{ + StringInfo buf = context->buf; + List *save_windowclause; + List *save_windowtlist; + bool force_colno; + ListCell *l; + + /* Insert the WITH clause if given */ + get_with_clause(query, context); + + /* Set up context for possible window functions */ + save_windowclause = context->windowClause; + context->windowClause = query->windowClause; + save_windowtlist = context->windowTList; + context->windowTList = query->targetList; + + /* + * If the Query node has a setOperations tree, then it's the top level of + * a UNION/INTERSECT/EXCEPT query; only the WITH, ORDER BY and LIMIT + * fields are interesting in the top query itself. + */ + if (query->setOperations) + { + get_setop_query(query->setOperations, query, context, resultDesc, + colNamesVisible); + /* ORDER BY clauses must be simple in this case */ + force_colno = true; + } + else + { + get_basic_select_query(query, context, resultDesc, colNamesVisible); + force_colno = false; + } + + /* Add the ORDER BY clause if given */ + if (query->sortClause != NIL) + { + appendContextKeyword(context, " ORDER BY ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_orderby(query->sortClause, query->targetList, + force_colno, context); + } + + /* + * Add the LIMIT/OFFSET clauses if given. If non-default options, use the + * standard spelling of LIMIT. + */ + if (query->limitOffset != NULL) + { + appendContextKeyword(context, " OFFSET ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + get_rule_expr(query->limitOffset, context, false); + } + if (query->limitCount != NULL) + { + if (query->limitOption == LIMIT_OPTION_WITH_TIES) + { + appendContextKeyword(context, " FETCH FIRST ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + get_rule_expr(query->limitCount, context, false); + appendStringInfoString(buf, " ROWS WITH TIES"); + } + else + { + appendContextKeyword(context, " LIMIT ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + if (IsA(query->limitCount, Const) && + ((Const *) query->limitCount)->constisnull) + appendStringInfoString(buf, "ALL"); + else + get_rule_expr(query->limitCount, context, false); + } + } + + /* Add FOR [KEY] UPDATE/SHARE clauses if present */ + if (query->hasForUpdate) + { + foreach(l, query->rowMarks) + { + RowMarkClause *rc = (RowMarkClause *) lfirst(l); + + /* don't print implicit clauses */ + if (rc->pushedDown) + continue; + + switch (rc->strength) + { + case LCS_NONE: + /* we intentionally throw an error for LCS_NONE */ + elog(ERROR, "unrecognized LockClauseStrength %d", + (int) rc->strength); + break; + case LCS_FORKEYSHARE: + appendContextKeyword(context, " FOR KEY SHARE", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + break; + case LCS_FORSHARE: + appendContextKeyword(context, " FOR SHARE", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + break; + case LCS_FORNOKEYUPDATE: + appendContextKeyword(context, " FOR NO KEY UPDATE", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + break; + case LCS_FORUPDATE: + appendContextKeyword(context, " FOR UPDATE", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + break; + } + + appendStringInfo(buf, " OF %s", + quote_identifier(get_rtable_name(rc->rti, + context))); + if (rc->waitPolicy == LockWaitError) + appendStringInfoString(buf, " NOWAIT"); + else if (rc->waitPolicy == LockWaitSkip) + appendStringInfoString(buf, " SKIP LOCKED"); + } + } + + context->windowClause = save_windowclause; + context->windowTList = save_windowtlist; +} + +/* + * Detect whether query looks like SELECT ... FROM VALUES(), + * with no need to rename the output columns of the VALUES RTE. + * If so, return the VALUES RTE. Otherwise return NULL. + */ +static RangeTblEntry * +get_simple_values_rte(Query *query, TupleDesc resultDesc) +{ + RangeTblEntry *result = NULL; + ListCell *lc; + + /* + * We want to detect a match even if the Query also contains OLD or NEW + * rule RTEs. So the idea is to scan the rtable and see if there is only + * one inFromCl RTE that is a VALUES RTE. + */ + foreach(lc, query->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + if (rte->rtekind == RTE_VALUES && rte->inFromCl) + { + if (result) + return NULL; /* multiple VALUES (probably not possible) */ + result = rte; + } + else if (rte->rtekind == RTE_RELATION && !rte->inFromCl) + continue; /* ignore rule entries */ + else + return NULL; /* something else -> not simple VALUES */ + } + + /* + * We don't need to check the targetlist in any great detail, because + * parser/analyze.c will never generate a "bare" VALUES RTE --- they only + * appear inside auto-generated sub-queries with very restricted + * structure. However, DefineView might have modified the tlist by + * injecting new column aliases, or we might have some other column + * aliases forced by a resultDesc. We can only simplify if the RTE's + * column names match the names that get_target_list() would select. + */ + if (result) + { + ListCell *lcn; + int colno; + + if (list_length(query->targetList) != list_length(result->eref->colnames)) + return NULL; /* this probably cannot happen */ + colno = 0; + forboth(lc, query->targetList, lcn, result->eref->colnames) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + char *cname = strVal(lfirst(lcn)); + char *colname; + + if (tle->resjunk) + return NULL; /* this probably cannot happen */ + + /* compute name that get_target_list would use for column */ + colno++; + if (resultDesc && colno <= resultDesc->natts) + colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname); + else + colname = tle->resname; + + /* does it match the VALUES RTE? */ + if (colname == NULL || strcmp(colname, cname) != 0) + return NULL; /* column name has been changed */ + } + } + + return result; +} + +static void +get_basic_select_query(Query *query, deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible) +{ + StringInfo buf = context->buf; + RangeTblEntry *values_rte; + char *sep; + ListCell *l; + + if (PRETTY_INDENT(context)) + { + context->indentLevel += PRETTYINDENT_STD; + appendStringInfoChar(buf, ' '); + } + + /* + * If the query looks like SELECT * FROM (VALUES ...), then print just the + * VALUES part. This reverses what transformValuesClause() did at parse + * time. + */ + values_rte = get_simple_values_rte(query, resultDesc); + if (values_rte) + { + get_values_def(values_rte->values_lists, context); + return; + } + + /* + * Build up the query string - first we say SELECT + */ + if (query->isReturn) + appendStringInfoString(buf, "RETURN"); + else + appendStringInfoString(buf, "SELECT"); + + /* Add the DISTINCT clause if given */ + if (query->distinctClause != NIL) + { + if (query->hasDistinctOn) + { + appendStringInfoString(buf, " DISTINCT ON ("); + sep = ""; + foreach(l, query->distinctClause) + { + SortGroupClause *srt = (SortGroupClause *) lfirst(l); + + appendStringInfoString(buf, sep); + get_rule_sortgroupclause(srt->tleSortGroupRef, query->targetList, + false, context); + sep = ", "; + } + appendStringInfoChar(buf, ')'); + } + else + appendStringInfoString(buf, " DISTINCT"); + } + + /* Then we tell what to select (the targetlist) */ + get_target_list(query->targetList, context, resultDesc, colNamesVisible); + + /* Add the FROM clause if needed */ + get_from_clause(query, " FROM ", context); + + /* Add the WHERE clause if given */ + if (query->jointree->quals != NULL) + { + appendContextKeyword(context, " WHERE ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_expr(query->jointree->quals, context, false); + } + + /* Add the GROUP BY clause if given */ + if (query->groupClause != NULL || query->groupingSets != NULL) + { + ParseExprKind save_exprkind; + + appendContextKeyword(context, " GROUP BY ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + if (query->groupDistinct) + appendStringInfoString(buf, "DISTINCT "); + + save_exprkind = context->special_exprkind; + context->special_exprkind = EXPR_KIND_GROUP_BY; + + if (query->groupingSets == NIL) + { + sep = ""; + foreach(l, query->groupClause) + { + SortGroupClause *grp = (SortGroupClause *) lfirst(l); + + appendStringInfoString(buf, sep); + get_rule_sortgroupclause(grp->tleSortGroupRef, query->targetList, + false, context); + sep = ", "; + } + } + else + { + sep = ""; + foreach(l, query->groupingSets) + { + GroupingSet *grp = lfirst(l); + + appendStringInfoString(buf, sep); + get_rule_groupingset(grp, query->targetList, true, context); + sep = ", "; + } + } + + context->special_exprkind = save_exprkind; + } + + /* Add the HAVING clause if given */ + if (query->havingQual != NULL) + { + appendContextKeyword(context, " HAVING ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + get_rule_expr(query->havingQual, context, false); + } + + /* Add the WINDOW clause if needed */ + if (query->windowClause != NIL) + get_rule_windowclause(query, context); +} + +/* ---------- + * get_target_list - Parse back a SELECT target list + * + * This is also used for RETURNING lists in INSERT/UPDATE/DELETE. + * + * resultDesc and colNamesVisible are as for get_query_def() + * ---------- + */ +static void +get_target_list(List *targetList, deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible) +{ + StringInfo buf = context->buf; + StringInfoData targetbuf; + bool last_was_multiline = false; + char *sep; + int colno; + ListCell *l; + + /* we use targetbuf to hold each TLE's text temporarily */ + initStringInfo(&targetbuf); + + sep = " "; + colno = 0; + foreach(l, targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + char *colname; + char *attname; + + if (tle->resjunk) + continue; /* ignore junk entries */ + + appendStringInfoString(buf, sep); + sep = ", "; + colno++; + + /* + * Put the new field text into targetbuf so we can decide after we've + * got it whether or not it needs to go on a new line. + */ + resetStringInfo(&targetbuf); + context->buf = &targetbuf; + + /* + * We special-case Var nodes rather than using get_rule_expr. This is + * needed because get_rule_expr will display a whole-row Var as + * "foo.*", which is the preferred notation in most contexts, but at + * the top level of a SELECT list it's not right (the parser will + * expand that notation into multiple columns, yielding behavior + * different from a whole-row Var). We need to call get_variable + * directly so that we can tell it to do the right thing, and so that + * we can get the attribute name which is the default AS label. + */ + if (tle->expr && (IsA(tle->expr, Var))) + { + attname = get_variable((Var *) tle->expr, 0, true, context); + } + else + { + get_rule_expr((Node *) tle->expr, context, true); + + /* + * When colNamesVisible is true, we should always show the + * assigned column name explicitly. Otherwise, show it only if + * it's not FigureColname's fallback. + */ + attname = colNamesVisible ? NULL : "?column?"; + } + + /* + * Figure out what the result column should be called. In the context + * of a view, use the view's tuple descriptor (so as to pick up the + * effects of any column RENAME that's been done on the view). + * Otherwise, just use what we can find in the TLE. + */ + if (resultDesc && colno <= resultDesc->natts) + colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname); + else + colname = tle->resname; + + /* Show AS unless the column's name is correct as-is */ + if (colname) /* resname could be NULL */ + { + if (attname == NULL || strcmp(attname, colname) != 0) + appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname)); + } + + /* Restore context's output buffer */ + context->buf = buf; + + /* Consider line-wrapping if enabled */ + if (PRETTY_INDENT(context) && context->wrapColumn >= 0) + { + int leading_nl_pos; + + /* Does the new field start with a new line? */ + if (targetbuf.len > 0 && targetbuf.data[0] == '\n') + leading_nl_pos = 0; + else + leading_nl_pos = -1; + + /* If so, we shouldn't add anything */ + if (leading_nl_pos >= 0) + { + /* instead, remove any trailing spaces currently in buf */ + removeStringInfoSpaces(buf); + } + else + { + char *trailing_nl; + + /* Locate the start of the current line in the output buffer */ + trailing_nl = strrchr(buf->data, '\n'); + if (trailing_nl == NULL) + trailing_nl = buf->data; + else + trailing_nl++; + + /* + * Add a newline, plus some indentation, if the new field is + * not the first and either the new field would cause an + * overflow or the last field used more than one line. + */ + if (colno > 1 && + ((strlen(trailing_nl) + targetbuf.len > context->wrapColumn) || + last_was_multiline)) + appendContextKeyword(context, "", -PRETTYINDENT_STD, + PRETTYINDENT_STD, PRETTYINDENT_VAR); + } + + /* Remember this field's multiline status for next iteration */ + last_was_multiline = + (strchr(targetbuf.data + leading_nl_pos + 1, '\n') != NULL); + } + + /* Add the new field */ + appendBinaryStringInfo(buf, targetbuf.data, targetbuf.len); + } + + /* clean up */ + pfree(targetbuf.data); +} + +static void +get_setop_query(Node *setOp, Query *query, deparse_context *context, + TupleDesc resultDesc, bool colNamesVisible) +{ + StringInfo buf = context->buf; + bool need_paren; + + /* Guard against excessively long or deeply-nested queries */ + CHECK_FOR_INTERRUPTS(); + check_stack_depth(); + + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *rte = rt_fetch(rtr->rtindex, query->rtable); + Query *subquery = rte->subquery; + + Assert(subquery != NULL); + Assert(subquery->setOperations == NULL); + /* Need parens if WITH, ORDER BY, FOR UPDATE, or LIMIT; see gram.y */ + need_paren = (subquery->cteList || + subquery->sortClause || + subquery->rowMarks || + subquery->limitOffset || + subquery->limitCount); + if (need_paren) + appendStringInfoChar(buf, '('); + get_query_def(subquery, buf, context->namespaces, resultDesc, + colNamesVisible, + context->prettyFlags, context->wrapColumn, + context->indentLevel); + if (need_paren) + appendStringInfoChar(buf, ')'); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + int subindent; + + /* + * We force parens when nesting two SetOperationStmts, except when the + * lefthand input is another setop of the same kind. Syntactically, + * we could omit parens in rather more cases, but it seems best to use + * parens to flag cases where the setop operator changes. If we use + * parens, we also increase the indentation level for the child query. + * + * There are some cases in which parens are needed around a leaf query + * too, but those are more easily handled at the next level down (see + * code above). + */ + if (IsA(op->larg, SetOperationStmt)) + { + SetOperationStmt *lop = (SetOperationStmt *) op->larg; + + if (op->op == lop->op && op->all == lop->all) + need_paren = false; + else + need_paren = true; + } + else + need_paren = false; + + if (need_paren) + { + appendStringInfoChar(buf, '('); + subindent = PRETTYINDENT_STD; + appendContextKeyword(context, "", subindent, 0, 0); + } + else + subindent = 0; + + get_setop_query(op->larg, query, context, resultDesc, colNamesVisible); + + if (need_paren) + appendContextKeyword(context, ") ", -subindent, 0, 0); + else if (PRETTY_INDENT(context)) + appendContextKeyword(context, "", -subindent, 0, 0); + else + appendStringInfoChar(buf, ' '); + + switch (op->op) + { + case SETOP_UNION: + appendStringInfoString(buf, "UNION "); + break; + case SETOP_INTERSECT: + appendStringInfoString(buf, "INTERSECT "); + break; + case SETOP_EXCEPT: + appendStringInfoString(buf, "EXCEPT "); + break; + default: + elog(ERROR, "unrecognized set op: %d", + (int) op->op); + } + if (op->all) + appendStringInfoString(buf, "ALL "); + + /* Always parenthesize if RHS is another setop */ + need_paren = IsA(op->rarg, SetOperationStmt); + + /* + * The indentation code here is deliberately a bit different from that + * for the lefthand input, because we want the line breaks in + * different places. + */ + if (need_paren) + { + appendStringInfoChar(buf, '('); + subindent = PRETTYINDENT_STD; + } + else + subindent = 0; + appendContextKeyword(context, "", subindent, 0, 0); + + get_setop_query(op->rarg, query, context, resultDesc, false); + + if (PRETTY_INDENT(context)) + context->indentLevel -= subindent; + if (need_paren) + appendContextKeyword(context, ")", 0, 0, 0); + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } +} + +/* + * Display a sort/group clause. + * + * Also returns the expression tree, so caller need not find it again. + */ +static Node * +get_rule_sortgroupclause(Index ref, List *tlist, bool force_colno, + deparse_context *context) +{ + StringInfo buf = context->buf; + TargetEntry *tle; + Node *expr; + + tle = get_sortgroupref_tle(ref, tlist); + expr = (Node *) tle->expr; + + /* + * Use column-number form if requested by caller. Otherwise, if + * expression is a constant, force it to be dumped with an explicit cast + * as decoration --- this is because a simple integer constant is + * ambiguous (and will be misinterpreted by findTargetlistEntry()) if we + * dump it without any decoration. If it's anything more complex than a + * simple Var, then force extra parens around it, to ensure it can't be + * misinterpreted as a cube() or rollup() construct. + */ + if (force_colno) + { + Assert(!tle->resjunk); + appendStringInfo(buf, "%d", tle->resno); + } + else if (expr && IsA(expr, Const)) + get_const_expr((Const *) expr, context, 1); + else if (!expr || IsA(expr, Var)) + get_rule_expr(expr, context, true); + else + { + /* + * We must force parens for function-like expressions even if + * PRETTY_PAREN is off, since those are the ones in danger of + * misparsing. For other expressions we need to force them only if + * PRETTY_PAREN is on, since otherwise the expression will output them + * itself. (We can't skip the parens.) + */ + bool need_paren = (PRETTY_PAREN(context) + || IsA(expr, FuncExpr) + || IsA(expr, Aggref) + || IsA(expr, WindowFunc) + || IsA(expr, JsonConstructorExpr)); + + if (need_paren) + appendStringInfoChar(context->buf, '('); + get_rule_expr(expr, context, true); + if (need_paren) + appendStringInfoChar(context->buf, ')'); + } + + return expr; +} + +/* + * Display a GroupingSet + */ +static void +get_rule_groupingset(GroupingSet *gset, List *targetlist, + bool omit_parens, deparse_context *context) +{ + ListCell *l; + StringInfo buf = context->buf; + bool omit_child_parens = true; + char *sep = ""; + + switch (gset->kind) + { + case GROUPING_SET_EMPTY: + appendStringInfoString(buf, "()"); + return; + + case GROUPING_SET_SIMPLE: + { + if (!omit_parens || list_length(gset->content) != 1) + appendStringInfoChar(buf, '('); + + foreach(l, gset->content) + { + Index ref = lfirst_int(l); + + appendStringInfoString(buf, sep); + get_rule_sortgroupclause(ref, targetlist, + false, context); + sep = ", "; + } + + if (!omit_parens || list_length(gset->content) != 1) + appendStringInfoChar(buf, ')'); + } + return; + + case GROUPING_SET_ROLLUP: + appendStringInfoString(buf, "ROLLUP("); + break; + case GROUPING_SET_CUBE: + appendStringInfoString(buf, "CUBE("); + break; + case GROUPING_SET_SETS: + appendStringInfoString(buf, "GROUPING SETS ("); + omit_child_parens = false; + break; + } + + foreach(l, gset->content) + { + appendStringInfoString(buf, sep); + get_rule_groupingset(lfirst(l), targetlist, omit_child_parens, context); + sep = ", "; + } + + appendStringInfoChar(buf, ')'); +} + +/* + * Display an ORDER BY list. + */ +static void +get_rule_orderby(List *orderList, List *targetList, + bool force_colno, deparse_context *context) +{ + StringInfo buf = context->buf; + const char *sep; + ListCell *l; + + sep = ""; + foreach(l, orderList) + { + SortGroupClause *srt = (SortGroupClause *) lfirst(l); + Node *sortexpr; + Oid sortcoltype; + TypeCacheEntry *typentry; + + appendStringInfoString(buf, sep); + sortexpr = get_rule_sortgroupclause(srt->tleSortGroupRef, targetList, + force_colno, context); + sortcoltype = exprType(sortexpr); + /* See whether operator is default < or > for datatype */ + typentry = lookup_type_cache(sortcoltype, + TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); + if (srt->sortop == typentry->lt_opr) + { + /* ASC is default, so emit nothing for it */ + if (srt->nulls_first) + appendStringInfoString(buf, " NULLS FIRST"); + } + else if (srt->sortop == typentry->gt_opr) + { + appendStringInfoString(buf, " DESC"); + /* DESC defaults to NULLS FIRST */ + if (!srt->nulls_first) + appendStringInfoString(buf, " NULLS LAST"); + } + else + { + appendStringInfo(buf, " USING %s", + generate_operator_name(srt->sortop, + sortcoltype, + sortcoltype)); + /* be specific to eliminate ambiguity */ + if (srt->nulls_first) + appendStringInfoString(buf, " NULLS FIRST"); + else + appendStringInfoString(buf, " NULLS LAST"); + } + sep = ", "; + } +} + +/* + * Display a WINDOW clause. + * + * Note that the windowClause list might contain only anonymous window + * specifications, in which case we should print nothing here. + */ +static void +get_rule_windowclause(Query *query, deparse_context *context) +{ + StringInfo buf = context->buf; + const char *sep; + ListCell *l; + + sep = NULL; + foreach(l, query->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(l); + + if (wc->name == NULL) + continue; /* ignore anonymous windows */ + + if (sep == NULL) + appendContextKeyword(context, " WINDOW ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + else + appendStringInfoString(buf, sep); + + appendStringInfo(buf, "%s AS ", quote_identifier(wc->name)); + + get_rule_windowspec(wc, query->targetList, context); + + sep = ", "; + } +} + +/* + * Display a window definition + */ +static void +get_rule_windowspec(WindowClause *wc, List *targetList, + deparse_context *context) +{ + StringInfo buf = context->buf; + bool needspace = false; + const char *sep; + ListCell *l; + + appendStringInfoChar(buf, '('); + if (wc->refname) + { + appendStringInfoString(buf, quote_identifier(wc->refname)); + needspace = true; + } + /* partition clauses are always inherited, so only print if no refname */ + if (wc->partitionClause && !wc->refname) + { + if (needspace) + appendStringInfoChar(buf, ' '); + appendStringInfoString(buf, "PARTITION BY "); + sep = ""; + foreach(l, wc->partitionClause) + { + SortGroupClause *grp = (SortGroupClause *) lfirst(l); + + appendStringInfoString(buf, sep); + get_rule_sortgroupclause(grp->tleSortGroupRef, targetList, + false, context); + sep = ", "; + } + needspace = true; + } + /* print ordering clause only if not inherited */ + if (wc->orderClause && !wc->copiedOrder) + { + if (needspace) + appendStringInfoChar(buf, ' '); + appendStringInfoString(buf, "ORDER BY "); + get_rule_orderby(wc->orderClause, targetList, false, context); + needspace = true; + } + /* framing clause is never inherited, so print unless it's default */ + if (wc->frameOptions & FRAMEOPTION_NONDEFAULT) + { + if (needspace) + appendStringInfoChar(buf, ' '); + if (wc->frameOptions & FRAMEOPTION_RANGE) + appendStringInfoString(buf, "RANGE "); + else if (wc->frameOptions & FRAMEOPTION_ROWS) + appendStringInfoString(buf, "ROWS "); + else if (wc->frameOptions & FRAMEOPTION_GROUPS) + appendStringInfoString(buf, "GROUPS "); + else + Assert(false); + if (wc->frameOptions & FRAMEOPTION_BETWEEN) + appendStringInfoString(buf, "BETWEEN "); + if (wc->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) + appendStringInfoString(buf, "UNBOUNDED PRECEDING "); + else if (wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) + appendStringInfoString(buf, "CURRENT ROW "); + else if (wc->frameOptions & FRAMEOPTION_START_OFFSET) + { + get_rule_expr(wc->startOffset, context, false); + if (wc->frameOptions & FRAMEOPTION_START_OFFSET_PRECEDING) + appendStringInfoString(buf, " PRECEDING "); + else if (wc->frameOptions & FRAMEOPTION_START_OFFSET_FOLLOWING) + appendStringInfoString(buf, " FOLLOWING "); + else + Assert(false); + } + else + Assert(false); + if (wc->frameOptions & FRAMEOPTION_BETWEEN) + { + appendStringInfoString(buf, "AND "); + if (wc->frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) + appendStringInfoString(buf, "UNBOUNDED FOLLOWING "); + else if (wc->frameOptions & FRAMEOPTION_END_CURRENT_ROW) + appendStringInfoString(buf, "CURRENT ROW "); + else if (wc->frameOptions & FRAMEOPTION_END_OFFSET) + { + get_rule_expr(wc->endOffset, context, false); + if (wc->frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING) + appendStringInfoString(buf, " PRECEDING "); + else if (wc->frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING) + appendStringInfoString(buf, " FOLLOWING "); + else + Assert(false); + } + else + Assert(false); + } + if (wc->frameOptions & FRAMEOPTION_EXCLUDE_CURRENT_ROW) + appendStringInfoString(buf, "EXCLUDE CURRENT ROW "); + else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_GROUP) + appendStringInfoString(buf, "EXCLUDE GROUP "); + else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_TIES) + appendStringInfoString(buf, "EXCLUDE TIES "); + /* we will now have a trailing space; remove it */ + buf->len--; + } + appendStringInfoChar(buf, ')'); +} + +/* ---------- + * get_insert_query_def - Parse back an INSERT parsetree + * ---------- + */ +static void +get_insert_query_def(Query *query, deparse_context *context, + bool colNamesVisible) +{ + StringInfo buf = context->buf; + RangeTblEntry *select_rte = NULL; + RangeTblEntry *values_rte = NULL; + RangeTblEntry *rte; + char *sep; + ListCell *l; + List *strippedexprs; + + /* Insert the WITH clause if given */ + get_with_clause(query, context); + + /* + * If it's an INSERT ... SELECT or multi-row VALUES, there will be a + * single RTE for the SELECT or VALUES. Plain VALUES has neither. + */ + foreach(l, query->rtable) + { + rte = (RangeTblEntry *) lfirst(l); + + if (rte->rtekind == RTE_SUBQUERY) + { + if (select_rte) + elog(ERROR, "too many subquery RTEs in INSERT"); + select_rte = rte; + } + + if (rte->rtekind == RTE_VALUES) + { + if (values_rte) + elog(ERROR, "too many values RTEs in INSERT"); + values_rte = rte; + } + } + if (select_rte && values_rte) + elog(ERROR, "both subquery and values RTEs in INSERT"); + + /* + * Start the query with INSERT INTO relname + */ + rte = rt_fetch(query->resultRelation, query->rtable); + Assert(rte->rtekind == RTE_RELATION); + + if (PRETTY_INDENT(context)) + { + context->indentLevel += PRETTYINDENT_STD; + appendStringInfoChar(buf, ' '); + } + appendStringInfo(buf, "INSERT INTO %s", + generate_relation_name(rte->relid, NIL)); + + /* Print the relation alias, if needed; INSERT requires explicit AS */ + get_rte_alias(rte, query->resultRelation, true, context); + + /* always want a space here */ + appendStringInfoChar(buf, ' '); + + /* + * Add the insert-column-names list. Any indirection decoration needed on + * the column names can be inferred from the top targetlist. + */ + strippedexprs = NIL; + sep = ""; + if (query->targetList) + appendStringInfoChar(buf, '('); + foreach(l, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + continue; /* ignore junk entries */ + + appendStringInfoString(buf, sep); + sep = ", "; + + /* + * Put out name of target column; look in the catalogs, not at + * tle->resname, since resname will fail to track RENAME. + */ + appendStringInfoString(buf, + quote_identifier(get_attname(rte->relid, + tle->resno, + false))); + + /* + * Print any indirection needed (subfields or subscripts), and strip + * off the top-level nodes representing the indirection assignments. + * Add the stripped expressions to strippedexprs. (If it's a + * single-VALUES statement, the stripped expressions are the VALUES to + * print below. Otherwise they're just Vars and not really + * interesting.) + */ + strippedexprs = lappend(strippedexprs, + processIndirection((Node *) tle->expr, + context)); + } + if (query->targetList) + appendStringInfoString(buf, ") "); + + if (query->override) + { + if (query->override == OVERRIDING_SYSTEM_VALUE) + appendStringInfoString(buf, "OVERRIDING SYSTEM VALUE "); + else if (query->override == OVERRIDING_USER_VALUE) + appendStringInfoString(buf, "OVERRIDING USER VALUE "); + } + + if (select_rte) + { + /* Add the SELECT */ + get_query_def(select_rte->subquery, buf, context->namespaces, NULL, + false, + context->prettyFlags, context->wrapColumn, + context->indentLevel); + } + else if (values_rte) + { + /* Add the multi-VALUES expression lists */ + get_values_def(values_rte->values_lists, context); + } + else if (strippedexprs) + { + /* Add the single-VALUES expression list */ + appendContextKeyword(context, "VALUES (", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 2); + get_rule_list_toplevel(strippedexprs, context, false); + appendStringInfoChar(buf, ')'); + } + else + { + /* No expressions, so it must be DEFAULT VALUES */ + appendStringInfoString(buf, "DEFAULT VALUES"); + } + + /* Add ON CONFLICT if present */ + if (query->onConflict) + { + OnConflictExpr *confl = query->onConflict; + + appendStringInfoString(buf, " ON CONFLICT"); + + if (confl->arbiterElems) + { + /* Add the single-VALUES expression list */ + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) confl->arbiterElems, context, false); + appendStringInfoChar(buf, ')'); + + /* Add a WHERE clause (for partial indexes) if given */ + if (confl->arbiterWhere != NULL) + { + bool save_varprefix; + + /* + * Force non-prefixing of Vars, since parser assumes that they + * belong to target relation. WHERE clause does not use + * InferenceElem, so this is separately required. + */ + save_varprefix = context->varprefix; + context->varprefix = false; + + appendContextKeyword(context, " WHERE ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_expr(confl->arbiterWhere, context, false); + + context->varprefix = save_varprefix; + } + } + else if (OidIsValid(confl->constraint)) + { + char *constraint = get_constraint_name(confl->constraint); + + if (!constraint) + elog(ERROR, "cache lookup failed for constraint %u", + confl->constraint); + appendStringInfo(buf, " ON CONSTRAINT %s", + quote_identifier(constraint)); + } + + if (confl->action == ONCONFLICT_NOTHING) + { + appendStringInfoString(buf, " DO NOTHING"); + } + else + { + appendStringInfoString(buf, " DO UPDATE SET "); + /* Deparse targetlist */ + get_update_query_targetlist_def(query, confl->onConflictSet, + context, rte); + + /* Add a WHERE clause if given */ + if (confl->onConflictWhere != NULL) + { + appendContextKeyword(context, " WHERE ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_expr(confl->onConflictWhere, context, false); + } + } + } + + /* Add RETURNING if present */ + if (query->returningList) + { + appendContextKeyword(context, " RETURNING", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_target_list(query->returningList, context, NULL, colNamesVisible); + } +} + + +/* ---------- + * get_update_query_def - Parse back an UPDATE parsetree + * ---------- + */ +static void +get_update_query_def(Query *query, deparse_context *context, + bool colNamesVisible) +{ + StringInfo buf = context->buf; + RangeTblEntry *rte; + + /* Insert the WITH clause if given */ + get_with_clause(query, context); + + /* + * Start the query with UPDATE relname SET + */ + rte = rt_fetch(query->resultRelation, query->rtable); + Assert(rte->rtekind == RTE_RELATION); + if (PRETTY_INDENT(context)) + { + appendStringInfoChar(buf, ' '); + context->indentLevel += PRETTYINDENT_STD; + } + appendStringInfo(buf, "UPDATE %s%s", + only_marker(rte), + generate_relation_name(rte->relid, NIL)); + + /* Print the relation alias, if needed */ + get_rte_alias(rte, query->resultRelation, false, context); + + appendStringInfoString(buf, " SET "); + + /* Deparse targetlist */ + get_update_query_targetlist_def(query, query->targetList, context, rte); + + /* Add the FROM clause if needed */ + get_from_clause(query, " FROM ", context); + + /* Add a WHERE clause if given */ + if (query->jointree->quals != NULL) + { + appendContextKeyword(context, " WHERE ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_expr(query->jointree->quals, context, false); + } + + /* Add RETURNING if present */ + if (query->returningList) + { + appendContextKeyword(context, " RETURNING", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_target_list(query->returningList, context, NULL, colNamesVisible); + } +} + + +/* ---------- + * get_update_query_targetlist_def - Parse back an UPDATE targetlist + * ---------- + */ +static void +get_update_query_targetlist_def(Query *query, List *targetList, + deparse_context *context, RangeTblEntry *rte) +{ + StringInfo buf = context->buf; + ListCell *l; + ListCell *next_ma_cell; + int remaining_ma_columns; + const char *sep; + SubLink *cur_ma_sublink; + List *ma_sublinks; + + /* + * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks + * into a list. We expect them to appear, in ID order, in resjunk tlist + * entries. + */ + ma_sublinks = NIL; + if (query->hasSubLinks) /* else there can't be any */ + { + foreach(l, targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk && IsA(tle->expr, SubLink)) + { + SubLink *sl = (SubLink *) tle->expr; + + if (sl->subLinkType == MULTIEXPR_SUBLINK) + { + ma_sublinks = lappend(ma_sublinks, sl); + Assert(sl->subLinkId == list_length(ma_sublinks)); + } + } + } + } + next_ma_cell = list_head(ma_sublinks); + cur_ma_sublink = NULL; + remaining_ma_columns = 0; + + /* Add the comma separated list of 'attname = value' */ + sep = ""; + foreach(l, targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + Node *expr; + + if (tle->resjunk) + continue; /* ignore junk entries */ + + /* Emit separator (OK whether we're in multiassignment or not) */ + appendStringInfoString(buf, sep); + sep = ", "; + + /* + * Check to see if we're starting a multiassignment group: if so, + * output a left paren. + */ + if (next_ma_cell != NULL && cur_ma_sublink == NULL) + { + /* + * We must dig down into the expr to see if it's a PARAM_MULTIEXPR + * Param. That could be buried under FieldStores and + * SubscriptingRefs and CoerceToDomains (cf processIndirection()), + * and underneath those there could be an implicit type coercion. + * Because we would ignore implicit type coercions anyway, we + * don't need to be as careful as processIndirection() is about + * descending past implicit CoerceToDomains. + */ + expr = (Node *) tle->expr; + while (expr) + { + if (IsA(expr, FieldStore)) + { + FieldStore *fstore = (FieldStore *) expr; + + expr = (Node *) linitial(fstore->newvals); + } + else if (IsA(expr, SubscriptingRef)) + { + SubscriptingRef *sbsref = (SubscriptingRef *) expr; + + if (sbsref->refassgnexpr == NULL) + break; + + expr = (Node *) sbsref->refassgnexpr; + } + else if (IsA(expr, CoerceToDomain)) + { + CoerceToDomain *cdomain = (CoerceToDomain *) expr; + + if (cdomain->coercionformat != COERCE_IMPLICIT_CAST) + break; + expr = (Node *) cdomain->arg; + } + else + break; + } + expr = strip_implicit_coercions(expr); + + if (expr && IsA(expr, Param) && + ((Param *) expr)->paramkind == PARAM_MULTIEXPR) + { + cur_ma_sublink = (SubLink *) lfirst(next_ma_cell); + next_ma_cell = lnext(ma_sublinks, next_ma_cell); + remaining_ma_columns = count_nonjunk_tlist_entries(((Query *) cur_ma_sublink->subselect)->targetList); + Assert(((Param *) expr)->paramid == + ((cur_ma_sublink->subLinkId << 16) | 1)); + appendStringInfoChar(buf, '('); + } + } + + /* + * Put out name of target column; look in the catalogs, not at + * tle->resname, since resname will fail to track RENAME. + */ + appendStringInfoString(buf, + quote_identifier(get_attname(rte->relid, + tle->resno, + false))); + + /* + * Print any indirection needed (subfields or subscripts), and strip + * off the top-level nodes representing the indirection assignments. + */ + expr = processIndirection((Node *) tle->expr, context); + + /* + * If we're in a multiassignment, skip printing anything more, unless + * this is the last column; in which case, what we print should be the + * sublink, not the Param. + */ + if (cur_ma_sublink != NULL) + { + if (--remaining_ma_columns > 0) + continue; /* not the last column of multiassignment */ + appendStringInfoChar(buf, ')'); + expr = (Node *) cur_ma_sublink; + cur_ma_sublink = NULL; + } + + appendStringInfoString(buf, " = "); + + get_rule_expr(expr, context, false); + } +} + + +/* ---------- + * get_delete_query_def - Parse back a DELETE parsetree + * ---------- + */ +static void +get_delete_query_def(Query *query, deparse_context *context, + bool colNamesVisible) +{ + StringInfo buf = context->buf; + RangeTblEntry *rte; + + /* Insert the WITH clause if given */ + get_with_clause(query, context); + + /* + * Start the query with DELETE FROM relname + */ + rte = rt_fetch(query->resultRelation, query->rtable); + Assert(rte->rtekind == RTE_RELATION); + if (PRETTY_INDENT(context)) + { + appendStringInfoChar(buf, ' '); + context->indentLevel += PRETTYINDENT_STD; + } + appendStringInfo(buf, "DELETE FROM %s%s", + only_marker(rte), + generate_relation_name(rte->relid, NIL)); + + /* Print the relation alias, if needed */ + get_rte_alias(rte, query->resultRelation, false, context); + + /* Add the USING clause if given */ + get_from_clause(query, " USING ", context); + + /* Add a WHERE clause if given */ + if (query->jointree->quals != NULL) + { + appendContextKeyword(context, " WHERE ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_expr(query->jointree->quals, context, false); + } + + /* Add RETURNING if present */ + if (query->returningList) + { + appendContextKeyword(context, " RETURNING", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_target_list(query->returningList, context, NULL, colNamesVisible); + } +} + + +/* ---------- + * get_merge_query_def - Parse back a MERGE parsetree + * ---------- + */ +static void +get_merge_query_def(Query *query, deparse_context *context, + bool colNamesVisible) +{ + StringInfo buf = context->buf; + RangeTblEntry *rte; + ListCell *lc; + + /* Insert the WITH clause if given */ + get_with_clause(query, context); + + /* + * Start the query with MERGE INTO relname + */ + rte = rt_fetch(query->resultRelation, query->rtable); + Assert(rte->rtekind == RTE_RELATION); + if (PRETTY_INDENT(context)) + { + appendStringInfoChar(buf, ' '); + context->indentLevel += PRETTYINDENT_STD; + } + appendStringInfo(buf, "MERGE INTO %s%s", + only_marker(rte), + generate_relation_name(rte->relid, NIL)); + + /* Print the relation alias, if needed */ + get_rte_alias(rte, query->resultRelation, false, context); + + /* Print the source relation and join clause */ + get_from_clause(query, " USING ", context); + appendContextKeyword(context, " ON ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 2); + get_rule_expr(query->jointree->quals, context, false); + + /* Print each merge action */ + foreach(lc, query->mergeActionList) + { + MergeAction *action = lfirst_node(MergeAction, lc); + + appendContextKeyword(context, " WHEN ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 2); + appendStringInfo(buf, "%sMATCHED", action->matched ? "" : "NOT "); + + if (action->qual) + { + appendContextKeyword(context, " AND ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 3); + get_rule_expr(action->qual, context, false); + } + appendContextKeyword(context, " THEN ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 3); + + if (action->commandType == CMD_INSERT) + { + /* This generally matches get_insert_query_def() */ + List *strippedexprs = NIL; + const char *sep = ""; + ListCell *lc2; + + appendStringInfoString(buf, "INSERT"); + + if (action->targetList) + appendStringInfoString(buf, " ("); + foreach(lc2, action->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc2); + + Assert(!tle->resjunk); + + appendStringInfoString(buf, sep); + sep = ", "; + + appendStringInfoString(buf, + quote_identifier(get_attname(rte->relid, + tle->resno, + false))); + strippedexprs = lappend(strippedexprs, + processIndirection((Node *) tle->expr, + context)); + } + if (action->targetList) + appendStringInfoChar(buf, ')'); + + if (action->override) + { + if (action->override == OVERRIDING_SYSTEM_VALUE) + appendStringInfoString(buf, " OVERRIDING SYSTEM VALUE"); + else if (action->override == OVERRIDING_USER_VALUE) + appendStringInfoString(buf, " OVERRIDING USER VALUE"); + } + + if (strippedexprs) + { + appendContextKeyword(context, " VALUES (", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 4); + get_rule_list_toplevel(strippedexprs, context, false); + appendStringInfoChar(buf, ')'); + } + else + appendStringInfoString(buf, " DEFAULT VALUES"); + } + else if (action->commandType == CMD_UPDATE) + { + appendStringInfoString(buf, "UPDATE SET "); + get_update_query_targetlist_def(query, action->targetList, + context, rte); + } + else if (action->commandType == CMD_DELETE) + appendStringInfoString(buf, "DELETE"); + else if (action->commandType == CMD_NOTHING) + appendStringInfoString(buf, "DO NOTHING"); + } + + /* No RETURNING support in MERGE yet */ + Assert(query->returningList == NIL); +} + + +/* ---------- + * get_utility_query_def - Parse back a UTILITY parsetree + * ---------- + */ +static void +get_utility_query_def(Query *query, deparse_context *context) +{ + StringInfo buf = context->buf; + + if (query->utilityStmt && IsA(query->utilityStmt, NotifyStmt)) + { + NotifyStmt *stmt = (NotifyStmt *) query->utilityStmt; + + appendContextKeyword(context, "", + 0, PRETTYINDENT_STD, 1); + appendStringInfo(buf, "NOTIFY %s", + quote_identifier(stmt->conditionname)); + if (stmt->payload) + { + appendStringInfoString(buf, ", "); + simple_quote_literal(buf, stmt->payload); + } + } + else + { + /* Currently only NOTIFY utility commands can appear in rules */ + elog(ERROR, "unexpected utility statement type"); + } +} + +/* + * Display a Var appropriately. + * + * In some cases (currently only when recursing into an unnamed join) + * the Var's varlevelsup has to be interpreted with respect to a context + * above the current one; levelsup indicates the offset. + * + * If istoplevel is true, the Var is at the top level of a SELECT's + * targetlist, which means we need special treatment of whole-row Vars. + * Instead of the normal "tab.*", we'll print "tab.*::typename", which is a + * dirty hack to prevent "tab.*" from being expanded into multiple columns. + * (The parser will strip the useless coercion, so no inefficiency is added in + * dump and reload.) We used to print just "tab" in such cases, but that is + * ambiguous and will yield the wrong result if "tab" is also a plain column + * name in the query. + * + * Returns the attname of the Var, or NULL if the Var has no attname (because + * it is a whole-row Var or a subplan output reference). + */ +static char * +get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context) +{ + StringInfo buf = context->buf; + RangeTblEntry *rte; + AttrNumber attnum; + int netlevelsup; + deparse_namespace *dpns; + int varno; + AttrNumber varattno; + deparse_columns *colinfo; + char *refname; + char *attname; + + /* Find appropriate nesting depth */ + netlevelsup = var->varlevelsup + levelsup; + if (netlevelsup >= list_length(context->namespaces)) + elog(ERROR, "bogus varlevelsup: %d offset %d", + var->varlevelsup, levelsup); + dpns = (deparse_namespace *) list_nth(context->namespaces, + netlevelsup); + + /* + * If we have a syntactic referent for the Var, and we're working from a + * parse tree, prefer to use the syntactic referent. Otherwise, fall back + * on the semantic referent. (Forcing use of the semantic referent when + * printing plan trees is a design choice that's perhaps more motivated by + * backwards compatibility than anything else. But it does have the + * advantage of making plans more explicit.) + */ + if (var->varnosyn > 0 && dpns->plan == NULL) + { + varno = var->varnosyn; + varattno = var->varattnosyn; + } + else + { + varno = var->varno; + varattno = var->varattno; + } + + /* + * Try to find the relevant RTE in this rtable. In a plan tree, it's + * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig + * down into the subplans, or INDEX_VAR, which is resolved similarly. Also + * find the aliases previously assigned for this RTE. + */ + if (varno >= 1 && varno <= list_length(dpns->rtable)) + { + /* + * We might have been asked to map child Vars to some parent relation. + */ + if (context->appendparents && dpns->appendrels) + { + int pvarno = varno; + AttrNumber pvarattno = varattno; + AppendRelInfo *appinfo = dpns->appendrels[pvarno]; + bool found = false; + + /* Only map up to inheritance parents, not UNION ALL appendrels */ + while (appinfo && + rt_fetch(appinfo->parent_relid, + dpns->rtable)->rtekind == RTE_RELATION) + { + found = false; + if (pvarattno > 0) /* system columns stay as-is */ + { + if (pvarattno > appinfo->num_child_cols) + break; /* safety check */ + pvarattno = appinfo->parent_colnos[pvarattno - 1]; + if (pvarattno == 0) + break; /* Var is local to child */ + } + + pvarno = appinfo->parent_relid; + found = true; + + /* If the parent is itself a child, continue up. */ + Assert(pvarno > 0 && pvarno <= list_length(dpns->rtable)); + appinfo = dpns->appendrels[pvarno]; + } + + /* + * If we found an ancestral rel, and that rel is included in + * appendparents, print that column not the original one. + */ + if (found && bms_is_member(pvarno, context->appendparents)) + { + varno = pvarno; + varattno = pvarattno; + } + } + + rte = rt_fetch(varno, dpns->rtable); + refname = (char *) list_nth(dpns->rtable_names, varno - 1); + colinfo = deparse_columns_fetch(varno, dpns); + attnum = varattno; + } + else + { + resolve_special_varno((Node *) var, context, + get_special_variable, NULL); + return NULL; + } + + /* + * The planner will sometimes emit Vars referencing resjunk elements of a + * subquery's target list (this is currently only possible if it chooses + * to generate a "physical tlist" for a SubqueryScan or CteScan node). + * Although we prefer to print subquery-referencing Vars using the + * subquery's alias, that's not possible for resjunk items since they have + * no alias. So in that case, drill down to the subplan and print the + * contents of the referenced tlist item. This works because in a plan + * tree, such Vars can only occur in a SubqueryScan or CteScan node, and + * we'll have set dpns->inner_plan to reference the child plan node. + */ + if ((rte->rtekind == RTE_SUBQUERY || rte->rtekind == RTE_CTE) && + attnum > list_length(rte->eref->colnames) && + dpns->inner_plan) + { + TargetEntry *tle; + deparse_namespace save_dpns; + + tle = get_tle_by_resno(dpns->inner_tlist, attnum); + if (!tle) + elog(ERROR, "invalid attnum %d for relation \"%s\"", + attnum, rte->eref->aliasname); + + Assert(netlevelsup == 0); + push_child_plan(dpns, dpns->inner_plan, &save_dpns); + + /* + * Force parentheses because our caller probably assumed a Var is a + * simple expression. + */ + if (!IsA(tle->expr, Var)) + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) tle->expr, context, true); + if (!IsA(tle->expr, Var)) + appendStringInfoChar(buf, ')'); + + pop_child_plan(dpns, &save_dpns); + return NULL; + } + + /* + * If it's an unnamed join, look at the expansion of the alias variable. + * If it's a simple reference to one of the input vars, then recursively + * print the name of that var instead. When it's not a simple reference, + * we have to just print the unqualified join column name. (This can only + * happen with "dangerous" merged columns in a JOIN USING; we took pains + * previously to make the unqualified column name unique in such cases.) + * + * This wouldn't work in decompiling plan trees, because we don't store + * joinaliasvars lists after planning; but a plan tree should never + * contain a join alias variable. + */ + if (rte->rtekind == RTE_JOIN && rte->alias == NULL) + { + if (rte->joinaliasvars == NIL) + elog(ERROR, "cannot decompile join alias var in plan tree"); + if (attnum > 0) + { + Var *aliasvar; + + aliasvar = (Var *) list_nth(rte->joinaliasvars, attnum - 1); + /* we intentionally don't strip implicit coercions here */ + if (aliasvar && IsA(aliasvar, Var)) + { + return get_variable(aliasvar, var->varlevelsup + levelsup, + istoplevel, context); + } + } + + /* + * Unnamed join has no refname. (Note: since it's unnamed, there is + * no way the user could have referenced it to create a whole-row Var + * for it. So we don't have to cover that case below.) + */ + Assert(refname == NULL); + } + + if (attnum == InvalidAttrNumber) + attname = NULL; + else if (attnum > 0) + { + /* Get column name to use from the colinfo struct */ + if (attnum > colinfo->num_cols) + elog(ERROR, "invalid attnum %d for relation \"%s\"", + attnum, rte->eref->aliasname); + attname = colinfo->colnames[attnum - 1]; + + /* + * If we find a Var referencing a dropped column, it seems better to + * print something (anything) than to fail. In general this should + * not happen, but it used to be possible for some cases involving + * functions returning named composite types, and perhaps there are + * still bugs out there. + */ + if (attname == NULL) + attname = "?dropped?column?"; + } + else + { + /* System column - name is fixed, get it from the catalog */ + attname = get_rte_attribute_name(rte, attnum); + } + + if (refname && (context->varprefix || attname == NULL)) + { + appendStringInfoString(buf, quote_identifier(refname)); + appendStringInfoChar(buf, '.'); + } + if (attname) + appendStringInfoString(buf, quote_identifier(attname)); + else + { + appendStringInfoChar(buf, '*'); + if (istoplevel) + appendStringInfo(buf, "::%s", + format_type_with_typemod(var->vartype, + var->vartypmod)); + } + + return attname; +} + +/* + * Deparse a Var which references OUTER_VAR, INNER_VAR, or INDEX_VAR. This + * routine is actually a callback for resolve_special_varno, which handles + * finding the correct TargetEntry. We get the expression contained in that + * TargetEntry and just need to deparse it, a job we can throw back on + * get_rule_expr. + */ +static void +get_special_variable(Node *node, deparse_context *context, void *callback_arg) +{ + StringInfo buf = context->buf; + + /* + * For a non-Var referent, force parentheses because our caller probably + * assumed a Var is a simple expression. + */ + if (!IsA(node, Var)) + appendStringInfoChar(buf, '('); + get_rule_expr(node, context, true); + if (!IsA(node, Var)) + appendStringInfoChar(buf, ')'); +} + +/* + * Chase through plan references to special varnos (OUTER_VAR, INNER_VAR, + * INDEX_VAR) until we find a real Var or some kind of non-Var node; then, + * invoke the callback provided. + */ +static void +resolve_special_varno(Node *node, deparse_context *context, + rsv_callback callback, void *callback_arg) +{ + Var *var; + deparse_namespace *dpns; + + /* This function is recursive, so let's be paranoid. */ + check_stack_depth(); + + /* If it's not a Var, invoke the callback. */ + if (!IsA(node, Var)) + { + (*callback) (node, context, callback_arg); + return; + } + + /* Find appropriate nesting depth */ + var = (Var *) node; + dpns = (deparse_namespace *) list_nth(context->namespaces, + var->varlevelsup); + + /* + * If varno is special, recurse. (Don't worry about varnosyn; if we're + * here, we already decided not to use that.) + */ + if (var->varno == OUTER_VAR && dpns->outer_tlist) + { + TargetEntry *tle; + deparse_namespace save_dpns; + Bitmapset *save_appendparents; + + tle = get_tle_by_resno(dpns->outer_tlist, var->varattno); + if (!tle) + elog(ERROR, "bogus varattno for OUTER_VAR var: %d", var->varattno); + + /* + * If we're descending to the first child of an Append or MergeAppend, + * update appendparents. This will affect deparsing of all Vars + * appearing within the eventually-resolved subexpression. + */ + save_appendparents = context->appendparents; + + if (IsA(dpns->plan, Append)) + context->appendparents = bms_union(context->appendparents, + ((Append *) dpns->plan)->apprelids); + else if (IsA(dpns->plan, MergeAppend)) + context->appendparents = bms_union(context->appendparents, + ((MergeAppend *) dpns->plan)->apprelids); + + push_child_plan(dpns, dpns->outer_plan, &save_dpns); + resolve_special_varno((Node *) tle->expr, context, + callback, callback_arg); + pop_child_plan(dpns, &save_dpns); + context->appendparents = save_appendparents; + return; + } + else if (var->varno == INNER_VAR && dpns->inner_tlist) + { + TargetEntry *tle; + deparse_namespace save_dpns; + + tle = get_tle_by_resno(dpns->inner_tlist, var->varattno); + if (!tle) + elog(ERROR, "bogus varattno for INNER_VAR var: %d", var->varattno); + + push_child_plan(dpns, dpns->inner_plan, &save_dpns); + resolve_special_varno((Node *) tle->expr, context, + callback, callback_arg); + pop_child_plan(dpns, &save_dpns); + return; + } + else if (var->varno == INDEX_VAR && dpns->index_tlist) + { + TargetEntry *tle; + + tle = get_tle_by_resno(dpns->index_tlist, var->varattno); + if (!tle) + elog(ERROR, "bogus varattno for INDEX_VAR var: %d", var->varattno); + + resolve_special_varno((Node *) tle->expr, context, + callback, callback_arg); + return; + } + else if (var->varno < 1 || var->varno > list_length(dpns->rtable)) + elog(ERROR, "bogus varno: %d", var->varno); + + /* Not special. Just invoke the callback. */ + (*callback) (node, context, callback_arg); +} + +/* + * Get the name of a field of an expression of composite type. The + * expression is usually a Var, but we handle other cases too. + * + * levelsup is an extra offset to interpret the Var's varlevelsup correctly. + * + * This is fairly straightforward when the expression has a named composite + * type; we need only look up the type in the catalogs. However, the type + * could also be RECORD. Since no actual table or view column is allowed to + * have type RECORD, a Var of type RECORD must refer to a JOIN or FUNCTION RTE + * or to a subquery output. We drill down to find the ultimate defining + * expression and attempt to infer the field name from it. We ereport if we + * can't determine the name. + * + * Similarly, a PARAM of type RECORD has to refer to some expression of + * a determinable composite type. + */ +static const char * +get_name_for_var_field(Var *var, int fieldno, + int levelsup, deparse_context *context) +{ + RangeTblEntry *rte; + AttrNumber attnum; + int netlevelsup; + deparse_namespace *dpns; + int varno; + AttrNumber varattno; + TupleDesc tupleDesc; + Node *expr; + + /* + * If it's a RowExpr that was expanded from a whole-row Var, use the + * column names attached to it. (We could let get_expr_result_tupdesc() + * handle this, but it's much cheaper to just pull out the name we need.) + */ + if (IsA(var, RowExpr)) + { + RowExpr *r = (RowExpr *) var; + + if (fieldno > 0 && fieldno <= list_length(r->colnames)) + return strVal(list_nth(r->colnames, fieldno - 1)); + } + + /* + * If it's a Param of type RECORD, try to find what the Param refers to. + */ + if (IsA(var, Param)) + { + Param *param = (Param *) var; + ListCell *ancestor_cell; + + expr = find_param_referent(param, context, &dpns, &ancestor_cell); + if (expr) + { + /* Found a match, so recurse to decipher the field name */ + deparse_namespace save_dpns; + const char *result; + + push_ancestor_plan(dpns, ancestor_cell, &save_dpns); + result = get_name_for_var_field((Var *) expr, fieldno, + 0, context); + pop_ancestor_plan(dpns, &save_dpns); + return result; + } + } + + /* + * If it's a Var of type RECORD, we have to find what the Var refers to; + * if not, we can use get_expr_result_tupdesc(). + */ + if (!IsA(var, Var) || + var->vartype != RECORDOID) + { + tupleDesc = get_expr_result_tupdesc((Node *) var, false); + /* Got the tupdesc, so we can extract the field name */ + Assert(fieldno >= 1 && fieldno <= tupleDesc->natts); + return NameStr(TupleDescAttr(tupleDesc, fieldno - 1)->attname); + } + + /* Find appropriate nesting depth */ + netlevelsup = var->varlevelsup + levelsup; + if (netlevelsup >= list_length(context->namespaces)) + elog(ERROR, "bogus varlevelsup: %d offset %d", + var->varlevelsup, levelsup); + dpns = (deparse_namespace *) list_nth(context->namespaces, + netlevelsup); + + /* + * If we have a syntactic referent for the Var, and we're working from a + * parse tree, prefer to use the syntactic referent. Otherwise, fall back + * on the semantic referent. (See comments in get_variable().) + */ + if (var->varnosyn > 0 && dpns->plan == NULL) + { + varno = var->varnosyn; + varattno = var->varattnosyn; + } + else + { + varno = var->varno; + varattno = var->varattno; + } + + /* + * Try to find the relevant RTE in this rtable. In a plan tree, it's + * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig + * down into the subplans, or INDEX_VAR, which is resolved similarly. + * + * Note: unlike get_variable and resolve_special_varno, we need not worry + * about inheritance mapping: a child Var should have the same datatype as + * its parent, and here we're really only interested in the Var's type. + */ + if (varno >= 1 && varno <= list_length(dpns->rtable)) + { + rte = rt_fetch(varno, dpns->rtable); + attnum = varattno; + } + else if (varno == OUTER_VAR && dpns->outer_tlist) + { + TargetEntry *tle; + deparse_namespace save_dpns; + const char *result; + + tle = get_tle_by_resno(dpns->outer_tlist, varattno); + if (!tle) + elog(ERROR, "bogus varattno for OUTER_VAR var: %d", varattno); + + Assert(netlevelsup == 0); + push_child_plan(dpns, dpns->outer_plan, &save_dpns); + + result = get_name_for_var_field((Var *) tle->expr, fieldno, + levelsup, context); + + pop_child_plan(dpns, &save_dpns); + return result; + } + else if (varno == INNER_VAR && dpns->inner_tlist) + { + TargetEntry *tle; + deparse_namespace save_dpns; + const char *result; + + tle = get_tle_by_resno(dpns->inner_tlist, varattno); + if (!tle) + elog(ERROR, "bogus varattno for INNER_VAR var: %d", varattno); + + Assert(netlevelsup == 0); + push_child_plan(dpns, dpns->inner_plan, &save_dpns); + + result = get_name_for_var_field((Var *) tle->expr, fieldno, + levelsup, context); + + pop_child_plan(dpns, &save_dpns); + return result; + } + else if (varno == INDEX_VAR && dpns->index_tlist) + { + TargetEntry *tle; + const char *result; + + tle = get_tle_by_resno(dpns->index_tlist, varattno); + if (!tle) + elog(ERROR, "bogus varattno for INDEX_VAR var: %d", varattno); + + Assert(netlevelsup == 0); + + result = get_name_for_var_field((Var *) tle->expr, fieldno, + levelsup, context); + + return result; + } + else + { + elog(ERROR, "bogus varno: %d", varno); + return NULL; /* keep compiler quiet */ + } + + if (attnum == InvalidAttrNumber) + { + /* Var is whole-row reference to RTE, so select the right field */ + return get_rte_attribute_name(rte, fieldno); + } + + /* + * This part has essentially the same logic as the parser's + * expandRecordVariable() function, but we are dealing with a different + * representation of the input context, and we only need one field name + * not a TupleDesc. Also, we need special cases for finding subquery and + * CTE subplans when deparsing Plan trees. + */ + expr = (Node *) var; /* default if we can't drill down */ + + switch (rte->rtekind) + { + case RTE_RELATION: + case RTE_VALUES: + case RTE_NAMEDTUPLESTORE: + case RTE_RESULT: + + /* + * This case should not occur: a column of a table, values list, + * or ENR shouldn't have type RECORD. Fall through and fail (most + * likely) at the bottom. + */ + break; + case RTE_SUBQUERY: + /* Subselect-in-FROM: examine sub-select's output expr */ + { + if (rte->subquery) + { + TargetEntry *ste = get_tle_by_resno(rte->subquery->targetList, + attnum); + + if (ste == NULL || ste->resjunk) + elog(ERROR, "subquery %s does not have attribute %d", + rte->eref->aliasname, attnum); + expr = (Node *) ste->expr; + if (IsA(expr, Var)) + { + /* + * Recurse into the sub-select to see what its Var + * refers to. We have to build an additional level of + * namespace to keep in step with varlevelsup in the + * subselect; furthermore, the subquery RTE might be + * from an outer query level, in which case the + * namespace for the subselect must have that outer + * level as parent namespace. + */ + List *save_nslist = context->namespaces; + List *parent_namespaces; + deparse_namespace mydpns; + const char *result; + + parent_namespaces = list_copy_tail(context->namespaces, + netlevelsup); + + set_deparse_for_query(&mydpns, rte->subquery, + parent_namespaces); + + context->namespaces = lcons(&mydpns, parent_namespaces); + + result = get_name_for_var_field((Var *) expr, fieldno, + 0, context); + + context->namespaces = save_nslist; + + return result; + } + /* else fall through to inspect the expression */ + } + else + { + /* + * We're deparsing a Plan tree so we don't have complete + * RTE entries (in particular, rte->subquery is NULL). But + * the only place we'd see a Var directly referencing a + * SUBQUERY RTE is in a SubqueryScan plan node, and we can + * look into the child plan's tlist instead. + */ + TargetEntry *tle; + deparse_namespace save_dpns; + const char *result; + + if (!dpns->inner_plan) + elog(ERROR, "failed to find plan for subquery %s", + rte->eref->aliasname); + tle = get_tle_by_resno(dpns->inner_tlist, attnum); + if (!tle) + elog(ERROR, "bogus varattno for subquery var: %d", + attnum); + Assert(netlevelsup == 0); + push_child_plan(dpns, dpns->inner_plan, &save_dpns); + + result = get_name_for_var_field((Var *) tle->expr, fieldno, + levelsup, context); + + pop_child_plan(dpns, &save_dpns); + return result; + } + } + break; + case RTE_JOIN: + /* Join RTE --- recursively inspect the alias variable */ + if (rte->joinaliasvars == NIL) + elog(ERROR, "cannot decompile join alias var in plan tree"); + Assert(attnum > 0 && attnum <= list_length(rte->joinaliasvars)); + expr = (Node *) list_nth(rte->joinaliasvars, attnum - 1); + Assert(expr != NULL); + /* we intentionally don't strip implicit coercions here */ + if (IsA(expr, Var)) + return get_name_for_var_field((Var *) expr, fieldno, + var->varlevelsup + levelsup, + context); + /* else fall through to inspect the expression */ + break; + case RTE_FUNCTION: + case RTE_TABLEFUNC: + + /* + * We couldn't get here unless a function is declared with one of + * its result columns as RECORD, which is not allowed. + */ + break; + case RTE_CTE: + /* CTE reference: examine subquery's output expr */ + { + CommonTableExpr *cte = NULL; + Index ctelevelsup; + ListCell *lc; + + /* + * Try to find the referenced CTE using the namespace stack. + */ + ctelevelsup = rte->ctelevelsup + netlevelsup; + if (ctelevelsup >= list_length(context->namespaces)) + lc = NULL; + else + { + deparse_namespace *ctedpns; + + ctedpns = (deparse_namespace *) + list_nth(context->namespaces, ctelevelsup); + foreach(lc, ctedpns->ctes) + { + cte = (CommonTableExpr *) lfirst(lc); + if (strcmp(cte->ctename, rte->ctename) == 0) + break; + } + } + if (lc != NULL) + { + Query *ctequery = (Query *) cte->ctequery; + TargetEntry *ste = get_tle_by_resno(GetCTETargetList(cte), + attnum); + + if (ste == NULL || ste->resjunk) + elog(ERROR, "CTE %s does not have attribute %d", + rte->eref->aliasname, attnum); + expr = (Node *) ste->expr; + if (IsA(expr, Var)) + { + /* + * Recurse into the CTE to see what its Var refers to. + * We have to build an additional level of namespace + * to keep in step with varlevelsup in the CTE; + * furthermore it could be an outer CTE (compare + * SUBQUERY case above). + */ + List *save_nslist = context->namespaces; + List *parent_namespaces; + deparse_namespace mydpns; + const char *result; + + parent_namespaces = list_copy_tail(context->namespaces, + ctelevelsup); + + set_deparse_for_query(&mydpns, ctequery, + parent_namespaces); + + context->namespaces = lcons(&mydpns, parent_namespaces); + + result = get_name_for_var_field((Var *) expr, fieldno, + 0, context); + + context->namespaces = save_nslist; + + return result; + } + /* else fall through to inspect the expression */ + } + else + { + /* + * We're deparsing a Plan tree so we don't have a CTE + * list. But the only places we'd see a Var directly + * referencing a CTE RTE are in CteScan or WorkTableScan + * plan nodes. For those cases, set_deparse_plan arranged + * for dpns->inner_plan to be the plan node that emits the + * CTE or RecursiveUnion result, and we can look at its + * tlist instead. + */ + TargetEntry *tle; + deparse_namespace save_dpns; + const char *result; + + if (!dpns->inner_plan) + elog(ERROR, "failed to find plan for CTE %s", + rte->eref->aliasname); + tle = get_tle_by_resno(dpns->inner_tlist, attnum); + if (!tle) + elog(ERROR, "bogus varattno for subquery var: %d", + attnum); + Assert(netlevelsup == 0); + push_child_plan(dpns, dpns->inner_plan, &save_dpns); + + result = get_name_for_var_field((Var *) tle->expr, fieldno, + levelsup, context); + + pop_child_plan(dpns, &save_dpns); + return result; + } + } + break; + } + + /* + * We now have an expression we can't expand any more, so see if + * get_expr_result_tupdesc() can do anything with it. + */ + tupleDesc = get_expr_result_tupdesc(expr, false); + /* Got the tupdesc, so we can extract the field name */ + Assert(fieldno >= 1 && fieldno <= tupleDesc->natts); + return NameStr(TupleDescAttr(tupleDesc, fieldno - 1)->attname); +} + +/* + * Try to find the referenced expression for a PARAM_EXEC Param that might + * reference a parameter supplied by an upper NestLoop or SubPlan plan node. + * + * If successful, return the expression and set *dpns_p and *ancestor_cell_p + * appropriately for calling push_ancestor_plan(). If no referent can be + * found, return NULL. + */ +static Node * +find_param_referent(Param *param, deparse_context *context, + deparse_namespace **dpns_p, ListCell **ancestor_cell_p) +{ + /* Initialize output parameters to prevent compiler warnings */ + *dpns_p = NULL; + *ancestor_cell_p = NULL; + + /* + * If it's a PARAM_EXEC parameter, look for a matching NestLoopParam or + * SubPlan argument. This will necessarily be in some ancestor of the + * current expression's Plan node. + */ + if (param->paramkind == PARAM_EXEC) + { + deparse_namespace *dpns; + Plan *child_plan; + ListCell *lc; + + dpns = (deparse_namespace *) linitial(context->namespaces); + child_plan = dpns->plan; + + foreach(lc, dpns->ancestors) + { + Node *ancestor = (Node *) lfirst(lc); + ListCell *lc2; + + /* + * NestLoops transmit params to their inner child only. + */ + if (IsA(ancestor, NestLoop) && + child_plan == innerPlan(ancestor)) + { + NestLoop *nl = (NestLoop *) ancestor; + + foreach(lc2, nl->nestParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(lc2); + + if (nlp->paramno == param->paramid) + { + /* Found a match, so return it */ + *dpns_p = dpns; + *ancestor_cell_p = lc; + return (Node *) nlp->paramval; + } + } + } + + /* + * If ancestor is a SubPlan, check the arguments it provides. + */ + if (IsA(ancestor, SubPlan)) + { + SubPlan *subplan = (SubPlan *) ancestor; + ListCell *lc3; + ListCell *lc4; + + forboth(lc3, subplan->parParam, lc4, subplan->args) + { + int paramid = lfirst_int(lc3); + Node *arg = (Node *) lfirst(lc4); + + if (paramid == param->paramid) + { + /* + * Found a match, so return it. But, since Vars in + * the arg are to be evaluated in the surrounding + * context, we have to point to the next ancestor item + * that is *not* a SubPlan. + */ + ListCell *rest; + + for_each_cell(rest, dpns->ancestors, + lnext(dpns->ancestors, lc)) + { + Node *ancestor2 = (Node *) lfirst(rest); + + if (!IsA(ancestor2, SubPlan)) + { + *dpns_p = dpns; + *ancestor_cell_p = rest; + return arg; + } + } + elog(ERROR, "SubPlan cannot be outermost ancestor"); + } + } + + /* SubPlan isn't a kind of Plan, so skip the rest */ + continue; + } + + /* + * We need not consider the ancestor's initPlan list, since + * initplans never have any parParams. + */ + + /* No luck, crawl up to next ancestor */ + child_plan = (Plan *) ancestor; + } + } + + /* No referent found */ + return NULL; +} + +/* + * Display a Param appropriately. + */ +static void +get_parameter(Param *param, deparse_context *context) +{ + Node *expr; + deparse_namespace *dpns; + ListCell *ancestor_cell; + + /* + * If it's a PARAM_EXEC parameter, try to locate the expression from which + * the parameter was computed. Note that failing to find a referent isn't + * an error, since the Param might well be a subplan output rather than an + * input. + */ + expr = find_param_referent(param, context, &dpns, &ancestor_cell); + if (expr) + { + /* Found a match, so print it */ + deparse_namespace save_dpns; + bool save_varprefix; + bool need_paren; + + /* Switch attention to the ancestor plan node */ + push_ancestor_plan(dpns, ancestor_cell, &save_dpns); + + /* + * Force prefixing of Vars, since they won't belong to the relation + * being scanned in the original plan node. + */ + save_varprefix = context->varprefix; + context->varprefix = true; + + /* + * A Param's expansion is typically a Var, Aggref, GroupingFunc, or + * upper-level Param, which wouldn't need extra parentheses. + * Otherwise, insert parens to ensure the expression looks atomic. + */ + need_paren = !(IsA(expr, Var) || + IsA(expr, Aggref) || + IsA(expr, GroupingFunc) || + IsA(expr, Param)); + if (need_paren) + appendStringInfoChar(context->buf, '('); + + get_rule_expr(expr, context, false); + + if (need_paren) + appendStringInfoChar(context->buf, ')'); + + context->varprefix = save_varprefix; + + pop_ancestor_plan(dpns, &save_dpns); + + return; + } + + /* + * If it's an external parameter, see if the outermost namespace provides + * function argument names. + */ + if (param->paramkind == PARAM_EXTERN && context->namespaces != NIL) + { + dpns = llast(context->namespaces); + if (dpns->argnames && + param->paramid > 0 && + param->paramid <= dpns->numargs) + { + char *argname = dpns->argnames[param->paramid - 1]; + + if (argname) + { + bool should_qualify = false; + ListCell *lc; + + /* + * Qualify the parameter name if there are any other deparse + * namespaces with range tables. This avoids qualifying in + * trivial cases like "RETURN a + b", but makes it safe in all + * other cases. + */ + foreach(lc, context->namespaces) + { + deparse_namespace *depns = lfirst(lc); + + if (depns->rtable_names != NIL) + { + should_qualify = true; + break; + } + } + if (should_qualify) + { + appendStringInfoString(context->buf, quote_identifier(dpns->funcname)); + appendStringInfoChar(context->buf, '.'); + } + + appendStringInfoString(context->buf, quote_identifier(argname)); + return; + } + } + } + + /* + * Not PARAM_EXEC, or couldn't find referent: just print $N. + */ + appendStringInfo(context->buf, "$%d", param->paramid); +} + +/* + * get_simple_binary_op_name + * + * helper function for isSimpleNode + * will return single char binary operator name, or NULL if it's not + */ +static const char * +get_simple_binary_op_name(OpExpr *expr) +{ + List *args = expr->args; + + if (list_length(args) == 2) + { + /* binary operator */ + Node *arg1 = (Node *) linitial(args); + Node *arg2 = (Node *) lsecond(args); + const char *op; + + op = generate_operator_name(expr->opno, exprType(arg1), exprType(arg2)); + if (strlen(op) == 1) + return op; + } + return NULL; +} + + +/* + * isSimpleNode - check if given node is simple (doesn't need parenthesizing) + * + * true : simple in the context of parent node's type + * false : not simple + */ +static bool +isSimpleNode(Node *node, Node *parentNode, int prettyFlags) +{ + if (!node) + return false; + + switch (nodeTag(node)) + { + case T_Var: + case T_Const: + case T_Param: + case T_CoerceToDomainValue: + case T_SetToDefault: + case T_CurrentOfExpr: + /* single words: always simple */ + return true; + + case T_SubscriptingRef: + case T_ArrayExpr: + case T_RowExpr: + case T_CoalesceExpr: + case T_MinMaxExpr: + case T_SQLValueFunction: + case T_XmlExpr: + case T_NextValueExpr: + case T_NullIfExpr: + case T_Aggref: + case T_GroupingFunc: + case T_WindowFunc: + case T_FuncExpr: + case T_JsonConstructorExpr: + /* function-like: name(..) or name[..] */ + return true; + + /* CASE keywords act as parentheses */ + case T_CaseExpr: + return true; + + case T_FieldSelect: + + /* + * appears simple since . has top precedence, unless parent is + * T_FieldSelect itself! + */ + return !IsA(parentNode, FieldSelect); + + case T_FieldStore: + + /* + * treat like FieldSelect (probably doesn't matter) + */ + return !IsA(parentNode, FieldStore); + + case T_CoerceToDomain: + /* maybe simple, check args */ + return isSimpleNode((Node *) ((CoerceToDomain *) node)->arg, + node, prettyFlags); + case T_RelabelType: + return isSimpleNode((Node *) ((RelabelType *) node)->arg, + node, prettyFlags); + case T_CoerceViaIO: + return isSimpleNode((Node *) ((CoerceViaIO *) node)->arg, + node, prettyFlags); + case T_ArrayCoerceExpr: + return isSimpleNode((Node *) ((ArrayCoerceExpr *) node)->arg, + node, prettyFlags); + case T_ConvertRowtypeExpr: + return isSimpleNode((Node *) ((ConvertRowtypeExpr *) node)->arg, + node, prettyFlags); + + case T_OpExpr: + { + /* depends on parent node type; needs further checking */ + if (prettyFlags & PRETTYFLAG_PAREN && IsA(parentNode, OpExpr)) + { + const char *op; + const char *parentOp; + bool is_lopriop; + bool is_hipriop; + bool is_lopriparent; + bool is_hipriparent; + + op = get_simple_binary_op_name((OpExpr *) node); + if (!op) + return false; + + /* We know only the basic operators + - and * / % */ + is_lopriop = (strchr("+-", *op) != NULL); + is_hipriop = (strchr("*/%", *op) != NULL); + if (!(is_lopriop || is_hipriop)) + return false; + + parentOp = get_simple_binary_op_name((OpExpr *) parentNode); + if (!parentOp) + return false; + + is_lopriparent = (strchr("+-", *parentOp) != NULL); + is_hipriparent = (strchr("*/%", *parentOp) != NULL); + if (!(is_lopriparent || is_hipriparent)) + return false; + + if (is_hipriop && is_lopriparent) + return true; /* op binds tighter than parent */ + + if (is_lopriop && is_hipriparent) + return false; + + /* + * Operators are same priority --- can skip parens only if + * we have (a - b) - c, not a - (b - c). + */ + if (node == (Node *) linitial(((OpExpr *) parentNode)->args)) + return true; + + return false; + } + /* else do the same stuff as for T_SubLink et al. */ + } + /* FALLTHROUGH */ + + case T_SubLink: + case T_NullTest: + case T_BooleanTest: + case T_DistinctExpr: + case T_JsonIsPredicate: + switch (nodeTag(parentNode)) + { + case T_FuncExpr: + { + /* special handling for casts and COERCE_SQL_SYNTAX */ + CoercionForm type = ((FuncExpr *) parentNode)->funcformat; + + if (type == COERCE_EXPLICIT_CAST || + type == COERCE_IMPLICIT_CAST || + type == COERCE_SQL_SYNTAX) + return false; + return true; /* own parentheses */ + } + case T_BoolExpr: /* lower precedence */ + case T_SubscriptingRef: /* other separators */ + case T_ArrayExpr: /* other separators */ + case T_RowExpr: /* other separators */ + case T_CoalesceExpr: /* own parentheses */ + case T_MinMaxExpr: /* own parentheses */ + case T_XmlExpr: /* own parentheses */ + case T_NullIfExpr: /* other separators */ + case T_Aggref: /* own parentheses */ + case T_GroupingFunc: /* own parentheses */ + case T_WindowFunc: /* own parentheses */ + case T_CaseExpr: /* other separators */ + return true; + default: + return false; + } + + case T_BoolExpr: + switch (nodeTag(parentNode)) + { + case T_BoolExpr: + if (prettyFlags & PRETTYFLAG_PAREN) + { + BoolExprType type; + BoolExprType parentType; + + type = ((BoolExpr *) node)->boolop; + parentType = ((BoolExpr *) parentNode)->boolop; + switch (type) + { + case NOT_EXPR: + case AND_EXPR: + if (parentType == AND_EXPR || parentType == OR_EXPR) + return true; + break; + case OR_EXPR: + if (parentType == OR_EXPR) + return true; + break; + } + } + return false; + case T_FuncExpr: + { + /* special handling for casts and COERCE_SQL_SYNTAX */ + CoercionForm type = ((FuncExpr *) parentNode)->funcformat; + + if (type == COERCE_EXPLICIT_CAST || + type == COERCE_IMPLICIT_CAST || + type == COERCE_SQL_SYNTAX) + return false; + return true; /* own parentheses */ + } + case T_SubscriptingRef: /* other separators */ + case T_ArrayExpr: /* other separators */ + case T_RowExpr: /* other separators */ + case T_CoalesceExpr: /* own parentheses */ + case T_MinMaxExpr: /* own parentheses */ + case T_XmlExpr: /* own parentheses */ + case T_NullIfExpr: /* other separators */ + case T_Aggref: /* own parentheses */ + case T_GroupingFunc: /* own parentheses */ + case T_WindowFunc: /* own parentheses */ + case T_CaseExpr: /* other separators */ + return true; + default: + return false; + } + + case T_JsonValueExpr: + /* maybe simple, check args */ + return isSimpleNode((Node *) ((JsonValueExpr *) node)->raw_expr, + node, prettyFlags); + + default: + break; + } + /* those we don't know: in dubio complexo */ + return false; +} + + +/* + * appendContextKeyword - append a keyword to buffer + * + * If prettyPrint is enabled, perform a line break, and adjust indentation. + * Otherwise, just append the keyword. + */ +static void +appendContextKeyword(deparse_context *context, const char *str, + int indentBefore, int indentAfter, int indentPlus) +{ + StringInfo buf = context->buf; + + if (PRETTY_INDENT(context)) + { + int indentAmount; + + context->indentLevel += indentBefore; + + /* remove any trailing spaces currently in the buffer ... */ + removeStringInfoSpaces(buf); + /* ... then add a newline and some spaces */ + appendStringInfoChar(buf, '\n'); + + if (context->indentLevel < PRETTYINDENT_LIMIT) + indentAmount = Max(context->indentLevel, 0) + indentPlus; + else + { + /* + * If we're indented more than PRETTYINDENT_LIMIT characters, try + * to conserve horizontal space by reducing the per-level + * indentation. For best results the scale factor here should + * divide all the indent amounts that get added to indentLevel + * (PRETTYINDENT_STD, etc). It's important that the indentation + * not grow unboundedly, else deeply-nested trees use O(N^2) + * whitespace; so we also wrap modulo PRETTYINDENT_LIMIT. + */ + indentAmount = PRETTYINDENT_LIMIT + + (context->indentLevel - PRETTYINDENT_LIMIT) / + (PRETTYINDENT_STD / 2); + indentAmount %= PRETTYINDENT_LIMIT; + /* scale/wrap logic affects indentLevel, but not indentPlus */ + indentAmount += indentPlus; + } + appendStringInfoSpaces(buf, indentAmount); + + appendStringInfoString(buf, str); + + context->indentLevel += indentAfter; + if (context->indentLevel < 0) + context->indentLevel = 0; + } + else + appendStringInfoString(buf, str); +} + +/* + * removeStringInfoSpaces - delete trailing spaces from a buffer. + * + * Possibly this should move to stringinfo.c at some point. + */ +static void +removeStringInfoSpaces(StringInfo str) +{ + while (str->len > 0 && str->data[str->len - 1] == ' ') + str->data[--(str->len)] = '\0'; +} + + +/* + * get_rule_expr_paren - deparse expr using get_rule_expr, + * embracing the string with parentheses if necessary for prettyPrint. + * + * Never embrace if prettyFlags=0, because it's done in the calling node. + * + * Any node that does *not* embrace its argument node by sql syntax (with + * parentheses, non-operator keywords like CASE/WHEN/ON, or comma etc) should + * use get_rule_expr_paren instead of get_rule_expr so parentheses can be + * added. + */ +static void +get_rule_expr_paren(Node *node, deparse_context *context, + bool showimplicit, Node *parentNode) +{ + bool need_paren; + + need_paren = PRETTY_PAREN(context) && + !isSimpleNode(node, parentNode, context->prettyFlags); + + if (need_paren) + appendStringInfoChar(context->buf, '('); + + get_rule_expr(node, context, showimplicit); + + if (need_paren) + appendStringInfoChar(context->buf, ')'); +} + + +/* ---------- + * get_rule_expr - Parse back an expression + * + * Note: showimplicit determines whether we display any implicit cast that + * is present at the top of the expression tree. It is a passed argument, + * not a field of the context struct, because we change the value as we + * recurse down into the expression. In general we suppress implicit casts + * when the result type is known with certainty (eg, the arguments of an + * OR must be boolean). We display implicit casts for arguments of functions + * and operators, since this is needed to be certain that the same function + * or operator will be chosen when the expression is re-parsed. + * ---------- + */ +static void +get_rule_expr(Node *node, deparse_context *context, + bool showimplicit) +{ + StringInfo buf = context->buf; + + if (node == NULL) + return; + + /* Guard against excessively long or deeply-nested queries */ + CHECK_FOR_INTERRUPTS(); + check_stack_depth(); + + /* + * Each level of get_rule_expr must emit an indivisible term + * (parenthesized if necessary) to ensure result is reparsed into the same + * expression tree. The only exception is that when the input is a List, + * we emit the component items comma-separated with no surrounding + * decoration; this is convenient for most callers. + */ + switch (nodeTag(node)) + { + case T_Var: + (void) get_variable((Var *) node, 0, false, context); + break; + + case T_Const: + get_const_expr((Const *) node, context, 0); + break; + + case T_Param: + get_parameter((Param *) node, context); + break; + + case T_Aggref: + get_agg_expr((Aggref *) node, context, (Aggref *) node); + break; + + case T_GroupingFunc: + { + GroupingFunc *gexpr = (GroupingFunc *) node; + + appendStringInfoString(buf, "GROUPING("); + get_rule_expr((Node *) gexpr->args, context, true); + appendStringInfoChar(buf, ')'); + } + break; + + case T_WindowFunc: + get_windowfunc_expr((WindowFunc *) node, context); + break; + + case T_SubscriptingRef: + { + SubscriptingRef *sbsref = (SubscriptingRef *) node; + bool need_parens; + + /* + * If the argument is a CaseTestExpr, we must be inside a + * FieldStore, ie, we are assigning to an element of an array + * within a composite column. Since we already punted on + * displaying the FieldStore's target information, just punt + * here too, and display only the assignment source + * expression. + */ + if (IsA(sbsref->refexpr, CaseTestExpr)) + { + Assert(sbsref->refassgnexpr); + get_rule_expr((Node *) sbsref->refassgnexpr, + context, showimplicit); + break; + } + + /* + * Parenthesize the argument unless it's a simple Var or a + * FieldSelect. (In particular, if it's another + * SubscriptingRef, we *must* parenthesize to avoid + * confusion.) + */ + need_parens = !IsA(sbsref->refexpr, Var) && + !IsA(sbsref->refexpr, FieldSelect); + if (need_parens) + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) sbsref->refexpr, context, showimplicit); + if (need_parens) + appendStringInfoChar(buf, ')'); + + /* + * If there's a refassgnexpr, we want to print the node in the + * format "container[subscripts] := refassgnexpr". This is + * not legal SQL, so decompilation of INSERT or UPDATE + * statements should always use processIndirection as part of + * the statement-level syntax. We should only see this when + * EXPLAIN tries to print the targetlist of a plan resulting + * from such a statement. + */ + if (sbsref->refassgnexpr) + { + Node *refassgnexpr; + + /* + * Use processIndirection to print this node's subscripts + * as well as any additional field selections or + * subscripting in immediate descendants. It returns the + * RHS expr that is actually being "assigned". + */ + refassgnexpr = processIndirection(node, context); + appendStringInfoString(buf, " := "); + get_rule_expr(refassgnexpr, context, showimplicit); + } + else + { + /* Just an ordinary container fetch, so print subscripts */ + printSubscripts(sbsref, context); + } + } + break; + + case T_FuncExpr: + get_func_expr((FuncExpr *) node, context, showimplicit); + break; + + case T_NamedArgExpr: + { + NamedArgExpr *na = (NamedArgExpr *) node; + + appendStringInfo(buf, "%s => ", quote_identifier(na->name)); + get_rule_expr((Node *) na->arg, context, showimplicit); + } + break; + + case T_OpExpr: + get_oper_expr((OpExpr *) node, context); + break; + + case T_DistinctExpr: + { + DistinctExpr *expr = (DistinctExpr *) node; + List *args = expr->args; + Node *arg1 = (Node *) linitial(args); + Node *arg2 = (Node *) lsecond(args); + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren(arg1, context, true, node); + appendStringInfoString(buf, " IS DISTINCT FROM "); + get_rule_expr_paren(arg2, context, true, node); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + break; + + case T_NullIfExpr: + { + NullIfExpr *nullifexpr = (NullIfExpr *) node; + + appendStringInfoString(buf, "NULLIF("); + get_rule_expr((Node *) nullifexpr->args, context, true); + appendStringInfoChar(buf, ')'); + } + break; + + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node; + List *args = expr->args; + Node *arg1 = (Node *) linitial(args); + Node *arg2 = (Node *) lsecond(args); + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren(arg1, context, true, node); + appendStringInfo(buf, " %s %s (", + generate_operator_name(expr->opno, + exprType(arg1), + get_base_element_type(exprType(arg2))), + expr->useOr ? "ANY" : "ALL"); + get_rule_expr_paren(arg2, context, true, node); + + /* + * There's inherent ambiguity in "x op ANY/ALL (y)" when y is + * a bare sub-SELECT. Since we're here, the sub-SELECT must + * be meant as a scalar sub-SELECT yielding an array value to + * be used in ScalarArrayOpExpr; but the grammar will + * preferentially interpret such a construct as an ANY/ALL + * SubLink. To prevent misparsing the output that way, insert + * a dummy coercion (which will be stripped by parse analysis, + * so no inefficiency is added in dump and reload). This is + * indeed most likely what the user wrote to get the construct + * accepted in the first place. + */ + if (IsA(arg2, SubLink) && + ((SubLink *) arg2)->subLinkType == EXPR_SUBLINK) + appendStringInfo(buf, "::%s", + format_type_with_typemod(exprType(arg2), + exprTypmod(arg2))); + appendStringInfoChar(buf, ')'); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + break; + + case T_BoolExpr: + { + BoolExpr *expr = (BoolExpr *) node; + Node *first_arg = linitial(expr->args); + ListCell *arg; + + switch (expr->boolop) + { + case AND_EXPR: + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren(first_arg, context, + false, node); + for_each_from(arg, expr->args, 1) + { + appendStringInfoString(buf, " AND "); + get_rule_expr_paren((Node *) lfirst(arg), context, + false, node); + } + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + break; + + case OR_EXPR: + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren(first_arg, context, + false, node); + for_each_from(arg, expr->args, 1) + { + appendStringInfoString(buf, " OR "); + get_rule_expr_paren((Node *) lfirst(arg), context, + false, node); + } + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + break; + + case NOT_EXPR: + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + appendStringInfoString(buf, "NOT "); + get_rule_expr_paren(first_arg, context, + false, node); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + break; + + default: + elog(ERROR, "unrecognized boolop: %d", + (int) expr->boolop); + } + } + break; + + case T_SubLink: + get_sublink_expr((SubLink *) node, context); + break; + + case T_SubPlan: + { + SubPlan *subplan = (SubPlan *) node; + + /* + * We cannot see an already-planned subplan in rule deparsing, + * only while EXPLAINing a query plan. We don't try to + * reconstruct the original SQL, just reference the subplan + * that appears elsewhere in EXPLAIN's result. + */ + if (subplan->useHashTable) + appendStringInfo(buf, "(hashed %s)", subplan->plan_name); + else + appendStringInfo(buf, "(%s)", subplan->plan_name); + } + break; + + case T_AlternativeSubPlan: + { + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; + ListCell *lc; + + /* + * This case cannot be reached in normal usage, since no + * AlternativeSubPlan can appear either in parsetrees or + * finished plan trees. We keep it just in case somebody + * wants to use this code to print planner data structures. + */ + appendStringInfoString(buf, "(alternatives: "); + foreach(lc, asplan->subplans) + { + SubPlan *splan = lfirst_node(SubPlan, lc); + + if (splan->useHashTable) + appendStringInfo(buf, "hashed %s", splan->plan_name); + else + appendStringInfoString(buf, splan->plan_name); + if (lnext(asplan->subplans, lc)) + appendStringInfoString(buf, " or "); + } + appendStringInfoChar(buf, ')'); + } + break; + + case T_FieldSelect: + { + FieldSelect *fselect = (FieldSelect *) node; + Node *arg = (Node *) fselect->arg; + int fno = fselect->fieldnum; + const char *fieldname; + bool need_parens; + + /* + * Parenthesize the argument unless it's an SubscriptingRef or + * another FieldSelect. Note in particular that it would be + * WRONG to not parenthesize a Var argument; simplicity is not + * the issue here, having the right number of names is. + */ + need_parens = !IsA(arg, SubscriptingRef) && + !IsA(arg, FieldSelect); + if (need_parens) + appendStringInfoChar(buf, '('); + get_rule_expr(arg, context, true); + if (need_parens) + appendStringInfoChar(buf, ')'); + + /* + * Get and print the field name. + */ + fieldname = get_name_for_var_field((Var *) arg, fno, + 0, context); + appendStringInfo(buf, ".%s", quote_identifier(fieldname)); + } + break; + + case T_FieldStore: + { + FieldStore *fstore = (FieldStore *) node; + bool need_parens; + + /* + * There is no good way to represent a FieldStore as real SQL, + * so decompilation of INSERT or UPDATE statements should + * always use processIndirection as part of the + * statement-level syntax. We should only get here when + * EXPLAIN tries to print the targetlist of a plan resulting + * from such a statement. The plan case is even harder than + * ordinary rules would be, because the planner tries to + * collapse multiple assignments to the same field or subfield + * into one FieldStore; so we can see a list of target fields + * not just one, and the arguments could be FieldStores + * themselves. We don't bother to try to print the target + * field names; we just print the source arguments, with a + * ROW() around them if there's more than one. This isn't + * terribly complete, but it's probably good enough for + * EXPLAIN's purposes; especially since anything more would be + * either hopelessly confusing or an even poorer + * representation of what the plan is actually doing. + */ + need_parens = (list_length(fstore->newvals) != 1); + if (need_parens) + appendStringInfoString(buf, "ROW("); + get_rule_expr((Node *) fstore->newvals, context, showimplicit); + if (need_parens) + appendStringInfoChar(buf, ')'); + } + break; + + case T_RelabelType: + { + RelabelType *relabel = (RelabelType *) node; + Node *arg = (Node *) relabel->arg; + + if (relabel->relabelformat == COERCE_IMPLICIT_CAST && + !showimplicit) + { + /* don't show the implicit cast */ + get_rule_expr_paren(arg, context, false, node); + } + else + { + get_coercion_expr(arg, context, + relabel->resulttype, + relabel->resulttypmod, + node); + } + } + break; + + case T_CoerceViaIO: + { + CoerceViaIO *iocoerce = (CoerceViaIO *) node; + Node *arg = (Node *) iocoerce->arg; + + if (iocoerce->coerceformat == COERCE_IMPLICIT_CAST && + !showimplicit) + { + /* don't show the implicit cast */ + get_rule_expr_paren(arg, context, false, node); + } + else + { + get_coercion_expr(arg, context, + iocoerce->resulttype, + -1, + node); + } + } + break; + + case T_ArrayCoerceExpr: + { + ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node; + Node *arg = (Node *) acoerce->arg; + + if (acoerce->coerceformat == COERCE_IMPLICIT_CAST && + !showimplicit) + { + /* don't show the implicit cast */ + get_rule_expr_paren(arg, context, false, node); + } + else + { + get_coercion_expr(arg, context, + acoerce->resulttype, + acoerce->resulttypmod, + node); + } + } + break; + + case T_ConvertRowtypeExpr: + { + ConvertRowtypeExpr *convert = (ConvertRowtypeExpr *) node; + Node *arg = (Node *) convert->arg; + + if (convert->convertformat == COERCE_IMPLICIT_CAST && + !showimplicit) + { + /* don't show the implicit cast */ + get_rule_expr_paren(arg, context, false, node); + } + else + { + get_coercion_expr(arg, context, + convert->resulttype, -1, + node); + } + } + break; + + case T_CollateExpr: + { + CollateExpr *collate = (CollateExpr *) node; + Node *arg = (Node *) collate->arg; + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren(arg, context, showimplicit, node); + appendStringInfo(buf, " COLLATE %s", + generate_collation_name(collate->collOid)); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + break; + + case T_CaseExpr: + { + CaseExpr *caseexpr = (CaseExpr *) node; + ListCell *temp; + + appendContextKeyword(context, "CASE", + 0, PRETTYINDENT_VAR, 0); + if (caseexpr->arg) + { + appendStringInfoChar(buf, ' '); + get_rule_expr((Node *) caseexpr->arg, context, true); + } + foreach(temp, caseexpr->args) + { + CaseWhen *when = (CaseWhen *) lfirst(temp); + Node *w = (Node *) when->expr; + + if (caseexpr->arg) + { + /* + * The parser should have produced WHEN clauses of the + * form "CaseTestExpr = RHS", possibly with an + * implicit coercion inserted above the CaseTestExpr. + * For accurate decompilation of rules it's essential + * that we show just the RHS. However in an + * expression that's been through the optimizer, the + * WHEN clause could be almost anything (since the + * equality operator could have been expanded into an + * inline function). If we don't recognize the form + * of the WHEN clause, just punt and display it as-is. + */ + if (IsA(w, OpExpr)) + { + List *args = ((OpExpr *) w)->args; + + if (list_length(args) == 2 && + IsA(strip_implicit_coercions(linitial(args)), + CaseTestExpr)) + w = (Node *) lsecond(args); + } + } + + if (!PRETTY_INDENT(context)) + appendStringInfoChar(buf, ' '); + appendContextKeyword(context, "WHEN ", + 0, 0, 0); + get_rule_expr(w, context, false); + appendStringInfoString(buf, " THEN "); + get_rule_expr((Node *) when->result, context, true); + } + if (!PRETTY_INDENT(context)) + appendStringInfoChar(buf, ' '); + appendContextKeyword(context, "ELSE ", + 0, 0, 0); + get_rule_expr((Node *) caseexpr->defresult, context, true); + if (!PRETTY_INDENT(context)) + appendStringInfoChar(buf, ' '); + appendContextKeyword(context, "END", + -PRETTYINDENT_VAR, 0, 0); + } + break; + + case T_CaseTestExpr: + { + /* + * Normally we should never get here, since for expressions + * that can contain this node type we attempt to avoid + * recursing to it. But in an optimized expression we might + * be unable to avoid that (see comments for CaseExpr). If we + * do see one, print it as CASE_TEST_EXPR. + */ + appendStringInfoString(buf, "CASE_TEST_EXPR"); + } + break; + + case T_ArrayExpr: + { + ArrayExpr *arrayexpr = (ArrayExpr *) node; + + appendStringInfoString(buf, "ARRAY["); + get_rule_expr((Node *) arrayexpr->elements, context, true); + appendStringInfoChar(buf, ']'); + + /* + * If the array isn't empty, we assume its elements are + * coerced to the desired type. If it's empty, though, we + * need an explicit coercion to the array type. + */ + if (arrayexpr->elements == NIL) + appendStringInfo(buf, "::%s", + format_type_with_typemod(arrayexpr->array_typeid, -1)); + } + break; + + case T_RowExpr: + { + RowExpr *rowexpr = (RowExpr *) node; + TupleDesc tupdesc = NULL; + ListCell *arg; + int i; + char *sep; + + /* + * If it's a named type and not RECORD, we may have to skip + * dropped columns and/or claim there are NULLs for added + * columns. + */ + if (rowexpr->row_typeid != RECORDOID) + { + tupdesc = lookup_rowtype_tupdesc(rowexpr->row_typeid, -1); + Assert(list_length(rowexpr->args) <= tupdesc->natts); + } + + /* + * SQL99 allows "ROW" to be omitted when there is more than + * one column, but for simplicity we always print it. + */ + appendStringInfoString(buf, "ROW("); + sep = ""; + i = 0; + foreach(arg, rowexpr->args) + { + Node *e = (Node *) lfirst(arg); + + if (tupdesc == NULL || + !TupleDescAttr(tupdesc, i)->attisdropped) + { + appendStringInfoString(buf, sep); + /* Whole-row Vars need special treatment here */ + get_rule_expr_toplevel(e, context, true); + sep = ", "; + } + i++; + } + if (tupdesc != NULL) + { + while (i < tupdesc->natts) + { + if (!TupleDescAttr(tupdesc, i)->attisdropped) + { + appendStringInfoString(buf, sep); + appendStringInfoString(buf, "NULL"); + sep = ", "; + } + i++; + } + + ReleaseTupleDesc(tupdesc); + } + appendStringInfoChar(buf, ')'); + if (rowexpr->row_format == COERCE_EXPLICIT_CAST) + appendStringInfo(buf, "::%s", + format_type_with_typemod(rowexpr->row_typeid, -1)); + } + break; + + case T_RowCompareExpr: + { + RowCompareExpr *rcexpr = (RowCompareExpr *) node; + + /* + * SQL99 allows "ROW" to be omitted when there is more than + * one column, but for simplicity we always print it. Within + * a ROW expression, whole-row Vars need special treatment, so + * use get_rule_list_toplevel. + */ + appendStringInfoString(buf, "(ROW("); + get_rule_list_toplevel(rcexpr->largs, context, true); + + /* + * We assume that the name of the first-column operator will + * do for all the rest too. This is definitely open to + * failure, eg if some but not all operators were renamed + * since the construct was parsed, but there seems no way to + * be perfect. + */ + appendStringInfo(buf, ") %s ROW(", + generate_operator_name(linitial_oid(rcexpr->opnos), + exprType(linitial(rcexpr->largs)), + exprType(linitial(rcexpr->rargs)))); + get_rule_list_toplevel(rcexpr->rargs, context, true); + appendStringInfoString(buf, "))"); + } + break; + + case T_CoalesceExpr: + { + CoalesceExpr *coalesceexpr = (CoalesceExpr *) node; + + appendStringInfoString(buf, "COALESCE("); + get_rule_expr((Node *) coalesceexpr->args, context, true); + appendStringInfoChar(buf, ')'); + } + break; + + case T_MinMaxExpr: + { + MinMaxExpr *minmaxexpr = (MinMaxExpr *) node; + + switch (minmaxexpr->op) + { + case IS_GREATEST: + appendStringInfoString(buf, "GREATEST("); + break; + case IS_LEAST: + appendStringInfoString(buf, "LEAST("); + break; + } + get_rule_expr((Node *) minmaxexpr->args, context, true); + appendStringInfoChar(buf, ')'); + } + break; + + case T_SQLValueFunction: + { + SQLValueFunction *svf = (SQLValueFunction *) node; + + /* + * Note: this code knows that typmod for time, timestamp, and + * timestamptz just prints as integer. + */ + switch (svf->op) + { + case SVFOP_CURRENT_DATE: + appendStringInfoString(buf, "CURRENT_DATE"); + break; + case SVFOP_CURRENT_TIME: + appendStringInfoString(buf, "CURRENT_TIME"); + break; + case SVFOP_CURRENT_TIME_N: + appendStringInfo(buf, "CURRENT_TIME(%d)", svf->typmod); + break; + case SVFOP_CURRENT_TIMESTAMP: + appendStringInfoString(buf, "CURRENT_TIMESTAMP"); + break; + case SVFOP_CURRENT_TIMESTAMP_N: + appendStringInfo(buf, "CURRENT_TIMESTAMP(%d)", + svf->typmod); + break; + case SVFOP_LOCALTIME: + appendStringInfoString(buf, "LOCALTIME"); + break; + case SVFOP_LOCALTIME_N: + appendStringInfo(buf, "LOCALTIME(%d)", svf->typmod); + break; + case SVFOP_LOCALTIMESTAMP: + appendStringInfoString(buf, "LOCALTIMESTAMP"); + break; + case SVFOP_LOCALTIMESTAMP_N: + appendStringInfo(buf, "LOCALTIMESTAMP(%d)", + svf->typmod); + break; + case SVFOP_CURRENT_ROLE: + appendStringInfoString(buf, "CURRENT_ROLE"); + break; + case SVFOP_CURRENT_USER: + appendStringInfoString(buf, "CURRENT_USER"); + break; + case SVFOP_USER: + appendStringInfoString(buf, "USER"); + break; + case SVFOP_SESSION_USER: + appendStringInfoString(buf, "SESSION_USER"); + break; + case SVFOP_CURRENT_CATALOG: + appendStringInfoString(buf, "CURRENT_CATALOG"); + break; + case SVFOP_CURRENT_SCHEMA: + appendStringInfoString(buf, "CURRENT_SCHEMA"); + break; + } + } + break; + + case T_XmlExpr: + { + XmlExpr *xexpr = (XmlExpr *) node; + bool needcomma = false; + ListCell *arg; + ListCell *narg; + Const *con; + + switch (xexpr->op) + { + case IS_XMLCONCAT: + appendStringInfoString(buf, "XMLCONCAT("); + break; + case IS_XMLELEMENT: + appendStringInfoString(buf, "XMLELEMENT("); + break; + case IS_XMLFOREST: + appendStringInfoString(buf, "XMLFOREST("); + break; + case IS_XMLPARSE: + appendStringInfoString(buf, "XMLPARSE("); + break; + case IS_XMLPI: + appendStringInfoString(buf, "XMLPI("); + break; + case IS_XMLROOT: + appendStringInfoString(buf, "XMLROOT("); + break; + case IS_XMLSERIALIZE: + appendStringInfoString(buf, "XMLSERIALIZE("); + break; + case IS_DOCUMENT: + break; + } + if (xexpr->op == IS_XMLPARSE || xexpr->op == IS_XMLSERIALIZE) + { + if (xexpr->xmloption == XMLOPTION_DOCUMENT) + appendStringInfoString(buf, "DOCUMENT "); + else + appendStringInfoString(buf, "CONTENT "); + } + if (xexpr->name) + { + appendStringInfo(buf, "NAME %s", + quote_identifier(map_xml_name_to_sql_identifier(xexpr->name))); + needcomma = true; + } + if (xexpr->named_args) + { + if (xexpr->op != IS_XMLFOREST) + { + if (needcomma) + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, "XMLATTRIBUTES("); + needcomma = false; + } + forboth(arg, xexpr->named_args, narg, xexpr->arg_names) + { + Node *e = (Node *) lfirst(arg); + char *argname = strVal(lfirst(narg)); + + if (needcomma) + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) e, context, true); + appendStringInfo(buf, " AS %s", + quote_identifier(map_xml_name_to_sql_identifier(argname))); + needcomma = true; + } + if (xexpr->op != IS_XMLFOREST) + appendStringInfoChar(buf, ')'); + } + if (xexpr->args) + { + if (needcomma) + appendStringInfoString(buf, ", "); + switch (xexpr->op) + { + case IS_XMLCONCAT: + case IS_XMLELEMENT: + case IS_XMLFOREST: + case IS_XMLPI: + case IS_XMLSERIALIZE: + /* no extra decoration needed */ + get_rule_expr((Node *) xexpr->args, context, true); + break; + case IS_XMLPARSE: + Assert(list_length(xexpr->args) == 2); + + get_rule_expr((Node *) linitial(xexpr->args), + context, true); + + con = lsecond_node(Const, xexpr->args); + Assert(!con->constisnull); + if (DatumGetBool(con->constvalue)) + appendStringInfoString(buf, + " PRESERVE WHITESPACE"); + else + appendStringInfoString(buf, + " STRIP WHITESPACE"); + break; + case IS_XMLROOT: + Assert(list_length(xexpr->args) == 3); + + get_rule_expr((Node *) linitial(xexpr->args), + context, true); + + appendStringInfoString(buf, ", VERSION "); + con = (Const *) lsecond(xexpr->args); + if (IsA(con, Const) && + con->constisnull) + appendStringInfoString(buf, "NO VALUE"); + else + get_rule_expr((Node *) con, context, false); + + con = lthird_node(Const, xexpr->args); + if (con->constisnull) + /* suppress STANDALONE NO VALUE */ ; + else + { + switch (DatumGetInt32(con->constvalue)) + { + case XML_STANDALONE_YES: + appendStringInfoString(buf, + ", STANDALONE YES"); + break; + case XML_STANDALONE_NO: + appendStringInfoString(buf, + ", STANDALONE NO"); + break; + case XML_STANDALONE_NO_VALUE: + appendStringInfoString(buf, + ", STANDALONE NO VALUE"); + break; + default: + break; + } + } + break; + case IS_DOCUMENT: + get_rule_expr_paren((Node *) xexpr->args, context, false, node); + break; + } + } + if (xexpr->op == IS_XMLSERIALIZE) + appendStringInfo(buf, " AS %s", + format_type_with_typemod(xexpr->type, + xexpr->typmod)); + if (xexpr->op == IS_DOCUMENT) + appendStringInfoString(buf, " IS DOCUMENT"); + else + appendStringInfoChar(buf, ')'); + } + break; + + case T_NullTest: + { + NullTest *ntest = (NullTest *) node; + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren((Node *) ntest->arg, context, true, node); + + /* + * For scalar inputs, we prefer to print as IS [NOT] NULL, + * which is shorter and traditional. If it's a rowtype input + * but we're applying a scalar test, must print IS [NOT] + * DISTINCT FROM NULL to be semantically correct. + */ + if (ntest->argisrow || + !type_is_rowtype(exprType((Node *) ntest->arg))) + { + switch (ntest->nulltesttype) + { + case IS_NULL: + appendStringInfoString(buf, " IS NULL"); + break; + case IS_NOT_NULL: + appendStringInfoString(buf, " IS NOT NULL"); + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int) ntest->nulltesttype); + } + } + else + { + switch (ntest->nulltesttype) + { + case IS_NULL: + appendStringInfoString(buf, " IS NOT DISTINCT FROM NULL"); + break; + case IS_NOT_NULL: + appendStringInfoString(buf, " IS DISTINCT FROM NULL"); + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int) ntest->nulltesttype); + } + } + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + break; + + case T_BooleanTest: + { + BooleanTest *btest = (BooleanTest *) node; + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren((Node *) btest->arg, context, false, node); + switch (btest->booltesttype) + { + case IS_TRUE: + appendStringInfoString(buf, " IS TRUE"); + break; + case IS_NOT_TRUE: + appendStringInfoString(buf, " IS NOT TRUE"); + break; + case IS_FALSE: + appendStringInfoString(buf, " IS FALSE"); + break; + case IS_NOT_FALSE: + appendStringInfoString(buf, " IS NOT FALSE"); + break; + case IS_UNKNOWN: + appendStringInfoString(buf, " IS UNKNOWN"); + break; + case IS_NOT_UNKNOWN: + appendStringInfoString(buf, " IS NOT UNKNOWN"); + break; + default: + elog(ERROR, "unrecognized booltesttype: %d", + (int) btest->booltesttype); + } + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + break; + + case T_CoerceToDomain: + { + CoerceToDomain *ctest = (CoerceToDomain *) node; + Node *arg = (Node *) ctest->arg; + + if (ctest->coercionformat == COERCE_IMPLICIT_CAST && + !showimplicit) + { + /* don't show the implicit cast */ + get_rule_expr(arg, context, false); + } + else + { + get_coercion_expr(arg, context, + ctest->resulttype, + ctest->resulttypmod, + node); + } + } + break; + + case T_CoerceToDomainValue: + appendStringInfoString(buf, "VALUE"); + break; + + case T_SetToDefault: + appendStringInfoString(buf, "DEFAULT"); + break; + + case T_CurrentOfExpr: + { + CurrentOfExpr *cexpr = (CurrentOfExpr *) node; + + if (cexpr->cursor_name) + appendStringInfo(buf, "CURRENT OF %s", + quote_identifier(cexpr->cursor_name)); + else + appendStringInfo(buf, "CURRENT OF $%d", + cexpr->cursor_param); + } + break; + + case T_NextValueExpr: + { + NextValueExpr *nvexpr = (NextValueExpr *) node; + + /* + * This isn't exactly nextval(), but that seems close enough + * for EXPLAIN's purposes. + */ + appendStringInfoString(buf, "nextval("); + simple_quote_literal(buf, + generate_relation_name(nvexpr->seqid, + NIL)); + appendStringInfoChar(buf, ')'); + } + break; + + case T_InferenceElem: + { + InferenceElem *iexpr = (InferenceElem *) node; + bool save_varprefix; + bool need_parens; + + /* + * InferenceElem can only refer to target relation, so a + * prefix is not useful, and indeed would cause parse errors. + */ + save_varprefix = context->varprefix; + context->varprefix = false; + + /* + * Parenthesize the element unless it's a simple Var or a bare + * function call. Follows pg_get_indexdef_worker(). + */ + need_parens = !IsA(iexpr->expr, Var); + if (IsA(iexpr->expr, FuncExpr) && + ((FuncExpr *) iexpr->expr)->funcformat == + COERCE_EXPLICIT_CALL) + need_parens = false; + + if (need_parens) + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) iexpr->expr, + context, false); + if (need_parens) + appendStringInfoChar(buf, ')'); + + context->varprefix = save_varprefix; + + if (iexpr->infercollid) + appendStringInfo(buf, " COLLATE %s", + generate_collation_name(iexpr->infercollid)); + + /* Add the operator class name, if not default */ + if (iexpr->inferopclass) + { + Oid inferopclass = iexpr->inferopclass; + Oid inferopcinputtype = get_opclass_input_type(iexpr->inferopclass); + + get_opclass_name(inferopclass, inferopcinputtype, buf); + } + } + break; + + case T_PartitionBoundSpec: + { + PartitionBoundSpec *spec = (PartitionBoundSpec *) node; + ListCell *cell; + char *sep; + + if (spec->is_default) + { + appendStringInfoString(buf, "DEFAULT"); + break; + } + + switch (spec->strategy) + { + case PARTITION_STRATEGY_HASH: + Assert(spec->modulus > 0 && spec->remainder >= 0); + Assert(spec->modulus > spec->remainder); + + appendStringInfoString(buf, "FOR VALUES"); + appendStringInfo(buf, " WITH (modulus %d, remainder %d)", + spec->modulus, spec->remainder); + break; + + case PARTITION_STRATEGY_LIST: + Assert(spec->listdatums != NIL); + + appendStringInfoString(buf, "FOR VALUES IN ("); + sep = ""; + foreach(cell, spec->listdatums) + { + Const *val = lfirst_node(Const, cell); + + appendStringInfoString(buf, sep); + get_const_expr(val, context, -1); + sep = ", "; + } + + appendStringInfoChar(buf, ')'); + break; + + case PARTITION_STRATEGY_RANGE: + Assert(spec->lowerdatums != NIL && + spec->upperdatums != NIL && + list_length(spec->lowerdatums) == + list_length(spec->upperdatums)); + + appendStringInfo(buf, "FOR VALUES FROM %s TO %s", + get_range_partbound_string(spec->lowerdatums), + get_range_partbound_string(spec->upperdatums)); + break; + + default: + elog(ERROR, "unrecognized partition strategy: %d", + (int) spec->strategy); + break; + } + } + break; + + case T_JsonValueExpr: + { + JsonValueExpr *jve = (JsonValueExpr *) node; + + get_rule_expr((Node *) jve->raw_expr, context, false); + get_json_format(jve->format, context->buf); + } + break; + + case T_JsonConstructorExpr: + get_json_constructor((JsonConstructorExpr *) node, context, false); + break; + + case T_JsonIsPredicate: + { + JsonIsPredicate *pred = (JsonIsPredicate *) node; + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(context->buf, '('); + + get_rule_expr_paren(pred->expr, context, true, node); + + appendStringInfoString(context->buf, " IS JSON"); + + /* TODO: handle FORMAT clause */ + + switch (pred->item_type) + { + case JS_TYPE_SCALAR: + appendStringInfoString(context->buf, " SCALAR"); + break; + case JS_TYPE_ARRAY: + appendStringInfoString(context->buf, " ARRAY"); + break; + case JS_TYPE_OBJECT: + appendStringInfoString(context->buf, " OBJECT"); + break; + default: + break; + } + + if (pred->unique_keys) + appendStringInfoString(context->buf, " WITH UNIQUE KEYS"); + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(context->buf, ')'); + } + break; + + case T_List: + { + char *sep; + ListCell *l; + + sep = ""; + foreach(l, (List *) node) + { + appendStringInfoString(buf, sep); + get_rule_expr((Node *) lfirst(l), context, showimplicit); + sep = ", "; + } + } + break; + + case T_TableFunc: + get_tablefunc((TableFunc *) node, context, showimplicit); + break; + + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); + break; + } +} + +/* + * get_rule_expr_toplevel - Parse back a toplevel expression + * + * Same as get_rule_expr(), except that if the expr is just a Var, we pass + * istoplevel = true not false to get_variable(). This causes whole-row Vars + * to get printed with decoration that will prevent expansion of "*". + * We need to use this in contexts such as ROW() and VALUES(), where the + * parser would expand "foo.*" appearing at top level. (In principle we'd + * use this in get_target_list() too, but that has additional worries about + * whether to print AS, so it needs to invoke get_variable() directly anyway.) + */ +static void +get_rule_expr_toplevel(Node *node, deparse_context *context, + bool showimplicit) +{ + if (node && IsA(node, Var)) + (void) get_variable((Var *) node, 0, true, context); + else + get_rule_expr(node, context, showimplicit); +} + +/* + * get_rule_list_toplevel - Parse back a list of toplevel expressions + * + * Apply get_rule_expr_toplevel() to each element of a List. + * + * This adds commas between the expressions, but caller is responsible + * for printing surrounding decoration. + */ +static void +get_rule_list_toplevel(List *lst, deparse_context *context, + bool showimplicit) +{ + const char *sep; + ListCell *lc; + + sep = ""; + foreach(lc, lst) + { + Node *e = (Node *) lfirst(lc); + + appendStringInfoString(context->buf, sep); + get_rule_expr_toplevel(e, context, showimplicit); + sep = ", "; + } +} + +/* + * get_rule_expr_funccall - Parse back a function-call expression + * + * Same as get_rule_expr(), except that we guarantee that the output will + * look like a function call, or like one of the things the grammar treats as + * equivalent to a function call (see the func_expr_windowless production). + * This is needed in places where the grammar uses func_expr_windowless and + * you can't substitute a parenthesized a_expr. If what we have isn't going + * to look like a function call, wrap it in a dummy CAST() expression, which + * will satisfy the grammar --- and, indeed, is likely what the user wrote to + * produce such a thing. + */ +static void +get_rule_expr_funccall(Node *node, deparse_context *context, + bool showimplicit) +{ + if (looks_like_function(node)) + get_rule_expr(node, context, showimplicit); + else + { + StringInfo buf = context->buf; + + appendStringInfoString(buf, "CAST("); + /* no point in showing any top-level implicit cast */ + get_rule_expr(node, context, false); + appendStringInfo(buf, " AS %s)", + format_type_with_typemod(exprType(node), + exprTypmod(node))); + } +} + +/* + * Helper function to identify node types that satisfy func_expr_windowless. + * If in doubt, "false" is always a safe answer. + */ +static bool +looks_like_function(Node *node) +{ + if (node == NULL) + return false; /* probably shouldn't happen */ + switch (nodeTag(node)) + { + case T_FuncExpr: + /* OK, unless it's going to deparse as a cast */ + return (((FuncExpr *) node)->funcformat == COERCE_EXPLICIT_CALL || + ((FuncExpr *) node)->funcformat == COERCE_SQL_SYNTAX); + case T_NullIfExpr: + case T_CoalesceExpr: + case T_MinMaxExpr: + case T_SQLValueFunction: + case T_XmlExpr: + /* these are all accepted by func_expr_common_subexpr */ + return true; + default: + break; + } + return false; +} + + +/* + * get_oper_expr - Parse back an OpExpr node + */ +static void +get_oper_expr(OpExpr *expr, deparse_context *context) +{ + StringInfo buf = context->buf; + Oid opno = expr->opno; + List *args = expr->args; + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + if (list_length(args) == 2) + { + /* binary operator */ + Node *arg1 = (Node *) linitial(args); + Node *arg2 = (Node *) lsecond(args); + + get_rule_expr_paren(arg1, context, true, (Node *) expr); + appendStringInfo(buf, " %s ", + generate_operator_name(opno, + exprType(arg1), + exprType(arg2))); + get_rule_expr_paren(arg2, context, true, (Node *) expr); + } + else + { + /* prefix operator */ + Node *arg = (Node *) linitial(args); + + appendStringInfo(buf, "%s ", + generate_operator_name(opno, + InvalidOid, + exprType(arg))); + get_rule_expr_paren(arg, context, true, (Node *) expr); + } + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); +} + +/* + * get_func_expr - Parse back a FuncExpr node + */ +static void +get_func_expr(FuncExpr *expr, deparse_context *context, + bool showimplicit) +{ + StringInfo buf = context->buf; + Oid funcoid = expr->funcid; + Oid argtypes[FUNC_MAX_ARGS]; + int nargs; + List *argnames; + bool use_variadic; + ListCell *l; + + /* + * If the function call came from an implicit coercion, then just show the + * first argument --- unless caller wants to see implicit coercions. + */ + if (expr->funcformat == COERCE_IMPLICIT_CAST && !showimplicit) + { + get_rule_expr_paren((Node *) linitial(expr->args), context, + false, (Node *) expr); + return; + } + + /* + * If the function call came from a cast, then show the first argument + * plus an explicit cast operation. + */ + if (expr->funcformat == COERCE_EXPLICIT_CAST || + expr->funcformat == COERCE_IMPLICIT_CAST) + { + Node *arg = linitial(expr->args); + Oid rettype = expr->funcresulttype; + int32 coercedTypmod; + + /* Get the typmod if this is a length-coercion function */ + (void) exprIsLengthCoercion((Node *) expr, &coercedTypmod); + + get_coercion_expr(arg, context, + rettype, coercedTypmod, + (Node *) expr); + + return; + } + + /* + * If the function was called using one of the SQL spec's random special + * syntaxes, try to reproduce that. If we don't recognize the function, + * fall through. + */ + if (expr->funcformat == COERCE_SQL_SYNTAX) + { + if (get_func_sql_syntax(expr, context)) + return; + } + + /* + * Normal function: display as proname(args). First we need to extract + * the argument datatypes. + */ + if (list_length(expr->args) > FUNC_MAX_ARGS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg("too many arguments"))); + nargs = 0; + argnames = NIL; + foreach(l, expr->args) + { + Node *arg = (Node *) lfirst(l); + + if (IsA(arg, NamedArgExpr)) + argnames = lappend(argnames, ((NamedArgExpr *) arg)->name); + argtypes[nargs] = exprType(arg); + nargs++; + } + + appendStringInfo(buf, "%s(", + generate_function_name(funcoid, nargs, + argnames, argtypes, + expr->funcvariadic, + &use_variadic, + context->special_exprkind)); + nargs = 0; + foreach(l, expr->args) + { + if (nargs++ > 0) + appendStringInfoString(buf, ", "); + if (use_variadic && lnext(expr->args, l) == NULL) + appendStringInfoString(buf, "VARIADIC "); + get_rule_expr((Node *) lfirst(l), context, true); + } + appendStringInfoChar(buf, ')'); +} + +/* + * get_agg_expr - Parse back an Aggref node + */ +static void +get_agg_expr(Aggref *aggref, deparse_context *context, + Aggref *original_aggref) +{ + get_agg_expr_helper(aggref, context, original_aggref, NULL, NULL, + false); +} + +/* + * get_agg_expr_helper - subroutine for get_agg_expr and + * get_json_agg_constructor + */ +static void +get_agg_expr_helper(Aggref *aggref, deparse_context *context, + Aggref *original_aggref, const char *funcname, + const char *options, bool is_json_objectagg) +{ + StringInfo buf = context->buf; + Oid argtypes[FUNC_MAX_ARGS]; + int nargs; + bool use_variadic = false; + + /* + * For a combining aggregate, we look up and deparse the corresponding + * partial aggregate instead. This is necessary because our input + * argument list has been replaced; the new argument list always has just + * one element, which will point to a partial Aggref that supplies us with + * transition states to combine. + */ + if (DO_AGGSPLIT_COMBINE(aggref->aggsplit)) + { + TargetEntry *tle; + + Assert(list_length(aggref->args) == 1); + tle = linitial_node(TargetEntry, aggref->args); + resolve_special_varno((Node *) tle->expr, context, + get_agg_combine_expr, original_aggref); + return; + } + + /* + * Mark as PARTIAL, if appropriate. We look to the original aggref so as + * to avoid printing this when recursing from the code just above. + */ + if (DO_AGGSPLIT_SKIPFINAL(original_aggref->aggsplit)) + appendStringInfoString(buf, "PARTIAL "); + + /* Extract the argument types as seen by the parser */ + nargs = get_aggregate_argtypes(aggref, argtypes); + + if (!funcname) + funcname = generate_function_name(aggref->aggfnoid, nargs, NIL, + argtypes, aggref->aggvariadic, + &use_variadic, + context->special_exprkind); + + /* Print the aggregate name, schema-qualified if needed */ + appendStringInfo(buf, "%s(%s", funcname, + (aggref->aggdistinct != NIL) ? "DISTINCT " : ""); + + if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) + { + /* + * Ordered-set aggregates do not use "*" syntax. Also, we needn't + * worry about inserting VARIADIC. So we can just dump the direct + * args as-is. + */ + Assert(!aggref->aggvariadic); + get_rule_expr((Node *) aggref->aggdirectargs, context, true); + Assert(aggref->aggorder != NIL); + appendStringInfoString(buf, ") WITHIN GROUP (ORDER BY "); + get_rule_orderby(aggref->aggorder, aggref->args, false, context); + } + else + { + /* aggstar can be set only in zero-argument aggregates */ + if (aggref->aggstar) + appendStringInfoChar(buf, '*'); + else + { + ListCell *l; + int i; + + i = 0; + foreach(l, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + Node *arg = (Node *) tle->expr; + + Assert(!IsA(arg, NamedArgExpr)); + if (tle->resjunk) + continue; + if (i++ > 0) + { + if (is_json_objectagg) + { + /* + * the ABSENT ON NULL and WITH UNIQUE args are printed + * separately, so ignore them here + */ + if (i > 2) + break; + + appendStringInfoString(buf, " : "); + } + else + appendStringInfoString(buf, ", "); + } + if (use_variadic && i == nargs) + appendStringInfoString(buf, "VARIADIC "); + get_rule_expr(arg, context, true); + } + } + + if (aggref->aggorder != NIL) + { + appendStringInfoString(buf, " ORDER BY "); + get_rule_orderby(aggref->aggorder, aggref->args, false, context); + } + } + + if (options) + appendStringInfoString(buf, options); + + if (aggref->aggfilter != NULL) + { + appendStringInfoString(buf, ") FILTER (WHERE "); + get_rule_expr((Node *) aggref->aggfilter, context, false); + } + + appendStringInfoChar(buf, ')'); +} + +/* + * This is a helper function for get_agg_expr(). It's used when we deparse + * a combining Aggref; resolve_special_varno locates the corresponding partial + * Aggref and then calls this. + */ +static void +get_agg_combine_expr(Node *node, deparse_context *context, void *callback_arg) +{ + Aggref *aggref; + Aggref *original_aggref = callback_arg; + + if (!IsA(node, Aggref)) + elog(ERROR, "combining Aggref does not point to an Aggref"); + + aggref = (Aggref *) node; + get_agg_expr(aggref, context, original_aggref); +} + +/* + * get_windowfunc_expr - Parse back a WindowFunc node + */ +static void +get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context) +{ + get_windowfunc_expr_helper(wfunc, context, NULL, NULL, false); +} + + +/* + * get_windowfunc_expr_helper - subroutine for get_windowfunc_expr and + * get_json_agg_constructor + */ +static void +get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context, + const char *funcname, const char *options, + bool is_json_objectagg) +{ + StringInfo buf = context->buf; + Oid argtypes[FUNC_MAX_ARGS]; + int nargs; + List *argnames; + ListCell *l; + + if (list_length(wfunc->args) > FUNC_MAX_ARGS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg("too many arguments"))); + nargs = 0; + argnames = NIL; + foreach(l, wfunc->args) + { + Node *arg = (Node *) lfirst(l); + + if (IsA(arg, NamedArgExpr)) + argnames = lappend(argnames, ((NamedArgExpr *) arg)->name); + argtypes[nargs] = exprType(arg); + nargs++; + } + + if (!funcname) + funcname = generate_function_name(wfunc->winfnoid, nargs, argnames, + argtypes, false, NULL, + context->special_exprkind); + + appendStringInfo(buf, "%s(", funcname); + + /* winstar can be set only in zero-argument aggregates */ + if (wfunc->winstar) + appendStringInfoChar(buf, '*'); + else + { + if (is_json_objectagg) + { + get_rule_expr((Node *) linitial(wfunc->args), context, false); + appendStringInfoString(buf, " : "); + get_rule_expr((Node *) lsecond(wfunc->args), context, false); + } + else + get_rule_expr((Node *) wfunc->args, context, true); + } + + if (options) + appendStringInfoString(buf, options); + + if (wfunc->aggfilter != NULL) + { + appendStringInfoString(buf, ") FILTER (WHERE "); + get_rule_expr((Node *) wfunc->aggfilter, context, false); + } + + appendStringInfoString(buf, ") OVER "); + + foreach(l, context->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(l); + + if (wc->winref == wfunc->winref) + { + if (wc->name) + appendStringInfoString(buf, quote_identifier(wc->name)); + else + get_rule_windowspec(wc, context->windowTList, context); + break; + } + } + if (l == NULL) + { + if (context->windowClause) + elog(ERROR, "could not find window clause for winref %u", + wfunc->winref); + + /* + * In EXPLAIN, we don't have window context information available, so + * we have to settle for this: + */ + appendStringInfoString(buf, "(?)"); + } +} + +/* + * get_func_sql_syntax - Parse back a SQL-syntax function call + * + * Returns true if we successfully deparsed, false if we did not + * recognize the function. + */ +static bool +get_func_sql_syntax(FuncExpr *expr, deparse_context *context) +{ + StringInfo buf = context->buf; + Oid funcoid = expr->funcid; + + switch (funcoid) + { + case F_TIMEZONE_INTERVAL_TIMESTAMP: + case F_TIMEZONE_INTERVAL_TIMESTAMPTZ: + case F_TIMEZONE_INTERVAL_TIMETZ: + case F_TIMEZONE_TEXT_TIMESTAMP: + case F_TIMEZONE_TEXT_TIMESTAMPTZ: + case F_TIMEZONE_TEXT_TIMETZ: + /* AT TIME ZONE ... note reversed argument order */ + appendStringInfoChar(buf, '('); + get_rule_expr_paren((Node *) lsecond(expr->args), context, false, + (Node *) expr); + appendStringInfoString(buf, " AT TIME ZONE "); + get_rule_expr_paren((Node *) linitial(expr->args), context, false, + (Node *) expr); + appendStringInfoChar(buf, ')'); + return true; + + case F_OVERLAPS_TIMESTAMPTZ_INTERVAL_TIMESTAMPTZ_INTERVAL: + case F_OVERLAPS_TIMESTAMPTZ_INTERVAL_TIMESTAMPTZ_TIMESTAMPTZ: + case F_OVERLAPS_TIMESTAMPTZ_TIMESTAMPTZ_TIMESTAMPTZ_INTERVAL: + case F_OVERLAPS_TIMESTAMPTZ_TIMESTAMPTZ_TIMESTAMPTZ_TIMESTAMPTZ: + case F_OVERLAPS_TIMESTAMP_INTERVAL_TIMESTAMP_INTERVAL: + case F_OVERLAPS_TIMESTAMP_INTERVAL_TIMESTAMP_TIMESTAMP: + case F_OVERLAPS_TIMESTAMP_TIMESTAMP_TIMESTAMP_INTERVAL: + case F_OVERLAPS_TIMESTAMP_TIMESTAMP_TIMESTAMP_TIMESTAMP: + case F_OVERLAPS_TIMETZ_TIMETZ_TIMETZ_TIMETZ: + case F_OVERLAPS_TIME_INTERVAL_TIME_INTERVAL: + case F_OVERLAPS_TIME_INTERVAL_TIME_TIME: + case F_OVERLAPS_TIME_TIME_TIME_INTERVAL: + case F_OVERLAPS_TIME_TIME_TIME_TIME: + /* (x1, x2) OVERLAPS (y1, y2) */ + appendStringInfoString(buf, "(("); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) lsecond(expr->args), context, false); + appendStringInfoString(buf, ") OVERLAPS ("); + get_rule_expr((Node *) lthird(expr->args), context, false); + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) lfourth(expr->args), context, false); + appendStringInfoString(buf, "))"); + return true; + + case F_EXTRACT_TEXT_DATE: + case F_EXTRACT_TEXT_TIME: + case F_EXTRACT_TEXT_TIMETZ: + case F_EXTRACT_TEXT_TIMESTAMP: + case F_EXTRACT_TEXT_TIMESTAMPTZ: + case F_EXTRACT_TEXT_INTERVAL: + /* EXTRACT (x FROM y) */ + appendStringInfoString(buf, "EXTRACT("); + { + Const *con = (Const *) linitial(expr->args); + + Assert(IsA(con, Const) && + con->consttype == TEXTOID && + !con->constisnull); + appendStringInfoString(buf, TextDatumGetCString(con->constvalue)); + } + appendStringInfoString(buf, " FROM "); + get_rule_expr((Node *) lsecond(expr->args), context, false); + appendStringInfoChar(buf, ')'); + return true; + + case F_IS_NORMALIZED: + /* IS xxx NORMALIZED */ + appendStringInfoString(buf, "("); + get_rule_expr_paren((Node *) linitial(expr->args), context, false, + (Node *) expr); + appendStringInfoString(buf, " IS"); + if (list_length(expr->args) == 2) + { + Const *con = (Const *) lsecond(expr->args); + + Assert(IsA(con, Const) && + con->consttype == TEXTOID && + !con->constisnull); + appendStringInfo(buf, " %s", + TextDatumGetCString(con->constvalue)); + } + appendStringInfoString(buf, " NORMALIZED)"); + return true; + + case F_PG_COLLATION_FOR: + /* COLLATION FOR */ + appendStringInfoString(buf, "COLLATION FOR ("); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoChar(buf, ')'); + return true; + + case F_NORMALIZE: + /* NORMALIZE() */ + appendStringInfoString(buf, "NORMALIZE("); + get_rule_expr((Node *) linitial(expr->args), context, false); + if (list_length(expr->args) == 2) + { + Const *con = (Const *) lsecond(expr->args); + + Assert(IsA(con, Const) && + con->consttype == TEXTOID && + !con->constisnull); + appendStringInfo(buf, ", %s", + TextDatumGetCString(con->constvalue)); + } + appendStringInfoChar(buf, ')'); + return true; + + case F_OVERLAY_BIT_BIT_INT4: + case F_OVERLAY_BIT_BIT_INT4_INT4: + case F_OVERLAY_BYTEA_BYTEA_INT4: + case F_OVERLAY_BYTEA_BYTEA_INT4_INT4: + case F_OVERLAY_TEXT_TEXT_INT4: + case F_OVERLAY_TEXT_TEXT_INT4_INT4: + /* OVERLAY() */ + appendStringInfoString(buf, "OVERLAY("); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoString(buf, " PLACING "); + get_rule_expr((Node *) lsecond(expr->args), context, false); + appendStringInfoString(buf, " FROM "); + get_rule_expr((Node *) lthird(expr->args), context, false); + if (list_length(expr->args) == 4) + { + appendStringInfoString(buf, " FOR "); + get_rule_expr((Node *) lfourth(expr->args), context, false); + } + appendStringInfoChar(buf, ')'); + return true; + + case F_POSITION_BIT_BIT: + case F_POSITION_BYTEA_BYTEA: + case F_POSITION_TEXT_TEXT: + /* POSITION() ... extra parens since args are b_expr not a_expr */ + appendStringInfoString(buf, "POSITION(("); + get_rule_expr((Node *) lsecond(expr->args), context, false); + appendStringInfoString(buf, ") IN ("); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoString(buf, "))"); + return true; + + case F_SUBSTRING_BIT_INT4: + case F_SUBSTRING_BIT_INT4_INT4: + case F_SUBSTRING_BYTEA_INT4: + case F_SUBSTRING_BYTEA_INT4_INT4: + case F_SUBSTRING_TEXT_INT4: + case F_SUBSTRING_TEXT_INT4_INT4: + /* SUBSTRING FROM/FOR (i.e., integer-position variants) */ + appendStringInfoString(buf, "SUBSTRING("); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoString(buf, " FROM "); + get_rule_expr((Node *) lsecond(expr->args), context, false); + if (list_length(expr->args) == 3) + { + appendStringInfoString(buf, " FOR "); + get_rule_expr((Node *) lthird(expr->args), context, false); + } + appendStringInfoChar(buf, ')'); + return true; + + case F_SUBSTRING_TEXT_TEXT_TEXT: + /* SUBSTRING SIMILAR/ESCAPE */ + appendStringInfoString(buf, "SUBSTRING("); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoString(buf, " SIMILAR "); + get_rule_expr((Node *) lsecond(expr->args), context, false); + appendStringInfoString(buf, " ESCAPE "); + get_rule_expr((Node *) lthird(expr->args), context, false); + appendStringInfoChar(buf, ')'); + return true; + + case F_BTRIM_BYTEA_BYTEA: + case F_BTRIM_TEXT: + case F_BTRIM_TEXT_TEXT: + /* TRIM() */ + appendStringInfoString(buf, "TRIM(BOTH"); + if (list_length(expr->args) == 2) + { + appendStringInfoChar(buf, ' '); + get_rule_expr((Node *) lsecond(expr->args), context, false); + } + appendStringInfoString(buf, " FROM "); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoChar(buf, ')'); + return true; + + case F_LTRIM_BYTEA_BYTEA: + case F_LTRIM_TEXT: + case F_LTRIM_TEXT_TEXT: + /* TRIM() */ + appendStringInfoString(buf, "TRIM(LEADING"); + if (list_length(expr->args) == 2) + { + appendStringInfoChar(buf, ' '); + get_rule_expr((Node *) lsecond(expr->args), context, false); + } + appendStringInfoString(buf, " FROM "); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoChar(buf, ')'); + return true; + + case F_RTRIM_BYTEA_BYTEA: + case F_RTRIM_TEXT: + case F_RTRIM_TEXT_TEXT: + /* TRIM() */ + appendStringInfoString(buf, "TRIM(TRAILING"); + if (list_length(expr->args) == 2) + { + appendStringInfoChar(buf, ' '); + get_rule_expr((Node *) lsecond(expr->args), context, false); + } + appendStringInfoString(buf, " FROM "); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoChar(buf, ')'); + return true; + + case F_SYSTEM_USER: + appendStringInfoString(buf, "SYSTEM_USER"); + return true; + + case F_XMLEXISTS: + /* XMLEXISTS ... extra parens because args are c_expr */ + appendStringInfoString(buf, "XMLEXISTS(("); + get_rule_expr((Node *) linitial(expr->args), context, false); + appendStringInfoString(buf, ") PASSING ("); + get_rule_expr((Node *) lsecond(expr->args), context, false); + appendStringInfoString(buf, "))"); + return true; + } + return false; +} + +/* ---------- + * get_coercion_expr + * + * Make a string representation of a value coerced to a specific type + * ---------- + */ +static void +get_coercion_expr(Node *arg, deparse_context *context, + Oid resulttype, int32 resulttypmod, + Node *parentNode) +{ + StringInfo buf = context->buf; + + /* + * Since parse_coerce.c doesn't immediately collapse application of + * length-coercion functions to constants, what we'll typically see in + * such cases is a Const with typmod -1 and a length-coercion function + * right above it. Avoid generating redundant output. However, beware of + * suppressing casts when the user actually wrote something like + * 'foo'::text::char(3). + * + * Note: it might seem that we are missing the possibility of needing to + * print a COLLATE clause for such a Const. However, a Const could only + * have nondefault collation in a post-constant-folding tree, in which the + * length coercion would have been folded too. See also the special + * handling of CollateExpr in coerce_to_target_type(): any collation + * marking will be above the coercion node, not below it. + */ + if (arg && IsA(arg, Const) && + ((Const *) arg)->consttype == resulttype && + ((Const *) arg)->consttypmod == -1) + { + /* Show the constant without normal ::typename decoration */ + get_const_expr((Const *) arg, context, -1); + } + else + { + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren(arg, context, false, parentNode); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + + /* + * Never emit resulttype(arg) functional notation. A pg_proc entry could + * take precedence, and a resulttype in pg_temp would require schema + * qualification that format_type_with_typemod() would usually omit. We've + * standardized on arg::resulttype, but CAST(arg AS resulttype) notation + * would work fine. + */ + appendStringInfo(buf, "::%s", + format_type_with_typemod(resulttype, resulttypmod)); +} + +/* ---------- + * get_const_expr + * + * Make a string representation of a Const + * + * showtype can be -1 to never show "::typename" decoration, or +1 to always + * show it, or 0 to show it only if the constant wouldn't be assumed to be + * the right type by default. + * + * If the Const's collation isn't default for its type, show that too. + * We mustn't do this when showtype is -1 (since that means the caller will + * print "::typename", and we can't put a COLLATE clause in between). It's + * caller's responsibility that collation isn't missed in such cases. + * ---------- + */ +static void +get_const_expr(Const *constval, deparse_context *context, int showtype) +{ + StringInfo buf = context->buf; + Oid typoutput; + bool typIsVarlena; + char *extval; + bool needlabel = false; + + if (constval->constisnull) + { + /* + * Always label the type of a NULL constant to prevent misdecisions + * about type when reparsing. + */ + appendStringInfoString(buf, "NULL"); + if (showtype >= 0) + { + appendStringInfo(buf, "::%s", + format_type_with_typemod(constval->consttype, + constval->consttypmod)); + get_const_collation(constval, context); + } + return; + } + + getTypeOutputInfo(constval->consttype, + &typoutput, &typIsVarlena); + + extval = OidOutputFunctionCall(typoutput, constval->constvalue); + + switch (constval->consttype) + { + case INT4OID: + + /* + * INT4 can be printed without any decoration, unless it is + * negative; in that case print it as '-nnn'::integer to ensure + * that the output will re-parse as a constant, not as a constant + * plus operator. In most cases we could get away with printing + * (-nnn) instead, because of the way that gram.y handles negative + * literals; but that doesn't work for INT_MIN, and it doesn't + * seem that much prettier anyway. + */ + if (extval[0] != '-') + appendStringInfoString(buf, extval); + else + { + appendStringInfo(buf, "'%s'", extval); + needlabel = true; /* we must attach a cast */ + } + break; + + case NUMERICOID: + + /* + * NUMERIC can be printed without quotes if it looks like a float + * constant (not an integer, and not Infinity or NaN) and doesn't + * have a leading sign (for the same reason as for INT4). + */ + if (isdigit((unsigned char) extval[0]) && + strcspn(extval, "eE.") != strlen(extval)) + { + appendStringInfoString(buf, extval); + } + else + { + appendStringInfo(buf, "'%s'", extval); + needlabel = true; /* we must attach a cast */ + } + break; + + case BOOLOID: + if (strcmp(extval, "t") == 0) + appendStringInfoString(buf, "true"); + else + appendStringInfoString(buf, "false"); + break; + + default: + simple_quote_literal(buf, extval); + break; + } + + pfree(extval); + + if (showtype < 0) + return; + + /* + * For showtype == 0, append ::typename unless the constant will be + * implicitly typed as the right type when it is read in. + * + * XXX this code has to be kept in sync with the behavior of the parser, + * especially make_const. + */ + switch (constval->consttype) + { + case BOOLOID: + case UNKNOWNOID: + /* These types can be left unlabeled */ + needlabel = false; + break; + case INT4OID: + /* We determined above whether a label is needed */ + break; + case NUMERICOID: + + /* + * Float-looking constants will be typed as numeric, which we + * checked above; but if there's a nondefault typmod we need to + * show it. + */ + needlabel |= (constval->consttypmod >= 0); + break; + default: + needlabel = true; + break; + } + if (needlabel || showtype > 0) + appendStringInfo(buf, "::%s", + format_type_with_typemod(constval->consttype, + constval->consttypmod)); + + get_const_collation(constval, context); +} + +/* + * helper for get_const_expr: append COLLATE if needed + */ +static void +get_const_collation(Const *constval, deparse_context *context) +{ + StringInfo buf = context->buf; + + if (OidIsValid(constval->constcollid)) + { + Oid typcollation = get_typcollation(constval->consttype); + + if (constval->constcollid != typcollation) + { + appendStringInfo(buf, " COLLATE %s", + generate_collation_name(constval->constcollid)); + } + } +} + +/* + * get_json_format - Parse back a JsonFormat node + */ +static void +get_json_format(JsonFormat *format, StringInfo buf) +{ + if (format->format_type == JS_FORMAT_DEFAULT) + return; + + appendStringInfoString(buf, + format->format_type == JS_FORMAT_JSONB ? + " FORMAT JSONB" : " FORMAT JSON"); + + if (format->encoding != JS_ENC_DEFAULT) + { + const char *encoding; + + encoding = + format->encoding == JS_ENC_UTF16 ? "UTF16" : + format->encoding == JS_ENC_UTF32 ? "UTF32" : "UTF8"; + + appendStringInfo(buf, " ENCODING %s", encoding); + } +} + +/* + * get_json_returning - Parse back a JsonReturning structure + */ +static void +get_json_returning(JsonReturning *returning, StringInfo buf, + bool json_format_by_default) +{ + if (!OidIsValid(returning->typid)) + return; + + appendStringInfo(buf, " RETURNING %s", + format_type_with_typemod(returning->typid, + returning->typmod)); + + if (!json_format_by_default || + returning->format->format_type != + (returning->typid == JSONBOID ? JS_FORMAT_JSONB : JS_FORMAT_JSON)) + get_json_format(returning->format, buf); +} + +/* + * get_json_constructor - Parse back a JsonConstructorExpr node + */ +static void +get_json_constructor(JsonConstructorExpr *ctor, deparse_context *context, + bool showimplicit) +{ + StringInfo buf = context->buf; + const char *funcname; + bool is_json_object; + int curridx; + ListCell *lc; + + if (ctor->type == JSCTOR_JSON_OBJECTAGG) + { + get_json_agg_constructor(ctor, context, "JSON_OBJECTAGG", true); + return; + } + else if (ctor->type == JSCTOR_JSON_ARRAYAGG) + { + get_json_agg_constructor(ctor, context, "JSON_ARRAYAGG", false); + return; + } + + switch (ctor->type) + { + case JSCTOR_JSON_OBJECT: + funcname = "JSON_OBJECT"; + break; + case JSCTOR_JSON_ARRAY: + funcname = "JSON_ARRAY"; + break; + default: + elog(ERROR, "invalid JsonConstructorType %d", ctor->type); + } + + appendStringInfo(buf, "%s(", funcname); + + is_json_object = ctor->type == JSCTOR_JSON_OBJECT; + foreach(lc, ctor->args) + { + curridx = foreach_current_index(lc); + if (curridx > 0) + { + const char *sep; + + sep = (is_json_object && (curridx % 2) != 0) ? " : " : ", "; + appendStringInfoString(buf, sep); + } + + get_rule_expr((Node *) lfirst(lc), context, true); + } + + get_json_constructor_options(ctor, buf); + appendStringInfo(buf, ")"); +} + +/* + * Append options, if any, to the JSON constructor being deparsed + */ +static void +get_json_constructor_options(JsonConstructorExpr *ctor, StringInfo buf) +{ + if (ctor->absent_on_null) + { + if (ctor->type == JSCTOR_JSON_OBJECT || + ctor->type == JSCTOR_JSON_OBJECTAGG) + appendStringInfoString(buf, " ABSENT ON NULL"); + } + else + { + if (ctor->type == JSCTOR_JSON_ARRAY || + ctor->type == JSCTOR_JSON_ARRAYAGG) + appendStringInfoString(buf, " NULL ON NULL"); + } + + if (ctor->unique) + appendStringInfoString(buf, " WITH UNIQUE KEYS"); + + get_json_returning(ctor->returning, buf, true); +} + +/* + * get_json_agg_constructor - Parse back an aggregate JsonConstructorExpr node + */ +static void +get_json_agg_constructor(JsonConstructorExpr *ctor, deparse_context *context, + const char *funcname, bool is_json_objectagg) +{ + StringInfoData options; + + initStringInfo(&options); + get_json_constructor_options(ctor, &options); + + if (IsA(ctor->func, Aggref)) + get_agg_expr_helper((Aggref *) ctor->func, context, + (Aggref *) ctor->func, + funcname, options.data, is_json_objectagg); + else if (IsA(ctor->func, WindowFunc)) + get_windowfunc_expr_helper((WindowFunc *) ctor->func, context, + funcname, options.data, + is_json_objectagg); + else + elog(ERROR, "invalid JsonConstructorExpr underlying node type: %d", + nodeTag(ctor->func)); +} + +/* + * simple_quote_literal - Format a string as a SQL literal, append to buf + */ +static void +simple_quote_literal(StringInfo buf, const char *val) +{ + const char *valptr; + + /* + * We form the string literal according to the prevailing setting of + * standard_conforming_strings; we never use E''. User is responsible for + * making sure result is used correctly. + */ + appendStringInfoChar(buf, '\''); + for (valptr = val; *valptr; valptr++) + { + char ch = *valptr; + + if (SQL_STR_DOUBLE(ch, !standard_conforming_strings)) + appendStringInfoChar(buf, ch); + appendStringInfoChar(buf, ch); + } + appendStringInfoChar(buf, '\''); +} + + +/* ---------- + * get_sublink_expr - Parse back a sublink + * ---------- + */ +static void +get_sublink_expr(SubLink *sublink, deparse_context *context) +{ + StringInfo buf = context->buf; + Query *query = (Query *) (sublink->subselect); + char *opname = NULL; + bool need_paren; + + if (sublink->subLinkType == ARRAY_SUBLINK) + appendStringInfoString(buf, "ARRAY("); + else + appendStringInfoChar(buf, '('); + + /* + * Note that we print the name of only the first operator, when there are + * multiple combining operators. This is an approximation that could go + * wrong in various scenarios (operators in different schemas, renamed + * operators, etc) but there is not a whole lot we can do about it, since + * the syntax allows only one operator to be shown. + */ + if (sublink->testexpr) + { + if (IsA(sublink->testexpr, OpExpr)) + { + /* single combining operator */ + OpExpr *opexpr = (OpExpr *) sublink->testexpr; + + get_rule_expr(linitial(opexpr->args), context, true); + opname = generate_operator_name(opexpr->opno, + exprType(linitial(opexpr->args)), + exprType(lsecond(opexpr->args))); + } + else if (IsA(sublink->testexpr, BoolExpr)) + { + /* multiple combining operators, = or <> cases */ + char *sep; + ListCell *l; + + appendStringInfoChar(buf, '('); + sep = ""; + foreach(l, ((BoolExpr *) sublink->testexpr)->args) + { + OpExpr *opexpr = lfirst_node(OpExpr, l); + + appendStringInfoString(buf, sep); + get_rule_expr(linitial(opexpr->args), context, true); + if (!opname) + opname = generate_operator_name(opexpr->opno, + exprType(linitial(opexpr->args)), + exprType(lsecond(opexpr->args))); + sep = ", "; + } + appendStringInfoChar(buf, ')'); + } + else if (IsA(sublink->testexpr, RowCompareExpr)) + { + /* multiple combining operators, < <= > >= cases */ + RowCompareExpr *rcexpr = (RowCompareExpr *) sublink->testexpr; + + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) rcexpr->largs, context, true); + opname = generate_operator_name(linitial_oid(rcexpr->opnos), + exprType(linitial(rcexpr->largs)), + exprType(linitial(rcexpr->rargs))); + appendStringInfoChar(buf, ')'); + } + else + elog(ERROR, "unrecognized testexpr type: %d", + (int) nodeTag(sublink->testexpr)); + } + + need_paren = true; + + switch (sublink->subLinkType) + { + case EXISTS_SUBLINK: + appendStringInfoString(buf, "EXISTS "); + break; + + case ANY_SUBLINK: + if (strcmp(opname, "=") == 0) /* Represent = ANY as IN */ + appendStringInfoString(buf, " IN "); + else + appendStringInfo(buf, " %s ANY ", opname); + break; + + case ALL_SUBLINK: + appendStringInfo(buf, " %s ALL ", opname); + break; + + case ROWCOMPARE_SUBLINK: + appendStringInfo(buf, " %s ", opname); + break; + + case EXPR_SUBLINK: + case MULTIEXPR_SUBLINK: + case ARRAY_SUBLINK: + need_paren = false; + break; + + case CTE_SUBLINK: /* shouldn't occur in a SubLink */ + default: + elog(ERROR, "unrecognized sublink type: %d", + (int) sublink->subLinkType); + break; + } + + if (need_paren) + appendStringInfoChar(buf, '('); + + get_query_def(query, buf, context->namespaces, NULL, false, + context->prettyFlags, context->wrapColumn, + context->indentLevel); + + if (need_paren) + appendStringInfoString(buf, "))"); + else + appendStringInfoChar(buf, ')'); +} + + +/* ---------- + * get_tablefunc - Parse back a table function + * ---------- + */ +static void +get_tablefunc(TableFunc *tf, deparse_context *context, bool showimplicit) +{ + StringInfo buf = context->buf; + + /* XMLTABLE is the only existing implementation. */ + + appendStringInfoString(buf, "XMLTABLE("); + + if (tf->ns_uris != NIL) + { + ListCell *lc1, + *lc2; + bool first = true; + + appendStringInfoString(buf, "XMLNAMESPACES ("); + forboth(lc1, tf->ns_uris, lc2, tf->ns_names) + { + Node *expr = (Node *) lfirst(lc1); + String *ns_node = lfirst_node(String, lc2); + + if (!first) + appendStringInfoString(buf, ", "); + else + first = false; + + if (ns_node != NULL) + { + get_rule_expr(expr, context, showimplicit); + appendStringInfo(buf, " AS %s", strVal(ns_node)); + } + else + { + appendStringInfoString(buf, "DEFAULT "); + get_rule_expr(expr, context, showimplicit); + } + } + appendStringInfoString(buf, "), "); + } + + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) tf->rowexpr, context, showimplicit); + appendStringInfoString(buf, ") PASSING ("); + get_rule_expr((Node *) tf->docexpr, context, showimplicit); + appendStringInfoChar(buf, ')'); + + if (tf->colexprs != NIL) + { + ListCell *l1; + ListCell *l2; + ListCell *l3; + ListCell *l4; + ListCell *l5; + int colnum = 0; + + appendStringInfoString(buf, " COLUMNS "); + forfive(l1, tf->colnames, l2, tf->coltypes, l3, tf->coltypmods, + l4, tf->colexprs, l5, tf->coldefexprs) + { + char *colname = strVal(lfirst(l1)); + Oid typid = lfirst_oid(l2); + int32 typmod = lfirst_int(l3); + Node *colexpr = (Node *) lfirst(l4); + Node *coldefexpr = (Node *) lfirst(l5); + bool ordinality = (tf->ordinalitycol == colnum); + bool notnull = bms_is_member(colnum, tf->notnulls); + + if (colnum > 0) + appendStringInfoString(buf, ", "); + colnum++; + + appendStringInfo(buf, "%s %s", quote_identifier(colname), + ordinality ? "FOR ORDINALITY" : + format_type_with_typemod(typid, typmod)); + if (ordinality) + continue; + + if (coldefexpr != NULL) + { + appendStringInfoString(buf, " DEFAULT ("); + get_rule_expr((Node *) coldefexpr, context, showimplicit); + appendStringInfoChar(buf, ')'); + } + if (colexpr != NULL) + { + appendStringInfoString(buf, " PATH ("); + get_rule_expr((Node *) colexpr, context, showimplicit); + appendStringInfoChar(buf, ')'); + } + if (notnull) + appendStringInfoString(buf, " NOT NULL"); + } + } + + appendStringInfoChar(buf, ')'); +} + +/* ---------- + * get_from_clause - Parse back a FROM clause + * + * "prefix" is the keyword that denotes the start of the list of FROM + * elements. It is FROM when used to parse back SELECT and UPDATE, but + * is USING when parsing back DELETE. + * ---------- + */ +static void +get_from_clause(Query *query, const char *prefix, deparse_context *context) +{ + StringInfo buf = context->buf; + bool first = true; + ListCell *l; + + /* + * We use the query's jointree as a guide to what to print. However, we + * must ignore auto-added RTEs that are marked not inFromCl. (These can + * only appear at the top level of the jointree, so it's sufficient to + * check here.) This check also ensures we ignore the rule pseudo-RTEs + * for NEW and OLD. + */ + foreach(l, query->jointree->fromlist) + { + Node *jtnode = (Node *) lfirst(l); + + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + RangeTblEntry *rte = rt_fetch(varno, query->rtable); + + if (!rte->inFromCl) + continue; + } + + if (first) + { + appendContextKeyword(context, prefix, + -PRETTYINDENT_STD, PRETTYINDENT_STD, 2); + first = false; + + get_from_clause_item(jtnode, query, context); + } + else + { + StringInfoData itembuf; + + appendStringInfoString(buf, ", "); + + /* + * Put the new FROM item's text into itembuf so we can decide + * after we've got it whether or not it needs to go on a new line. + */ + initStringInfo(&itembuf); + context->buf = &itembuf; + + get_from_clause_item(jtnode, query, context); + + /* Restore context's output buffer */ + context->buf = buf; + + /* Consider line-wrapping if enabled */ + if (PRETTY_INDENT(context) && context->wrapColumn >= 0) + { + /* Does the new item start with a new line? */ + if (itembuf.len > 0 && itembuf.data[0] == '\n') + { + /* If so, we shouldn't add anything */ + /* instead, remove any trailing spaces currently in buf */ + removeStringInfoSpaces(buf); + } + else + { + char *trailing_nl; + + /* Locate the start of the current line in the buffer */ + trailing_nl = strrchr(buf->data, '\n'); + if (trailing_nl == NULL) + trailing_nl = buf->data; + else + trailing_nl++; + + /* + * Add a newline, plus some indentation, if the new item + * would cause an overflow. + */ + if (strlen(trailing_nl) + itembuf.len > context->wrapColumn) + appendContextKeyword(context, "", -PRETTYINDENT_STD, + PRETTYINDENT_STD, + PRETTYINDENT_VAR); + } + } + + /* Add the new item */ + appendBinaryStringInfo(buf, itembuf.data, itembuf.len); + + /* clean up */ + pfree(itembuf.data); + } + } +} + +static void +get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) +{ + StringInfo buf = context->buf; + deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces); + + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + RangeTblEntry *rte = rt_fetch(varno, query->rtable); + deparse_columns *colinfo = deparse_columns_fetch(varno, dpns); + RangeTblFunction *rtfunc1 = NULL; + + if (rte->lateral) + appendStringInfoString(buf, "LATERAL "); + + /* Print the FROM item proper */ + switch (rte->rtekind) + { + case RTE_RELATION: + /* Normal relation RTE */ + appendStringInfo(buf, "%s%s", + only_marker(rte), + generate_relation_name(rte->relid, + context->namespaces)); + break; + case RTE_SUBQUERY: + /* Subquery RTE */ + appendStringInfoChar(buf, '('); + get_query_def(rte->subquery, buf, context->namespaces, NULL, + true, + context->prettyFlags, context->wrapColumn, + context->indentLevel); + appendStringInfoChar(buf, ')'); + break; + case RTE_FUNCTION: + /* Function RTE */ + rtfunc1 = (RangeTblFunction *) linitial(rte->functions); + + /* + * Omit ROWS FROM() syntax for just one function, unless it + * has both a coldeflist and WITH ORDINALITY. If it has both, + * we must use ROWS FROM() syntax to avoid ambiguity about + * whether the coldeflist includes the ordinality column. + */ + if (list_length(rte->functions) == 1 && + (rtfunc1->funccolnames == NIL || !rte->funcordinality)) + { + get_rule_expr_funccall(rtfunc1->funcexpr, context, true); + /* we'll print the coldeflist below, if it has one */ + } + else + { + bool all_unnest; + ListCell *lc; + + /* + * If all the function calls in the list are to unnest, + * and none need a coldeflist, then collapse the list back + * down to UNNEST(args). (If we had more than one + * built-in unnest function, this would get more + * difficult.) + * + * XXX This is pretty ugly, since it makes not-terribly- + * future-proof assumptions about what the parser would do + * with the output; but the alternative is to emit our + * nonstandard ROWS FROM() notation for what might have + * been a perfectly spec-compliant multi-argument + * UNNEST(). + */ + all_unnest = true; + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + + if (!IsA(rtfunc->funcexpr, FuncExpr) || + ((FuncExpr *) rtfunc->funcexpr)->funcid != F_UNNEST_ANYARRAY || + rtfunc->funccolnames != NIL) + { + all_unnest = false; + break; + } + } + + if (all_unnest) + { + List *allargs = NIL; + + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + List *args = ((FuncExpr *) rtfunc->funcexpr)->args; + + allargs = list_concat(allargs, args); + } + + appendStringInfoString(buf, "UNNEST("); + get_rule_expr((Node *) allargs, context, true); + appendStringInfoChar(buf, ')'); + } + else + { + int funcno = 0; + + appendStringInfoString(buf, "ROWS FROM("); + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + + if (funcno > 0) + appendStringInfoString(buf, ", "); + get_rule_expr_funccall(rtfunc->funcexpr, context, true); + if (rtfunc->funccolnames != NIL) + { + /* Reconstruct the column definition list */ + appendStringInfoString(buf, " AS "); + get_from_clause_coldeflist(rtfunc, + NULL, + context); + } + funcno++; + } + appendStringInfoChar(buf, ')'); + } + /* prevent printing duplicate coldeflist below */ + rtfunc1 = NULL; + } + if (rte->funcordinality) + appendStringInfoString(buf, " WITH ORDINALITY"); + break; + case RTE_TABLEFUNC: + get_tablefunc(rte->tablefunc, context, true); + break; + case RTE_VALUES: + /* Values list RTE */ + appendStringInfoChar(buf, '('); + get_values_def(rte->values_lists, context); + appendStringInfoChar(buf, ')'); + break; + case RTE_CTE: + appendStringInfoString(buf, quote_identifier(rte->ctename)); + break; + default: + elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind); + break; + } + + /* Print the relation alias, if needed */ + get_rte_alias(rte, varno, false, context); + + /* Print the column definitions or aliases, if needed */ + if (rtfunc1 && rtfunc1->funccolnames != NIL) + { + /* Reconstruct the columndef list, which is also the aliases */ + get_from_clause_coldeflist(rtfunc1, colinfo, context); + } + else + { + /* Else print column aliases as needed */ + get_column_alias_list(colinfo, context); + } + + /* Tablesample clause must go after any alias */ + if (rte->rtekind == RTE_RELATION && rte->tablesample) + get_tablesample_def(rte->tablesample, context); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + deparse_columns *colinfo = deparse_columns_fetch(j->rtindex, dpns); + bool need_paren_on_right; + + need_paren_on_right = PRETTY_PAREN(context) && + !IsA(j->rarg, RangeTblRef) && + !(IsA(j->rarg, JoinExpr) && ((JoinExpr *) j->rarg)->alias != NULL); + + if (!PRETTY_PAREN(context) || j->alias != NULL) + appendStringInfoChar(buf, '('); + + get_from_clause_item(j->larg, query, context); + + switch (j->jointype) + { + case JOIN_INNER: + if (j->quals) + appendContextKeyword(context, " JOIN ", + -PRETTYINDENT_STD, + PRETTYINDENT_STD, + PRETTYINDENT_JOIN); + else + appendContextKeyword(context, " CROSS JOIN ", + -PRETTYINDENT_STD, + PRETTYINDENT_STD, + PRETTYINDENT_JOIN); + break; + case JOIN_LEFT: + appendContextKeyword(context, " LEFT JOIN ", + -PRETTYINDENT_STD, + PRETTYINDENT_STD, + PRETTYINDENT_JOIN); + break; + case JOIN_FULL: + appendContextKeyword(context, " FULL JOIN ", + -PRETTYINDENT_STD, + PRETTYINDENT_STD, + PRETTYINDENT_JOIN); + break; + case JOIN_RIGHT: + appendContextKeyword(context, " RIGHT JOIN ", + -PRETTYINDENT_STD, + PRETTYINDENT_STD, + PRETTYINDENT_JOIN); + break; + default: + elog(ERROR, "unrecognized join type: %d", + (int) j->jointype); + } + + if (need_paren_on_right) + appendStringInfoChar(buf, '('); + get_from_clause_item(j->rarg, query, context); + if (need_paren_on_right) + appendStringInfoChar(buf, ')'); + + if (j->usingClause) + { + ListCell *lc; + bool first = true; + + appendStringInfoString(buf, " USING ("); + /* Use the assigned names, not what's in usingClause */ + foreach(lc, colinfo->usingNames) + { + char *colname = (char *) lfirst(lc); + + if (first) + first = false; + else + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, quote_identifier(colname)); + } + appendStringInfoChar(buf, ')'); + + if (j->join_using_alias) + appendStringInfo(buf, " AS %s", + quote_identifier(j->join_using_alias->aliasname)); + } + else if (j->quals) + { + appendStringInfoString(buf, " ON "); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr(j->quals, context, false); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + else if (j->jointype != JOIN_INNER) + { + /* If we didn't say CROSS JOIN above, we must provide an ON */ + appendStringInfoString(buf, " ON TRUE"); + } + + if (!PRETTY_PAREN(context) || j->alias != NULL) + appendStringInfoChar(buf, ')'); + + /* Yes, it's correct to put alias after the right paren ... */ + if (j->alias != NULL) + { + /* + * Note that it's correct to emit an alias clause if and only if + * there was one originally. Otherwise we'd be converting a named + * join to unnamed or vice versa, which creates semantic + * subtleties we don't want. However, we might print a different + * alias name than was there originally. + */ + appendStringInfo(buf, " %s", + quote_identifier(get_rtable_name(j->rtindex, + context))); + get_column_alias_list(colinfo, context); + } + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} + +/* + * get_rte_alias - print the relation's alias, if needed + * + * If printed, the alias is preceded by a space, or by " AS " if use_as is true. + */ +static void +get_rte_alias(RangeTblEntry *rte, int varno, bool use_as, + deparse_context *context) +{ + deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces); + char *refname = get_rtable_name(varno, context); + deparse_columns *colinfo = deparse_columns_fetch(varno, dpns); + bool printalias = false; + + if (rte->alias != NULL) + { + /* Always print alias if user provided one */ + printalias = true; + } + else if (colinfo->printaliases) + { + /* Always print alias if we need to print column aliases */ + printalias = true; + } + else if (rte->rtekind == RTE_RELATION) + { + /* + * No need to print alias if it's same as relation name (this would + * normally be the case, but not if set_rtable_names had to resolve a + * conflict). + */ + if (strcmp(refname, get_relation_name(rte->relid)) != 0) + printalias = true; + } + else if (rte->rtekind == RTE_FUNCTION) + { + /* + * For a function RTE, always print alias. This covers possible + * renaming of the function and/or instability of the FigureColname + * rules for things that aren't simple functions. Note we'd need to + * force it anyway for the columndef list case. + */ + printalias = true; + } + else if (rte->rtekind == RTE_SUBQUERY || + rte->rtekind == RTE_VALUES) + { + /* + * For a subquery, always print alias. This makes the output + * SQL-spec-compliant, even though we allow such aliases to be omitted + * on input. + */ + printalias = true; + } + else if (rte->rtekind == RTE_CTE) + { + /* + * No need to print alias if it's same as CTE name (this would + * normally be the case, but not if set_rtable_names had to resolve a + * conflict). + */ + if (strcmp(refname, rte->ctename) != 0) + printalias = true; + } + + if (printalias) + appendStringInfo(context->buf, "%s%s", + use_as ? " AS " : " ", + quote_identifier(refname)); +} + +/* + * get_column_alias_list - print column alias list for an RTE + * + * Caller must already have printed the relation's alias name. + */ +static void +get_column_alias_list(deparse_columns *colinfo, deparse_context *context) +{ + StringInfo buf = context->buf; + int i; + bool first = true; + + /* Don't print aliases if not needed */ + if (!colinfo->printaliases) + return; + + for (i = 0; i < colinfo->num_new_cols; i++) + { + char *colname = colinfo->new_colnames[i]; + + if (first) + { + appendStringInfoChar(buf, '('); + first = false; + } + else + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, quote_identifier(colname)); + } + if (!first) + appendStringInfoChar(buf, ')'); +} + +/* + * get_from_clause_coldeflist - reproduce FROM clause coldeflist + * + * When printing a top-level coldeflist (which is syntactically also the + * relation's column alias list), use column names from colinfo. But when + * printing a coldeflist embedded inside ROWS FROM(), we prefer to use the + * original coldeflist's names, which are available in rtfunc->funccolnames. + * Pass NULL for colinfo to select the latter behavior. + * + * The coldeflist is appended immediately (no space) to buf. Caller is + * responsible for ensuring that an alias or AS is present before it. + */ +static void +get_from_clause_coldeflist(RangeTblFunction *rtfunc, + deparse_columns *colinfo, + deparse_context *context) +{ + StringInfo buf = context->buf; + ListCell *l1; + ListCell *l2; + ListCell *l3; + ListCell *l4; + int i; + + appendStringInfoChar(buf, '('); + + i = 0; + forfour(l1, rtfunc->funccoltypes, + l2, rtfunc->funccoltypmods, + l3, rtfunc->funccolcollations, + l4, rtfunc->funccolnames) + { + Oid atttypid = lfirst_oid(l1); + int32 atttypmod = lfirst_int(l2); + Oid attcollation = lfirst_oid(l3); + char *attname; + + if (colinfo) + attname = colinfo->colnames[i]; + else + attname = strVal(lfirst(l4)); + + Assert(attname); /* shouldn't be any dropped columns here */ + + if (i > 0) + appendStringInfoString(buf, ", "); + appendStringInfo(buf, "%s %s", + quote_identifier(attname), + format_type_with_typemod(atttypid, atttypmod)); + if (OidIsValid(attcollation) && + attcollation != get_typcollation(atttypid)) + appendStringInfo(buf, " COLLATE %s", + generate_collation_name(attcollation)); + + i++; + } + + appendStringInfoChar(buf, ')'); +} + +/* + * get_tablesample_def - print a TableSampleClause + */ +static void +get_tablesample_def(TableSampleClause *tablesample, deparse_context *context) +{ + StringInfo buf = context->buf; + Oid argtypes[1]; + int nargs; + ListCell *l; + + /* + * We should qualify the handler's function name if it wouldn't be + * resolved by lookup in the current search path. + */ + argtypes[0] = INTERNALOID; + appendStringInfo(buf, " TABLESAMPLE %s (", + generate_function_name(tablesample->tsmhandler, 1, + NIL, argtypes, + false, NULL, EXPR_KIND_NONE)); + + nargs = 0; + foreach(l, tablesample->args) + { + if (nargs++ > 0) + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) lfirst(l), context, false); + } + appendStringInfoChar(buf, ')'); + + if (tablesample->repeatable != NULL) + { + appendStringInfoString(buf, " REPEATABLE ("); + get_rule_expr((Node *) tablesample->repeatable, context, false); + appendStringInfoChar(buf, ')'); + } +} + +/* + * get_opclass_name - fetch name of an index operator class + * + * The opclass name is appended (after a space) to buf. + * + * Output is suppressed if the opclass is the default for the given + * actual_datatype. (If you don't want this behavior, just pass + * InvalidOid for actual_datatype.) + */ +static void +get_opclass_name(Oid opclass, Oid actual_datatype, + StringInfo buf) +{ + HeapTuple ht_opc; + Form_pg_opclass opcrec; + char *opcname; + char *nspname; + + ht_opc = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(ht_opc)) + elog(ERROR, "cache lookup failed for opclass %u", opclass); + opcrec = (Form_pg_opclass) GETSTRUCT(ht_opc); + + if (!OidIsValid(actual_datatype) || + GetDefaultOpClass(actual_datatype, opcrec->opcmethod) != opclass) + { + /* Okay, we need the opclass name. Do we need to qualify it? */ + opcname = NameStr(opcrec->opcname); + if (OpclassIsVisible(opclass)) + appendStringInfo(buf, " %s", quote_identifier(opcname)); + else + { + nspname = get_namespace_name_or_temp(opcrec->opcnamespace); + appendStringInfo(buf, " %s.%s", + quote_identifier(nspname), + quote_identifier(opcname)); + } + } + ReleaseSysCache(ht_opc); +} + +/* + * generate_opclass_name + * Compute the name to display for an opclass specified by OID + * + * The result includes all necessary quoting and schema-prefixing. + */ +char * +generate_opclass_name(Oid opclass) +{ + StringInfoData buf; + + initStringInfo(&buf); + get_opclass_name(opclass, InvalidOid, &buf); + + return &buf.data[1]; /* get_opclass_name() prepends space */ +} + +/* + * processIndirection - take care of array and subfield assignment + * + * We strip any top-level FieldStore or assignment SubscriptingRef nodes that + * appear in the input, printing them as decoration for the base column + * name (which we assume the caller just printed). We might also need to + * strip CoerceToDomain nodes, but only ones that appear above assignment + * nodes. + * + * Returns the subexpression that's to be assigned. + */ +static Node * +processIndirection(Node *node, deparse_context *context) +{ + StringInfo buf = context->buf; + CoerceToDomain *cdomain = NULL; + + for (;;) + { + if (node == NULL) + break; + if (IsA(node, FieldStore)) + { + FieldStore *fstore = (FieldStore *) node; + Oid typrelid; + char *fieldname; + + /* lookup tuple type */ + typrelid = get_typ_typrelid(fstore->resulttype); + if (!OidIsValid(typrelid)) + elog(ERROR, "argument type %s of FieldStore is not a tuple type", + format_type_be(fstore->resulttype)); + + /* + * Print the field name. There should only be one target field in + * stored rules. There could be more than that in executable + * target lists, but this function cannot be used for that case. + */ + Assert(list_length(fstore->fieldnums) == 1); + fieldname = get_attname(typrelid, + linitial_int(fstore->fieldnums), false); + appendStringInfo(buf, ".%s", quote_identifier(fieldname)); + + /* + * We ignore arg since it should be an uninteresting reference to + * the target column or subcolumn. + */ + node = (Node *) linitial(fstore->newvals); + } + else if (IsA(node, SubscriptingRef)) + { + SubscriptingRef *sbsref = (SubscriptingRef *) node; + + if (sbsref->refassgnexpr == NULL) + break; + + printSubscripts(sbsref, context); + + /* + * We ignore refexpr since it should be an uninteresting reference + * to the target column or subcolumn. + */ + node = (Node *) sbsref->refassgnexpr; + } + else if (IsA(node, CoerceToDomain)) + { + cdomain = (CoerceToDomain *) node; + /* If it's an explicit domain coercion, we're done */ + if (cdomain->coercionformat != COERCE_IMPLICIT_CAST) + break; + /* Tentatively descend past the CoerceToDomain */ + node = (Node *) cdomain->arg; + } + else + break; + } + + /* + * If we descended past a CoerceToDomain whose argument turned out not to + * be a FieldStore or array assignment, back up to the CoerceToDomain. + * (This is not enough to be fully correct if there are nested implicit + * CoerceToDomains, but such cases shouldn't ever occur.) + */ + if (cdomain && node == (Node *) cdomain->arg) + node = (Node *) cdomain; + + return node; +} + +static void +printSubscripts(SubscriptingRef *sbsref, deparse_context *context) +{ + StringInfo buf = context->buf; + ListCell *lowlist_item; + ListCell *uplist_item; + + lowlist_item = list_head(sbsref->reflowerindexpr); /* could be NULL */ + foreach(uplist_item, sbsref->refupperindexpr) + { + appendStringInfoChar(buf, '['); + if (lowlist_item) + { + /* If subexpression is NULL, get_rule_expr prints nothing */ + get_rule_expr((Node *) lfirst(lowlist_item), context, false); + appendStringInfoChar(buf, ':'); + lowlist_item = lnext(sbsref->reflowerindexpr, lowlist_item); + } + /* If subexpression is NULL, get_rule_expr prints nothing */ + get_rule_expr((Node *) lfirst(uplist_item), context, false); + appendStringInfoChar(buf, ']'); + } +} + +/* + * quote_identifier - Quote an identifier only if needed + * + * When quotes are needed, we palloc the required space; slightly + * space-wasteful but well worth it for notational simplicity. + */ +const char * +quote_identifier(const char *ident) +{ + /* + * Can avoid quoting if ident starts with a lowercase letter or underscore + * and contains only lowercase letters, digits, and underscores, *and* is + * not any SQL keyword. Otherwise, supply quotes. + */ + int nquotes = 0; + bool safe; + const char *ptr; + char *result; + char *optr; + + /* + * would like to use <ctype.h> macros here, but they might yield unwanted + * locale-specific results... + */ + safe = ((ident[0] >= 'a' && ident[0] <= 'z') || ident[0] == '_'); + + for (ptr = ident; *ptr; ptr++) + { + char ch = *ptr; + + if ((ch >= 'a' && ch <= 'z') || + (ch >= '0' && ch <= '9') || + (ch == '_')) + { + /* okay */ + } + else + { + safe = false; + if (ch == '"') + nquotes++; + } + } + + if (quote_all_identifiers) + safe = false; + + if (safe) + { + /* + * Check for keyword. We quote keywords except for unreserved ones. + * (In some cases we could avoid quoting a col_name or type_func_name + * keyword, but it seems much harder than it's worth to tell that.) + * + * Note: ScanKeywordLookup() does case-insensitive comparison, but + * that's fine, since we already know we have all-lower-case. + */ + int kwnum = ScanKeywordLookup(ident, &ScanKeywords); + + if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD) + safe = false; + } + + if (safe) + return ident; /* no change needed */ + + result = (char *) palloc(strlen(ident) + nquotes + 2 + 1); + + optr = result; + *optr++ = '"'; + for (ptr = ident; *ptr; ptr++) + { + char ch = *ptr; + + if (ch == '"') + *optr++ = '"'; + *optr++ = ch; + } + *optr++ = '"'; + *optr = '\0'; + + return result; +} + +/* + * quote_qualified_identifier - Quote a possibly-qualified identifier + * + * Return a name of the form qualifier.ident, or just ident if qualifier + * is NULL, quoting each component if necessary. The result is palloc'd. + */ +char * +quote_qualified_identifier(const char *qualifier, + const char *ident) +{ + StringInfoData buf; + + initStringInfo(&buf); + if (qualifier) + appendStringInfo(&buf, "%s.", quote_identifier(qualifier)); + appendStringInfoString(&buf, quote_identifier(ident)); + return buf.data; +} + +/* + * get_relation_name + * Get the unqualified name of a relation specified by OID + * + * This differs from the underlying get_rel_name() function in that it will + * throw error instead of silently returning NULL if the OID is bad. + */ +static char * +get_relation_name(Oid relid) +{ + char *relname = get_rel_name(relid); + + if (!relname) + elog(ERROR, "cache lookup failed for relation %u", relid); + return relname; +} + +/* + * generate_relation_name + * Compute the name to display for a relation specified by OID + * + * The result includes all necessary quoting and schema-prefixing. + * + * If namespaces isn't NIL, it must be a list of deparse_namespace nodes. + * We will forcibly qualify the relation name if it equals any CTE name + * visible in the namespace list. + */ +static char * +generate_relation_name(Oid relid, List *namespaces) +{ + HeapTuple tp; + Form_pg_class reltup; + bool need_qual; + ListCell *nslist; + char *relname; + char *nspname; + char *result; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for relation %u", relid); + reltup = (Form_pg_class) GETSTRUCT(tp); + relname = NameStr(reltup->relname); + + /* Check for conflicting CTE name */ + need_qual = false; + foreach(nslist, namespaces) + { + deparse_namespace *dpns = (deparse_namespace *) lfirst(nslist); + ListCell *ctlist; + + foreach(ctlist, dpns->ctes) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(ctlist); + + if (strcmp(cte->ctename, relname) == 0) + { + need_qual = true; + break; + } + } + if (need_qual) + break; + } + + /* Otherwise, qualify the name if not visible in search path */ + if (!need_qual) + need_qual = !RelationIsVisible(relid); + + if (need_qual) + nspname = get_namespace_name_or_temp(reltup->relnamespace); + else + nspname = NULL; + + result = quote_qualified_identifier(nspname, relname); + + ReleaseSysCache(tp); + + return result; +} + +/* + * generate_qualified_relation_name + * Compute the name to display for a relation specified by OID + * + * As above, but unconditionally schema-qualify the name. + */ +static char * +generate_qualified_relation_name(Oid relid) +{ + HeapTuple tp; + Form_pg_class reltup; + char *relname; + char *nspname; + char *result; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for relation %u", relid); + reltup = (Form_pg_class) GETSTRUCT(tp); + relname = NameStr(reltup->relname); + + nspname = get_namespace_name_or_temp(reltup->relnamespace); + if (!nspname) + elog(ERROR, "cache lookup failed for namespace %u", + reltup->relnamespace); + + result = quote_qualified_identifier(nspname, relname); + + ReleaseSysCache(tp); + + return result; +} + +/* + * generate_function_name + * Compute the name to display for a function specified by OID, + * given that it is being called with the specified actual arg names and + * types. (Those matter because of ambiguous-function resolution rules.) + * + * If we're dealing with a potentially variadic function (in practice, this + * means a FuncExpr or Aggref, not some other way of calling a function), then + * has_variadic must specify whether variadic arguments have been merged, + * and *use_variadic_p will be set to indicate whether to print VARIADIC in + * the output. For non-FuncExpr cases, has_variadic should be false and + * use_variadic_p can be NULL. + * + * The result includes all necessary quoting and schema-prefixing. + */ +static char * +generate_function_name(Oid funcid, int nargs, List *argnames, Oid *argtypes, + bool has_variadic, bool *use_variadic_p, + ParseExprKind special_exprkind) +{ + char *result; + HeapTuple proctup; + Form_pg_proc procform; + char *proname; + bool use_variadic; + char *nspname; + FuncDetailCode p_result; + Oid p_funcid; + Oid p_rettype; + bool p_retset; + int p_nvargs; + Oid p_vatype; + Oid *p_true_typeids; + bool force_qualify = false; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", funcid); + procform = (Form_pg_proc) GETSTRUCT(proctup); + proname = NameStr(procform->proname); + + /* + * Due to parser hacks to avoid needing to reserve CUBE, we need to force + * qualification in some special cases. + */ + if (special_exprkind == EXPR_KIND_GROUP_BY) + { + if (strcmp(proname, "cube") == 0 || strcmp(proname, "rollup") == 0) + force_qualify = true; + } + + /* + * Determine whether VARIADIC should be printed. We must do this first + * since it affects the lookup rules in func_get_detail(). + * + * We always print VARIADIC if the function has a merged variadic-array + * argument. Note that this is always the case for functions taking a + * VARIADIC argument type other than VARIADIC ANY. If we omitted VARIADIC + * and printed the array elements as separate arguments, the call could + * match a newer non-VARIADIC function. + */ + if (use_variadic_p) + { + /* Parser should not have set funcvariadic unless fn is variadic */ + Assert(!has_variadic || OidIsValid(procform->provariadic)); + use_variadic = has_variadic; + *use_variadic_p = use_variadic; + } + else + { + Assert(!has_variadic); + use_variadic = false; + } + + /* + * The idea here is to schema-qualify only if the parser would fail to + * resolve the correct function given the unqualified func name with the + * specified argtypes and VARIADIC flag. But if we already decided to + * force qualification, then we can skip the lookup and pretend we didn't + * find it. + */ + if (!force_qualify) + p_result = func_get_detail(list_make1(makeString(proname)), + NIL, argnames, nargs, argtypes, + !use_variadic, true, false, + &p_funcid, &p_rettype, + &p_retset, &p_nvargs, &p_vatype, + &p_true_typeids, NULL); + else + { + p_result = FUNCDETAIL_NOTFOUND; + p_funcid = InvalidOid; + } + + if ((p_result == FUNCDETAIL_NORMAL || + p_result == FUNCDETAIL_AGGREGATE || + p_result == FUNCDETAIL_WINDOWFUNC) && + p_funcid == funcid) + nspname = NULL; + else + nspname = get_namespace_name_or_temp(procform->pronamespace); + + result = quote_qualified_identifier(nspname, proname); + + ReleaseSysCache(proctup); + + return result; +} + +/* + * generate_operator_name + * Compute the name to display for an operator specified by OID, + * given that it is being called with the specified actual arg types. + * (Arg types matter because of ambiguous-operator resolution rules. + * Pass InvalidOid for unused arg of a unary operator.) + * + * The result includes all necessary quoting and schema-prefixing, + * plus the OPERATOR() decoration needed to use a qualified operator name + * in an expression. + */ +static char * +generate_operator_name(Oid operid, Oid arg1, Oid arg2) +{ + StringInfoData buf; + HeapTuple opertup; + Form_pg_operator operform; + char *oprname; + char *nspname; + Operator p_result; + + initStringInfo(&buf); + + opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operid)); + if (!HeapTupleIsValid(opertup)) + elog(ERROR, "cache lookup failed for operator %u", operid); + operform = (Form_pg_operator) GETSTRUCT(opertup); + oprname = NameStr(operform->oprname); + + /* + * The idea here is to schema-qualify only if the parser would fail to + * resolve the correct operator given the unqualified op name with the + * specified argtypes. + */ + switch (operform->oprkind) + { + case 'b': + p_result = oper(NULL, list_make1(makeString(oprname)), arg1, arg2, + true, -1); + break; + case 'l': + p_result = left_oper(NULL, list_make1(makeString(oprname)), arg2, + true, -1); + break; + default: + elog(ERROR, "unrecognized oprkind: %d", operform->oprkind); + p_result = NULL; /* keep compiler quiet */ + break; + } + + if (p_result != NULL && oprid(p_result) == operid) + nspname = NULL; + else + { + nspname = get_namespace_name_or_temp(operform->oprnamespace); + appendStringInfo(&buf, "OPERATOR(%s.", quote_identifier(nspname)); + } + + appendStringInfoString(&buf, oprname); + + if (nspname) + appendStringInfoChar(&buf, ')'); + + if (p_result != NULL) + ReleaseSysCache(p_result); + + ReleaseSysCache(opertup); + + return buf.data; +} + +/* + * generate_operator_clause --- generate a binary-operator WHERE clause + * + * This is used for internally-generated-and-executed SQL queries, where + * precision is essential and readability is secondary. The basic + * requirement is to append "leftop op rightop" to buf, where leftop and + * rightop are given as strings and are assumed to yield types leftoptype + * and rightoptype; the operator is identified by OID. The complexity + * comes from needing to be sure that the parser will select the desired + * operator when the query is parsed. We always name the operator using + * OPERATOR(schema.op) syntax, so as to avoid search-path uncertainties. + * We have to emit casts too, if either input isn't already the input type + * of the operator; else we are at the mercy of the parser's heuristics for + * ambiguous-operator resolution. The caller must ensure that leftop and + * rightop are suitable arguments for a cast operation; it's best to insert + * parentheses if they aren't just variables or parameters. + */ +void +generate_operator_clause(StringInfo buf, + const char *leftop, Oid leftoptype, + Oid opoid, + const char *rightop, Oid rightoptype) +{ + HeapTuple opertup; + Form_pg_operator operform; + char *oprname; + char *nspname; + + opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid)); + if (!HeapTupleIsValid(opertup)) + elog(ERROR, "cache lookup failed for operator %u", opoid); + operform = (Form_pg_operator) GETSTRUCT(opertup); + Assert(operform->oprkind == 'b'); + oprname = NameStr(operform->oprname); + + nspname = get_namespace_name(operform->oprnamespace); + + appendStringInfoString(buf, leftop); + if (leftoptype != operform->oprleft) + add_cast_to(buf, operform->oprleft); + appendStringInfo(buf, " OPERATOR(%s.", quote_identifier(nspname)); + appendStringInfoString(buf, oprname); + appendStringInfo(buf, ") %s", rightop); + if (rightoptype != operform->oprright) + add_cast_to(buf, operform->oprright); + + ReleaseSysCache(opertup); +} + +/* + * Add a cast specification to buf. We spell out the type name the hard way, + * intentionally not using format_type_be(). This is to avoid corner cases + * for CHARACTER, BIT, and perhaps other types, where specifying the type + * using SQL-standard syntax results in undesirable data truncation. By + * doing it this way we can be certain that the cast will have default (-1) + * target typmod. + */ +static void +add_cast_to(StringInfo buf, Oid typid) +{ + HeapTuple typetup; + Form_pg_type typform; + char *typname; + char *nspname; + + typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(typetup)) + elog(ERROR, "cache lookup failed for type %u", typid); + typform = (Form_pg_type) GETSTRUCT(typetup); + + typname = NameStr(typform->typname); + nspname = get_namespace_name_or_temp(typform->typnamespace); + + appendStringInfo(buf, "::%s.%s", + quote_identifier(nspname), quote_identifier(typname)); + + ReleaseSysCache(typetup); +} + +/* + * generate_qualified_type_name + * Compute the name to display for a type specified by OID + * + * This is different from format_type_be() in that we unconditionally + * schema-qualify the name. That also means no special syntax for + * SQL-standard type names ... although in current usage, this should + * only get used for domains, so such cases wouldn't occur anyway. + */ +static char * +generate_qualified_type_name(Oid typid) +{ + HeapTuple tp; + Form_pg_type typtup; + char *typname; + char *nspname; + char *result; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for type %u", typid); + typtup = (Form_pg_type) GETSTRUCT(tp); + typname = NameStr(typtup->typname); + + nspname = get_namespace_name_or_temp(typtup->typnamespace); + if (!nspname) + elog(ERROR, "cache lookup failed for namespace %u", + typtup->typnamespace); + + result = quote_qualified_identifier(nspname, typname); + + ReleaseSysCache(tp); + + return result; +} + +/* + * generate_collation_name + * Compute the name to display for a collation specified by OID + * + * The result includes all necessary quoting and schema-prefixing. + */ +char * +generate_collation_name(Oid collid) +{ + HeapTuple tp; + Form_pg_collation colltup; + char *collname; + char *nspname; + char *result; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collid); + colltup = (Form_pg_collation) GETSTRUCT(tp); + collname = NameStr(colltup->collname); + + if (!CollationIsVisible(collid)) + nspname = get_namespace_name_or_temp(colltup->collnamespace); + else + nspname = NULL; + + result = quote_qualified_identifier(nspname, collname); + + ReleaseSysCache(tp); + + return result; +} + +/* + * Given a C string, produce a TEXT datum. + * + * We assume that the input was palloc'd and may be freed. + */ +static text * +string_to_text(char *str) +{ + text *result; + + result = cstring_to_text(str); + pfree(str); + return result; +} + +/* + * Generate a C string representing a relation options from text[] datum. + */ +static void +get_reloptions(StringInfo buf, Datum reloptions) +{ + Datum *options; + int noptions; + int i; + + deconstruct_array_builtin(DatumGetArrayTypeP(reloptions), TEXTOID, + &options, NULL, &noptions); + + for (i = 0; i < noptions; i++) + { + char *option = TextDatumGetCString(options[i]); + char *name; + char *separator; + char *value; + + /* + * Each array element should have the form name=value. If the "=" is + * missing for some reason, treat it like an empty value. + */ + name = option; + separator = strchr(option, '='); + if (separator) + { + *separator = '\0'; + value = separator + 1; + } + else + value = ""; + + if (i > 0) + appendStringInfoString(buf, ", "); + appendStringInfo(buf, "%s=", quote_identifier(name)); + + /* + * In general we need to quote the value; but to avoid unnecessary + * clutter, do not quote if it is an identifier that would not need + * quoting. (We could also allow numbers, but that is a bit trickier + * than it looks --- for example, are leading zeroes significant? We + * don't want to assume very much here about what custom reloptions + * might mean.) + */ + if (quote_identifier(value) == value) + appendStringInfoString(buf, value); + else + simple_quote_literal(buf, value); + + pfree(option); + } +} + +/* + * Generate a C string representing a relation's reloptions, or NULL if none. + */ +static char * +flatten_reloptions(Oid relid) +{ + char *result = NULL; + HeapTuple tuple; + Datum reloptions; + bool isnull; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + + reloptions = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_reloptions, &isnull); + if (!isnull) + { + StringInfoData buf; + + initStringInfo(&buf); + get_reloptions(&buf, reloptions); + + result = buf.data; + } + + ReleaseSysCache(tuple); + + return result; +} + +/* + * get_range_partbound_string + * A C string representation of one range partition bound + */ +char * +get_range_partbound_string(List *bound_datums) +{ + deparse_context context; + StringInfo buf = makeStringInfo(); + ListCell *cell; + char *sep; + + memset(&context, 0, sizeof(deparse_context)); + context.buf = buf; + + appendStringInfoChar(buf, '('); + sep = ""; + foreach(cell, bound_datums) + { + PartitionRangeDatum *datum = + lfirst_node(PartitionRangeDatum, cell); + + appendStringInfoString(buf, sep); + if (datum->kind == PARTITION_RANGE_DATUM_MINVALUE) + appendStringInfoString(buf, "MINVALUE"); + else if (datum->kind == PARTITION_RANGE_DATUM_MAXVALUE) + appendStringInfoString(buf, "MAXVALUE"); + else + { + Const *val = castNode(Const, datum->value); + + get_const_expr(val, &context, -1); + } + sep = ", "; + } + appendStringInfoChar(buf, ')'); + + return buf->data; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/selfuncs.c new file mode 100644 index 00000000000..b24bf5979d5 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/selfuncs.c @@ -0,0 +1,8030 @@ +/*------------------------------------------------------------------------- + * + * selfuncs.c + * Selectivity functions and index cost estimation functions for + * standard operators and index access methods. + * + * Selectivity routines are registered in the pg_operator catalog + * in the "oprrest" and "oprjoin" attributes. + * + * Index cost functions are located via the index AM's API struct, + * which is obtained from the handler function registered in pg_am. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/selfuncs.c + * + *------------------------------------------------------------------------- + */ + +/*---------- + * Operator selectivity estimation functions are called to estimate the + * selectivity of WHERE clauses whose top-level operator is their operator. + * We divide the problem into two cases: + * Restriction clause estimation: the clause involves vars of just + * one relation. + * Join clause estimation: the clause involves vars of multiple rels. + * Join selectivity estimation is far more difficult and usually less accurate + * than restriction estimation. + * + * When dealing with the inner scan of a nestloop join, we consider the + * join's joinclauses as restriction clauses for the inner relation, and + * treat vars of the outer relation as parameters (a/k/a constants of unknown + * values). So, restriction estimators need to be able to accept an argument + * telling which relation is to be treated as the variable. + * + * The call convention for a restriction estimator (oprrest function) is + * + * Selectivity oprrest (PlannerInfo *root, + * Oid operator, + * List *args, + * int varRelid); + * + * root: general information about the query (rtable and RelOptInfo lists + * are particularly important for the estimator). + * operator: OID of the specific operator in question. + * args: argument list from the operator clause. + * varRelid: if not zero, the relid (rtable index) of the relation to + * be treated as the variable relation. May be zero if the args list + * is known to contain vars of only one relation. + * + * This is represented at the SQL level (in pg_proc) as + * + * float8 oprrest (internal, oid, internal, int4); + * + * The result is a selectivity, that is, a fraction (0 to 1) of the rows + * of the relation that are expected to produce a TRUE result for the + * given operator. + * + * The call convention for a join estimator (oprjoin function) is similar + * except that varRelid is not needed, and instead join information is + * supplied: + * + * Selectivity oprjoin (PlannerInfo *root, + * Oid operator, + * List *args, + * JoinType jointype, + * SpecialJoinInfo *sjinfo); + * + * float8 oprjoin (internal, oid, internal, int2, internal); + * + * (Before Postgres 8.4, join estimators had only the first four of these + * parameters. That signature is still allowed, but deprecated.) The + * relationship between jointype and sjinfo is explained in the comments for + * clause_selectivity() --- the short version is that jointype is usually + * best ignored in favor of examining sjinfo. + * + * Join selectivity for regular inner and outer joins is defined as the + * fraction (0 to 1) of the cross product of the relations that is expected + * to produce a TRUE result for the given operator. For both semi and anti + * joins, however, the selectivity is defined as the fraction of the left-hand + * side relation's rows that are expected to have a match (ie, at least one + * row with a TRUE result) in the right-hand side. + * + * For both oprrest and oprjoin functions, the operator's input collation OID + * (if any) is passed using the standard fmgr mechanism, so that the estimator + * function can fetch it with PG_GET_COLLATION(). Note, however, that all + * statistics in pg_statistic are currently built using the relevant column's + * collation. + *---------- + */ + +#include "postgres.h" + +#include <ctype.h> +#include <math.h> + +#include "access/brin.h" +#include "access/brin_page.h" +#include "access/gin.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/visibilitymap.h" +#include "catalog/pg_am.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" +#include "executor/nodeAgg.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "parser/parse_clause.h" +#include "parser/parsetree.h" +#include "statistics/statistics.h" +#include "storage/bufmgr.h" +#include "utils/acl.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/index_selfuncs.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_locale.h" +#include "utils/rel.h" +#include "utils/selfuncs.h" +#include "utils/snapmgr.h" +#include "utils/spccache.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" +#include "utils/typcache.h" + +#define DEFAULT_PAGE_CPU_MULTIPLIER 50.0 + +/* Hooks for plugins to get control when we ask for stats */ +__thread get_relation_stats_hook_type get_relation_stats_hook = NULL; +__thread get_index_stats_hook_type get_index_stats_hook = NULL; + +static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); +static double eqjoinsel_inner(Oid opfuncoid, Oid collation, + VariableStatData *vardata1, VariableStatData *vardata2, + double nd1, double nd2, + bool isdefault1, bool isdefault2, + AttStatsSlot *sslot1, AttStatsSlot *sslot2, + Form_pg_statistic stats1, Form_pg_statistic stats2, + bool have_mcvs1, bool have_mcvs2); +static double eqjoinsel_semi(Oid opfuncoid, Oid collation, + VariableStatData *vardata1, VariableStatData *vardata2, + double nd1, double nd2, + bool isdefault1, bool isdefault2, + AttStatsSlot *sslot1, AttStatsSlot *sslot2, + Form_pg_statistic stats1, Form_pg_statistic stats2, + bool have_mcvs1, bool have_mcvs2, + RelOptInfo *inner_rel); +static bool estimate_multivariate_ndistinct(PlannerInfo *root, + RelOptInfo *rel, List **varinfos, double *ndistinct); +static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, + double *scaledvalue, + Datum lobound, Datum hibound, Oid boundstypid, + double *scaledlobound, double *scaledhibound); +static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure); +static void convert_string_to_scalar(char *value, + double *scaledvalue, + char *lobound, + double *scaledlobound, + char *hibound, + double *scaledhibound); +static void convert_bytea_to_scalar(Datum value, + double *scaledvalue, + Datum lobound, + double *scaledlobound, + Datum hibound, + double *scaledhibound); +static double convert_one_string_to_scalar(char *value, + int rangelo, int rangehi); +static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, + int rangelo, int rangehi); +static char *convert_string_datum(Datum value, Oid typid, Oid collid, + bool *failure); +static double convert_timevalue_to_scalar(Datum value, Oid typid, + bool *failure); +static void examine_simple_variable(PlannerInfo *root, Var *var, + VariableStatData *vardata); +static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, + Oid sortop, Oid collation, + Datum *min, Datum *max); +static void get_stats_slot_range(AttStatsSlot *sslot, + Oid opfuncoid, FmgrInfo *opproc, + Oid collation, int16 typLen, bool typByVal, + Datum *min, Datum *max, bool *p_have_data); +static bool get_actual_variable_range(PlannerInfo *root, + VariableStatData *vardata, + Oid sortop, Oid collation, + Datum *min, Datum *max); +static bool get_actual_variable_endpoint(Relation heapRel, + Relation indexRel, + ScanDirection indexscandir, + ScanKey scankeys, + int16 typLen, + bool typByVal, + TupleTableSlot *tableslot, + MemoryContext outercontext, + Datum *endpointDatum); +static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); + + +/* + * eqsel - Selectivity of "=" for any data types. + * + * Note: this routine is also used to estimate selectivity for some + * operators that are not "=" but have comparable selectivity behavior, + * such as "~=" (geometric approximate-match). Even for "=", we must + * keep in mind that the left and right datatypes may differ. + */ +Datum +eqsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false)); +} + +/* + * Common code for eqsel() and neqsel() + */ +static double +eqsel_internal(PG_FUNCTION_ARGS, bool negate) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); + VariableStatData vardata; + Node *other; + bool varonleft; + double selec; + + /* + * When asked about <>, we do the estimation using the corresponding = + * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac". + */ + if (negate) + { + operator = get_negator(operator); + if (!OidIsValid(operator)) + { + /* Use default selectivity (should we raise an error instead?) */ + return 1.0 - DEFAULT_EQ_SEL; + } + } + + /* + * If expression is not variable = something or something = variable, then + * punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL; + + /* + * We can do a lot better if the something is a constant. (Note: the + * Const might result from estimation rather than being a simple constant + * in the query.) + */ + if (IsA(other, Const)) + selec = var_eq_const(&vardata, operator, collation, + ((Const *) other)->constvalue, + ((Const *) other)->constisnull, + varonleft, negate); + else + selec = var_eq_non_const(&vardata, operator, collation, other, + varonleft, negate); + + ReleaseVariableStats(vardata); + + return selec; +} + +/* + * var_eq_const --- eqsel for var = const case + * + * This is exported so that some other estimation functions can use it. + */ +double +var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, + Datum constval, bool constisnull, + bool varonleft, bool negate) +{ + double selec; + double nullfrac = 0.0; + bool isdefault; + Oid opfuncoid; + + /* + * If the constant is NULL, assume operator is strict and return zero, ie, + * operator will never return TRUE. (It's zero even for a negator op.) + */ + if (constisnull) + return 0.0; + + /* + * Grab the nullfrac for use below. Note we allow use of nullfrac + * regardless of security check. + */ + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + nullfrac = stats->stanullfrac; + } + + /* + * If we matched the var to a unique index or DISTINCT clause, assume + * there is exactly one match regardless of anything else. (This is + * slightly bogus, since the index or clause's equality operator might be + * different from ours, but it's much more likely to be right than + * ignoring the information.) + */ + if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) + { + selec = 1.0 / vardata->rel->tuples; + } + else if (HeapTupleIsValid(vardata->statsTuple) && + statistic_proc_security_check(vardata, + (opfuncoid = get_opcode(oproid)))) + { + AttStatsSlot sslot; + bool match = false; + int i; + + /* + * Is the constant "=" to any of the column's most common values? + * (Although the given operator may not really be "=", we will assume + * that seeing whether it returns TRUE is an appropriate test. If you + * don't like this, maybe you shouldn't be using eqsel for your + * operator...) + */ + if (get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) + { + LOCAL_FCINFO(fcinfo, 2); + FmgrInfo eqproc; + + fmgr_info(opfuncoid, &eqproc); + + /* + * Save a few cycles by setting up the fcinfo struct just once. + * Using FunctionCallInvoke directly also avoids failure if the + * eqproc returns NULL, though really equality functions should + * never do that. + */ + InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation, + NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + /* be careful to apply operator right way 'round */ + if (varonleft) + fcinfo->args[1].value = constval; + else + fcinfo->args[0].value = constval; + + for (i = 0; i < sslot.nvalues; i++) + { + Datum fresult; + + if (varonleft) + fcinfo->args[0].value = sslot.values[i]; + else + fcinfo->args[1].value = sslot.values[i]; + fcinfo->isnull = false; + fresult = FunctionCallInvoke(fcinfo); + if (!fcinfo->isnull && DatumGetBool(fresult)) + { + match = true; + break; + } + } + } + else + { + /* no most-common-value info available */ + i = 0; /* keep compiler quiet */ + } + + if (match) + { + /* + * Constant is "=" to this common value. We know selectivity + * exactly (or as exactly as ANALYZE could calculate it, anyway). + */ + selec = sslot.numbers[i]; + } + else + { + /* + * Comparison is against a constant that is neither NULL nor any + * of the common values. Its selectivity cannot be more than + * this: + */ + double sumcommon = 0.0; + double otherdistinct; + + for (i = 0; i < sslot.nnumbers; i++) + sumcommon += sslot.numbers[i]; + selec = 1.0 - sumcommon - nullfrac; + CLAMP_PROBABILITY(selec); + + /* + * and in fact it's probably a good deal less. We approximate that + * all the not-common values share this remaining fraction + * equally, so we divide by the number of other distinct values. + */ + otherdistinct = get_variable_numdistinct(vardata, &isdefault) - + sslot.nnumbers; + if (otherdistinct > 1) + selec /= otherdistinct; + + /* + * Another cross-check: selectivity shouldn't be estimated as more + * than the least common "most common value". + */ + if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1]) + selec = sslot.numbers[sslot.nnumbers - 1]; + } + + free_attstatsslot(&sslot); + } + else + { + /* + * No ANALYZE stats available, so make a guess using estimated number + * of distinct values and assuming they are equally common. (The guess + * is unlikely to be very good, but we do know a few special cases.) + */ + selec = 1.0 / get_variable_numdistinct(vardata, &isdefault); + } + + /* now adjust if we wanted <> rather than = */ + if (negate) + selec = 1.0 - selec - nullfrac; + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * var_eq_non_const --- eqsel for var = something-other-than-const case + * + * This is exported so that some other estimation functions can use it. + */ +double +var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation, + Node *other, + bool varonleft, bool negate) +{ + double selec; + double nullfrac = 0.0; + bool isdefault; + + /* + * Grab the nullfrac for use below. + */ + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + nullfrac = stats->stanullfrac; + } + + /* + * If we matched the var to a unique index or DISTINCT clause, assume + * there is exactly one match regardless of anything else. (This is + * slightly bogus, since the index or clause's equality operator might be + * different from ours, but it's much more likely to be right than + * ignoring the information.) + */ + if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) + { + selec = 1.0 / vardata->rel->tuples; + } + else if (HeapTupleIsValid(vardata->statsTuple)) + { + double ndistinct; + AttStatsSlot sslot; + + /* + * Search is for a value that we do not know a priori, but we will + * assume it is not NULL. Estimate the selectivity as non-null + * fraction divided by number of distinct values, so that we get a + * result averaged over all possible values whether common or + * uncommon. (Essentially, we are assuming that the not-yet-known + * comparison value is equally likely to be any of the possible + * values, regardless of their frequency in the table. Is that a good + * idea?) + */ + selec = 1.0 - nullfrac; + ndistinct = get_variable_numdistinct(vardata, &isdefault); + if (ndistinct > 1) + selec /= ndistinct; + + /* + * Cross-check: selectivity should never be estimated as more than the + * most common value's. + */ + if (get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + if (sslot.nnumbers > 0 && selec > sslot.numbers[0]) + selec = sslot.numbers[0]; + free_attstatsslot(&sslot); + } + } + else + { + /* + * No ANALYZE stats available, so make a guess using estimated number + * of distinct values and assuming they are equally common. (The guess + * is unlikely to be very good, but we do know a few special cases.) + */ + selec = 1.0 / get_variable_numdistinct(vardata, &isdefault); + } + + /* now adjust if we wanted <> rather than = */ + if (negate) + selec = 1.0 - selec - nullfrac; + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * neqsel - Selectivity of "!=" for any data types. + * + * This routine is also used for some operators that are not "!=" + * but have comparable selectivity behavior. See above comments + * for eqsel(). + */ +Datum +neqsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true)); +} + +/* + * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars. + * + * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel. + * The isgt and iseq flags distinguish which of the four cases apply. + * + * The caller has commuted the clause, if necessary, so that we can treat + * the variable as being on the left. The caller must also make sure that + * the other side of the clause is a non-null Const, and dissect that into + * a value and datatype. (This definition simplifies some callers that + * want to estimate against a computed value instead of a Const node.) + * + * This routine works for any datatype (or pair of datatypes) known to + * convert_to_scalar(). If it is applied to some other datatype, + * it will return an approximate estimate based on assuming that the constant + * value falls in the middle of the bin identified by binary search. + */ +static double +scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, + Oid collation, + VariableStatData *vardata, Datum constval, Oid consttype) +{ + Form_pg_statistic stats; + FmgrInfo opproc; + double mcv_selec, + hist_selec, + sumcommon; + double selec; + + if (!HeapTupleIsValid(vardata->statsTuple)) + { + /* + * No stats are available. Typically this means we have to fall back + * on the default estimate; but if the variable is CTID then we can + * make an estimate based on comparing the constant to the table size. + */ + if (vardata->var && IsA(vardata->var, Var) && + ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber) + { + ItemPointer itemptr; + double block; + double density; + + /* + * If the relation's empty, we're going to include all of it. + * (This is mostly to avoid divide-by-zero below.) + */ + if (vardata->rel->pages == 0) + return 1.0; + + itemptr = (ItemPointer) DatumGetPointer(constval); + block = ItemPointerGetBlockNumberNoCheck(itemptr); + + /* + * Determine the average number of tuples per page (density). + * + * Since the last page will, on average, be only half full, we can + * estimate it to have half as many tuples as earlier pages. So + * give it half the weight of a regular page. + */ + density = vardata->rel->tuples / (vardata->rel->pages - 0.5); + + /* If target is the last page, use half the density. */ + if (block >= vardata->rel->pages - 1) + density *= 0.5; + + /* + * Using the average tuples per page, calculate how far into the + * page the itemptr is likely to be and adjust block accordingly, + * by adding that fraction of a whole block (but never more than a + * whole block, no matter how high the itemptr's offset is). Here + * we are ignoring the possibility of dead-tuple line pointers, + * which is fairly bogus, but we lack the info to do better. + */ + if (density > 0.0) + { + OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr); + + block += Min(offset / density, 1.0); + } + + /* + * Convert relative block number to selectivity. Again, the last + * page has only half weight. + */ + selec = block / (vardata->rel->pages - 0.5); + + /* + * The calculation so far gave us a selectivity for the "<=" case. + * We'll have one fewer tuple for "<" and one additional tuple for + * ">=", the latter of which we'll reverse the selectivity for + * below, so we can simply subtract one tuple for both cases. The + * cases that need this adjustment can be identified by iseq being + * equal to isgt. + */ + if (iseq == isgt && vardata->rel->tuples >= 1.0) + selec -= (1.0 / vardata->rel->tuples); + + /* Finally, reverse the selectivity for the ">", ">=" cases. */ + if (isgt) + selec = 1.0 - selec; + + CLAMP_PROBABILITY(selec); + return selec; + } + + /* no stats available, so default result */ + return DEFAULT_INEQ_SEL; + } + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + + fmgr_info(get_opcode(operator), &opproc); + + /* + * If we have most-common-values info, add up the fractions of the MCV + * entries that satisfy MCV OP CONST. These fractions contribute directly + * to the result selectivity. Also add up the total fraction represented + * by MCV entries. + */ + mcv_selec = mcv_selectivity(vardata, &opproc, collation, constval, true, + &sumcommon); + + /* + * If there is a histogram, determine which bin the constant falls in, and + * compute the resulting contribution to selectivity. + */ + hist_selec = ineq_histogram_selectivity(root, vardata, + operator, &opproc, isgt, iseq, + collation, + constval, consttype); + + /* + * Now merge the results from the MCV and histogram calculations, + * realizing that the histogram covers only the non-null values that are + * not listed in MCV. + */ + selec = 1.0 - stats->stanullfrac - sumcommon; + + if (hist_selec >= 0.0) + selec *= hist_selec; + else + { + /* + * If no histogram but there are values not accounted for by MCV, + * arbitrarily assume half of them will match. + */ + selec *= 0.5; + } + + selec += mcv_selec; + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * mcv_selectivity - Examine the MCV list for selectivity estimates + * + * Determine the fraction of the variable's MCV population that satisfies + * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also + * compute the fraction of the total column population represented by the MCV + * list. This code will work for any boolean-returning predicate operator. + * + * The function result is the MCV selectivity, and the fraction of the + * total population is returned into *sumcommonp. Zeroes are returned + * if there is no MCV list. + */ +double +mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + double *sumcommonp) +{ + double mcv_selec, + sumcommon; + AttStatsSlot sslot; + int i; + + mcv_selec = 0.0; + sumcommon = 0.0; + + if (HeapTupleIsValid(vardata->statsTuple) && + statistic_proc_security_check(vardata, opproc->fn_oid) && + get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) + { + LOCAL_FCINFO(fcinfo, 2); + + /* + * We invoke the opproc "by hand" so that we won't fail on NULL + * results. Such cases won't arise for normal comparison functions, + * but generic_restriction_selectivity could perhaps be used with + * operators that can return NULL. A small side benefit is to not + * need to re-initialize the fcinfo struct from scratch each time. + */ + InitFunctionCallInfoData(*fcinfo, opproc, 2, collation, + NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + /* be careful to apply operator right way 'round */ + if (varonleft) + fcinfo->args[1].value = constval; + else + fcinfo->args[0].value = constval; + + for (i = 0; i < sslot.nvalues; i++) + { + Datum fresult; + + if (varonleft) + fcinfo->args[0].value = sslot.values[i]; + else + fcinfo->args[1].value = sslot.values[i]; + fcinfo->isnull = false; + fresult = FunctionCallInvoke(fcinfo); + if (!fcinfo->isnull && DatumGetBool(fresult)) + mcv_selec += sslot.numbers[i]; + sumcommon += sslot.numbers[i]; + } + free_attstatsslot(&sslot); + } + + *sumcommonp = sumcommon; + return mcv_selec; +} + +/* + * histogram_selectivity - Examine the histogram for selectivity estimates + * + * Determine the fraction of the variable's histogram entries that satisfy + * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. + * + * This code will work for any boolean-returning predicate operator, whether + * or not it has anything to do with the histogram sort operator. We are + * essentially using the histogram just as a representative sample. However, + * small histograms are unlikely to be all that representative, so the caller + * should be prepared to fall back on some other estimation approach when the + * histogram is missing or very small. It may also be prudent to combine this + * approach with another one when the histogram is small. + * + * If the actual histogram size is not at least min_hist_size, we won't bother + * to do the calculation at all. Also, if the n_skip parameter is > 0, we + * ignore the first and last n_skip histogram elements, on the grounds that + * they are outliers and hence not very representative. Typical values for + * these parameters are 10 and 1. + * + * The function result is the selectivity, or -1 if there is no histogram + * or it's smaller than min_hist_size. + * + * The output parameter *hist_size receives the actual histogram size, + * or zero if no histogram. Callers may use this number to decide how + * much faith to put in the function result. + * + * Note that the result disregards both the most-common-values (if any) and + * null entries. The caller is expected to combine this result with + * statistics for those portions of the column population. It may also be + * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs. + */ +double +histogram_selectivity(VariableStatData *vardata, + FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + int min_hist_size, int n_skip, + int *hist_size) +{ + double result; + AttStatsSlot sslot; + + /* check sanity of parameters */ + Assert(n_skip >= 0); + Assert(min_hist_size > 2 * n_skip); + + if (HeapTupleIsValid(vardata->statsTuple) && + statistic_proc_security_check(vardata, opproc->fn_oid) && + get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES)) + { + *hist_size = sslot.nvalues; + if (sslot.nvalues >= min_hist_size) + { + LOCAL_FCINFO(fcinfo, 2); + int nmatch = 0; + int i; + + /* + * We invoke the opproc "by hand" so that we won't fail on NULL + * results. Such cases won't arise for normal comparison + * functions, but generic_restriction_selectivity could perhaps be + * used with operators that can return NULL. A small side benefit + * is to not need to re-initialize the fcinfo struct from scratch + * each time. + */ + InitFunctionCallInfoData(*fcinfo, opproc, 2, collation, + NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + /* be careful to apply operator right way 'round */ + if (varonleft) + fcinfo->args[1].value = constval; + else + fcinfo->args[0].value = constval; + + for (i = n_skip; i < sslot.nvalues - n_skip; i++) + { + Datum fresult; + + if (varonleft) + fcinfo->args[0].value = sslot.values[i]; + else + fcinfo->args[1].value = sslot.values[i]; + fcinfo->isnull = false; + fresult = FunctionCallInvoke(fcinfo); + if (!fcinfo->isnull && DatumGetBool(fresult)) + nmatch++; + } + result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip)); + } + else + result = -1; + free_attstatsslot(&sslot); + } + else + { + *hist_size = 0; + result = -1; + } + + return result; +} + +/* + * generic_restriction_selectivity - Selectivity for almost anything + * + * This function estimates selectivity for operators that we don't have any + * special knowledge about, but are on data types that we collect standard + * MCV and/or histogram statistics for. (Additional assumptions are that + * the operator is strict and immutable, or at least stable.) + * + * If we have "VAR OP CONST" or "CONST OP VAR", selectivity is estimated by + * applying the operator to each element of the column's MCV and/or histogram + * stats, and merging the results using the assumption that the histogram is + * a reasonable random sample of the column's non-MCV population. Note that + * if the operator's semantics are related to the histogram ordering, this + * might not be such a great assumption; other functions such as + * scalarineqsel() are probably a better match in such cases. + * + * Otherwise, fall back to the default selectivity provided by the caller. + */ +double +generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation, + List *args, int varRelid, + double default_selectivity) +{ + double selec; + VariableStatData vardata; + Node *other; + bool varonleft; + + /* + * If expression is not variable OP something or something OP variable, + * then punt and return the default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + return default_selectivity; + + /* + * If the something is a NULL constant, assume operator is strict and + * return zero, ie, operator will never return TRUE. + */ + if (IsA(other, Const) && + ((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + return 0.0; + } + + if (IsA(other, Const)) + { + /* Variable is being compared to a known non-null constant */ + Datum constval = ((Const *) other)->constvalue; + FmgrInfo opproc; + double mcvsum; + double mcvsel; + double nullfrac; + int hist_size; + + fmgr_info(get_opcode(oproid), &opproc); + + /* + * Calculate the selectivity for the column's most common values. + */ + mcvsel = mcv_selectivity(&vardata, &opproc, collation, + constval, varonleft, + &mcvsum); + + /* + * If the histogram is large enough, see what fraction of it matches + * the query, and assume that's representative of the non-MCV + * population. Otherwise use the default selectivity for the non-MCV + * population. + */ + selec = histogram_selectivity(&vardata, &opproc, collation, + constval, varonleft, + 10, 1, &hist_size); + if (selec < 0) + { + /* Nope, fall back on default */ + selec = default_selectivity; + } + else if (hist_size < 100) + { + /* + * For histogram sizes from 10 to 100, we combine the histogram + * and default selectivities, putting increasingly more trust in + * the histogram for larger sizes. + */ + double hist_weight = hist_size / 100.0; + + selec = selec * hist_weight + + default_selectivity * (1.0 - hist_weight); + } + + /* In any case, don't believe extremely small or large estimates. */ + if (selec < 0.0001) + selec = 0.0001; + else if (selec > 0.9999) + selec = 0.9999; + + /* Don't forget to account for nulls. */ + if (HeapTupleIsValid(vardata.statsTuple)) + nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac; + else + nullfrac = 0.0; + + /* + * Now merge the results from the MCV and histogram calculations, + * realizing that the histogram covers only the non-null values that + * are not listed in MCV. + */ + selec *= 1.0 - nullfrac - mcvsum; + selec += mcvsel; + } + else + { + /* Comparison value is not constant, so we can't do anything */ + selec = default_selectivity; + } + + ReleaseVariableStats(vardata); + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * ineq_histogram_selectivity - Examine the histogram for scalarineqsel + * + * Determine the fraction of the variable's histogram population that + * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST. + * The isgt and iseq flags distinguish which of the four cases apply. + * + * While opproc could be looked up from the operator OID, common callers + * also need to call it separately, so we make the caller pass both. + * + * Returns -1 if there is no histogram (valid results will always be >= 0). + * + * Note that the result disregards both the most-common-values (if any) and + * null entries. The caller is expected to combine this result with + * statistics for those portions of the column population. + * + * This is exported so that some other estimation functions can use it. + */ +double +ineq_histogram_selectivity(PlannerInfo *root, + VariableStatData *vardata, + Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, + Oid collation, + Datum constval, Oid consttype) +{ + double hist_selec; + AttStatsSlot sslot; + + hist_selec = -1.0; + + /* + * Someday, ANALYZE might store more than one histogram per rel/att, + * corresponding to more than one possible sort ordering defined for the + * column type. Right now, we know there is only one, so just grab it and + * see if it matches the query. + * + * Note that we can't use opoid as search argument; the staop appearing in + * pg_statistic will be for the relevant '<' operator, but what we have + * might be some other inequality operator such as '>='. (Even if opoid + * is a '<' operator, it could be cross-type.) Hence we must use + * comparison_ops_are_compatible() to see if the operators match. + */ + if (HeapTupleIsValid(vardata->statsTuple) && + statistic_proc_security_check(vardata, opproc->fn_oid) && + get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES)) + { + if (sslot.nvalues > 1 && + sslot.stacoll == collation && + comparison_ops_are_compatible(sslot.staop, opoid)) + { + /* + * Use binary search to find the desired location, namely the + * right end of the histogram bin containing the comparison value, + * which is the leftmost entry for which the comparison operator + * succeeds (if isgt) or fails (if !isgt). + * + * In this loop, we pay no attention to whether the operator iseq + * or not; that detail will be mopped up below. (We cannot tell, + * anyway, whether the operator thinks the values are equal.) + * + * If the binary search accesses the first or last histogram + * entry, we try to replace that endpoint with the true column min + * or max as found by get_actual_variable_range(). This + * ameliorates misestimates when the min or max is moving as a + * result of changes since the last ANALYZE. Note that this could + * result in effectively including MCVs into the histogram that + * weren't there before, but we don't try to correct for that. + */ + double histfrac; + int lobound = 0; /* first possible slot to search */ + int hibound = sslot.nvalues; /* last+1 slot to search */ + bool have_end = false; + + /* + * If there are only two histogram entries, we'll want up-to-date + * values for both. (If there are more than two, we need at most + * one of them to be updated, so we deal with that within the + * loop.) + */ + if (sslot.nvalues == 2) + have_end = get_actual_variable_range(root, + vardata, + sslot.staop, + collation, + &sslot.values[0], + &sslot.values[1]); + + while (lobound < hibound) + { + int probe = (lobound + hibound) / 2; + bool ltcmp; + + /* + * If we find ourselves about to compare to the first or last + * histogram entry, first try to replace it with the actual + * current min or max (unless we already did so above). + */ + if (probe == 0 && sslot.nvalues > 2) + have_end = get_actual_variable_range(root, + vardata, + sslot.staop, + collation, + &sslot.values[0], + NULL); + else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2) + have_end = get_actual_variable_range(root, + vardata, + sslot.staop, + collation, + NULL, + &sslot.values[probe]); + + ltcmp = DatumGetBool(FunctionCall2Coll(opproc, + collation, + sslot.values[probe], + constval)); + if (isgt) + ltcmp = !ltcmp; + if (ltcmp) + lobound = probe + 1; + else + hibound = probe; + } + + if (lobound <= 0) + { + /* + * Constant is below lower histogram boundary. More + * precisely, we have found that no entry in the histogram + * satisfies the inequality clause (if !isgt) or they all do + * (if isgt). We estimate that that's true of the entire + * table, so set histfrac to 0.0 (which we'll flip to 1.0 + * below, if isgt). + */ + histfrac = 0.0; + } + else if (lobound >= sslot.nvalues) + { + /* + * Inverse case: constant is above upper histogram boundary. + */ + histfrac = 1.0; + } + else + { + /* We have values[i-1] <= constant <= values[i]. */ + int i = lobound; + double eq_selec = 0; + double val, + high, + low; + double binfrac; + + /* + * In the cases where we'll need it below, obtain an estimate + * of the selectivity of "x = constval". We use a calculation + * similar to what var_eq_const() does for a non-MCV constant, + * ie, estimate that all distinct non-MCV values occur equally + * often. But multiplication by "1.0 - sumcommon - nullfrac" + * will be done by our caller, so we shouldn't do that here. + * Therefore we can't try to clamp the estimate by reference + * to the least common MCV; the result would be too small. + * + * Note: since this is effectively assuming that constval + * isn't an MCV, it's logically dubious if constval in fact is + * one. But we have to apply *some* correction for equality, + * and anyway we cannot tell if constval is an MCV, since we + * don't have a suitable equality operator at hand. + */ + if (i == 1 || isgt == iseq) + { + double otherdistinct; + bool isdefault; + AttStatsSlot mcvslot; + + /* Get estimated number of distinct values */ + otherdistinct = get_variable_numdistinct(vardata, + &isdefault); + + /* Subtract off the number of known MCVs */ + if (get_attstatsslot(&mcvslot, vardata->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + otherdistinct -= mcvslot.nnumbers; + free_attstatsslot(&mcvslot); + } + + /* If result doesn't seem sane, leave eq_selec at 0 */ + if (otherdistinct > 1) + eq_selec = 1.0 / otherdistinct; + } + + /* + * Convert the constant and the two nearest bin boundary + * values to a uniform comparison scale, and do a linear + * interpolation within this bin. + */ + if (convert_to_scalar(constval, consttype, collation, + &val, + sslot.values[i - 1], sslot.values[i], + vardata->vartype, + &low, &high)) + { + if (high <= low) + { + /* cope if bin boundaries appear identical */ + binfrac = 0.5; + } + else if (val <= low) + binfrac = 0.0; + else if (val >= high) + binfrac = 1.0; + else + { + binfrac = (val - low) / (high - low); + + /* + * Watch out for the possibility that we got a NaN or + * Infinity from the division. This can happen + * despite the previous checks, if for example "low" + * is -Infinity. + */ + if (isnan(binfrac) || + binfrac < 0.0 || binfrac > 1.0) + binfrac = 0.5; + } + } + else + { + /* + * Ideally we'd produce an error here, on the grounds that + * the given operator shouldn't have scalarXXsel + * registered as its selectivity func unless we can deal + * with its operand types. But currently, all manner of + * stuff is invoking scalarXXsel, so give a default + * estimate until that can be fixed. + */ + binfrac = 0.5; + } + + /* + * Now, compute the overall selectivity across the values + * represented by the histogram. We have i-1 full bins and + * binfrac partial bin below the constant. + */ + histfrac = (double) (i - 1) + binfrac; + histfrac /= (double) (sslot.nvalues - 1); + + /* + * At this point, histfrac is an estimate of the fraction of + * the population represented by the histogram that satisfies + * "x <= constval". Somewhat remarkably, this statement is + * true regardless of which operator we were doing the probes + * with, so long as convert_to_scalar() delivers reasonable + * results. If the probe constant is equal to some histogram + * entry, we would have considered the bin to the left of that + * entry if probing with "<" or ">=", or the bin to the right + * if probing with "<=" or ">"; but binfrac would have come + * out as 1.0 in the first case and 0.0 in the second, leading + * to the same histfrac in either case. For probe constants + * between histogram entries, we find the same bin and get the + * same estimate with any operator. + * + * The fact that the estimate corresponds to "x <= constval" + * and not "x < constval" is because of the way that ANALYZE + * constructs the histogram: each entry is, effectively, the + * rightmost value in its sample bucket. So selectivity + * values that are exact multiples of 1/(histogram_size-1) + * should be understood as estimates including a histogram + * entry plus everything to its left. + * + * However, that breaks down for the first histogram entry, + * which necessarily is the leftmost value in its sample + * bucket. That means the first histogram bin is slightly + * narrower than the rest, by an amount equal to eq_selec. + * Another way to say that is that we want "x <= leftmost" to + * be estimated as eq_selec not zero. So, if we're dealing + * with the first bin (i==1), rescale to make that true while + * adjusting the rest of that bin linearly. + */ + if (i == 1) + histfrac += eq_selec * (1.0 - binfrac); + + /* + * "x <= constval" is good if we want an estimate for "<=" or + * ">", but if we are estimating for "<" or ">=", we now need + * to decrease the estimate by eq_selec. + */ + if (isgt == iseq) + histfrac -= eq_selec; + } + + /* + * Now the estimate is finished for "<" and "<=" cases. If we are + * estimating for ">" or ">=", flip it. + */ + hist_selec = isgt ? (1.0 - histfrac) : histfrac; + + /* + * The histogram boundaries are only approximate to begin with, + * and may well be out of date anyway. Therefore, don't believe + * extremely small or large selectivity estimates --- unless we + * got actual current endpoint values from the table, in which + * case just do the usual sanity clamp. Somewhat arbitrarily, we + * set the cutoff for other cases at a hundredth of the histogram + * resolution. + */ + if (have_end) + CLAMP_PROBABILITY(hist_selec); + else + { + double cutoff = 0.01 / (double) (sslot.nvalues - 1); + + if (hist_selec < cutoff) + hist_selec = cutoff; + else if (hist_selec > 1.0 - cutoff) + hist_selec = 1.0 - cutoff; + } + } + else if (sslot.nvalues > 1) + { + /* + * If we get here, we have a histogram but it's not sorted the way + * we want. Do a brute-force search to see how many of the + * entries satisfy the comparison condition, and take that + * fraction as our estimate. (This is identical to the inner loop + * of histogram_selectivity; maybe share code?) + */ + LOCAL_FCINFO(fcinfo, 2); + int nmatch = 0; + + InitFunctionCallInfoData(*fcinfo, opproc, 2, collation, + NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + fcinfo->args[1].value = constval; + for (int i = 0; i < sslot.nvalues; i++) + { + Datum fresult; + + fcinfo->args[0].value = sslot.values[i]; + fcinfo->isnull = false; + fresult = FunctionCallInvoke(fcinfo); + if (!fcinfo->isnull && DatumGetBool(fresult)) + nmatch++; + } + hist_selec = ((double) nmatch) / ((double) sslot.nvalues); + + /* + * As above, clamp to a hundredth of the histogram resolution. + * This case is surely even less trustworthy than the normal one, + * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the + * clamp should be more restrictive in this case?) + */ + { + double cutoff = 0.01 / (double) (sslot.nvalues - 1); + + if (hist_selec < cutoff) + hist_selec = cutoff; + else if (hist_selec > 1.0 - cutoff) + hist_selec = 1.0 - cutoff; + } + } + + free_attstatsslot(&sslot); + } + + return hist_selec; +} + +/* + * Common wrapper function for the selectivity estimators that simply + * invoke scalarineqsel(). + */ +static Datum +scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); + VariableStatData vardata; + Node *other; + bool varonleft; + Datum constval; + Oid consttype; + double selec; + + /* + * If expression is not variable op something or something op variable, + * then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); + } + + /* + * If the constant is NULL, assume operator is strict and return zero, ie, + * operator will never return TRUE. + */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + constval = ((Const *) other)->constvalue; + consttype = ((Const *) other)->consttype; + + /* + * Force the var to be on the left to simplify logic in scalarineqsel. + */ + if (!varonleft) + { + operator = get_commutator(operator); + if (!operator) + { + /* Use default selectivity (should we raise an error instead?) */ + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); + } + isgt = !isgt; + } + + /* The rest of the work is done by scalarineqsel(). */ + selec = scalarineqsel(root, operator, isgt, iseq, collation, + &vardata, constval, consttype); + + ReleaseVariableStats(vardata); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * scalarltsel - Selectivity of "<" for scalars. + */ +Datum +scalarltsel(PG_FUNCTION_ARGS) +{ + return scalarineqsel_wrapper(fcinfo, false, false); +} + +/* + * scalarlesel - Selectivity of "<=" for scalars. + */ +Datum +scalarlesel(PG_FUNCTION_ARGS) +{ + return scalarineqsel_wrapper(fcinfo, false, true); +} + +/* + * scalargtsel - Selectivity of ">" for scalars. + */ +Datum +scalargtsel(PG_FUNCTION_ARGS) +{ + return scalarineqsel_wrapper(fcinfo, true, false); +} + +/* + * scalargesel - Selectivity of ">=" for scalars. + */ +Datum +scalargesel(PG_FUNCTION_ARGS) +{ + return scalarineqsel_wrapper(fcinfo, true, true); +} + +/* + * boolvarsel - Selectivity of Boolean variable. + * + * This can actually be called on any boolean-valued expression. If it + * involves only Vars of the specified relation, and if there are statistics + * about the Var or expression (the latter is possible if it's indexed) then + * we'll produce a real estimate; otherwise it's just a default. + */ +Selectivity +boolvarsel(PlannerInfo *root, Node *arg, int varRelid) +{ + VariableStatData vardata; + double selec; + + examine_variable(root, arg, varRelid, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + { + /* + * A boolean variable V is equivalent to the clause V = 't', so we + * compute the selectivity as if that is what we have. + */ + selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid, + BoolGetDatum(true), false, true, false); + } + else + { + /* Otherwise, the default estimate is 0.5 */ + selec = 0.5; + } + ReleaseVariableStats(vardata); + return selec; +} + +/* + * booltestsel - Selectivity of BooleanTest Node. + */ +Selectivity +booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, + int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) +{ + VariableStatData vardata; + double selec; + + examine_variable(root, arg, varRelid, &vardata); + + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + double freq_null; + AttStatsSlot sslot; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + freq_null = stats->stanullfrac; + + if (get_attstatsslot(&sslot, vardata.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS) + && sslot.nnumbers > 0) + { + double freq_true; + double freq_false; + + /* + * Get first MCV frequency and derive frequency for true. + */ + if (DatumGetBool(sslot.values[0])) + freq_true = sslot.numbers[0]; + else + freq_true = 1.0 - sslot.numbers[0] - freq_null; + + /* + * Next derive frequency for false. Then use these as appropriate + * to derive frequency for each case. + */ + freq_false = 1.0 - freq_true - freq_null; + + switch (booltesttype) + { + case IS_UNKNOWN: + /* select only NULL values */ + selec = freq_null; + break; + case IS_NOT_UNKNOWN: + /* select non-NULL values */ + selec = 1.0 - freq_null; + break; + case IS_TRUE: + /* select only TRUE values */ + selec = freq_true; + break; + case IS_NOT_TRUE: + /* select non-TRUE values */ + selec = 1.0 - freq_true; + break; + case IS_FALSE: + /* select only FALSE values */ + selec = freq_false; + break; + case IS_NOT_FALSE: + /* select non-FALSE values */ + selec = 1.0 - freq_false; + break; + default: + elog(ERROR, "unrecognized booltesttype: %d", + (int) booltesttype); + selec = 0.0; /* Keep compiler quiet */ + break; + } + + free_attstatsslot(&sslot); + } + else + { + /* + * No most-common-value info available. Still have null fraction + * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust + * for null fraction and assume a 50-50 split of TRUE and FALSE. + */ + switch (booltesttype) + { + case IS_UNKNOWN: + /* select only NULL values */ + selec = freq_null; + break; + case IS_NOT_UNKNOWN: + /* select non-NULL values */ + selec = 1.0 - freq_null; + break; + case IS_TRUE: + case IS_FALSE: + /* Assume we select half of the non-NULL values */ + selec = (1.0 - freq_null) / 2.0; + break; + case IS_NOT_TRUE: + case IS_NOT_FALSE: + /* Assume we select NULLs plus half of the non-NULLs */ + /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */ + selec = (freq_null + 1.0) / 2.0; + break; + default: + elog(ERROR, "unrecognized booltesttype: %d", + (int) booltesttype); + selec = 0.0; /* Keep compiler quiet */ + break; + } + } + } + else + { + /* + * If we can't get variable statistics for the argument, perhaps + * clause_selectivity can do something with it. We ignore the + * possibility of a NULL value when using clause_selectivity, and just + * assume the value is either TRUE or FALSE. + */ + switch (booltesttype) + { + case IS_UNKNOWN: + selec = DEFAULT_UNK_SEL; + break; + case IS_NOT_UNKNOWN: + selec = DEFAULT_NOT_UNK_SEL; + break; + case IS_TRUE: + case IS_NOT_FALSE: + selec = (double) clause_selectivity(root, arg, + varRelid, + jointype, sjinfo); + break; + case IS_FALSE: + case IS_NOT_TRUE: + selec = 1.0 - (double) clause_selectivity(root, arg, + varRelid, + jointype, sjinfo); + break; + default: + elog(ERROR, "unrecognized booltesttype: %d", + (int) booltesttype); + selec = 0.0; /* Keep compiler quiet */ + break; + } + } + + ReleaseVariableStats(vardata); + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return (Selectivity) selec; +} + +/* + * nulltestsel - Selectivity of NullTest Node. + */ +Selectivity +nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, + int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) +{ + VariableStatData vardata; + double selec; + + examine_variable(root, arg, varRelid, &vardata); + + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + double freq_null; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + freq_null = stats->stanullfrac; + + switch (nulltesttype) + { + case IS_NULL: + + /* + * Use freq_null directly. + */ + selec = freq_null; + break; + case IS_NOT_NULL: + + /* + * Select not unknown (not null) values. Calculate from + * freq_null. + */ + selec = 1.0 - freq_null; + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int) nulltesttype); + return (Selectivity) 0; /* keep compiler quiet */ + } + } + else if (vardata.var && IsA(vardata.var, Var) && + ((Var *) vardata.var)->varattno < 0) + { + /* + * There are no stats for system columns, but we know they are never + * NULL. + */ + selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0; + } + else + { + /* + * No ANALYZE stats available, so make a guess + */ + switch (nulltesttype) + { + case IS_NULL: + selec = DEFAULT_UNK_SEL; + break; + case IS_NOT_NULL: + selec = DEFAULT_NOT_UNK_SEL; + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int) nulltesttype); + return (Selectivity) 0; /* keep compiler quiet */ + } + } + + ReleaseVariableStats(vardata); + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + return (Selectivity) selec; +} + +/* + * strip_array_coercion - strip binary-compatible relabeling from an array expr + * + * For array values, the parser normally generates ArrayCoerceExpr conversions, + * but it seems possible that RelabelType might show up. Also, the planner + * is not currently tense about collapsing stacked ArrayCoerceExpr nodes, + * so we need to be ready to deal with more than one level. + */ +static Node * +strip_array_coercion(Node *node) +{ + for (;;) + { + if (node && IsA(node, ArrayCoerceExpr)) + { + ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node; + + /* + * If the per-element expression is just a RelabelType on top of + * CaseTestExpr, then we know it's a binary-compatible relabeling. + */ + if (IsA(acoerce->elemexpr, RelabelType) && + IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr)) + node = (Node *) acoerce->arg; + else + break; + } + else if (node && IsA(node, RelabelType)) + { + /* We don't really expect this case, but may as well cope */ + node = (Node *) ((RelabelType *) node)->arg; + } + else + break; + } + return node; +} + +/* + * scalararraysel - Selectivity of ScalarArrayOpExpr Node. + */ +Selectivity +scalararraysel(PlannerInfo *root, + ScalarArrayOpExpr *clause, + bool is_join_clause, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo) +{ + Oid operator = clause->opno; + bool useOr = clause->useOr; + bool isEquality = false; + bool isInequality = false; + Node *leftop; + Node *rightop; + Oid nominal_element_type; + Oid nominal_element_collation; + TypeCacheEntry *typentry; + RegProcedure oprsel; + FmgrInfo oprselproc; + Selectivity s1; + Selectivity s1disjoint; + + /* First, deconstruct the expression */ + Assert(list_length(clause->args) == 2); + leftop = (Node *) linitial(clause->args); + rightop = (Node *) lsecond(clause->args); + + /* aggressively reduce both sides to constants */ + leftop = estimate_expression_value(root, leftop); + rightop = estimate_expression_value(root, rightop); + + /* get nominal (after relabeling) element type of rightop */ + nominal_element_type = get_base_element_type(exprType(rightop)); + if (!OidIsValid(nominal_element_type)) + return (Selectivity) 0.5; /* probably shouldn't happen */ + /* get nominal collation, too, for generating constants */ + nominal_element_collation = exprCollation(rightop); + + /* look through any binary-compatible relabeling of rightop */ + rightop = strip_array_coercion(rightop); + + /* + * Detect whether the operator is the default equality or inequality + * operator of the array element type. + */ + typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR); + if (OidIsValid(typentry->eq_opr)) + { + if (operator == typentry->eq_opr) + isEquality = true; + else if (get_negator(operator) == typentry->eq_opr) + isInequality = true; + } + + /* + * If it is equality or inequality, we might be able to estimate this as a + * form of array containment; for instance "const = ANY(column)" can be + * treated as "ARRAY[const] <@ column". scalararraysel_containment tries + * that, and returns the selectivity estimate if successful, or -1 if not. + */ + if ((isEquality || isInequality) && !is_join_clause) + { + s1 = scalararraysel_containment(root, leftop, rightop, + nominal_element_type, + isEquality, useOr, varRelid); + if (s1 >= 0.0) + return s1; + } + + /* + * Look up the underlying operator's selectivity estimator. Punt if it + * hasn't got one. + */ + if (is_join_clause) + oprsel = get_oprjoin(operator); + else + oprsel = get_oprrest(operator); + if (!oprsel) + return (Selectivity) 0.5; + fmgr_info(oprsel, &oprselproc); + + /* + * In the array-containment check above, we must only believe that an + * operator is equality or inequality if it is the default btree equality + * operator (or its negator) for the element type, since those are the + * operators that array containment will use. But in what follows, we can + * be a little laxer, and also believe that any operators using eqsel() or + * neqsel() as selectivity estimator act like equality or inequality. + */ + if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL) + isEquality = true; + else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL) + isInequality = true; + + /* + * We consider three cases: + * + * 1. rightop is an Array constant: deconstruct the array, apply the + * operator's selectivity function for each array element, and merge the + * results in the same way that clausesel.c does for AND/OR combinations. + * + * 2. rightop is an ARRAY[] construct: apply the operator's selectivity + * function for each element of the ARRAY[] construct, and merge. + * + * 3. otherwise, make a guess ... + */ + if (rightop && IsA(rightop, Const)) + { + Datum arraydatum = ((Const *) rightop)->constvalue; + bool arrayisnull = ((Const *) rightop)->constisnull; + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + int i; + + if (arrayisnull) /* qual can't succeed if null array */ + return (Selectivity) 0.0; + arrayval = DatumGetArrayTypeP(arraydatum); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); + + /* + * For generic operators, we assume the probability of success is + * independent for each array element. But for "= ANY" or "<> ALL", + * if the array elements are distinct (which'd typically be the case) + * then the probabilities are disjoint, and we should just sum them. + * + * If we were being really tense we would try to confirm that the + * elements are all distinct, but that would be expensive and it + * doesn't seem to be worth the cycles; it would amount to penalizing + * well-written queries in favor of poorly-written ones. However, we + * do protect ourselves a little bit by checking whether the + * disjointness assumption leads to an impossible (out of range) + * probability; if so, we fall back to the normal calculation. + */ + s1 = s1disjoint = (useOr ? 0.0 : 1.0); + + for (i = 0; i < num_elems; i++) + { + List *args; + Selectivity s2; + + args = list_make2(leftop, + makeConst(nominal_element_type, + -1, + nominal_element_collation, + elmlen, + elem_values[i], + elem_nulls[i], + elmbyval)); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); + + if (useOr) + { + s1 = s1 + s2 - s1 * s2; + if (isEquality) + s1disjoint += s2; + } + else + { + s1 = s1 * s2; + if (isInequality) + s1disjoint += s2 - 1.0; + } + } + + /* accept disjoint-probability estimate if in range */ + if ((useOr ? isEquality : isInequality) && + s1disjoint >= 0.0 && s1disjoint <= 1.0) + s1 = s1disjoint; + } + else if (rightop && IsA(rightop, ArrayExpr) && + !((ArrayExpr *) rightop)->multidims) + { + ArrayExpr *arrayexpr = (ArrayExpr *) rightop; + int16 elmlen; + bool elmbyval; + ListCell *l; + + get_typlenbyval(arrayexpr->element_typeid, + &elmlen, &elmbyval); + + /* + * We use the assumption of disjoint probabilities here too, although + * the odds of equal array elements are rather higher if the elements + * are not all constants (which they won't be, else constant folding + * would have reduced the ArrayExpr to a Const). In this path it's + * critical to have the sanity check on the s1disjoint estimate. + */ + s1 = s1disjoint = (useOr ? 0.0 : 1.0); + + foreach(l, arrayexpr->elements) + { + Node *elem = (Node *) lfirst(l); + List *args; + Selectivity s2; + + /* + * Theoretically, if elem isn't of nominal_element_type we should + * insert a RelabelType, but it seems unlikely that any operator + * estimation function would really care ... + */ + args = list_make2(leftop, elem); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); + + if (useOr) + { + s1 = s1 + s2 - s1 * s2; + if (isEquality) + s1disjoint += s2; + } + else + { + s1 = s1 * s2; + if (isInequality) + s1disjoint += s2 - 1.0; + } + } + + /* accept disjoint-probability estimate if in range */ + if ((useOr ? isEquality : isInequality) && + s1disjoint >= 0.0 && s1disjoint <= 1.0) + s1 = s1disjoint; + } + else + { + CaseTestExpr *dummyexpr; + List *args; + Selectivity s2; + int i; + + /* + * We need a dummy rightop to pass to the operator selectivity + * routine. It can be pretty much anything that doesn't look like a + * constant; CaseTestExpr is a convenient choice. + */ + dummyexpr = makeNode(CaseTestExpr); + dummyexpr->typeId = nominal_element_type; + dummyexpr->typeMod = -1; + dummyexpr->collation = clause->inputcollid; + args = list_make2(leftop, dummyexpr); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); + s1 = useOr ? 0.0 : 1.0; + + /* + * Arbitrarily assume 10 elements in the eventual array value (see + * also estimate_array_length). We don't risk an assumption of + * disjoint probabilities here. + */ + for (i = 0; i < 10; i++) + { + if (useOr) + s1 = s1 + s2 - s1 * s2; + else + s1 = s1 * s2; + } + } + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(s1); + + return s1; +} + +/* + * Estimate number of elements in the array yielded by an expression. + * + * It's important that this agree with scalararraysel. + */ +int +estimate_array_length(Node *arrayexpr) +{ + /* look through any binary-compatible relabeling of arrayexpr */ + arrayexpr = strip_array_coercion(arrayexpr); + + if (arrayexpr && IsA(arrayexpr, Const)) + { + Datum arraydatum = ((Const *) arrayexpr)->constvalue; + bool arrayisnull = ((Const *) arrayexpr)->constisnull; + ArrayType *arrayval; + + if (arrayisnull) + return 0; + arrayval = DatumGetArrayTypeP(arraydatum); + return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval)); + } + else if (arrayexpr && IsA(arrayexpr, ArrayExpr) && + !((ArrayExpr *) arrayexpr)->multidims) + { + return list_length(((ArrayExpr *) arrayexpr)->elements); + } + else + { + /* default guess --- see also scalararraysel */ + return 10; + } +} + +/* + * rowcomparesel - Selectivity of RowCompareExpr Node. + * + * We estimate RowCompare selectivity by considering just the first (high + * order) columns, which makes it equivalent to an ordinary OpExpr. While + * this estimate could be refined by considering additional columns, it + * seems unlikely that we could do a lot better without multi-column + * statistics. + */ +Selectivity +rowcomparesel(PlannerInfo *root, + RowCompareExpr *clause, + int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) +{ + Selectivity s1; + Oid opno = linitial_oid(clause->opnos); + Oid inputcollid = linitial_oid(clause->inputcollids); + List *opargs; + bool is_join_clause; + + /* Build equivalent arg list for single operator */ + opargs = list_make2(linitial(clause->largs), linitial(clause->rargs)); + + /* + * Decide if it's a join clause. This should match clausesel.c's + * treat_as_join_clause(), except that we intentionally consider only the + * leading columns and not the rest of the clause. + */ + if (varRelid != 0) + { + /* + * Caller is forcing restriction mode (eg, because we are examining an + * inner indexscan qual). + */ + is_join_clause = false; + } + else if (sjinfo == NULL) + { + /* + * It must be a restriction clause, since it's being evaluated at a + * scan node. + */ + is_join_clause = false; + } + else + { + /* + * Otherwise, it's a join if there's more than one base relation used. + */ + is_join_clause = (NumRelids(root, (Node *) opargs) > 1); + } + + if (is_join_clause) + { + /* Estimate selectivity for a join clause. */ + s1 = join_selectivity(root, opno, + opargs, + inputcollid, + jointype, + sjinfo); + } + else + { + /* Estimate selectivity for a restriction clause. */ + s1 = restriction_selectivity(root, opno, + opargs, + inputcollid, + varRelid); + } + + return s1; +} + +/* + * eqjoinsel - Join selectivity of "=" + */ +Datum +eqjoinsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + +#ifdef NOT_USED + JoinType jointype = (JoinType) PG_GETARG_INT16(3); +#endif + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); + Oid collation = PG_GET_COLLATION(); + double selec; + double selec_inner; + VariableStatData vardata1; + VariableStatData vardata2; + double nd1; + double nd2; + bool isdefault1; + bool isdefault2; + Oid opfuncoid; + AttStatsSlot sslot1; + AttStatsSlot sslot2; + Form_pg_statistic stats1 = NULL; + Form_pg_statistic stats2 = NULL; + bool have_mcvs1 = false; + bool have_mcvs2 = false; + bool get_mcv_stats; + bool join_is_reversed; + RelOptInfo *inner_rel; + + get_join_variables(root, args, sjinfo, + &vardata1, &vardata2, &join_is_reversed); + + nd1 = get_variable_numdistinct(&vardata1, &isdefault1); + nd2 = get_variable_numdistinct(&vardata2, &isdefault2); + + opfuncoid = get_opcode(operator); + + memset(&sslot1, 0, sizeof(sslot1)); + memset(&sslot2, 0, sizeof(sslot2)); + + /* + * There is no use in fetching one side's MCVs if we lack MCVs for the + * other side, so do a quick check to verify that both stats exist. + */ + get_mcv_stats = (HeapTupleIsValid(vardata1.statsTuple) && + HeapTupleIsValid(vardata2.statsTuple) && + get_attstatsslot(&sslot1, vardata1.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + 0) && + get_attstatsslot(&sslot2, vardata2.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + 0)); + + if (HeapTupleIsValid(vardata1.statsTuple)) + { + /* note we allow use of nullfrac regardless of security check */ + stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple); + if (get_mcv_stats && + statistic_proc_security_check(&vardata1, opfuncoid)) + have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); + } + + if (HeapTupleIsValid(vardata2.statsTuple)) + { + /* note we allow use of nullfrac regardless of security check */ + stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple); + if (get_mcv_stats && + statistic_proc_security_check(&vardata2, opfuncoid)) + have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); + } + + /* We need to compute the inner-join selectivity in all cases */ + selec_inner = eqjoinsel_inner(opfuncoid, collation, + &vardata1, &vardata2, + nd1, nd2, + isdefault1, isdefault2, + &sslot1, &sslot2, + stats1, stats2, + have_mcvs1, have_mcvs2); + + switch (sjinfo->jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_FULL: + selec = selec_inner; + break; + case JOIN_SEMI: + case JOIN_ANTI: + + /* + * Look up the join's inner relation. min_righthand is sufficient + * information because neither SEMI nor ANTI joins permit any + * reassociation into or out of their RHS, so the righthand will + * always be exactly that set of rels. + */ + inner_rel = find_join_input_rel(root, sjinfo->min_righthand); + + if (!join_is_reversed) + selec = eqjoinsel_semi(opfuncoid, collation, + &vardata1, &vardata2, + nd1, nd2, + isdefault1, isdefault2, + &sslot1, &sslot2, + stats1, stats2, + have_mcvs1, have_mcvs2, + inner_rel); + else + { + Oid commop = get_commutator(operator); + Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid; + + selec = eqjoinsel_semi(commopfuncoid, collation, + &vardata2, &vardata1, + nd2, nd1, + isdefault2, isdefault1, + &sslot2, &sslot1, + stats2, stats1, + have_mcvs2, have_mcvs1, + inner_rel); + } + + /* + * We should never estimate the output of a semijoin to be more + * rows than we estimate for an inner join with the same input + * rels and join condition; it's obviously impossible for that to + * happen. The former estimate is N1 * Ssemi while the latter is + * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing + * this is worthwhile because of the shakier estimation rules we + * use in eqjoinsel_semi, particularly in cases where it has to + * punt entirely. + */ + selec = Min(selec, inner_rel->rows * selec_inner); + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", + (int) sjinfo->jointype); + selec = 0; /* keep compiler quiet */ + break; + } + + free_attstatsslot(&sslot1); + free_attstatsslot(&sslot2); + + ReleaseVariableStats(vardata1); + ReleaseVariableStats(vardata2); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * eqjoinsel_inner --- eqjoinsel for normal inner join + * + * We also use this for LEFT/FULL outer joins; it's not presently clear + * that it's worth trying to distinguish them here. + */ +static double +eqjoinsel_inner(Oid opfuncoid, Oid collation, + VariableStatData *vardata1, VariableStatData *vardata2, + double nd1, double nd2, + bool isdefault1, bool isdefault2, + AttStatsSlot *sslot1, AttStatsSlot *sslot2, + Form_pg_statistic stats1, Form_pg_statistic stats2, + bool have_mcvs1, bool have_mcvs2) +{ + double selec; + + if (have_mcvs1 && have_mcvs2) + { + /* + * We have most-common-value lists for both relations. Run through + * the lists to see which MCVs actually join to each other with the + * given operator. This allows us to determine the exact join + * selectivity for the portion of the relations represented by the MCV + * lists. We still have to estimate for the remaining population, but + * in a skewed distribution this gives us a big leg up in accuracy. + * For motivation see the analysis in Y. Ioannidis and S. + * Christodoulakis, "On the propagation of errors in the size of join + * results", Technical Report 1018, Computer Science Dept., University + * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu). + */ + LOCAL_FCINFO(fcinfo, 2); + FmgrInfo eqproc; + bool *hasmatch1; + bool *hasmatch2; + double nullfrac1 = stats1->stanullfrac; + double nullfrac2 = stats2->stanullfrac; + double matchprodfreq, + matchfreq1, + matchfreq2, + unmatchfreq1, + unmatchfreq2, + otherfreq1, + otherfreq2, + totalsel1, + totalsel2; + int i, + nmatches; + + fmgr_info(opfuncoid, &eqproc); + + /* + * Save a few cycles by setting up the fcinfo struct just once. Using + * FunctionCallInvoke directly also avoids failure if the eqproc + * returns NULL, though really equality functions should never do + * that. + */ + InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation, + NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + + hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool)); + hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool)); + + /* + * Note we assume that each MCV will match at most one member of the + * other MCV list. If the operator isn't really equality, there could + * be multiple matches --- but we don't look for them, both for speed + * and because the math wouldn't add up... + */ + matchprodfreq = 0.0; + nmatches = 0; + for (i = 0; i < sslot1->nvalues; i++) + { + int j; + + fcinfo->args[0].value = sslot1->values[i]; + + for (j = 0; j < sslot2->nvalues; j++) + { + Datum fresult; + + if (hasmatch2[j]) + continue; + fcinfo->args[1].value = sslot2->values[j]; + fcinfo->isnull = false; + fresult = FunctionCallInvoke(fcinfo); + if (!fcinfo->isnull && DatumGetBool(fresult)) + { + hasmatch1[i] = hasmatch2[j] = true; + matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j]; + nmatches++; + break; + } + } + } + CLAMP_PROBABILITY(matchprodfreq); + /* Sum up frequencies of matched and unmatched MCVs */ + matchfreq1 = unmatchfreq1 = 0.0; + for (i = 0; i < sslot1->nvalues; i++) + { + if (hasmatch1[i]) + matchfreq1 += sslot1->numbers[i]; + else + unmatchfreq1 += sslot1->numbers[i]; + } + CLAMP_PROBABILITY(matchfreq1); + CLAMP_PROBABILITY(unmatchfreq1); + matchfreq2 = unmatchfreq2 = 0.0; + for (i = 0; i < sslot2->nvalues; i++) + { + if (hasmatch2[i]) + matchfreq2 += sslot2->numbers[i]; + else + unmatchfreq2 += sslot2->numbers[i]; + } + CLAMP_PROBABILITY(matchfreq2); + CLAMP_PROBABILITY(unmatchfreq2); + pfree(hasmatch1); + pfree(hasmatch2); + + /* + * Compute total frequency of non-null values that are not in the MCV + * lists. + */ + otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1; + otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2; + CLAMP_PROBABILITY(otherfreq1); + CLAMP_PROBABILITY(otherfreq2); + + /* + * We can estimate the total selectivity from the point of view of + * relation 1 as: the known selectivity for matched MCVs, plus + * unmatched MCVs that are assumed to match against random members of + * relation 2's non-MCV population, plus non-MCV values that are + * assumed to match against random members of relation 2's unmatched + * MCVs plus non-MCV values. + */ + totalsel1 = matchprodfreq; + if (nd2 > sslot2->nvalues) + totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues); + if (nd2 > nmatches) + totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) / + (nd2 - nmatches); + /* Same estimate from the point of view of relation 2. */ + totalsel2 = matchprodfreq; + if (nd1 > sslot1->nvalues) + totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues); + if (nd1 > nmatches) + totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) / + (nd1 - nmatches); + + /* + * Use the smaller of the two estimates. This can be justified in + * essentially the same terms as given below for the no-stats case: to + * a first approximation, we are estimating from the point of view of + * the relation with smaller nd. + */ + selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2; + } + else + { + /* + * We do not have MCV lists for both sides. Estimate the join + * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This + * is plausible if we assume that the join operator is strict and the + * non-null values are about equally distributed: a given non-null + * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows + * of rel2, so total join rows are at most + * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of + * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it + * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression + * with MIN() is an upper bound. Using the MIN() means we estimate + * from the point of view of the relation with smaller nd (since the + * larger nd is determining the MIN). It is reasonable to assume that + * most tuples in this rel will have join partners, so the bound is + * probably reasonably tight and should be taken as-is. + * + * XXX Can we be smarter if we have an MCV list for just one side? It + * seems that if we assume equal distribution for the other side, we + * end up with the same answer anyway. + */ + double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; + double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0; + + selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); + if (nd1 > nd2) + selec /= nd1; + else + selec /= nd2; + } + + return selec; +} + +/* + * eqjoinsel_semi --- eqjoinsel for semi join + * + * (Also used for anti join, which we are supposed to estimate the same way.) + * Caller has ensured that vardata1 is the LHS variable. + * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid. + */ +static double +eqjoinsel_semi(Oid opfuncoid, Oid collation, + VariableStatData *vardata1, VariableStatData *vardata2, + double nd1, double nd2, + bool isdefault1, bool isdefault2, + AttStatsSlot *sslot1, AttStatsSlot *sslot2, + Form_pg_statistic stats1, Form_pg_statistic stats2, + bool have_mcvs1, bool have_mcvs2, + RelOptInfo *inner_rel) +{ + double selec; + + /* + * We clamp nd2 to be not more than what we estimate the inner relation's + * size to be. This is intuitively somewhat reasonable since obviously + * there can't be more than that many distinct values coming from the + * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1 + * likewise) is that this is the only pathway by which restriction clauses + * applied to the inner rel will affect the join result size estimate, + * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by + * only the outer rel's size. If we clamped nd1 we'd be double-counting + * the selectivity of outer-rel restrictions. + * + * We can apply this clamping both with respect to the base relation from + * which the join variable comes (if there is just one), and to the + * immediate inner input relation of the current join. + * + * If we clamp, we can treat nd2 as being a non-default estimate; it's not + * great, maybe, but it didn't come out of nowhere either. This is most + * helpful when the inner relation is empty and consequently has no stats. + */ + if (vardata2->rel) + { + if (nd2 >= vardata2->rel->rows) + { + nd2 = vardata2->rel->rows; + isdefault2 = false; + } + } + if (nd2 >= inner_rel->rows) + { + nd2 = inner_rel->rows; + isdefault2 = false; + } + + if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid)) + { + /* + * We have most-common-value lists for both relations. Run through + * the lists to see which MCVs actually join to each other with the + * given operator. This allows us to determine the exact join + * selectivity for the portion of the relations represented by the MCV + * lists. We still have to estimate for the remaining population, but + * in a skewed distribution this gives us a big leg up in accuracy. + */ + LOCAL_FCINFO(fcinfo, 2); + FmgrInfo eqproc; + bool *hasmatch1; + bool *hasmatch2; + double nullfrac1 = stats1->stanullfrac; + double matchfreq1, + uncertainfrac, + uncertain; + int i, + nmatches, + clamped_nvalues2; + + /* + * The clamping above could have resulted in nd2 being less than + * sslot2->nvalues; in which case, we assume that precisely the nd2 + * most common values in the relation will appear in the join input, + * and so compare to only the first nd2 members of the MCV list. Of + * course this is frequently wrong, but it's the best bet we can make. + */ + clamped_nvalues2 = Min(sslot2->nvalues, nd2); + + fmgr_info(opfuncoid, &eqproc); + + /* + * Save a few cycles by setting up the fcinfo struct just once. Using + * FunctionCallInvoke directly also avoids failure if the eqproc + * returns NULL, though really equality functions should never do + * that. + */ + InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation, + NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + + hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool)); + hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool)); + + /* + * Note we assume that each MCV will match at most one member of the + * other MCV list. If the operator isn't really equality, there could + * be multiple matches --- but we don't look for them, both for speed + * and because the math wouldn't add up... + */ + nmatches = 0; + for (i = 0; i < sslot1->nvalues; i++) + { + int j; + + fcinfo->args[0].value = sslot1->values[i]; + + for (j = 0; j < clamped_nvalues2; j++) + { + Datum fresult; + + if (hasmatch2[j]) + continue; + fcinfo->args[1].value = sslot2->values[j]; + fcinfo->isnull = false; + fresult = FunctionCallInvoke(fcinfo); + if (!fcinfo->isnull && DatumGetBool(fresult)) + { + hasmatch1[i] = hasmatch2[j] = true; + nmatches++; + break; + } + } + } + /* Sum up frequencies of matched MCVs */ + matchfreq1 = 0.0; + for (i = 0; i < sslot1->nvalues; i++) + { + if (hasmatch1[i]) + matchfreq1 += sslot1->numbers[i]; + } + CLAMP_PROBABILITY(matchfreq1); + pfree(hasmatch1); + pfree(hasmatch2); + + /* + * Now we need to estimate the fraction of relation 1 that has at + * least one join partner. We know for certain that the matched MCVs + * do, so that gives us a lower bound, but we're really in the dark + * about everything else. Our crude approach is: if nd1 <= nd2 then + * assume all non-null rel1 rows have join partners, else assume for + * the uncertain rows that a fraction nd2/nd1 have join partners. We + * can discount the known-matched MCVs from the distinct-values counts + * before doing the division. + * + * Crude as the above is, it's completely useless if we don't have + * reliable ndistinct values for both sides. Hence, if either nd1 or + * nd2 is default, punt and assume half of the uncertain rows have + * join partners. + */ + if (!isdefault1 && !isdefault2) + { + nd1 -= nmatches; + nd2 -= nmatches; + if (nd1 <= nd2 || nd2 < 0) + uncertainfrac = 1.0; + else + uncertainfrac = nd2 / nd1; + } + else + uncertainfrac = 0.5; + uncertain = 1.0 - matchfreq1 - nullfrac1; + CLAMP_PROBABILITY(uncertain); + selec = matchfreq1 + uncertainfrac * uncertain; + } + else + { + /* + * Without MCV lists for both sides, we can only use the heuristic + * about nd1 vs nd2. + */ + double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; + + if (!isdefault1 && !isdefault2) + { + if (nd1 <= nd2 || nd2 < 0) + selec = 1.0 - nullfrac1; + else + selec = (nd2 / nd1) * (1.0 - nullfrac1); + } + else + selec = 0.5 * (1.0 - nullfrac1); + } + + return selec; +} + +/* + * neqjoinsel - Join selectivity of "!=" + */ +Datum +neqjoinsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + JoinType jointype = (JoinType) PG_GETARG_INT16(3); + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); + Oid collation = PG_GET_COLLATION(); + float8 result; + + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + /* + * For semi-joins, if there is more than one distinct value in the RHS + * relation then every non-null LHS row must find a row to join since + * it can only be equal to one of them. We'll assume that there is + * always more than one distinct RHS value for the sake of stability, + * though in theory we could have special cases for empty RHS + * (selectivity = 0) and single-distinct-value RHS (selectivity = + * fraction of LHS that has the same value as the single RHS value). + * + * For anti-joins, if we use the same assumption that there is more + * than one distinct key in the RHS relation, then every non-null LHS + * row must be suppressed by the anti-join. + * + * So either way, the selectivity estimate should be 1 - nullfrac. + */ + VariableStatData leftvar; + VariableStatData rightvar; + bool reversed; + HeapTuple statsTuple; + double nullfrac; + + get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed); + statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple; + if (HeapTupleIsValid(statsTuple)) + nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac; + else + nullfrac = 0.0; + ReleaseVariableStats(leftvar); + ReleaseVariableStats(rightvar); + + result = 1.0 - nullfrac; + } + else + { + /* + * We want 1 - eqjoinsel() where the equality operator is the one + * associated with this != operator, that is, its negator. + */ + Oid eqop = get_negator(operator); + + if (eqop) + { + result = + DatumGetFloat8(DirectFunctionCall5Coll(eqjoinsel, + collation, + PointerGetDatum(root), + ObjectIdGetDatum(eqop), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + } + else + { + /* Use default selectivity (should we raise an error instead?) */ + result = DEFAULT_EQ_SEL; + } + result = 1.0 - result; + } + + PG_RETURN_FLOAT8(result); +} + +/* + * scalarltjoinsel - Join selectivity of "<" for scalars + */ +Datum +scalarltjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); +} + +/* + * scalarlejoinsel - Join selectivity of "<=" for scalars + */ +Datum +scalarlejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); +} + +/* + * scalargtjoinsel - Join selectivity of ">" for scalars + */ +Datum +scalargtjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); +} + +/* + * scalargejoinsel - Join selectivity of ">=" for scalars + */ +Datum +scalargejoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); +} + + +/* + * mergejoinscansel - Scan selectivity of merge join. + * + * A merge join will stop as soon as it exhausts either input stream. + * Therefore, if we can estimate the ranges of both input variables, + * we can estimate how much of the input will actually be read. This + * can have a considerable impact on the cost when using indexscans. + * + * Also, we can estimate how much of each input has to be read before the + * first join pair is found, which will affect the join's startup time. + * + * clause should be a clause already known to be mergejoinable. opfamily, + * strategy, and nulls_first specify the sort ordering being used. + * + * The outputs are: + * *leftstart is set to the fraction of the left-hand variable expected + * to be scanned before the first join pair is found (0 to 1). + * *leftend is set to the fraction of the left-hand variable expected + * to be scanned before the join terminates (0 to 1). + * *rightstart, *rightend similarly for the right-hand variable. + */ +void +mergejoinscansel(PlannerInfo *root, Node *clause, + Oid opfamily, int strategy, bool nulls_first, + Selectivity *leftstart, Selectivity *leftend, + Selectivity *rightstart, Selectivity *rightend) +{ + Node *left, + *right; + VariableStatData leftvar, + rightvar; + int op_strategy; + Oid op_lefttype; + Oid op_righttype; + Oid opno, + collation, + lsortop, + rsortop, + lstatop, + rstatop, + ltop, + leop, + revltop, + revleop; + bool isgt; + Datum leftmin, + leftmax, + rightmin, + rightmax; + double selec; + + /* Set default results if we can't figure anything out. */ + /* XXX should default "start" fraction be a bit more than 0? */ + *leftstart = *rightstart = 0.0; + *leftend = *rightend = 1.0; + + /* Deconstruct the merge clause */ + if (!is_opclause(clause)) + return; /* shouldn't happen */ + opno = ((OpExpr *) clause)->opno; + collation = ((OpExpr *) clause)->inputcollid; + left = get_leftop((Expr *) clause); + right = get_rightop((Expr *) clause); + if (!right) + return; /* shouldn't happen */ + + /* Look for stats for the inputs */ + examine_variable(root, left, 0, &leftvar); + examine_variable(root, right, 0, &rightvar); + + /* Extract the operator's declared left/right datatypes */ + get_op_opfamily_properties(opno, opfamily, false, + &op_strategy, + &op_lefttype, + &op_righttype); + Assert(op_strategy == BTEqualStrategyNumber); + + /* + * Look up the various operators we need. If we don't find them all, it + * probably means the opfamily is broken, but we just fail silently. + * + * Note: we expect that pg_statistic histograms will be sorted by the '<' + * operator, regardless of which sort direction we are considering. + */ + switch (strategy) + { + case BTLessStrategyNumber: + isgt = false; + if (op_lefttype == op_righttype) + { + /* easy case */ + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessEqualStrategyNumber); + lsortop = ltop; + rsortop = ltop; + lstatop = lsortop; + rstatop = rsortop; + revltop = ltop; + revleop = leop; + } + else + { + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessEqualStrategyNumber); + lsortop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTLessStrategyNumber); + rsortop = get_opfamily_member(opfamily, + op_righttype, op_righttype, + BTLessStrategyNumber); + lstatop = lsortop; + rstatop = rsortop; + revltop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTLessStrategyNumber); + revleop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTLessEqualStrategyNumber); + } + break; + case BTGreaterStrategyNumber: + /* descending-order case */ + isgt = true; + if (op_lefttype == op_righttype) + { + /* easy case */ + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterEqualStrategyNumber); + lsortop = ltop; + rsortop = ltop; + lstatop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTLessStrategyNumber); + rstatop = lstatop; + revltop = ltop; + revleop = leop; + } + else + { + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterEqualStrategyNumber); + lsortop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTGreaterStrategyNumber); + rsortop = get_opfamily_member(opfamily, + op_righttype, op_righttype, + BTGreaterStrategyNumber); + lstatop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTLessStrategyNumber); + rstatop = get_opfamily_member(opfamily, + op_righttype, op_righttype, + BTLessStrategyNumber); + revltop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTGreaterStrategyNumber); + revleop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTGreaterEqualStrategyNumber); + } + break; + default: + goto fail; /* shouldn't get here */ + } + + if (!OidIsValid(lsortop) || + !OidIsValid(rsortop) || + !OidIsValid(lstatop) || + !OidIsValid(rstatop) || + !OidIsValid(ltop) || + !OidIsValid(leop) || + !OidIsValid(revltop) || + !OidIsValid(revleop)) + goto fail; /* insufficient info in catalogs */ + + /* Try to get ranges of both inputs */ + if (!isgt) + { + if (!get_variable_range(root, &leftvar, lstatop, collation, + &leftmin, &leftmax)) + goto fail; /* no range available from stats */ + if (!get_variable_range(root, &rightvar, rstatop, collation, + &rightmin, &rightmax)) + goto fail; /* no range available from stats */ + } + else + { + /* need to swap the max and min */ + if (!get_variable_range(root, &leftvar, lstatop, collation, + &leftmax, &leftmin)) + goto fail; /* no range available from stats */ + if (!get_variable_range(root, &rightvar, rstatop, collation, + &rightmax, &rightmin)) + goto fail; /* no range available from stats */ + } + + /* + * Now, the fraction of the left variable that will be scanned is the + * fraction that's <= the right-side maximum value. But only believe + * non-default estimates, else stick with our 1.0. + */ + selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar, + rightmax, op_righttype); + if (selec != DEFAULT_INEQ_SEL) + *leftend = selec; + + /* And similarly for the right variable. */ + selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar, + leftmax, op_lefttype); + if (selec != DEFAULT_INEQ_SEL) + *rightend = selec; + + /* + * Only one of the two "end" fractions can really be less than 1.0; + * believe the smaller estimate and reset the other one to exactly 1.0. If + * we get exactly equal estimates (as can easily happen with self-joins), + * believe neither. + */ + if (*leftend > *rightend) + *leftend = 1.0; + else if (*leftend < *rightend) + *rightend = 1.0; + else + *leftend = *rightend = 1.0; + + /* + * Also, the fraction of the left variable that will be scanned before the + * first join pair is found is the fraction that's < the right-side + * minimum value. But only believe non-default estimates, else stick with + * our own default. + */ + selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar, + rightmin, op_righttype); + if (selec != DEFAULT_INEQ_SEL) + *leftstart = selec; + + /* And similarly for the right variable. */ + selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar, + leftmin, op_lefttype); + if (selec != DEFAULT_INEQ_SEL) + *rightstart = selec; + + /* + * Only one of the two "start" fractions can really be more than zero; + * believe the larger estimate and reset the other one to exactly 0.0. If + * we get exactly equal estimates (as can easily happen with self-joins), + * believe neither. + */ + if (*leftstart < *rightstart) + *leftstart = 0.0; + else if (*leftstart > *rightstart) + *rightstart = 0.0; + else + *leftstart = *rightstart = 0.0; + + /* + * If the sort order is nulls-first, we're going to have to skip over any + * nulls too. These would not have been counted by scalarineqsel, and we + * can safely add in this fraction regardless of whether we believe + * scalarineqsel's results or not. But be sure to clamp the sum to 1.0! + */ + if (nulls_first) + { + Form_pg_statistic stats; + + if (HeapTupleIsValid(leftvar.statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); + *leftstart += stats->stanullfrac; + CLAMP_PROBABILITY(*leftstart); + *leftend += stats->stanullfrac; + CLAMP_PROBABILITY(*leftend); + } + if (HeapTupleIsValid(rightvar.statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple); + *rightstart += stats->stanullfrac; + CLAMP_PROBABILITY(*rightstart); + *rightend += stats->stanullfrac; + CLAMP_PROBABILITY(*rightend); + } + } + + /* Disbelieve start >= end, just in case that can happen */ + if (*leftstart >= *leftend) + { + *leftstart = 0.0; + *leftend = 1.0; + } + if (*rightstart >= *rightend) + { + *rightstart = 0.0; + *rightend = 1.0; + } + +fail: + ReleaseVariableStats(leftvar); + ReleaseVariableStats(rightvar); +} + + +/* + * matchingsel -- generic matching-operator selectivity support + * + * Use these for any operators that (a) are on data types for which we collect + * standard statistics, and (b) have behavior for which the default estimate + * (twice DEFAULT_EQ_SEL) is sane. Typically that is good for match-like + * operators. + */ + +Datum +matchingsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); + double selec; + + /* Use generic restriction selectivity logic. */ + selec = generic_restriction_selectivity(root, operator, collation, + args, varRelid, + DEFAULT_MATCHING_SEL); + + PG_RETURN_FLOAT8((float8) selec); +} + +Datum +matchingjoinsel(PG_FUNCTION_ARGS) +{ + /* Just punt, for the moment. */ + PG_RETURN_FLOAT8(DEFAULT_MATCHING_SEL); +} + + +/* + * Helper routine for estimate_num_groups: add an item to a list of + * GroupVarInfos, but only if it's not known equal to any of the existing + * entries. + */ +typedef struct +{ + Node *var; /* might be an expression, not just a Var */ + RelOptInfo *rel; /* relation it belongs to */ + double ndistinct; /* # distinct values */ + bool isdefault; /* true if DEFAULT_NUM_DISTINCT was used */ +} GroupVarInfo; + +static List * +add_unique_group_var(PlannerInfo *root, List *varinfos, + Node *var, VariableStatData *vardata) +{ + GroupVarInfo *varinfo; + double ndistinct; + bool isdefault; + ListCell *lc; + + ndistinct = get_variable_numdistinct(vardata, &isdefault); + + foreach(lc, varinfos) + { + varinfo = (GroupVarInfo *) lfirst(lc); + + /* Drop exact duplicates */ + if (equal(var, varinfo->var)) + return varinfos; + + /* + * Drop known-equal vars, but only if they belong to different + * relations (see comments for estimate_num_groups) + */ + if (vardata->rel != varinfo->rel && + exprs_known_equal(root, var, varinfo->var)) + { + if (varinfo->ndistinct <= ndistinct) + { + /* Keep older item, forget new one */ + return varinfos; + } + else + { + /* Delete the older item */ + varinfos = foreach_delete_current(varinfos, lc); + } + } + } + + varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo)); + + varinfo->var = var; + varinfo->rel = vardata->rel; + varinfo->ndistinct = ndistinct; + varinfo->isdefault = isdefault; + varinfos = lappend(varinfos, varinfo); + return varinfos; +} + +/* + * estimate_num_groups - Estimate number of groups in a grouped query + * + * Given a query having a GROUP BY clause, estimate how many groups there + * will be --- ie, the number of distinct combinations of the GROUP BY + * expressions. + * + * This routine is also used to estimate the number of rows emitted by + * a DISTINCT filtering step; that is an isomorphic problem. (Note: + * actually, we only use it for DISTINCT when there's no grouping or + * aggregation ahead of the DISTINCT.) + * + * Inputs: + * root - the query + * groupExprs - list of expressions being grouped by + * input_rows - number of rows estimated to arrive at the group/unique + * filter step + * pgset - NULL, or a List** pointing to a grouping set to filter the + * groupExprs against + * + * Outputs: + * estinfo - When passed as non-NULL, the function will set bits in the + * "flags" field in order to provide callers with additional information + * about the estimation. Currently, we only set the SELFLAG_USED_DEFAULT + * bit if we used any default values in the estimation. + * + * Given the lack of any cross-correlation statistics in the system, it's + * impossible to do anything really trustworthy with GROUP BY conditions + * involving multiple Vars. We should however avoid assuming the worst + * case (all possible cross-product terms actually appear as groups) since + * very often the grouped-by Vars are highly correlated. Our current approach + * is as follows: + * 1. Expressions yielding boolean are assumed to contribute two groups, + * independently of their content, and are ignored in the subsequent + * steps. This is mainly because tests like "col IS NULL" break the + * heuristic used in step 2 especially badly. + * 2. Reduce the given expressions to a list of unique Vars used. For + * example, GROUP BY a, a + b is treated the same as GROUP BY a, b. + * It is clearly correct not to count the same Var more than once. + * It is also reasonable to treat f(x) the same as x: f() cannot + * increase the number of distinct values (unless it is volatile, + * which we consider unlikely for grouping), but it probably won't + * reduce the number of distinct values much either. + * As a special case, if a GROUP BY expression can be matched to an + * expressional index for which we have statistics, then we treat the + * whole expression as though it were just a Var. + * 3. If the list contains Vars of different relations that are known equal + * due to equivalence classes, then drop all but one of the Vars from each + * known-equal set, keeping the one with smallest estimated # of values + * (since the extra values of the others can't appear in joined rows). + * Note the reason we only consider Vars of different relations is that + * if we considered ones of the same rel, we'd be double-counting the + * restriction selectivity of the equality in the next step. + * 4. For Vars within a single source rel, we multiply together the numbers + * of values, clamp to the number of rows in the rel (divided by 10 if + * more than one Var), and then multiply by a factor based on the + * selectivity of the restriction clauses for that rel. When there's + * more than one Var, the initial product is probably too high (it's the + * worst case) but clamping to a fraction of the rel's rows seems to be a + * helpful heuristic for not letting the estimate get out of hand. (The + * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor + * we multiply by to adjust for the restriction selectivity assumes that + * the restriction clauses are independent of the grouping, which may not + * be a valid assumption, but it's hard to do better. + * 5. If there are Vars from multiple rels, we repeat step 4 for each such + * rel, and multiply the results together. + * Note that rels not containing grouped Vars are ignored completely, as are + * join clauses. Such rels cannot increase the number of groups, and we + * assume such clauses do not reduce the number either (somewhat bogus, + * but we don't have the info to do better). + */ +double +estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, + List **pgset, EstimationInfo *estinfo) +{ + List *varinfos = NIL; + double srf_multiplier = 1.0; + double numdistinct; + ListCell *l; + int i; + + /* Zero the estinfo output parameter, if non-NULL */ + if (estinfo != NULL) + memset(estinfo, 0, sizeof(EstimationInfo)); + + /* + * We don't ever want to return an estimate of zero groups, as that tends + * to lead to division-by-zero and other unpleasantness. The input_rows + * estimate is usually already at least 1, but clamp it just in case it + * isn't. + */ + input_rows = clamp_row_est(input_rows); + + /* + * If no grouping columns, there's exactly one group. (This can't happen + * for normal cases with GROUP BY or DISTINCT, but it is possible for + * corner cases with set operations.) + */ + if (groupExprs == NIL || (pgset && *pgset == NIL)) + return 1.0; + + /* + * Count groups derived from boolean grouping expressions. For other + * expressions, find the unique Vars used, treating an expression as a Var + * if we can find stats for it. For each one, record the statistical + * estimate of number of distinct values (total in its table, without + * regard for filtering). + */ + numdistinct = 1.0; + + i = 0; + foreach(l, groupExprs) + { + Node *groupexpr = (Node *) lfirst(l); + double this_srf_multiplier; + VariableStatData vardata; + List *varshere; + ListCell *l2; + + /* is expression in this grouping set? */ + if (pgset && !list_member_int(*pgset, i++)) + continue; + + /* + * Set-returning functions in grouping columns are a bit problematic. + * The code below will effectively ignore their SRF nature and come up + * with a numdistinct estimate as though they were scalar functions. + * We compensate by scaling up the end result by the largest SRF + * rowcount estimate. (This will be an overestimate if the SRF + * produces multiple copies of any output value, but it seems best to + * assume the SRF's outputs are distinct. In any case, it's probably + * pointless to worry too much about this without much better + * estimates for SRF output rowcounts than we have today.) + */ + this_srf_multiplier = expression_returns_set_rows(root, groupexpr); + if (srf_multiplier < this_srf_multiplier) + srf_multiplier = this_srf_multiplier; + + /* Short-circuit for expressions returning boolean */ + if (exprType(groupexpr) == BOOLOID) + { + numdistinct *= 2.0; + continue; + } + + /* + * If examine_variable is able to deduce anything about the GROUP BY + * expression, treat it as a single variable even if it's really more + * complicated. + * + * XXX This has the consequence that if there's a statistics object on + * the expression, we don't split it into individual Vars. This + * affects our selection of statistics in + * estimate_multivariate_ndistinct, because it's probably better to + * use more accurate estimate for each expression and treat them as + * independent, than to combine estimates for the extracted variables + * when we don't know how that relates to the expressions. + */ + examine_variable(root, groupexpr, 0, &vardata); + if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique) + { + varinfos = add_unique_group_var(root, varinfos, + groupexpr, &vardata); + ReleaseVariableStats(vardata); + continue; + } + ReleaseVariableStats(vardata); + + /* + * Else pull out the component Vars. Handle PlaceHolderVars by + * recursing into their arguments (effectively assuming that the + * PlaceHolderVar doesn't change the number of groups, which boils + * down to ignoring the possible addition of nulls to the result set). + */ + varshere = pull_var_clause(groupexpr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + + /* + * If we find any variable-free GROUP BY item, then either it is a + * constant (and we can ignore it) or it contains a volatile function; + * in the latter case we punt and assume that each input row will + * yield a distinct group. + */ + if (varshere == NIL) + { + if (contain_volatile_functions(groupexpr)) + return input_rows; + continue; + } + + /* + * Else add variables to varinfos list + */ + foreach(l2, varshere) + { + Node *var = (Node *) lfirst(l2); + + examine_variable(root, var, 0, &vardata); + varinfos = add_unique_group_var(root, varinfos, var, &vardata); + ReleaseVariableStats(vardata); + } + } + + /* + * If now no Vars, we must have an all-constant or all-boolean GROUP BY + * list. + */ + if (varinfos == NIL) + { + /* Apply SRF multiplier as we would do in the long path */ + numdistinct *= srf_multiplier; + /* Round off */ + numdistinct = ceil(numdistinct); + /* Guard against out-of-range answers */ + if (numdistinct > input_rows) + numdistinct = input_rows; + if (numdistinct < 1.0) + numdistinct = 1.0; + return numdistinct; + } + + /* + * Group Vars by relation and estimate total numdistinct. + * + * For each iteration of the outer loop, we process the frontmost Var in + * varinfos, plus all other Vars in the same relation. We remove these + * Vars from the newvarinfos list for the next iteration. This is the + * easiest way to group Vars of same rel together. + */ + do + { + GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos); + RelOptInfo *rel = varinfo1->rel; + double reldistinct = 1; + double relmaxndistinct = reldistinct; + int relvarcount = 0; + List *newvarinfos = NIL; + List *relvarinfos = NIL; + + /* + * Split the list of varinfos in two - one for the current rel, one + * for remaining Vars on other rels. + */ + relvarinfos = lappend(relvarinfos, varinfo1); + for_each_from(l, varinfos, 1) + { + GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); + + if (varinfo2->rel == varinfo1->rel) + { + /* varinfos on current rel */ + relvarinfos = lappend(relvarinfos, varinfo2); + } + else + { + /* not time to process varinfo2 yet */ + newvarinfos = lappend(newvarinfos, varinfo2); + } + } + + /* + * Get the numdistinct estimate for the Vars of this rel. We + * iteratively search for multivariate n-distinct with maximum number + * of vars; assuming that each var group is independent of the others, + * we multiply them together. Any remaining relvarinfos after no more + * multivariate matches are found are assumed independent too, so + * their individual ndistinct estimates are multiplied also. + * + * While iterating, count how many separate numdistinct values we + * apply. We apply a fudge factor below, but only if we multiplied + * more than one such values. + */ + while (relvarinfos) + { + double mvndistinct; + + if (estimate_multivariate_ndistinct(root, rel, &relvarinfos, + &mvndistinct)) + { + reldistinct *= mvndistinct; + if (relmaxndistinct < mvndistinct) + relmaxndistinct = mvndistinct; + relvarcount++; + } + else + { + foreach(l, relvarinfos) + { + GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); + + reldistinct *= varinfo2->ndistinct; + if (relmaxndistinct < varinfo2->ndistinct) + relmaxndistinct = varinfo2->ndistinct; + relvarcount++; + + /* + * When varinfo2's isdefault is set then we'd better set + * the SELFLAG_USED_DEFAULT bit in the EstimationInfo. + */ + if (estinfo != NULL && varinfo2->isdefault) + estinfo->flags |= SELFLAG_USED_DEFAULT; + } + + /* we're done with this relation */ + relvarinfos = NIL; + } + } + + /* + * Sanity check --- don't divide by zero if empty relation. + */ + Assert(IS_SIMPLE_REL(rel)); + if (rel->tuples > 0) + { + /* + * Clamp to size of rel, or size of rel / 10 if multiple Vars. The + * fudge factor is because the Vars are probably correlated but we + * don't know by how much. We should never clamp to less than the + * largest ndistinct value for any of the Vars, though, since + * there will surely be at least that many groups. + */ + double clamp = rel->tuples; + + if (relvarcount > 1) + { + clamp *= 0.1; + if (clamp < relmaxndistinct) + { + clamp = relmaxndistinct; + /* for sanity in case some ndistinct is too large: */ + if (clamp > rel->tuples) + clamp = rel->tuples; + } + } + if (reldistinct > clamp) + reldistinct = clamp; + + /* + * Update the estimate based on the restriction selectivity, + * guarding against division by zero when reldistinct is zero. + * Also skip this if we know that we are returning all rows. + */ + if (reldistinct > 0 && rel->rows < rel->tuples) + { + /* + * Given a table containing N rows with n distinct values in a + * uniform distribution, if we select p rows at random then + * the expected number of distinct values selected is + * + * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1)) + * + * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!) + * + * See "Approximating block accesses in database + * organizations", S. B. Yao, Communications of the ACM, + * Volume 20 Issue 4, April 1977 Pages 260-261. + * + * Alternatively, re-arranging the terms from the factorials, + * this may be written as + * + * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1)) + * + * This form of the formula is more efficient to compute in + * the common case where p is larger than N/n. Additionally, + * as pointed out by Dell'Era, if i << N for all terms in the + * product, it can be approximated by + * + * n * (1 - ((N-p)/N)^(N/n)) + * + * See "Expected distinct values when selecting from a bag + * without replacement", Alberto Dell'Era, + * http://www.adellera.it/investigations/distinct_balls/. + * + * The condition i << N is equivalent to n >> 1, so this is a + * good approximation when the number of distinct values in + * the table is large. It turns out that this formula also + * works well even when n is small. + */ + reldistinct *= + (1 - pow((rel->tuples - rel->rows) / rel->tuples, + rel->tuples / reldistinct)); + } + reldistinct = clamp_row_est(reldistinct); + + /* + * Update estimate of total distinct groups. + */ + numdistinct *= reldistinct; + } + + varinfos = newvarinfos; + } while (varinfos != NIL); + + /* Now we can account for the effects of any SRFs */ + numdistinct *= srf_multiplier; + + /* Round off */ + numdistinct = ceil(numdistinct); + + /* Guard against out-of-range answers */ + if (numdistinct > input_rows) + numdistinct = input_rows; + if (numdistinct < 1.0) + numdistinct = 1.0; + + return numdistinct; +} + +/* + * Estimate hash bucket statistics when the specified expression is used + * as a hash key for the given number of buckets. + * + * This attempts to determine two values: + * + * 1. The frequency of the most common value of the expression (returns + * zero into *mcv_freq if we can't get that). + * + * 2. The "bucketsize fraction", ie, average number of entries in a bucket + * divided by total tuples in relation. + * + * XXX This is really pretty bogus since we're effectively assuming that the + * distribution of hash keys will be the same after applying restriction + * clauses as it was in the underlying relation. However, we are not nearly + * smart enough to figure out how the restrict clauses might change the + * distribution, so this will have to do for now. + * + * We are passed the number of buckets the executor will use for the given + * input relation. If the data were perfectly distributed, with the same + * number of tuples going into each available bucket, then the bucketsize + * fraction would be 1/nbuckets. But this happy state of affairs will occur + * only if (a) there are at least nbuckets distinct data values, and (b) + * we have a not-too-skewed data distribution. Otherwise the buckets will + * be nonuniformly occupied. If the other relation in the join has a key + * distribution similar to this one's, then the most-loaded buckets are + * exactly those that will be probed most often. Therefore, the "average" + * bucket size for costing purposes should really be taken as something close + * to the "worst case" bucket size. We try to estimate this by adjusting the + * fraction if there are too few distinct data values, and then scaling up + * by the ratio of the most common value's frequency to the average frequency. + * + * If no statistics are available, use a default estimate of 0.1. This will + * discourage use of a hash rather strongly if the inner relation is large, + * which is what we want. We do not want to hash unless we know that the + * inner rel is well-dispersed (or the alternatives seem much worse). + * + * The caller should also check that the mcv_freq is not so large that the + * most common value would by itself require an impractically large bucket. + * In a hash join, the executor can split buckets if they get too big, but + * obviously that doesn't help for a bucket that contains many duplicates of + * the same value. + */ +void +estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, + Selectivity *mcv_freq, + Selectivity *bucketsize_frac) +{ + VariableStatData vardata; + double estfract, + ndistinct, + stanullfrac, + avgfreq; + bool isdefault; + AttStatsSlot sslot; + + examine_variable(root, hashkey, 0, &vardata); + + /* Look up the frequency of the most common value, if available */ + *mcv_freq = 0.0; + + if (HeapTupleIsValid(vardata.statsTuple)) + { + if (get_attstatsslot(&sslot, vardata.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + /* + * The first MCV stat is for the most common value. + */ + if (sslot.nnumbers > 0) + *mcv_freq = sslot.numbers[0]; + free_attstatsslot(&sslot); + } + } + + /* Get number of distinct values */ + ndistinct = get_variable_numdistinct(&vardata, &isdefault); + + /* + * If ndistinct isn't real, punt. We normally return 0.1, but if the + * mcv_freq is known to be even higher than that, use it instead. + */ + if (isdefault) + { + *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq); + ReleaseVariableStats(vardata); + return; + } + + /* Get fraction that are null */ + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + stanullfrac = stats->stanullfrac; + } + else + stanullfrac = 0.0; + + /* Compute avg freq of all distinct data values in raw relation */ + avgfreq = (1.0 - stanullfrac) / ndistinct; + + /* + * Adjust ndistinct to account for restriction clauses. Observe we are + * assuming that the data distribution is affected uniformly by the + * restriction clauses! + * + * XXX Possibly better way, but much more expensive: multiply by + * selectivity of rel's restriction clauses that mention the target Var. + */ + if (vardata.rel && vardata.rel->tuples > 0) + { + ndistinct *= vardata.rel->rows / vardata.rel->tuples; + ndistinct = clamp_row_est(ndistinct); + } + + /* + * Initial estimate of bucketsize fraction is 1/nbuckets as long as the + * number of buckets is less than the expected number of distinct values; + * otherwise it is 1/ndistinct. + */ + if (ndistinct > nbuckets) + estfract = 1.0 / nbuckets; + else + estfract = 1.0 / ndistinct; + + /* + * Adjust estimated bucketsize upward to account for skewed distribution. + */ + if (avgfreq > 0.0 && *mcv_freq > avgfreq) + estfract *= *mcv_freq / avgfreq; + + /* + * Clamp bucketsize to sane range (the above adjustment could easily + * produce an out-of-range result). We set the lower bound a little above + * zero, since zero isn't a very sane result. + */ + if (estfract < 1.0e-6) + estfract = 1.0e-6; + else if (estfract > 1.0) + estfract = 1.0; + + *bucketsize_frac = (Selectivity) estfract; + + ReleaseVariableStats(vardata); +} + +/* + * estimate_hashagg_tablesize + * estimate the number of bytes that a hash aggregate hashtable will + * require based on the agg_costs, path width and number of groups. + * + * We return the result as "double" to forestall any possible overflow + * problem in the multiplication by dNumGroups. + * + * XXX this may be over-estimating the size now that hashagg knows to omit + * unneeded columns from the hashtable. Also for mixed-mode grouping sets, + * grouping columns not in the hashed set are counted here even though hashagg + * won't store them. Is this a problem? + */ +double +estimate_hashagg_tablesize(PlannerInfo *root, Path *path, + const AggClauseCosts *agg_costs, double dNumGroups) +{ + Size hashentrysize; + + hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos), + path->pathtarget->width, + agg_costs->transitionSpace); + + /* + * Note that this disregards the effect of fill-factor and growth policy + * of the hash table. That's probably ok, given that the default + * fill-factor is relatively high. It'd be hard to meaningfully factor in + * "double-in-size" growth policies here. + */ + return hashentrysize * dNumGroups; +} + + +/*------------------------------------------------------------------------- + * + * Support routines + * + *------------------------------------------------------------------------- + */ + +/* + * Find applicable ndistinct statistics for the given list of VarInfos (which + * must all belong to the given rel), and update *ndistinct to the estimate of + * the MVNDistinctItem that best matches. If a match it found, *varinfos is + * updated to remove the list of matched varinfos. + * + * Varinfos that aren't for simple Vars are ignored. + * + * Return true if we're able to find a match, false otherwise. + */ +static bool +estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, + List **varinfos, double *ndistinct) +{ + ListCell *lc; + int nmatches_vars; + int nmatches_exprs; + Oid statOid = InvalidOid; + MVNDistinct *stats; + StatisticExtInfo *matched_info = NULL; + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + + /* bail out immediately if the table has no extended statistics */ + if (!rel->statlist) + return false; + + /* look for the ndistinct statistics object matching the most vars */ + nmatches_vars = 0; /* we require at least two matches */ + nmatches_exprs = 0; + foreach(lc, rel->statlist) + { + ListCell *lc2; + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + int nshared_vars = 0; + int nshared_exprs = 0; + + /* skip statistics of other kinds */ + if (info->kind != STATS_EXT_NDISTINCT) + continue; + + /* skip statistics with mismatching stxdinherit value */ + if (info->inherit != rte->inh) + continue; + + /* + * Determine how many expressions (and variables in non-matched + * expressions) match. We'll then use these numbers to pick the + * statistics object that best matches the clauses. + */ + foreach(lc2, *varinfos) + { + ListCell *lc3; + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2); + AttrNumber attnum; + + Assert(varinfo->rel == rel); + + /* simple Var, search in statistics keys directly */ + if (IsA(varinfo->var, Var)) + { + attnum = ((Var *) varinfo->var)->varattno; + + /* + * Ignore system attributes - we don't support statistics on + * them, so can't match them (and it'd fail as the values are + * negative). + */ + if (!AttrNumberIsForUserDefinedAttr(attnum)) + continue; + + if (bms_is_member(attnum, info->keys)) + nshared_vars++; + + continue; + } + + /* expression - see if it's in the statistics object */ + foreach(lc3, info->exprs) + { + Node *expr = (Node *) lfirst(lc3); + + if (equal(varinfo->var, expr)) + { + nshared_exprs++; + break; + } + } + } + + if (nshared_vars + nshared_exprs < 2) + continue; + + /* + * Does this statistics object match more columns than the currently + * best object? If so, use this one instead. + * + * XXX This should break ties using name of the object, or something + * like that, to make the outcome stable. + */ + if ((nshared_exprs > nmatches_exprs) || + (((nshared_exprs == nmatches_exprs)) && (nshared_vars > nmatches_vars))) + { + statOid = info->statOid; + nmatches_vars = nshared_vars; + nmatches_exprs = nshared_exprs; + matched_info = info; + } + } + + /* No match? */ + if (statOid == InvalidOid) + return false; + + Assert(nmatches_vars + nmatches_exprs > 1); + + stats = statext_ndistinct_load(statOid, rte->inh); + + /* + * If we have a match, search it for the specific item that matches (there + * must be one), and construct the output values. + */ + if (stats) + { + int i; + List *newlist = NIL; + MVNDistinctItem *item = NULL; + ListCell *lc2; + Bitmapset *matched = NULL; + AttrNumber attnum_offset; + + /* + * How much we need to offset the attnums? If there are no + * expressions, no offset is needed. Otherwise offset enough to move + * the lowest one (which is equal to number of expressions) to 1. + */ + if (matched_info->exprs) + attnum_offset = (list_length(matched_info->exprs) + 1); + else + attnum_offset = 0; + + /* see what actually matched */ + foreach(lc2, *varinfos) + { + ListCell *lc3; + int idx; + bool found = false; + + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2); + + /* + * Process a simple Var expression, by matching it to keys + * directly. If there's a matching expression, we'll try matching + * it later. + */ + if (IsA(varinfo->var, Var)) + { + AttrNumber attnum = ((Var *) varinfo->var)->varattno; + + /* + * Ignore expressions on system attributes. Can't rely on the + * bms check for negative values. + */ + if (!AttrNumberIsForUserDefinedAttr(attnum)) + continue; + + /* Is the variable covered by the statistics object? */ + if (!bms_is_member(attnum, matched_info->keys)) + continue; + + attnum = attnum + attnum_offset; + + /* ensure sufficient offset */ + Assert(AttrNumberIsForUserDefinedAttr(attnum)); + + matched = bms_add_member(matched, attnum); + + found = true; + } + + /* + * XXX Maybe we should allow searching the expressions even if we + * found an attribute matching the expression? That would handle + * trivial expressions like "(a)" but it seems fairly useless. + */ + if (found) + continue; + + /* expression - see if it's in the statistics object */ + idx = 0; + foreach(lc3, matched_info->exprs) + { + Node *expr = (Node *) lfirst(lc3); + + if (equal(varinfo->var, expr)) + { + AttrNumber attnum = -(idx + 1); + + attnum = attnum + attnum_offset; + + /* ensure sufficient offset */ + Assert(AttrNumberIsForUserDefinedAttr(attnum)); + + matched = bms_add_member(matched, attnum); + + /* there should be just one matching expression */ + break; + } + + idx++; + } + } + + /* Find the specific item that exactly matches the combination */ + for (i = 0; i < stats->nitems; i++) + { + int j; + MVNDistinctItem *tmpitem = &stats->items[i]; + + if (tmpitem->nattributes != bms_num_members(matched)) + continue; + + /* assume it's the right item */ + item = tmpitem; + + /* check that all item attributes/expressions fit the match */ + for (j = 0; j < tmpitem->nattributes; j++) + { + AttrNumber attnum = tmpitem->attributes[j]; + + /* + * Thanks to how we constructed the matched bitmap above, we + * can just offset all attnums the same way. + */ + attnum = attnum + attnum_offset; + + if (!bms_is_member(attnum, matched)) + { + /* nah, it's not this item */ + item = NULL; + break; + } + } + + /* + * If the item has all the matched attributes, we know it's the + * right one - there can't be a better one. matching more. + */ + if (item) + break; + } + + /* + * Make sure we found an item. There has to be one, because ndistinct + * statistics includes all combinations of attributes. + */ + if (!item) + elog(ERROR, "corrupt MVNDistinct entry"); + + /* Form the output varinfo list, keeping only unmatched ones */ + foreach(lc, *varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + ListCell *lc3; + bool found = false; + + /* + * Let's look at plain variables first, because it's the most + * common case and the check is quite cheap. We can simply get the + * attnum and check (with an offset) matched bitmap. + */ + if (IsA(varinfo->var, Var)) + { + AttrNumber attnum = ((Var *) varinfo->var)->varattno; + + /* + * If it's a system attribute, we're done. We don't support + * extended statistics on system attributes, so it's clearly + * not matched. Just keep the expression and continue. + */ + if (!AttrNumberIsForUserDefinedAttr(attnum)) + { + newlist = lappend(newlist, varinfo); + continue; + } + + /* apply the same offset as above */ + attnum += attnum_offset; + + /* if it's not matched, keep the varinfo */ + if (!bms_is_member(attnum, matched)) + newlist = lappend(newlist, varinfo); + + /* The rest of the loop deals with complex expressions. */ + continue; + } + + /* + * Process complex expressions, not just simple Vars. + * + * First, we search for an exact match of an expression. If we + * find one, we can just discard the whole GroupVarInfo, with all + * the variables we extracted from it. + * + * Otherwise we inspect the individual vars, and try matching it + * to variables in the item. + */ + foreach(lc3, matched_info->exprs) + { + Node *expr = (Node *) lfirst(lc3); + + if (equal(varinfo->var, expr)) + { + found = true; + break; + } + } + + /* found exact match, skip */ + if (found) + continue; + + newlist = lappend(newlist, varinfo); + } + + *varinfos = newlist; + *ndistinct = item->ndistinct; + return true; + } + + return false; +} + +/* + * convert_to_scalar + * Convert non-NULL values of the indicated types to the comparison + * scale needed by scalarineqsel(). + * Returns "true" if successful. + * + * XXX this routine is a hack: ideally we should look up the conversion + * subroutines in pg_type. + * + * All numeric datatypes are simply converted to their equivalent + * "double" values. (NUMERIC values that are outside the range of "double" + * are clamped to +/- HUGE_VAL.) + * + * String datatypes are converted by convert_string_to_scalar(), + * which is explained below. The reason why this routine deals with + * three values at a time, not just one, is that we need it for strings. + * + * The bytea datatype is just enough different from strings that it has + * to be treated separately. + * + * The several datatypes representing absolute times are all converted + * to Timestamp, which is actually an int64, and then we promote that to + * a double. Note this will give correct results even for the "special" + * values of Timestamp, since those are chosen to compare correctly; + * see timestamp_cmp. + * + * The several datatypes representing relative times (intervals) are all + * converted to measurements expressed in seconds. + */ +static bool +convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, + Datum lobound, Datum hibound, Oid boundstypid, + double *scaledlobound, double *scaledhibound) +{ + bool failure = false; + + /* + * Both the valuetypid and the boundstypid should exactly match the + * declared input type(s) of the operator we are invoked for. However, + * extensions might try to use scalarineqsel as estimator for operators + * with input type(s) we don't handle here; in such cases, we want to + * return false, not fail. In any case, we mustn't assume that valuetypid + * and boundstypid are identical. + * + * XXX The histogram we are interpolating between points of could belong + * to a column that's only binary-compatible with the declared type. In + * essence we are assuming that the semantics of binary-compatible types + * are enough alike that we can use a histogram generated with one type's + * operators to estimate selectivity for the other's. This is outright + * wrong in some cases --- in particular signed versus unsigned + * interpretation could trip us up. But it's useful enough in the + * majority of cases that we do it anyway. Should think about more + * rigorous ways to do it. + */ + switch (valuetypid) + { + /* + * Built-in numeric types + */ + case BOOLOID: + case INT2OID: + case INT4OID: + case INT8OID: + case FLOAT4OID: + case FLOAT8OID: + case NUMERICOID: + case OIDOID: + case REGPROCOID: + case REGPROCEDUREOID: + case REGOPEROID: + case REGOPERATOROID: + case REGCLASSOID: + case REGTYPEOID: + case REGCOLLATIONOID: + case REGCONFIGOID: + case REGDICTIONARYOID: + case REGROLEOID: + case REGNAMESPACEOID: + *scaledvalue = convert_numeric_to_scalar(value, valuetypid, + &failure); + *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid, + &failure); + *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid, + &failure); + return !failure; + + /* + * Built-in string types + */ + case CHAROID: + case BPCHAROID: + case VARCHAROID: + case TEXTOID: + case NAMEOID: + { + char *valstr = convert_string_datum(value, valuetypid, + collid, &failure); + char *lostr = convert_string_datum(lobound, boundstypid, + collid, &failure); + char *histr = convert_string_datum(hibound, boundstypid, + collid, &failure); + + /* + * Bail out if any of the values is not of string type. We + * might leak converted strings for the other value(s), but + * that's not worth troubling over. + */ + if (failure) + return false; + + convert_string_to_scalar(valstr, scaledvalue, + lostr, scaledlobound, + histr, scaledhibound); + pfree(valstr); + pfree(lostr); + pfree(histr); + return true; + } + + /* + * Built-in bytea type + */ + case BYTEAOID: + { + /* We only support bytea vs bytea comparison */ + if (boundstypid != BYTEAOID) + return false; + convert_bytea_to_scalar(value, scaledvalue, + lobound, scaledlobound, + hibound, scaledhibound); + return true; + } + + /* + * Built-in time types + */ + case TIMESTAMPOID: + case TIMESTAMPTZOID: + case DATEOID: + case INTERVALOID: + case TIMEOID: + case TIMETZOID: + *scaledvalue = convert_timevalue_to_scalar(value, valuetypid, + &failure); + *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid, + &failure); + *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid, + &failure); + return !failure; + + /* + * Built-in network types + */ + case INETOID: + case CIDROID: + case MACADDROID: + case MACADDR8OID: + *scaledvalue = convert_network_to_scalar(value, valuetypid, + &failure); + *scaledlobound = convert_network_to_scalar(lobound, boundstypid, + &failure); + *scaledhibound = convert_network_to_scalar(hibound, boundstypid, + &failure); + return !failure; + } + /* Don't know how to convert */ + *scaledvalue = *scaledlobound = *scaledhibound = 0; + return false; +} + +/* + * Do convert_to_scalar()'s work for any numeric data type. + * + * On failure (e.g., unsupported typid), set *failure to true; + * otherwise, that variable is not changed. + */ +static double +convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) +{ + switch (typid) + { + case BOOLOID: + return (double) DatumGetBool(value); + case INT2OID: + return (double) DatumGetInt16(value); + case INT4OID: + return (double) DatumGetInt32(value); + case INT8OID: + return (double) DatumGetInt64(value); + case FLOAT4OID: + return (double) DatumGetFloat4(value); + case FLOAT8OID: + return (double) DatumGetFloat8(value); + case NUMERICOID: + /* Note: out-of-range values will be clamped to +-HUGE_VAL */ + return (double) + DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow, + value)); + case OIDOID: + case REGPROCOID: + case REGPROCEDUREOID: + case REGOPEROID: + case REGOPERATOROID: + case REGCLASSOID: + case REGTYPEOID: + case REGCOLLATIONOID: + case REGCONFIGOID: + case REGDICTIONARYOID: + case REGROLEOID: + case REGNAMESPACEOID: + /* we can treat OIDs as integers... */ + return (double) DatumGetObjectId(value); + } + + *failure = true; + return 0; +} + +/* + * Do convert_to_scalar()'s work for any character-string data type. + * + * String datatypes are converted to a scale that ranges from 0 to 1, + * where we visualize the bytes of the string as fractional digits. + * + * We do not want the base to be 256, however, since that tends to + * generate inflated selectivity estimates; few databases will have + * occurrences of all 256 possible byte values at each position. + * Instead, use the smallest and largest byte values seen in the bounds + * as the estimated range for each byte, after some fudging to deal with + * the fact that we probably aren't going to see the full range that way. + * + * An additional refinement is that we discard any common prefix of the + * three strings before computing the scaled values. This allows us to + * "zoom in" when we encounter a narrow data range. An example is a phone + * number database where all the values begin with the same area code. + * (Actually, the bounds will be adjacent histogram-bin-boundary values, + * so this is more likely to happen than you might think.) + */ +static void +convert_string_to_scalar(char *value, + double *scaledvalue, + char *lobound, + double *scaledlobound, + char *hibound, + double *scaledhibound) +{ + int rangelo, + rangehi; + char *sptr; + + rangelo = rangehi = (unsigned char) hibound[0]; + for (sptr = lobound; *sptr; sptr++) + { + if (rangelo > (unsigned char) *sptr) + rangelo = (unsigned char) *sptr; + if (rangehi < (unsigned char) *sptr) + rangehi = (unsigned char) *sptr; + } + for (sptr = hibound; *sptr; sptr++) + { + if (rangelo > (unsigned char) *sptr) + rangelo = (unsigned char) *sptr; + if (rangehi < (unsigned char) *sptr) + rangehi = (unsigned char) *sptr; + } + /* If range includes any upper-case ASCII chars, make it include all */ + if (rangelo <= 'Z' && rangehi >= 'A') + { + if (rangelo > 'A') + rangelo = 'A'; + if (rangehi < 'Z') + rangehi = 'Z'; + } + /* Ditto lower-case */ + if (rangelo <= 'z' && rangehi >= 'a') + { + if (rangelo > 'a') + rangelo = 'a'; + if (rangehi < 'z') + rangehi = 'z'; + } + /* Ditto digits */ + if (rangelo <= '9' && rangehi >= '0') + { + if (rangelo > '0') + rangelo = '0'; + if (rangehi < '9') + rangehi = '9'; + } + + /* + * If range includes less than 10 chars, assume we have not got enough + * data, and make it include regular ASCII set. + */ + if (rangehi - rangelo < 9) + { + rangelo = ' '; + rangehi = 127; + } + + /* + * Now strip any common prefix of the three strings. + */ + while (*lobound) + { + if (*lobound != *hibound || *lobound != *value) + break; + lobound++, hibound++, value++; + } + + /* + * Now we can do the conversions. + */ + *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi); + *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi); + *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi); +} + +static double +convert_one_string_to_scalar(char *value, int rangelo, int rangehi) +{ + int slen = strlen(value); + double num, + denom, + base; + + if (slen <= 0) + return 0.0; /* empty string has scalar value 0 */ + + /* + * There seems little point in considering more than a dozen bytes from + * the string. Since base is at least 10, that will give us nominal + * resolution of at least 12 decimal digits, which is surely far more + * precision than this estimation technique has got anyway (especially in + * non-C locales). Also, even with the maximum possible base of 256, this + * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not + * overflow on any known machine. + */ + if (slen > 12) + slen = 12; + + /* Convert initial characters to fraction */ + base = rangehi - rangelo + 1; + num = 0.0; + denom = base; + while (slen-- > 0) + { + int ch = (unsigned char) *value++; + + if (ch < rangelo) + ch = rangelo - 1; + else if (ch > rangehi) + ch = rangehi + 1; + num += ((double) (ch - rangelo)) / denom; + denom *= base; + } + + return num; +} + +/* + * Convert a string-type Datum into a palloc'd, null-terminated string. + * + * On failure (e.g., unsupported typid), set *failure to true; + * otherwise, that variable is not changed. (We'll return NULL on failure.) + * + * When using a non-C locale, we must pass the string through strxfrm() + * before continuing, so as to generate correct locale-specific results. + */ +static char * +convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure) +{ + char *val; + + switch (typid) + { + case CHAROID: + val = (char *) palloc(2); + val[0] = DatumGetChar(value); + val[1] = '\0'; + break; + case BPCHAROID: + case VARCHAROID: + case TEXTOID: + val = TextDatumGetCString(value); + break; + case NAMEOID: + { + NameData *nm = (NameData *) DatumGetPointer(value); + + val = pstrdup(NameStr(*nm)); + break; + } + default: + *failure = true; + return NULL; + } + + if (!lc_collate_is_c(collid)) + { + char *xfrmstr; + size_t xfrmlen; + size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY; + + /* + * XXX: We could guess at a suitable output buffer size and only call + * strxfrm twice if our guess is too small. + * + * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return + * bogus data or set an error. This is not really a problem unless it + * crashes since it will only give an estimation error and nothing + * fatal. + */ + xfrmlen = strxfrm(NULL, val, 0); +#ifdef WIN32 + + /* + * On Windows, strxfrm returns INT_MAX when an error occurs. Instead + * of trying to allocate this much memory (and fail), just return the + * original string unmodified as if we were in the C locale. + */ + if (xfrmlen == INT_MAX) + return val; +#endif + xfrmstr = (char *) palloc(xfrmlen + 1); + xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1); + + /* + * Some systems (e.g., glibc) can return a smaller value from the + * second call than the first; thus the Assert must be <= not ==. + */ + Assert(xfrmlen2 <= xfrmlen); + pfree(val); + val = xfrmstr; + } + + return val; +} + +/* + * Do convert_to_scalar()'s work for any bytea data type. + * + * Very similar to convert_string_to_scalar except we can't assume + * null-termination and therefore pass explicit lengths around. + * + * Also, assumptions about likely "normal" ranges of characters have been + * removed - a data range of 0..255 is always used, for now. (Perhaps + * someday we will add information about actual byte data range to + * pg_statistic.) + */ +static void +convert_bytea_to_scalar(Datum value, + double *scaledvalue, + Datum lobound, + double *scaledlobound, + Datum hibound, + double *scaledhibound) +{ + bytea *valuep = DatumGetByteaPP(value); + bytea *loboundp = DatumGetByteaPP(lobound); + bytea *hiboundp = DatumGetByteaPP(hibound); + int rangelo, + rangehi, + valuelen = VARSIZE_ANY_EXHDR(valuep), + loboundlen = VARSIZE_ANY_EXHDR(loboundp), + hiboundlen = VARSIZE_ANY_EXHDR(hiboundp), + i, + minlen; + unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep); + unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp); + unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp); + + /* + * Assume bytea data is uniformly distributed across all byte values. + */ + rangelo = 0; + rangehi = 255; + + /* + * Now strip any common prefix of the three strings. + */ + minlen = Min(Min(valuelen, loboundlen), hiboundlen); + for (i = 0; i < minlen; i++) + { + if (*lostr != *histr || *lostr != *valstr) + break; + lostr++, histr++, valstr++; + loboundlen--, hiboundlen--, valuelen--; + } + + /* + * Now we can do the conversions. + */ + *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi); + *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi); + *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi); +} + +static double +convert_one_bytea_to_scalar(unsigned char *value, int valuelen, + int rangelo, int rangehi) +{ + double num, + denom, + base; + + if (valuelen <= 0) + return 0.0; /* empty string has scalar value 0 */ + + /* + * Since base is 256, need not consider more than about 10 chars (even + * this many seems like overkill) + */ + if (valuelen > 10) + valuelen = 10; + + /* Convert initial characters to fraction */ + base = rangehi - rangelo + 1; + num = 0.0; + denom = base; + while (valuelen-- > 0) + { + int ch = *value++; + + if (ch < rangelo) + ch = rangelo - 1; + else if (ch > rangehi) + ch = rangehi + 1; + num += ((double) (ch - rangelo)) / denom; + denom *= base; + } + + return num; +} + +/* + * Do convert_to_scalar()'s work for any timevalue data type. + * + * On failure (e.g., unsupported typid), set *failure to true; + * otherwise, that variable is not changed. + */ +static double +convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure) +{ + switch (typid) + { + case TIMESTAMPOID: + return DatumGetTimestamp(value); + case TIMESTAMPTZOID: + return DatumGetTimestampTz(value); + case DATEOID: + return date2timestamp_no_overflow(DatumGetDateADT(value)); + case INTERVALOID: + { + Interval *interval = DatumGetIntervalP(value); + + /* + * Convert the month part of Interval to days using assumed + * average month length of 365.25/12.0 days. Not too + * accurate, but plenty good enough for our purposes. + */ + return interval->time + interval->day * (double) USECS_PER_DAY + + interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY); + } + case TIMEOID: + return DatumGetTimeADT(value); + case TIMETZOID: + { + TimeTzADT *timetz = DatumGetTimeTzADTP(value); + + /* use GMT-equivalent time */ + return (double) (timetz->time + (timetz->zone * 1000000.0)); + } + } + + *failure = true; + return 0; +} + + +/* + * get_restriction_variable + * Examine the args of a restriction clause to see if it's of the + * form (variable op pseudoconstant) or (pseudoconstant op variable), + * where "variable" could be either a Var or an expression in vars of a + * single relation. If so, extract information about the variable, + * and also indicate which side it was on and the other argument. + * + * Inputs: + * root: the planner info + * args: clause argument list + * varRelid: see specs for restriction selectivity functions + * + * Outputs: (these are valid only if true is returned) + * *vardata: gets information about variable (see examine_variable) + * *other: gets other clause argument, aggressively reduced to a constant + * *varonleft: set true if variable is on the left, false if on the right + * + * Returns true if a variable is identified, otherwise false. + * + * Note: if there are Vars on both sides of the clause, we must fail, because + * callers are expecting that the other side will act like a pseudoconstant. + */ +bool +get_restriction_variable(PlannerInfo *root, List *args, int varRelid, + VariableStatData *vardata, Node **other, + bool *varonleft) +{ + Node *left, + *right; + VariableStatData rdata; + + /* Fail if not a binary opclause (probably shouldn't happen) */ + if (list_length(args) != 2) + return false; + + left = (Node *) linitial(args); + right = (Node *) lsecond(args); + + /* + * Examine both sides. Note that when varRelid is nonzero, Vars of other + * relations will be treated as pseudoconstants. + */ + examine_variable(root, left, varRelid, vardata); + examine_variable(root, right, varRelid, &rdata); + + /* + * If one side is a variable and the other not, we win. + */ + if (vardata->rel && rdata.rel == NULL) + { + *varonleft = true; + *other = estimate_expression_value(root, rdata.var); + /* Assume we need no ReleaseVariableStats(rdata) here */ + return true; + } + + if (vardata->rel == NULL && rdata.rel) + { + *varonleft = false; + *other = estimate_expression_value(root, vardata->var); + /* Assume we need no ReleaseVariableStats(*vardata) here */ + *vardata = rdata; + return true; + } + + /* Oops, clause has wrong structure (probably var op var) */ + ReleaseVariableStats(*vardata); + ReleaseVariableStats(rdata); + + return false; +} + +/* + * get_join_variables + * Apply examine_variable() to each side of a join clause. + * Also, attempt to identify whether the join clause has the same + * or reversed sense compared to the SpecialJoinInfo. + * + * We consider the join clause "normal" if it is "lhs_var OP rhs_var", + * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases + * where we can't tell for sure, we default to assuming it's normal. + */ +void +get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, + VariableStatData *vardata1, VariableStatData *vardata2, + bool *join_is_reversed) +{ + Node *left, + *right; + + if (list_length(args) != 2) + elog(ERROR, "join operator should take two arguments"); + + left = (Node *) linitial(args); + right = (Node *) lsecond(args); + + examine_variable(root, left, 0, vardata1); + examine_variable(root, right, 0, vardata2); + + if (vardata1->rel && + bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand)) + *join_is_reversed = true; /* var1 is on RHS */ + else if (vardata2->rel && + bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand)) + *join_is_reversed = true; /* var2 is on LHS */ + else + *join_is_reversed = false; +} + +/* statext_expressions_load copies the tuple, so just pfree it. */ +static void +ReleaseDummy(HeapTuple tuple) +{ + pfree(tuple); +} + +/* + * examine_variable + * Try to look up statistical data about an expression. + * Fill in a VariableStatData struct to describe the expression. + * + * Inputs: + * root: the planner info + * node: the expression tree to examine + * varRelid: see specs for restriction selectivity functions + * + * Outputs: *vardata is filled as follows: + * var: the input expression (with any binary relabeling stripped, if + * it is or contains a variable; but otherwise the type is preserved) + * rel: RelOptInfo for relation containing variable; NULL if expression + * contains no Vars (NOTE this could point to a RelOptInfo of a + * subquery, not one in the current query). + * statsTuple: the pg_statistic entry for the variable, if one exists; + * otherwise NULL. + * freefunc: pointer to a function to release statsTuple with. + * vartype: exposed type of the expression; this should always match + * the declared input type of the operator we are estimating for. + * atttype, atttypmod: actual type/typmod of the "var" expression. This is + * commonly the same as the exposed type of the variable argument, + * but can be different in binary-compatible-type cases. + * isunique: true if we were able to match the var to a unique index or a + * single-column DISTINCT clause, implying its values are unique for + * this query. (Caution: this should be trusted for statistical + * purposes only, since we do not check indimmediate nor verify that + * the exact same definition of equality applies.) + * acl_ok: true if current user has permission to read the column(s) + * underlying the pg_statistic entry. This is consulted by + * statistic_proc_security_check(). + * + * Caller is responsible for doing ReleaseVariableStats() before exiting. + */ +void +examine_variable(PlannerInfo *root, Node *node, int varRelid, + VariableStatData *vardata) +{ + Node *basenode; + Relids varnos; + RelOptInfo *onerel; + + /* Make sure we don't return dangling pointers in vardata */ + MemSet(vardata, 0, sizeof(VariableStatData)); + + /* Save the exposed type of the expression */ + vardata->vartype = exprType(node); + + /* Look inside any binary-compatible relabeling */ + + if (IsA(node, RelabelType)) + basenode = (Node *) ((RelabelType *) node)->arg; + else + basenode = node; + + /* Fast path for a simple Var */ + + if (IsA(basenode, Var) && + (varRelid == 0 || varRelid == ((Var *) basenode)->varno)) + { + Var *var = (Var *) basenode; + + /* Set up result fields other than the stats tuple */ + vardata->var = basenode; /* return Var without relabeling */ + vardata->rel = find_base_rel(root, var->varno); + vardata->atttype = var->vartype; + vardata->atttypmod = var->vartypmod; + vardata->isunique = has_unique_index(vardata->rel, var->varattno); + + /* Try to locate some stats */ + examine_simple_variable(root, var, vardata); + + return; + } + + /* + * Okay, it's a more complicated expression. Determine variable + * membership. Note that when varRelid isn't zero, only vars of that + * relation are considered "real" vars. + */ + varnos = pull_varnos(root, basenode); + + onerel = NULL; + + switch (bms_membership(varnos)) + { + case BMS_EMPTY_SET: + /* No Vars at all ... must be pseudo-constant clause */ + break; + case BMS_SINGLETON: + if (varRelid == 0 || bms_is_member(varRelid, varnos)) + { + onerel = find_base_rel(root, + (varRelid ? varRelid : bms_singleton_member(varnos))); + vardata->rel = onerel; + node = basenode; /* strip any relabeling */ + } + /* else treat it as a constant */ + break; + case BMS_MULTIPLE: + if (varRelid == 0) + { + /* treat it as a variable of a join relation */ + vardata->rel = find_join_rel(root, varnos); + node = basenode; /* strip any relabeling */ + } + else if (bms_is_member(varRelid, varnos)) + { + /* ignore the vars belonging to other relations */ + vardata->rel = find_base_rel(root, varRelid); + node = basenode; /* strip any relabeling */ + /* note: no point in expressional-index search here */ + } + /* else treat it as a constant */ + break; + } + + bms_free(varnos); + + vardata->var = node; + vardata->atttype = exprType(node); + vardata->atttypmod = exprTypmod(node); + + if (onerel) + { + /* + * We have an expression in vars of a single relation. Try to match + * it to expressional index columns, in hopes of finding some + * statistics. + * + * Note that we consider all index columns including INCLUDE columns, + * since there could be stats for such columns. But the test for + * uniqueness needs to be warier. + * + * XXX it's conceivable that there are multiple matches with different + * index opfamilies; if so, we need to pick one that matches the + * operator we are estimating for. FIXME later. + */ + ListCell *ilist; + ListCell *slist; + Oid userid; + + /* + * Determine the user ID to use for privilege checks: either + * onerel->userid if it's set (e.g., in case we're accessing the table + * via a view), or the current user otherwise. + * + * If we drill down to child relations, we keep using the same userid: + * it's going to be the same anyway, due to how we set up the relation + * tree (q.v. build_simple_rel). + */ + userid = OidIsValid(onerel->userid) ? onerel->userid : GetUserId(); + + foreach(ilist, onerel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); + ListCell *indexpr_item; + int pos; + + indexpr_item = list_head(index->indexprs); + if (indexpr_item == NULL) + continue; /* no expressions here... */ + + for (pos = 0; pos < index->ncolumns; pos++) + { + if (index->indexkeys[pos] == 0) + { + Node *indexkey; + + if (indexpr_item == NULL) + elog(ERROR, "too few entries in indexprs list"); + indexkey = (Node *) lfirst(indexpr_item); + if (indexkey && IsA(indexkey, RelabelType)) + indexkey = (Node *) ((RelabelType *) indexkey)->arg; + if (equal(node, indexkey)) + { + /* + * Found a match ... is it a unique index? Tests here + * should match has_unique_index(). + */ + if (index->unique && + index->nkeycolumns == 1 && + pos == 0 && + (index->indpred == NIL || index->predOK)) + vardata->isunique = true; + + /* + * Has it got stats? We only consider stats for + * non-partial indexes, since partial indexes probably + * don't reflect whole-relation statistics; the above + * check for uniqueness is the only info we take from + * a partial index. + * + * An index stats hook, however, must make its own + * decisions about what to do with partial indexes. + */ + if (get_index_stats_hook && + (*get_index_stats_hook) (root, index->indexoid, + pos + 1, vardata)) + { + /* + * The hook took control of acquiring a stats + * tuple. If it did supply a tuple, it'd better + * have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata->statsTuple) && + !vardata->freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else if (index->indpred == NIL) + { + vardata->statsTuple = + SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(index->indexoid), + Int16GetDatum(pos + 1), + BoolGetDatum(false)); + vardata->freefunc = ReleaseSysCache; + + if (HeapTupleIsValid(vardata->statsTuple)) + { + /* Get index's table for permission check */ + RangeTblEntry *rte; + + rte = planner_rt_fetch(index->rel->relid, root); + Assert(rte->rtekind == RTE_RELATION); + + /* + * For simplicity, we insist on the whole + * table being selectable, rather than trying + * to identify which column(s) the index + * depends on. Also require all rows to be + * selectable --- there must be no + * securityQuals from security barrier views + * or RLS policies. + */ + vardata->acl_ok = + rte->securityQuals == NIL && + (pg_class_aclcheck(rte->relid, userid, + ACL_SELECT) == ACLCHECK_OK); + + /* + * If the user doesn't have permissions to + * access an inheritance child relation, check + * the permissions of the table actually + * mentioned in the query, since most likely + * the user does have that permission. Note + * that whole-table select privilege on the + * parent doesn't quite guarantee that the + * user could read all columns of the child. + * But in practice it's unlikely that any + * interesting security violation could result + * from allowing access to the expression + * index's stats, so we allow it anyway. See + * similar code in examine_simple_variable() + * for additional comments. + */ + if (!vardata->acl_ok && + root->append_rel_array != NULL) + { + AppendRelInfo *appinfo; + Index varno = index->rel->relid; + + appinfo = root->append_rel_array[varno]; + while (appinfo && + planner_rt_fetch(appinfo->parent_relid, + root)->rtekind == RTE_RELATION) + { + varno = appinfo->parent_relid; + appinfo = root->append_rel_array[varno]; + } + if (varno != index->rel->relid) + { + /* Repeat access check on this rel */ + rte = planner_rt_fetch(varno, root); + Assert(rte->rtekind == RTE_RELATION); + + vardata->acl_ok = + rte->securityQuals == NIL && + (pg_class_aclcheck(rte->relid, + userid, + ACL_SELECT) == ACLCHECK_OK); + } + } + } + else + { + /* suppress leakproofness checks later */ + vardata->acl_ok = true; + } + } + if (vardata->statsTuple) + break; + } + indexpr_item = lnext(index->indexprs, indexpr_item); + } + } + if (vardata->statsTuple) + break; + } + + /* + * Search extended statistics for one with a matching expression. + * There might be multiple ones, so just grab the first one. In the + * future, we might consider the statistics target (and pick the most + * accurate statistics) and maybe some other parameters. + */ + foreach(slist, onerel->statlist) + { + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist); + RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root); + ListCell *expr_item; + int pos; + + /* + * Stop once we've found statistics for the expression (either + * from extended stats, or for an index in the preceding loop). + */ + if (vardata->statsTuple) + break; + + /* skip stats without per-expression stats */ + if (info->kind != STATS_EXT_EXPRESSIONS) + continue; + + /* skip stats with mismatching stxdinherit value */ + if (info->inherit != rte->inh) + continue; + + pos = 0; + foreach(expr_item, info->exprs) + { + Node *expr = (Node *) lfirst(expr_item); + + Assert(expr); + + /* strip RelabelType before comparing it */ + if (expr && IsA(expr, RelabelType)) + expr = (Node *) ((RelabelType *) expr)->arg; + + /* found a match, see if we can extract pg_statistic row */ + if (equal(node, expr)) + { + /* + * XXX Not sure if we should cache the tuple somewhere. + * Now we just create a new copy every time. + */ + vardata->statsTuple = + statext_expressions_load(info->statOid, rte->inh, pos); + + vardata->freefunc = ReleaseDummy; + + /* + * For simplicity, we insist on the whole table being + * selectable, rather than trying to identify which + * column(s) the statistics object depends on. Also + * require all rows to be selectable --- there must be no + * securityQuals from security barrier views or RLS + * policies. + */ + vardata->acl_ok = + rte->securityQuals == NIL && + (pg_class_aclcheck(rte->relid, userid, + ACL_SELECT) == ACLCHECK_OK); + + /* + * If the user doesn't have permissions to access an + * inheritance child relation, check the permissions of + * the table actually mentioned in the query, since most + * likely the user does have that permission. Note that + * whole-table select privilege on the parent doesn't + * quite guarantee that the user could read all columns of + * the child. But in practice it's unlikely that any + * interesting security violation could result from + * allowing access to the expression stats, so we allow it + * anyway. See similar code in examine_simple_variable() + * for additional comments. + */ + if (!vardata->acl_ok && + root->append_rel_array != NULL) + { + AppendRelInfo *appinfo; + Index varno = onerel->relid; + + appinfo = root->append_rel_array[varno]; + while (appinfo && + planner_rt_fetch(appinfo->parent_relid, + root)->rtekind == RTE_RELATION) + { + varno = appinfo->parent_relid; + appinfo = root->append_rel_array[varno]; + } + if (varno != onerel->relid) + { + /* Repeat access check on this rel */ + rte = planner_rt_fetch(varno, root); + Assert(rte->rtekind == RTE_RELATION); + + vardata->acl_ok = + rte->securityQuals == NIL && + (pg_class_aclcheck(rte->relid, + userid, + ACL_SELECT) == ACLCHECK_OK); + } + } + + break; + } + + pos++; + } + } + } +} + +/* + * examine_simple_variable + * Handle a simple Var for examine_variable + * + * This is split out as a subroutine so that we can recurse to deal with + * Vars referencing subqueries. + * + * We already filled in all the fields of *vardata except for the stats tuple. + */ +static void +examine_simple_variable(PlannerInfo *root, Var *var, + VariableStatData *vardata) +{ + RangeTblEntry *rte = root->simple_rte_array[var->varno]; + + Assert(IsA(rte, RangeTblEntry)); + + if (get_relation_stats_hook && + (*get_relation_stats_hook) (root, rte, var->varattno, vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did supply + * a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata->statsTuple) && + !vardata->freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else if (rte->rtekind == RTE_RELATION) + { + /* + * Plain table or parent of an inheritance appendrel, so look up the + * column in pg_statistic + */ + vardata->statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(rte->relid), + Int16GetDatum(var->varattno), + BoolGetDatum(rte->inh)); + vardata->freefunc = ReleaseSysCache; + + if (HeapTupleIsValid(vardata->statsTuple)) + { + RelOptInfo *onerel = find_base_rel(root, var->varno); + Oid userid; + + /* + * Check if user has permission to read this column. We require + * all rows to be accessible, so there must be no securityQuals + * from security barrier views or RLS policies. Use + * onerel->userid if it's set, in case we're accessing the table + * via a view. + */ + userid = OidIsValid(onerel->userid) ? onerel->userid : GetUserId(); + + vardata->acl_ok = + rte->securityQuals == NIL && + ((pg_class_aclcheck(rte->relid, userid, + ACL_SELECT) == ACLCHECK_OK) || + (pg_attribute_aclcheck(rte->relid, var->varattno, userid, + ACL_SELECT) == ACLCHECK_OK)); + + /* + * If the user doesn't have permissions to access an inheritance + * child relation or specifically this attribute, check the + * permissions of the table/column actually mentioned in the + * query, since most likely the user does have that permission + * (else the query will fail at runtime), and if the user can read + * the column there then he can get the values of the child table + * too. To do that, we must find out which of the root parent's + * attributes the child relation's attribute corresponds to. + */ + if (!vardata->acl_ok && var->varattno > 0 && + root->append_rel_array != NULL) + { + AppendRelInfo *appinfo; + Index varno = var->varno; + int varattno = var->varattno; + bool found = false; + + appinfo = root->append_rel_array[varno]; + + /* + * Partitions are mapped to their immediate parent, not the + * root parent, so must be ready to walk up multiple + * AppendRelInfos. But stop if we hit a parent that is not + * RTE_RELATION --- that's a flattened UNION ALL subquery, not + * an inheritance parent. + */ + while (appinfo && + planner_rt_fetch(appinfo->parent_relid, + root)->rtekind == RTE_RELATION) + { + int parent_varattno; + + found = false; + if (varattno <= 0 || varattno > appinfo->num_child_cols) + break; /* safety check */ + parent_varattno = appinfo->parent_colnos[varattno - 1]; + if (parent_varattno == 0) + break; /* Var is local to child */ + + varno = appinfo->parent_relid; + varattno = parent_varattno; + found = true; + + /* If the parent is itself a child, continue up. */ + appinfo = root->append_rel_array[varno]; + } + + /* + * In rare cases, the Var may be local to the child table, in + * which case, we've got to live with having no access to this + * column's stats. + */ + if (!found) + return; + + /* Repeat the access check on this parent rel & column */ + rte = planner_rt_fetch(varno, root); + Assert(rte->rtekind == RTE_RELATION); + + /* + * Fine to use the same userid as it's the same in all + * relations of a given inheritance tree. + */ + vardata->acl_ok = + rte->securityQuals == NIL && + ((pg_class_aclcheck(rte->relid, userid, + ACL_SELECT) == ACLCHECK_OK) || + (pg_attribute_aclcheck(rte->relid, varattno, userid, + ACL_SELECT) == ACLCHECK_OK)); + } + } + else + { + /* suppress any possible leakproofness checks later */ + vardata->acl_ok = true; + } + } + else if (rte->rtekind == RTE_SUBQUERY && !rte->inh) + { + /* + * Plain subquery (not one that was converted to an appendrel). + */ + Query *subquery = rte->subquery; + RelOptInfo *rel; + TargetEntry *ste; + + /* + * Punt if it's a whole-row var rather than a plain column reference. + */ + if (var->varattno == InvalidAttrNumber) + return; + + /* + * Punt if subquery uses set operations or GROUP BY, as these will + * mash underlying columns' stats beyond recognition. (Set ops are + * particularly nasty; if we forged ahead, we would return stats + * relevant to only the leftmost subselect...) DISTINCT is also + * problematic, but we check that later because there is a possibility + * of learning something even with it. + */ + if (subquery->setOperations || + subquery->groupClause || + subquery->groupingSets) + return; + + /* + * OK, fetch RelOptInfo for subquery. Note that we don't change the + * rel returned in vardata, since caller expects it to be a rel of the + * caller's query level. Because we might already be recursing, we + * can't use that rel pointer either, but have to look up the Var's + * rel afresh. + */ + rel = find_base_rel(root, var->varno); + + /* If the subquery hasn't been planned yet, we have to punt */ + if (rel->subroot == NULL) + return; + Assert(IsA(rel->subroot, PlannerInfo)); + + /* + * Switch our attention to the subquery as mangled by the planner. It + * was okay to look at the pre-planning version for the tests above, + * but now we need a Var that will refer to the subroot's live + * RelOptInfos. For instance, if any subquery pullup happened during + * planning, Vars in the targetlist might have gotten replaced, and we + * need to see the replacement expressions. + */ + subquery = rel->subroot->parse; + Assert(IsA(subquery, Query)); + + /* Get the subquery output expression referenced by the upper Var */ + ste = get_tle_by_resno(subquery->targetList, var->varattno); + if (ste == NULL || ste->resjunk) + elog(ERROR, "subquery %s does not have attribute %d", + rte->eref->aliasname, var->varattno); + var = (Var *) ste->expr; + + /* + * If subquery uses DISTINCT, we can't make use of any stats for the + * variable ... but, if it's the only DISTINCT column, we are entitled + * to consider it unique. We do the test this way so that it works + * for cases involving DISTINCT ON. + */ + if (subquery->distinctClause) + { + if (list_length(subquery->distinctClause) == 1 && + targetIsInSortList(ste, InvalidOid, subquery->distinctClause)) + vardata->isunique = true; + /* cannot go further */ + return; + } + + /* + * If the sub-query originated from a view with the security_barrier + * attribute, we must not look at the variable's statistics, though it + * seems all right to notice the existence of a DISTINCT clause. So + * stop here. + * + * This is probably a harsher restriction than necessary; it's + * certainly OK for the selectivity estimator (which is a C function, + * and therefore omnipotent anyway) to look at the statistics. But + * many selectivity estimators will happily *invoke the operator + * function* to try to work out a good estimate - and that's not OK. + * So for now, don't dig down for stats. + */ + if (rte->security_barrier) + return; + + /* Can only handle a simple Var of subquery's query level */ + if (var && IsA(var, Var) && + var->varlevelsup == 0) + { + /* + * OK, recurse into the subquery. Note that the original setting + * of vardata->isunique (which will surely be false) is left + * unchanged in this situation. That's what we want, since even + * if the underlying column is unique, the subquery may have + * joined to other tables in a way that creates duplicates. + */ + examine_simple_variable(rel->subroot, var, vardata); + } + } + else + { + /* + * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We + * won't see RTE_JOIN here because join alias Vars have already been + * flattened.) There's not much we can do with function outputs, but + * maybe someday try to be smarter about VALUES and/or CTEs. + */ + } +} + +/* + * Check whether it is permitted to call func_oid passing some of the + * pg_statistic data in vardata. We allow this either if the user has SELECT + * privileges on the table or column underlying the pg_statistic data or if + * the function is marked leak-proof. + */ +bool +statistic_proc_security_check(VariableStatData *vardata, Oid func_oid) +{ + if (vardata->acl_ok) + return true; + + if (!OidIsValid(func_oid)) + return false; + + if (get_func_leakproof(func_oid)) + return true; + + ereport(DEBUG2, + (errmsg_internal("not using statistics because function \"%s\" is not leak-proof", + get_func_name(func_oid)))); + return false; +} + +/* + * get_variable_numdistinct + * Estimate the number of distinct values of a variable. + * + * vardata: results of examine_variable + * *isdefault: set to true if the result is a default rather than based on + * anything meaningful. + * + * NB: be careful to produce a positive integral result, since callers may + * compare the result to exact integer counts, or might divide by it. + */ +double +get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) +{ + double stadistinct; + double stanullfrac = 0.0; + double ntuples; + + *isdefault = false; + + /* + * Determine the stadistinct value to use. There are cases where we can + * get an estimate even without a pg_statistic entry, or can get a better + * value than is in pg_statistic. Grab stanullfrac too if we can find it + * (otherwise, assume no nulls, for lack of any better idea). + */ + if (HeapTupleIsValid(vardata->statsTuple)) + { + /* Use the pg_statistic entry */ + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + stadistinct = stats->stadistinct; + stanullfrac = stats->stanullfrac; + } + else if (vardata->vartype == BOOLOID) + { + /* + * Special-case boolean columns: presumably, two distinct values. + * + * Are there any other datatypes we should wire in special estimates + * for? + */ + stadistinct = 2.0; + } + else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES) + { + /* + * If the Var represents a column of a VALUES RTE, assume it's unique. + * This could of course be very wrong, but it should tend to be true + * in well-written queries. We could consider examining the VALUES' + * contents to get some real statistics; but that only works if the + * entries are all constants, and it would be pretty expensive anyway. + */ + stadistinct = -1.0; /* unique (and all non null) */ + } + else + { + /* + * We don't keep statistics for system columns, but in some cases we + * can infer distinctness anyway. + */ + if (vardata->var && IsA(vardata->var, Var)) + { + switch (((Var *) vardata->var)->varattno) + { + case SelfItemPointerAttributeNumber: + stadistinct = -1.0; /* unique (and all non null) */ + break; + case TableOidAttributeNumber: + stadistinct = 1.0; /* only 1 value */ + break; + default: + stadistinct = 0.0; /* means "unknown" */ + break; + } + } + else + stadistinct = 0.0; /* means "unknown" */ + + /* + * XXX consider using estimate_num_groups on expressions? + */ + } + + /* + * If there is a unique index or DISTINCT clause for the variable, assume + * it is unique no matter what pg_statistic says; the statistics could be + * out of date, or we might have found a partial unique index that proves + * the var is unique for this query. However, we'd better still believe + * the null-fraction statistic. + */ + if (vardata->isunique) + stadistinct = -1.0 * (1.0 - stanullfrac); + + /* + * If we had an absolute estimate, use that. + */ + if (stadistinct > 0.0) + return clamp_row_est(stadistinct); + + /* + * Otherwise we need to get the relation size; punt if not available. + */ + if (vardata->rel == NULL) + { + *isdefault = true; + return DEFAULT_NUM_DISTINCT; + } + ntuples = vardata->rel->tuples; + if (ntuples <= 0.0) + { + *isdefault = true; + return DEFAULT_NUM_DISTINCT; + } + + /* + * If we had a relative estimate, use that. + */ + if (stadistinct < 0.0) + return clamp_row_est(-stadistinct * ntuples); + + /* + * With no data, estimate ndistinct = ntuples if the table is small, else + * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so + * that the behavior isn't discontinuous. + */ + if (ntuples < DEFAULT_NUM_DISTINCT) + return clamp_row_est(ntuples); + + *isdefault = true; + return DEFAULT_NUM_DISTINCT; +} + +/* + * get_variable_range + * Estimate the minimum and maximum value of the specified variable. + * If successful, store values in *min and *max, and return true. + * If no data available, return false. + * + * sortop is the "<" comparison operator to use. This should generally + * be "<" not ">", as only the former is likely to be found in pg_statistic. + * The collation must be specified too. + */ +static bool +get_variable_range(PlannerInfo *root, VariableStatData *vardata, + Oid sortop, Oid collation, + Datum *min, Datum *max) +{ + Datum tmin = 0; + Datum tmax = 0; + bool have_data = false; + int16 typLen; + bool typByVal; + Oid opfuncoid; + FmgrInfo opproc; + AttStatsSlot sslot; + + /* + * XXX It's very tempting to try to use the actual column min and max, if + * we can get them relatively-cheaply with an index probe. However, since + * this function is called many times during join planning, that could + * have unpleasant effects on planning speed. Need more investigation + * before enabling this. + */ +#ifdef NOT_USED + if (get_actual_variable_range(root, vardata, sortop, collation, min, max)) + return true; +#endif + + if (!HeapTupleIsValid(vardata->statsTuple)) + { + /* no stats available, so default result */ + return false; + } + + /* + * If we can't apply the sortop to the stats data, just fail. In + * principle, if there's a histogram and no MCVs, we could return the + * histogram endpoints without ever applying the sortop ... but it's + * probably not worth trying, because whatever the caller wants to do with + * the endpoints would likely fail the security check too. + */ + if (!statistic_proc_security_check(vardata, + (opfuncoid = get_opcode(sortop)))) + return false; + + opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */ + + get_typlenbyval(vardata->atttype, &typLen, &typByVal); + + /* + * If there is a histogram with the ordering we want, grab the first and + * last values. + */ + if (get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_HISTOGRAM, sortop, + ATTSTATSSLOT_VALUES)) + { + if (sslot.stacoll == collation && sslot.nvalues > 0) + { + tmin = datumCopy(sslot.values[0], typByVal, typLen); + tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen); + have_data = true; + } + free_attstatsslot(&sslot); + } + + /* + * Otherwise, if there is a histogram with some other ordering, scan it + * and get the min and max values according to the ordering we want. This + * of course may not find values that are really extremal according to our + * ordering, but it beats ignoring available data. + */ + if (!have_data && + get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES)) + { + get_stats_slot_range(&sslot, opfuncoid, &opproc, + collation, typLen, typByVal, + &tmin, &tmax, &have_data); + free_attstatsslot(&sslot); + } + + /* + * If we have most-common-values info, look for extreme MCVs. This is + * needed even if we also have a histogram, since the histogram excludes + * the MCVs. However, if we *only* have MCVs and no histogram, we should + * be pretty wary of deciding that that is a full representation of the + * data. Proceed only if the MCVs represent the whole table (to within + * roundoff error). + */ + if (get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + have_data ? ATTSTATSSLOT_VALUES : + (ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))) + { + bool use_mcvs = have_data; + + if (!have_data) + { + double sumcommon = 0.0; + double nullfrac; + int i; + + for (i = 0; i < sslot.nnumbers; i++) + sumcommon += sslot.numbers[i]; + nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac; + if (sumcommon + nullfrac > 0.99999) + use_mcvs = true; + } + + if (use_mcvs) + get_stats_slot_range(&sslot, opfuncoid, &opproc, + collation, typLen, typByVal, + &tmin, &tmax, &have_data); + free_attstatsslot(&sslot); + } + + *min = tmin; + *max = tmax; + return have_data; +} + +/* + * get_stats_slot_range: scan sslot for min/max values + * + * Subroutine for get_variable_range: update min/max/have_data according + * to what we find in the statistics array. + */ +static void +get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc, + Oid collation, int16 typLen, bool typByVal, + Datum *min, Datum *max, bool *p_have_data) +{ + Datum tmin = *min; + Datum tmax = *max; + bool have_data = *p_have_data; + bool found_tmin = false; + bool found_tmax = false; + + /* Look up the comparison function, if we didn't already do so */ + if (opproc->fn_oid != opfuncoid) + fmgr_info(opfuncoid, opproc); + + /* Scan all the slot's values */ + for (int i = 0; i < sslot->nvalues; i++) + { + if (!have_data) + { + tmin = tmax = sslot->values[i]; + found_tmin = found_tmax = true; + *p_have_data = have_data = true; + continue; + } + if (DatumGetBool(FunctionCall2Coll(opproc, + collation, + sslot->values[i], tmin))) + { + tmin = sslot->values[i]; + found_tmin = true; + } + if (DatumGetBool(FunctionCall2Coll(opproc, + collation, + tmax, sslot->values[i]))) + { + tmax = sslot->values[i]; + found_tmax = true; + } + } + + /* + * Copy the slot's values, if we found new extreme values. + */ + if (found_tmin) + *min = datumCopy(tmin, typByVal, typLen); + if (found_tmax) + *max = datumCopy(tmax, typByVal, typLen); +} + + +/* + * get_actual_variable_range + * Attempt to identify the current *actual* minimum and/or maximum + * of the specified variable, by looking for a suitable btree index + * and fetching its low and/or high values. + * If successful, store values in *min and *max, and return true. + * (Either pointer can be NULL if that endpoint isn't needed.) + * If unsuccessful, return false. + * + * sortop is the "<" comparison operator to use. + * collation is the required collation. + */ +static bool +get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, + Oid sortop, Oid collation, + Datum *min, Datum *max) +{ + bool have_data = false; + RelOptInfo *rel = vardata->rel; + RangeTblEntry *rte; + ListCell *lc; + + /* No hope if no relation or it doesn't have indexes */ + if (rel == NULL || rel->indexlist == NIL) + return false; + /* If it has indexes it must be a plain relation */ + rte = root->simple_rte_array[rel->relid]; + Assert(rte->rtekind == RTE_RELATION); + + /* ignore partitioned tables. Any indexes here are not real indexes */ + if (rte->relkind == RELKIND_PARTITIONED_TABLE) + return false; + + /* Search through the indexes to see if any match our problem */ + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + ScanDirection indexscandir; + + /* Ignore non-btree indexes */ + if (index->relam != BTREE_AM_OID) + continue; + + /* + * Ignore partial indexes --- we only want stats that cover the entire + * relation. + */ + if (index->indpred != NIL) + continue; + + /* + * The index list might include hypothetical indexes inserted by a + * get_relation_info hook --- don't try to access them. + */ + if (index->hypothetical) + continue; + + /* + * The first index column must match the desired variable, sortop, and + * collation --- but we can use a descending-order index. + */ + if (collation != index->indexcollations[0]) + continue; /* test first 'cause it's cheapest */ + if (!match_index_to_operand(vardata->var, 0, index)) + continue; + switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0])) + { + case BTLessStrategyNumber: + if (index->reverse_sort[0]) + indexscandir = BackwardScanDirection; + else + indexscandir = ForwardScanDirection; + break; + case BTGreaterStrategyNumber: + if (index->reverse_sort[0]) + indexscandir = ForwardScanDirection; + else + indexscandir = BackwardScanDirection; + break; + default: + /* index doesn't match the sortop */ + continue; + } + + /* + * Found a suitable index to extract data from. Set up some data that + * can be used by both invocations of get_actual_variable_endpoint. + */ + { + MemoryContext tmpcontext; + MemoryContext oldcontext; + Relation heapRel; + Relation indexRel; + TupleTableSlot *slot; + int16 typLen; + bool typByVal; + ScanKeyData scankeys[1]; + + /* Make sure any cruft gets recycled when we're done */ + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "get_actual_variable_range workspace", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(tmpcontext); + + /* + * Open the table and index so we can read from them. We should + * already have some type of lock on each. + */ + heapRel = table_open(rte->relid, NoLock); + indexRel = index_open(index->indexoid, NoLock); + + /* build some stuff needed for indexscan execution */ + slot = table_slot_create(heapRel, NULL); + get_typlenbyval(vardata->atttype, &typLen, &typByVal); + + /* set up an IS NOT NULL scan key so that we ignore nulls */ + ScanKeyEntryInitialize(&scankeys[0], + SK_ISNULL | SK_SEARCHNOTNULL, + 1, /* index col to scan */ + InvalidStrategy, /* no strategy */ + InvalidOid, /* no strategy subtype */ + InvalidOid, /* no collation */ + InvalidOid, /* no reg proc for this */ + (Datum) 0); /* constant */ + + /* If min is requested ... */ + if (min) + { + have_data = get_actual_variable_endpoint(heapRel, + indexRel, + indexscandir, + scankeys, + typLen, + typByVal, + slot, + oldcontext, + min); + } + else + { + /* If min not requested, still want to fetch max */ + have_data = true; + } + + /* If max is requested, and we didn't already fail ... */ + if (max && have_data) + { + /* scan in the opposite direction; all else is the same */ + have_data = get_actual_variable_endpoint(heapRel, + indexRel, + -indexscandir, + scankeys, + typLen, + typByVal, + slot, + oldcontext, + max); + } + + /* Clean everything up */ + ExecDropSingleTupleTableSlot(slot); + + index_close(indexRel, NoLock); + table_close(heapRel, NoLock); + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(tmpcontext); + + /* And we're done */ + break; + } + } + + return have_data; +} + +/* + * Get one endpoint datum (min or max depending on indexscandir) from the + * specified index. Return true if successful, false if not. + * On success, endpoint value is stored to *endpointDatum (and copied into + * outercontext). + * + * scankeys is a 1-element scankey array set up to reject nulls. + * typLen/typByVal describe the datatype of the index's first column. + * tableslot is a slot suitable to hold table tuples, in case we need + * to probe the heap. + * (We could compute these values locally, but that would mean computing them + * twice when get_actual_variable_range needs both the min and the max.) + * + * Failure occurs either when the index is empty, or we decide that it's + * taking too long to find a suitable tuple. + */ +static bool +get_actual_variable_endpoint(Relation heapRel, + Relation indexRel, + ScanDirection indexscandir, + ScanKey scankeys, + int16 typLen, + bool typByVal, + TupleTableSlot *tableslot, + MemoryContext outercontext, + Datum *endpointDatum) +{ + bool have_data = false; + SnapshotData SnapshotNonVacuumable; + IndexScanDesc index_scan; + Buffer vmbuffer = InvalidBuffer; + BlockNumber last_heap_block = InvalidBlockNumber; + int n_visited_heap_pages = 0; + ItemPointer tid; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + MemoryContext oldcontext; + + /* + * We use the index-only-scan machinery for this. With mostly-static + * tables that's a win because it avoids a heap visit. It's also a win + * for dynamic data, but the reason is less obvious; read on for details. + * + * In principle, we should scan the index with our current active + * snapshot, which is the best approximation we've got to what the query + * will see when executed. But that won't be exact if a new snap is taken + * before running the query, and it can be very expensive if a lot of + * recently-dead or uncommitted rows exist at the beginning or end of the + * index (because we'll laboriously fetch each one and reject it). + * Instead, we use SnapshotNonVacuumable. That will accept recently-dead + * and uncommitted rows as well as normal visible rows. On the other + * hand, it will reject known-dead rows, and thus not give a bogus answer + * when the extreme value has been deleted (unless the deletion was quite + * recent); that case motivates not using SnapshotAny here. + * + * A crucial point here is that SnapshotNonVacuumable, with + * GlobalVisTestFor(heapRel) as horizon, yields the inverse of the + * condition that the indexscan will use to decide that index entries are + * killable (see heap_hot_search_buffer()). Therefore, if the snapshot + * rejects a tuple (or more precisely, all tuples of a HOT chain) and we + * have to continue scanning past it, we know that the indexscan will mark + * that index entry killed. That means that the next + * get_actual_variable_endpoint() call will not have to re-consider that + * index entry. In this way we avoid repetitive work when this function + * is used a lot during planning. + * + * But using SnapshotNonVacuumable creates a hazard of its own. In a + * recently-created index, some index entries may point at "broken" HOT + * chains in which not all the tuple versions contain data matching the + * index entry. The live tuple version(s) certainly do match the index, + * but SnapshotNonVacuumable can accept recently-dead tuple versions that + * don't match. Hence, if we took data from the selected heap tuple, we + * might get a bogus answer that's not close to the index extremal value, + * or could even be NULL. We avoid this hazard because we take the data + * from the index entry not the heap. + * + * Despite all this care, there are situations where we might find many + * non-visible tuples near the end of the index. We don't want to expend + * a huge amount of time here, so we give up once we've read too many heap + * pages. When we fail for that reason, the caller will end up using + * whatever extremal value is recorded in pg_statistic. + */ + InitNonVacuumableSnapshot(SnapshotNonVacuumable, + GlobalVisTestFor(heapRel)); + + index_scan = index_beginscan(heapRel, indexRel, + &SnapshotNonVacuumable, + 1, 0); + /* Set it up for index-only scan */ + index_scan->xs_want_itup = true; + index_rescan(index_scan, scankeys, 1, NULL, 0); + + /* Fetch first/next tuple in specified direction */ + while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL) + { + BlockNumber block = ItemPointerGetBlockNumber(tid); + + if (!VM_ALL_VISIBLE(heapRel, + block, + &vmbuffer)) + { + /* Rats, we have to visit the heap to check visibility */ + if (!index_fetch_heap(index_scan, tableslot)) + { + /* + * No visible tuple for this index entry, so we need to + * advance to the next entry. Before doing so, count heap + * page fetches and give up if we've done too many. + * + * We don't charge a page fetch if this is the same heap page + * as the previous tuple. This is on the conservative side, + * since other recently-accessed pages are probably still in + * buffers too; but it's good enough for this heuristic. + */ +#define VISITED_PAGES_LIMIT 100 + + if (block != last_heap_block) + { + last_heap_block = block; + n_visited_heap_pages++; + if (n_visited_heap_pages > VISITED_PAGES_LIMIT) + break; + } + + continue; /* no visible tuple, try next index entry */ + } + + /* We don't actually need the heap tuple for anything */ + ExecClearTuple(tableslot); + + /* + * We don't care whether there's more than one visible tuple in + * the HOT chain; if any are visible, that's good enough. + */ + } + + /* + * We expect that btree will return data in IndexTuple not HeapTuple + * format. It's not lossy either. + */ + if (!index_scan->xs_itup) + elog(ERROR, "no data returned for index-only scan"); + if (index_scan->xs_recheck) + elog(ERROR, "unexpected recheck indication from btree"); + + /* OK to deconstruct the index tuple */ + index_deform_tuple(index_scan->xs_itup, + index_scan->xs_itupdesc, + values, isnull); + + /* Shouldn't have got a null, but be careful */ + if (isnull[0]) + elog(ERROR, "found unexpected null value in index \"%s\"", + RelationGetRelationName(indexRel)); + + /* Copy the index column value out to caller's context */ + oldcontext = MemoryContextSwitchTo(outercontext); + *endpointDatum = datumCopy(values[0], typByVal, typLen); + MemoryContextSwitchTo(oldcontext); + have_data = true; + break; + } + + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + index_endscan(index_scan); + + return have_data; +} + +/* + * find_join_input_rel + * Look up the input relation for a join. + * + * We assume that the input relation's RelOptInfo must have been constructed + * already. + */ +static RelOptInfo * +find_join_input_rel(PlannerInfo *root, Relids relids) +{ + RelOptInfo *rel = NULL; + + switch (bms_membership(relids)) + { + case BMS_EMPTY_SET: + /* should not happen */ + break; + case BMS_SINGLETON: + rel = find_base_rel(root, bms_singleton_member(relids)); + break; + case BMS_MULTIPLE: + rel = find_join_rel(root, relids); + break; + } + + if (rel == NULL) + elog(ERROR, "could not find RelOptInfo for given relids"); + + return rel; +} + + +/*------------------------------------------------------------------------- + * + * Index cost estimation functions + * + *------------------------------------------------------------------------- + */ + +/* + * Extract the actual indexquals (as RestrictInfos) from an IndexClause list + */ +List * +get_quals_from_indexclauses(List *indexclauses) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, indexclauses) + { + IndexClause *iclause = lfirst_node(IndexClause, lc); + ListCell *lc2; + + foreach(lc2, iclause->indexquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + + result = lappend(result, rinfo); + } + } + return result; +} + +/* + * Compute the total evaluation cost of the comparison operands in a list + * of index qual expressions. Since we know these will be evaluated just + * once per scan, there's no need to distinguish startup from per-row cost. + * + * This can be used either on the result of get_quals_from_indexclauses(), + * or directly on an indexorderbys list. In both cases, we expect that the + * index key expression is on the left side of binary clauses. + */ +Cost +index_other_operands_eval_cost(PlannerInfo *root, List *indexquals) +{ + Cost qual_arg_cost = 0; + ListCell *lc; + + foreach(lc, indexquals) + { + Expr *clause = (Expr *) lfirst(lc); + Node *other_operand; + QualCost index_qual_cost; + + /* + * Index quals will have RestrictInfos, indexorderbys won't. Look + * through RestrictInfo if present. + */ + if (IsA(clause, RestrictInfo)) + clause = ((RestrictInfo *) clause)->clause; + + if (IsA(clause, OpExpr)) + { + OpExpr *op = (OpExpr *) clause; + + other_operand = (Node *) lsecond(op->args); + } + else if (IsA(clause, RowCompareExpr)) + { + RowCompareExpr *rc = (RowCompareExpr *) clause; + + other_operand = (Node *) rc->rargs; + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + + other_operand = (Node *) lsecond(saop->args); + } + else if (IsA(clause, NullTest)) + { + other_operand = NULL; + } + else + { + elog(ERROR, "unsupported indexqual type: %d", + (int) nodeTag(clause)); + other_operand = NULL; /* keep compiler quiet */ + } + + cost_qual_eval_node(&index_qual_cost, other_operand, root); + qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple; + } + return qual_arg_cost; +} + +void +genericcostestimate(PlannerInfo *root, + IndexPath *path, + double loop_count, + GenericCosts *costs) +{ + IndexOptInfo *index = path->indexinfo; + List *indexQuals = get_quals_from_indexclauses(path->indexclauses); + List *indexOrderBys = path->indexorderbys; + Cost indexStartupCost; + Cost indexTotalCost; + Selectivity indexSelectivity; + double indexCorrelation; + double numIndexPages; + double numIndexTuples; + double spc_random_page_cost; + double num_sa_scans; + double num_outer_scans; + double num_scans; + double qual_op_cost; + double qual_arg_cost; + List *selectivityQuals; + ListCell *l; + + /* + * If the index is partial, AND the index predicate with the explicitly + * given indexquals to produce a more accurate idea of the index + * selectivity. + */ + selectivityQuals = add_predicate_to_index_quals(index, indexQuals); + + /* + * Check for ScalarArrayOpExpr index quals, and estimate the number of + * index scans that will be performed. + */ + num_sa_scans = 1; + foreach(l, indexQuals) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + if (IsA(rinfo->clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause; + int alength = estimate_array_length(lsecond(saop->args)); + + if (alength > 1) + num_sa_scans *= alength; + } + } + + /* Estimate the fraction of main-table tuples that will be visited */ + indexSelectivity = clauselist_selectivity(root, selectivityQuals, + index->rel->relid, + JOIN_INNER, + NULL); + + /* + * If caller didn't give us an estimate, estimate the number of index + * tuples that will be visited. We do it in this rather peculiar-looking + * way in order to get the right answer for partial indexes. + */ + numIndexTuples = costs->numIndexTuples; + if (numIndexTuples <= 0.0) + { + numIndexTuples = indexSelectivity * index->rel->tuples; + + /* + * The above calculation counts all the tuples visited across all + * scans induced by ScalarArrayOpExpr nodes. We want to consider the + * average per-indexscan number, so adjust. This is a handy place to + * round to integer, too. (If caller supplied tuple estimate, it's + * responsible for handling these considerations.) + */ + numIndexTuples = rint(numIndexTuples / num_sa_scans); + } + + /* + * We can bound the number of tuples by the index size in any case. Also, + * always estimate at least one tuple is touched, even when + * indexSelectivity estimate is tiny. + */ + if (numIndexTuples > index->tuples) + numIndexTuples = index->tuples; + if (numIndexTuples < 1.0) + numIndexTuples = 1.0; + + /* + * Estimate the number of index pages that will be retrieved. + * + * We use the simplistic method of taking a pro-rata fraction of the total + * number of index pages. In effect, this counts only leaf pages and not + * any overhead such as index metapage or upper tree levels. + * + * In practice access to upper index levels is often nearly free because + * those tend to stay in cache under load; moreover, the cost involved is + * highly dependent on index type. We therefore ignore such costs here + * and leave it to the caller to add a suitable charge if needed. + */ + if (index->pages > 1 && index->tuples > 1) + numIndexPages = ceil(numIndexTuples * index->pages / index->tuples); + else + numIndexPages = 1.0; + + /* fetch estimated page cost for tablespace containing index */ + get_tablespace_page_costs(index->reltablespace, + &spc_random_page_cost, + NULL); + + /* + * Now compute the disk access costs. + * + * The above calculations are all per-index-scan. However, if we are in a + * nestloop inner scan, we can expect the scan to be repeated (with + * different search keys) for each row of the outer relation. Likewise, + * ScalarArrayOpExpr quals result in multiple index scans. This creates + * the potential for cache effects to reduce the number of disk page + * fetches needed. We want to estimate the average per-scan I/O cost in + * the presence of caching. + * + * We use the Mackert-Lohman formula (see costsize.c for details) to + * estimate the total number of page fetches that occur. While this + * wasn't what it was designed for, it seems a reasonable model anyway. + * Note that we are counting pages not tuples anymore, so we take N = T = + * index size, as if there were one "tuple" per page. + */ + num_outer_scans = loop_count; + num_scans = num_sa_scans * num_outer_scans; + + if (num_scans > 1) + { + double pages_fetched; + + /* total page fetches ignoring cache effects */ + pages_fetched = numIndexPages * num_scans; + + /* use Mackert and Lohman formula to adjust for cache effects */ + pages_fetched = index_pages_fetched(pages_fetched, + index->pages, + (double) index->pages, + root); + + /* + * Now compute the total disk access cost, and then report a pro-rated + * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr, + * since that's internal to the indexscan.) + */ + indexTotalCost = (pages_fetched * spc_random_page_cost) + / num_outer_scans; + } + else + { + /* + * For a single index scan, we just charge spc_random_page_cost per + * page touched. + */ + indexTotalCost = numIndexPages * spc_random_page_cost; + } + + /* + * CPU cost: any complex expressions in the indexquals will need to be + * evaluated once at the start of the scan to reduce them to runtime keys + * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple + * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per + * indexqual operator. Because we have numIndexTuples as a per-scan + * number, we have to multiply by num_sa_scans to get the correct result + * for ScalarArrayOpExpr cases. Similarly add in costs for any index + * ORDER BY expressions. + * + * Note: this neglects the possible costs of rechecking lossy operators. + * Detecting that that might be needed seems more expensive than it's + * worth, though, considering all the other inaccuracies here ... + */ + qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) + + index_other_operands_eval_cost(root, indexOrderBys); + qual_op_cost = cpu_operator_cost * + (list_length(indexQuals) + list_length(indexOrderBys)); + + indexStartupCost = qual_arg_cost; + indexTotalCost += qual_arg_cost; + indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost); + + /* + * Generic assumption about index correlation: there isn't any. + */ + indexCorrelation = 0.0; + + /* + * Return everything to caller. + */ + costs->indexStartupCost = indexStartupCost; + costs->indexTotalCost = indexTotalCost; + costs->indexSelectivity = indexSelectivity; + costs->indexCorrelation = indexCorrelation; + costs->numIndexPages = numIndexPages; + costs->numIndexTuples = numIndexTuples; + costs->spc_random_page_cost = spc_random_page_cost; + costs->num_sa_scans = num_sa_scans; +} + +/* + * If the index is partial, add its predicate to the given qual list. + * + * ANDing the index predicate with the explicitly given indexquals produces + * a more accurate idea of the index's selectivity. However, we need to be + * careful not to insert redundant clauses, because clauselist_selectivity() + * is easily fooled into computing a too-low selectivity estimate. Our + * approach is to add only the predicate clause(s) that cannot be proven to + * be implied by the given indexquals. This successfully handles cases such + * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50". + * There are many other cases where we won't detect redundancy, leading to a + * too-low selectivity estimate, which will bias the system in favor of using + * partial indexes where possible. That is not necessarily bad though. + * + * Note that indexQuals contains RestrictInfo nodes while the indpred + * does not, so the output list will be mixed. This is OK for both + * predicate_implied_by() and clauselist_selectivity(), but might be + * problematic if the result were passed to other things. + */ +List * +add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals) +{ + List *predExtraQuals = NIL; + ListCell *lc; + + if (index->indpred == NIL) + return indexQuals; + + foreach(lc, index->indpred) + { + Node *predQual = (Node *) lfirst(lc); + List *oneQual = list_make1(predQual); + + if (!predicate_implied_by(oneQual, indexQuals, false)) + predExtraQuals = list_concat(predExtraQuals, oneQual); + } + return list_concat(predExtraQuals, indexQuals); +} + + +void +btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + IndexOptInfo *index = path->indexinfo; + GenericCosts costs = {0}; + Oid relid; + AttrNumber colnum; + VariableStatData vardata = {0}; + double numIndexTuples; + Cost descentCost; + List *indexBoundQuals; + int indexcol; + bool eqQualHere; + bool found_saop; + bool found_is_null_op; + double num_sa_scans; + ListCell *lc; + + /* + * For a btree scan, only leading '=' quals plus inequality quals for the + * immediately next attribute contribute to index selectivity (these are + * the "boundary quals" that determine the starting and stopping points of + * the index scan). Additional quals can suppress visits to the heap, so + * it's OK to count them in indexSelectivity, but they should not count + * for estimating numIndexTuples. So we must examine the given indexquals + * to find out which ones count as boundary quals. We rely on the + * knowledge that they are given in index column order. + * + * For a RowCompareExpr, we consider only the first column, just as + * rowcomparesel() does. + * + * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N + * index scans not one, but the ScalarArrayOpExpr's operator can be + * considered to act the same as it normally does. + */ + indexBoundQuals = NIL; + indexcol = 0; + eqQualHere = false; + found_saop = false; + found_is_null_op = false; + num_sa_scans = 1; + foreach(lc, path->indexclauses) + { + IndexClause *iclause = lfirst_node(IndexClause, lc); + ListCell *lc2; + + if (indexcol != iclause->indexcol) + { + /* Beginning of a new column's quals */ + if (!eqQualHere) + break; /* done if no '=' qual for indexcol */ + eqQualHere = false; + indexcol++; + if (indexcol != iclause->indexcol) + break; /* no quals at all for indexcol */ + } + + /* Examine each indexqual associated with this index clause */ + foreach(lc2, iclause->indexquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + Expr *clause = rinfo->clause; + Oid clause_op = InvalidOid; + int op_strategy; + + if (IsA(clause, OpExpr)) + { + OpExpr *op = (OpExpr *) clause; + + clause_op = op->opno; + } + else if (IsA(clause, RowCompareExpr)) + { + RowCompareExpr *rc = (RowCompareExpr *) clause; + + clause_op = linitial_oid(rc->opnos); + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Node *other_operand = (Node *) lsecond(saop->args); + int alength = estimate_array_length(other_operand); + + clause_op = saop->opno; + found_saop = true; + /* count number of SA scans induced by indexBoundQuals only */ + if (alength > 1) + num_sa_scans *= alength; + } + else if (IsA(clause, NullTest)) + { + NullTest *nt = (NullTest *) clause; + + if (nt->nulltesttype == IS_NULL) + { + found_is_null_op = true; + /* IS NULL is like = for selectivity purposes */ + eqQualHere = true; + } + } + else + elog(ERROR, "unsupported indexqual type: %d", + (int) nodeTag(clause)); + + /* check for equality operator */ + if (OidIsValid(clause_op)) + { + op_strategy = get_op_opfamily_strategy(clause_op, + index->opfamily[indexcol]); + Assert(op_strategy != 0); /* not a member of opfamily?? */ + if (op_strategy == BTEqualStrategyNumber) + eqQualHere = true; + } + + indexBoundQuals = lappend(indexBoundQuals, rinfo); + } + } + + /* + * If index is unique and we found an '=' clause for each column, we can + * just assume numIndexTuples = 1 and skip the expensive + * clauselist_selectivity calculations. However, a ScalarArrayOp or + * NullTest invalidates that theory, even though it sets eqQualHere. + */ + if (index->unique && + indexcol == index->nkeycolumns - 1 && + eqQualHere && + !found_saop && + !found_is_null_op) + numIndexTuples = 1.0; + else + { + List *selectivityQuals; + Selectivity btreeSelectivity; + + /* + * If the index is partial, AND the index predicate with the + * index-bound quals to produce a more accurate idea of the number of + * rows covered by the bound conditions. + */ + selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals); + + btreeSelectivity = clauselist_selectivity(root, selectivityQuals, + index->rel->relid, + JOIN_INNER, + NULL); + numIndexTuples = btreeSelectivity * index->rel->tuples; + + /* + * As in genericcostestimate(), we have to adjust for any + * ScalarArrayOpExpr quals included in indexBoundQuals, and then round + * to integer. + */ + numIndexTuples = rint(numIndexTuples / num_sa_scans); + } + + /* + * Now do generic index cost estimation. + */ + costs.numIndexTuples = numIndexTuples; + + genericcostestimate(root, path, loop_count, &costs); + + /* + * Add a CPU-cost component to represent the costs of initial btree + * descent. We don't charge any I/O cost for touching upper btree levels, + * since they tend to stay in cache, but we still have to do about log2(N) + * comparisons to descend a btree of N leaf tuples. We charge one + * cpu_operator_cost per comparison. + * + * If there are ScalarArrayOpExprs, charge this once per SA scan. The + * ones after the first one are not startup cost so far as the overall + * plan is concerned, so add them only to "total" cost. + */ + if (index->tuples > 1) /* avoid computing log(0) */ + { + descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost; + costs.indexStartupCost += descentCost; + costs.indexTotalCost += costs.num_sa_scans * descentCost; + } + + /* + * Even though we're not charging I/O cost for touching upper btree pages, + * it's still reasonable to charge some CPU cost per page descended + * through. Moreover, if we had no such charge at all, bloated indexes + * would appear to have the same search cost as unbloated ones, at least + * in cases where only a single leaf page is expected to be visited. This + * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page + * touched. The number of such pages is btree tree height plus one (ie, + * we charge for the leaf page too). As above, charge once per SA scan. + */ + descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; + costs.indexStartupCost += descentCost; + costs.indexTotalCost += costs.num_sa_scans * descentCost; + + /* + * If we can get an estimate of the first column's ordering correlation C + * from pg_statistic, estimate the index correlation as C for a + * single-column index, or C * 0.75 for multiple columns. (The idea here + * is that multiple columns dilute the importance of the first column's + * ordering, but don't negate it entirely. Before 8.0 we divided the + * correlation by the number of columns, but that seems too strong.) + */ + if (index->indexkeys[0] != 0) + { + /* Simple variable --- look to stats for the underlying table */ + RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); + + Assert(rte->rtekind == RTE_RELATION); + relid = rte->relid; + Assert(relid != InvalidOid); + colnum = index->indexkeys[0]; + + if (get_relation_stats_hook && + (*get_relation_stats_hook) (root, rte, colnum, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(colnum), + BoolGetDatum(rte->inh)); + vardata.freefunc = ReleaseSysCache; + } + } + else + { + /* Expression --- maybe there are stats for the index itself */ + relid = index->indexoid; + colnum = 1; + + if (get_index_stats_hook && + (*get_index_stats_hook) (root, relid, colnum, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(colnum), + BoolGetDatum(false)); + vardata.freefunc = ReleaseSysCache; + } + } + + if (HeapTupleIsValid(vardata.statsTuple)) + { + Oid sortop; + AttStatsSlot sslot; + + sortop = get_opfamily_member(index->opfamily[0], + index->opcintype[0], + index->opcintype[0], + BTLessStrategyNumber); + if (OidIsValid(sortop) && + get_attstatsslot(&sslot, vardata.statsTuple, + STATISTIC_KIND_CORRELATION, sortop, + ATTSTATSSLOT_NUMBERS)) + { + double varCorrelation; + + Assert(sslot.nnumbers == 1); + varCorrelation = sslot.numbers[0]; + + if (index->reverse_sort[0]) + varCorrelation = -varCorrelation; + + if (index->nkeycolumns > 1) + costs.indexCorrelation = varCorrelation * 0.75; + else + costs.indexCorrelation = varCorrelation; + + free_attstatsslot(&sslot); + } + } + + ReleaseVariableStats(vardata); + + *indexStartupCost = costs.indexStartupCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; +} + +void +hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + GenericCosts costs = {0}; + + genericcostestimate(root, path, loop_count, &costs); + + /* + * A hash index has no descent costs as such, since the index AM can go + * directly to the target bucket after computing the hash value. There + * are a couple of other hash-specific costs that we could conceivably add + * here, though: + * + * Ideally we'd charge spc_random_page_cost for each page in the target + * bucket, not just the numIndexPages pages that genericcostestimate + * thought we'd visit. However in most cases we don't know which bucket + * that will be. There's no point in considering the average bucket size + * because the hash AM makes sure that's always one page. + * + * Likewise, we could consider charging some CPU for each index tuple in + * the bucket, if we knew how many there were. But the per-tuple cost is + * just a hash value comparison, not a general datatype-dependent + * comparison, so any such charge ought to be quite a bit less than + * cpu_operator_cost; which makes it probably not worth worrying about. + * + * A bigger issue is that chance hash-value collisions will result in + * wasted probes into the heap. We don't currently attempt to model this + * cost on the grounds that it's rare, but maybe it's not rare enough. + * (Any fix for this ought to consider the generic lossy-operator problem, + * though; it's not entirely hash-specific.) + */ + + *indexStartupCost = costs.indexStartupCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; +} + +void +gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + IndexOptInfo *index = path->indexinfo; + GenericCosts costs = {0}; + Cost descentCost; + + genericcostestimate(root, path, loop_count, &costs); + + /* + * We model index descent costs similarly to those for btree, but to do + * that we first need an idea of the tree height. We somewhat arbitrarily + * assume that the fanout is 100, meaning the tree height is at most + * log100(index->pages). + * + * Although this computation isn't really expensive enough to require + * caching, we might as well use index->tree_height to cache it. + */ + if (index->tree_height < 0) /* unknown? */ + { + if (index->pages > 1) /* avoid computing log(0) */ + index->tree_height = (int) (log(index->pages) / log(100.0)); + else + index->tree_height = 0; + } + + /* + * Add a CPU-cost component to represent the costs of initial descent. We + * just use log(N) here not log2(N) since the branching factor isn't + * necessarily two anyway. As for btree, charge once per SA scan. + */ + if (index->tuples > 1) /* avoid computing log(0) */ + { + descentCost = ceil(log(index->tuples)) * cpu_operator_cost; + costs.indexStartupCost += descentCost; + costs.indexTotalCost += costs.num_sa_scans * descentCost; + } + + /* + * Likewise add a per-page charge, calculated the same as for btrees. + */ + descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; + costs.indexStartupCost += descentCost; + costs.indexTotalCost += costs.num_sa_scans * descentCost; + + *indexStartupCost = costs.indexStartupCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; +} + +void +spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + IndexOptInfo *index = path->indexinfo; + GenericCosts costs = {0}; + Cost descentCost; + + genericcostestimate(root, path, loop_count, &costs); + + /* + * We model index descent costs similarly to those for btree, but to do + * that we first need an idea of the tree height. We somewhat arbitrarily + * assume that the fanout is 100, meaning the tree height is at most + * log100(index->pages). + * + * Although this computation isn't really expensive enough to require + * caching, we might as well use index->tree_height to cache it. + */ + if (index->tree_height < 0) /* unknown? */ + { + if (index->pages > 1) /* avoid computing log(0) */ + index->tree_height = (int) (log(index->pages) / log(100.0)); + else + index->tree_height = 0; + } + + /* + * Add a CPU-cost component to represent the costs of initial descent. We + * just use log(N) here not log2(N) since the branching factor isn't + * necessarily two anyway. As for btree, charge once per SA scan. + */ + if (index->tuples > 1) /* avoid computing log(0) */ + { + descentCost = ceil(log(index->tuples)) * cpu_operator_cost; + costs.indexStartupCost += descentCost; + costs.indexTotalCost += costs.num_sa_scans * descentCost; + } + + /* + * Likewise add a per-page charge, calculated the same as for btrees. + */ + descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; + costs.indexStartupCost += descentCost; + costs.indexTotalCost += costs.num_sa_scans * descentCost; + + *indexStartupCost = costs.indexStartupCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; +} + + +/* + * Support routines for gincostestimate + */ + +typedef struct +{ + bool attHasFullScan[INDEX_MAX_KEYS]; + bool attHasNormalScan[INDEX_MAX_KEYS]; + double partialEntries; + double exactEntries; + double searchEntries; + double arrayScans; +} GinQualCounts; + +/* + * Estimate the number of index terms that need to be searched for while + * testing the given GIN query, and increment the counts in *counts + * appropriately. If the query is unsatisfiable, return false. + */ +static bool +gincost_pattern(IndexOptInfo *index, int indexcol, + Oid clause_op, Datum query, + GinQualCounts *counts) +{ + FmgrInfo flinfo; + Oid extractProcOid; + Oid collation; + int strategy_op; + Oid lefttype, + righttype; + int32 nentries = 0; + bool *partial_matches = NULL; + Pointer *extra_data = NULL; + bool *nullFlags = NULL; + int32 searchMode = GIN_SEARCH_MODE_DEFAULT; + int32 i; + + Assert(indexcol < index->nkeycolumns); + + /* + * Get the operator's strategy number and declared input data types within + * the index opfamily. (We don't need the latter, but we use + * get_op_opfamily_properties because it will throw error if it fails to + * find a matching pg_amop entry.) + */ + get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false, + &strategy_op, &lefttype, &righttype); + + /* + * GIN always uses the "default" support functions, which are those with + * lefttype == righttype == the opclass' opcintype (see + * IndexSupportInitialize in relcache.c). + */ + extractProcOid = get_opfamily_proc(index->opfamily[indexcol], + index->opcintype[indexcol], + index->opcintype[indexcol], + GIN_EXTRACTQUERY_PROC); + + if (!OidIsValid(extractProcOid)) + { + /* should not happen; throw same error as index_getprocinfo */ + elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", + GIN_EXTRACTQUERY_PROC, indexcol + 1, + get_rel_name(index->indexoid)); + } + + /* + * Choose collation to pass to extractProc (should match initGinState). + */ + if (OidIsValid(index->indexcollations[indexcol])) + collation = index->indexcollations[indexcol]; + else + collation = DEFAULT_COLLATION_OID; + + fmgr_info(extractProcOid, &flinfo); + + set_fn_opclass_options(&flinfo, index->opclassoptions[indexcol]); + + FunctionCall7Coll(&flinfo, + collation, + query, + PointerGetDatum(&nentries), + UInt16GetDatum(strategy_op), + PointerGetDatum(&partial_matches), + PointerGetDatum(&extra_data), + PointerGetDatum(&nullFlags), + PointerGetDatum(&searchMode)); + + if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT) + { + /* No match is possible */ + return false; + } + + for (i = 0; i < nentries; i++) + { + /* + * For partial match we haven't any information to estimate number of + * matched entries in index, so, we just estimate it as 100 + */ + if (partial_matches && partial_matches[i]) + counts->partialEntries += 100; + else + counts->exactEntries++; + + counts->searchEntries++; + } + + if (searchMode == GIN_SEARCH_MODE_DEFAULT) + { + counts->attHasNormalScan[indexcol] = true; + } + else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) + { + /* Treat "include empty" like an exact-match item */ + counts->attHasNormalScan[indexcol] = true; + counts->exactEntries++; + counts->searchEntries++; + } + else + { + /* It's GIN_SEARCH_MODE_ALL */ + counts->attHasFullScan[indexcol] = true; + } + + return true; +} + +/* + * Estimate the number of index terms that need to be searched for while + * testing the given GIN index clause, and increment the counts in *counts + * appropriately. If the query is unsatisfiable, return false. + */ +static bool +gincost_opexpr(PlannerInfo *root, + IndexOptInfo *index, + int indexcol, + OpExpr *clause, + GinQualCounts *counts) +{ + Oid clause_op = clause->opno; + Node *operand = (Node *) lsecond(clause->args); + + /* aggressively reduce to a constant, and look through relabeling */ + operand = estimate_expression_value(root, operand); + + if (IsA(operand, RelabelType)) + operand = (Node *) ((RelabelType *) operand)->arg; + + /* + * It's impossible to call extractQuery method for unknown operand. So + * unless operand is a Const we can't do much; just assume there will be + * one ordinary search entry from the operand at runtime. + */ + if (!IsA(operand, Const)) + { + counts->exactEntries++; + counts->searchEntries++; + return true; + } + + /* If Const is null, there can be no matches */ + if (((Const *) operand)->constisnull) + return false; + + /* Otherwise, apply extractQuery and get the actual term counts */ + return gincost_pattern(index, indexcol, clause_op, + ((Const *) operand)->constvalue, + counts); +} + +/* + * Estimate the number of index terms that need to be searched for while + * testing the given GIN index clause, and increment the counts in *counts + * appropriately. If the query is unsatisfiable, return false. + * + * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime, + * each of which involves one value from the RHS array, plus all the + * non-array quals (if any). To model this, we average the counts across + * the RHS elements, and add the averages to the counts in *counts (which + * correspond to per-indexscan costs). We also multiply counts->arrayScans + * by N, causing gincostestimate to scale up its estimates accordingly. + */ +static bool +gincost_scalararrayopexpr(PlannerInfo *root, + IndexOptInfo *index, + int indexcol, + ScalarArrayOpExpr *clause, + double numIndexEntries, + GinQualCounts *counts) +{ + Oid clause_op = clause->opno; + Node *rightop = (Node *) lsecond(clause->args); + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + int numElems; + Datum *elemValues; + bool *elemNulls; + GinQualCounts arraycounts; + int numPossible = 0; + int i; + + Assert(clause->useOr); + + /* aggressively reduce to a constant, and look through relabeling */ + rightop = estimate_expression_value(root, rightop); + + if (IsA(rightop, RelabelType)) + rightop = (Node *) ((RelabelType *) rightop)->arg; + + /* + * It's impossible to call extractQuery method for unknown operand. So + * unless operand is a Const we can't do much; just assume there will be + * one ordinary search entry from each array entry at runtime, and fall + * back on a probably-bad estimate of the number of array entries. + */ + if (!IsA(rightop, Const)) + { + counts->exactEntries++; + counts->searchEntries++; + counts->arrayScans *= estimate_array_length(rightop); + return true; + } + + /* If Const is null, there can be no matches */ + if (((Const *) rightop)->constisnull) + return false; + + /* Otherwise, extract the array elements and iterate over them */ + arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elemValues, &elemNulls, &numElems); + + memset(&arraycounts, 0, sizeof(arraycounts)); + + for (i = 0; i < numElems; i++) + { + GinQualCounts elemcounts; + + /* NULL can't match anything, so ignore, as the executor will */ + if (elemNulls[i]) + continue; + + /* Otherwise, apply extractQuery and get the actual term counts */ + memset(&elemcounts, 0, sizeof(elemcounts)); + + if (gincost_pattern(index, indexcol, clause_op, elemValues[i], + &elemcounts)) + { + /* We ignore array elements that are unsatisfiable patterns */ + numPossible++; + + if (elemcounts.attHasFullScan[indexcol] && + !elemcounts.attHasNormalScan[indexcol]) + { + /* + * Full index scan will be required. We treat this as if + * every key in the index had been listed in the query; is + * that reasonable? + */ + elemcounts.partialEntries = 0; + elemcounts.exactEntries = numIndexEntries; + elemcounts.searchEntries = numIndexEntries; + } + arraycounts.partialEntries += elemcounts.partialEntries; + arraycounts.exactEntries += elemcounts.exactEntries; + arraycounts.searchEntries += elemcounts.searchEntries; + } + } + + if (numPossible == 0) + { + /* No satisfiable patterns in the array */ + return false; + } + + /* + * Now add the averages to the global counts. This will give us an + * estimate of the average number of terms searched for in each indexscan, + * including contributions from both array and non-array quals. + */ + counts->partialEntries += arraycounts.partialEntries / numPossible; + counts->exactEntries += arraycounts.exactEntries / numPossible; + counts->searchEntries += arraycounts.searchEntries / numPossible; + + counts->arrayScans *= numPossible; + + return true; +} + +/* + * GIN has search behavior completely different from other index types + */ +void +gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + IndexOptInfo *index = path->indexinfo; + List *indexQuals = get_quals_from_indexclauses(path->indexclauses); + List *selectivityQuals; + double numPages = index->pages, + numTuples = index->tuples; + double numEntryPages, + numDataPages, + numPendingPages, + numEntries; + GinQualCounts counts; + bool matchPossible; + bool fullIndexScan; + double partialScale; + double entryPagesFetched, + dataPagesFetched, + dataPagesFetchedBySel; + double qual_op_cost, + qual_arg_cost, + spc_random_page_cost, + outer_scans; + Cost descentCost; + Relation indexRel; + GinStatsData ginStats; + ListCell *lc; + int i; + + /* + * Obtain statistical information from the meta page, if possible. Else + * set ginStats to zeroes, and we'll cope below. + */ + if (!index->hypothetical) + { + /* Lock should have already been obtained in plancat.c */ + indexRel = index_open(index->indexoid, NoLock); + ginGetStats(indexRel, &ginStats); + index_close(indexRel, NoLock); + } + else + { + memset(&ginStats, 0, sizeof(ginStats)); + } + + /* + * Assuming we got valid (nonzero) stats at all, nPendingPages can be + * trusted, but the other fields are data as of the last VACUUM. We can + * scale them up to account for growth since then, but that method only + * goes so far; in the worst case, the stats might be for a completely + * empty index, and scaling them will produce pretty bogus numbers. + * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if + * it's grown more than that, fall back to estimating things only from the + * assumed-accurate index size. But we'll trust nPendingPages in any case + * so long as it's not clearly insane, ie, more than the index size. + */ + if (ginStats.nPendingPages < numPages) + numPendingPages = ginStats.nPendingPages; + else + numPendingPages = 0; + + if (numPages > 0 && ginStats.nTotalPages <= numPages && + ginStats.nTotalPages > numPages / 4 && + ginStats.nEntryPages > 0 && ginStats.nEntries > 0) + { + /* + * OK, the stats seem close enough to sane to be trusted. But we + * still need to scale them by the ratio numPages / nTotalPages to + * account for growth since the last VACUUM. + */ + double scale = numPages / ginStats.nTotalPages; + + numEntryPages = ceil(ginStats.nEntryPages * scale); + numDataPages = ceil(ginStats.nDataPages * scale); + numEntries = ceil(ginStats.nEntries * scale); + /* ensure we didn't round up too much */ + numEntryPages = Min(numEntryPages, numPages - numPendingPages); + numDataPages = Min(numDataPages, + numPages - numPendingPages - numEntryPages); + } + else + { + /* + * We might get here because it's a hypothetical index, or an index + * created pre-9.1 and never vacuumed since upgrading (in which case + * its stats would read as zeroes), or just because it's grown too + * much since the last VACUUM for us to put our faith in scaling. + * + * Invent some plausible internal statistics based on the index page + * count (and clamp that to at least 10 pages, just in case). We + * estimate that 90% of the index is entry pages, and the rest is data + * pages. Estimate 100 entries per entry page; this is rather bogus + * since it'll depend on the size of the keys, but it's more robust + * than trying to predict the number of entries per heap tuple. + */ + numPages = Max(numPages, 10); + numEntryPages = floor((numPages - numPendingPages) * 0.90); + numDataPages = numPages - numPendingPages - numEntryPages; + numEntries = floor(numEntryPages * 100); + } + + /* In an empty index, numEntries could be zero. Avoid divide-by-zero */ + if (numEntries < 1) + numEntries = 1; + + /* + * If the index is partial, AND the index predicate with the index-bound + * quals to produce a more accurate idea of the number of rows covered by + * the bound conditions. + */ + selectivityQuals = add_predicate_to_index_quals(index, indexQuals); + + /* Estimate the fraction of main-table tuples that will be visited */ + *indexSelectivity = clauselist_selectivity(root, selectivityQuals, + index->rel->relid, + JOIN_INNER, + NULL); + + /* fetch estimated page cost for tablespace containing index */ + get_tablespace_page_costs(index->reltablespace, + &spc_random_page_cost, + NULL); + + /* + * Generic assumption about index correlation: there isn't any. + */ + *indexCorrelation = 0.0; + + /* + * Examine quals to estimate number of search entries & partial matches + */ + memset(&counts, 0, sizeof(counts)); + counts.arrayScans = 1; + matchPossible = true; + + foreach(lc, path->indexclauses) + { + IndexClause *iclause = lfirst_node(IndexClause, lc); + ListCell *lc2; + + foreach(lc2, iclause->indexquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + Expr *clause = rinfo->clause; + + if (IsA(clause, OpExpr)) + { + matchPossible = gincost_opexpr(root, + index, + iclause->indexcol, + (OpExpr *) clause, + &counts); + if (!matchPossible) + break; + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + matchPossible = gincost_scalararrayopexpr(root, + index, + iclause->indexcol, + (ScalarArrayOpExpr *) clause, + numEntries, + &counts); + if (!matchPossible) + break; + } + else + { + /* shouldn't be anything else for a GIN index */ + elog(ERROR, "unsupported GIN indexqual type: %d", + (int) nodeTag(clause)); + } + } + } + + /* Fall out if there were any provably-unsatisfiable quals */ + if (!matchPossible) + { + *indexStartupCost = 0; + *indexTotalCost = 0; + *indexSelectivity = 0; + return; + } + + /* + * If attribute has a full scan and at the same time doesn't have normal + * scan, then we'll have to scan all non-null entries of that attribute. + * Currently, we don't have per-attribute statistics for GIN. Thus, we + * must assume the whole GIN index has to be scanned in this case. + */ + fullIndexScan = false; + for (i = 0; i < index->nkeycolumns; i++) + { + if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i]) + { + fullIndexScan = true; + break; + } + } + + if (fullIndexScan || indexQuals == NIL) + { + /* + * Full index scan will be required. We treat this as if every key in + * the index had been listed in the query; is that reasonable? + */ + counts.partialEntries = 0; + counts.exactEntries = numEntries; + counts.searchEntries = numEntries; + } + + /* Will we have more than one iteration of a nestloop scan? */ + outer_scans = loop_count; + + /* + * Compute cost to begin scan, first of all, pay attention to pending + * list. + */ + entryPagesFetched = numPendingPages; + + /* + * Estimate number of entry pages read. We need to do + * counts.searchEntries searches. Use a power function as it should be, + * but tuples on leaf pages usually is much greater. Here we include all + * searches in entry tree, including search of first entry in partial + * match algorithm + */ + entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15))); + + /* + * Add an estimate of entry pages read by partial match algorithm. It's a + * scan over leaf pages in entry tree. We haven't any useful stats here, + * so estimate it as proportion. Because counts.partialEntries is really + * pretty bogus (see code above), it's possible that it is more than + * numEntries; clamp the proportion to ensure sanity. + */ + partialScale = counts.partialEntries / numEntries; + partialScale = Min(partialScale, 1.0); + + entryPagesFetched += ceil(numEntryPages * partialScale); + + /* + * Partial match algorithm reads all data pages before doing actual scan, + * so it's a startup cost. Again, we haven't any useful stats here, so + * estimate it as proportion. + */ + dataPagesFetched = ceil(numDataPages * partialScale); + + *indexStartupCost = 0; + *indexTotalCost = 0; + + /* + * Add a CPU-cost component to represent the costs of initial entry btree + * descent. We don't charge any I/O cost for touching upper btree levels, + * since they tend to stay in cache, but we still have to do about log2(N) + * comparisons to descend a btree of N leaf tuples. We charge one + * cpu_operator_cost per comparison. + * + * If there are ScalarArrayOpExprs, charge this once per SA scan. The + * ones after the first one are not startup cost so far as the overall + * plan is concerned, so add them only to "total" cost. + */ + if (numEntries > 1) /* avoid computing log(0) */ + { + descentCost = ceil(log(numEntries) / log(2.0)) * cpu_operator_cost; + *indexStartupCost += descentCost * counts.searchEntries; + *indexTotalCost += counts.arrayScans * descentCost * counts.searchEntries; + } + + /* + * Add a cpu cost per entry-page fetched. This is not amortized over a + * loop. + */ + *indexStartupCost += entryPagesFetched * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; + *indexTotalCost += entryPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; + + /* + * Add a cpu cost per data-page fetched. This is also not amortized over a + * loop. Since those are the data pages from the partial match algorithm, + * charge them as startup cost. + */ + *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * dataPagesFetched; + + /* + * Since we add the startup cost to the total cost later on, remove the + * initial arrayscan from the total. + */ + *indexTotalCost += dataPagesFetched * (counts.arrayScans - 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; + + /* + * Calculate cache effects if more than one scan due to nestloops or array + * quals. The result is pro-rated per nestloop scan, but the array qual + * factor shouldn't be pro-rated (compare genericcostestimate). + */ + if (outer_scans > 1 || counts.arrayScans > 1) + { + entryPagesFetched *= outer_scans * counts.arrayScans; + entryPagesFetched = index_pages_fetched(entryPagesFetched, + (BlockNumber) numEntryPages, + numEntryPages, root); + entryPagesFetched /= outer_scans; + dataPagesFetched *= outer_scans * counts.arrayScans; + dataPagesFetched = index_pages_fetched(dataPagesFetched, + (BlockNumber) numDataPages, + numDataPages, root); + dataPagesFetched /= outer_scans; + } + + /* + * Here we use random page cost because logically-close pages could be far + * apart on disk. + */ + *indexStartupCost += (entryPagesFetched + dataPagesFetched) * spc_random_page_cost; + + /* + * Now compute the number of data pages fetched during the scan. + * + * We assume every entry to have the same number of items, and that there + * is no overlap between them. (XXX: tsvector and array opclasses collect + * statistics on the frequency of individual keys; it would be nice to use + * those here.) + */ + dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries); + + /* + * If there is a lot of overlap among the entries, in particular if one of + * the entries is very frequent, the above calculation can grossly + * under-estimate. As a simple cross-check, calculate a lower bound based + * on the overall selectivity of the quals. At a minimum, we must read + * one item pointer for each matching entry. + * + * The width of each item pointer varies, based on the level of + * compression. We don't have statistics on that, but an average of + * around 3 bytes per item is fairly typical. + */ + dataPagesFetchedBySel = ceil(*indexSelectivity * + (numTuples / (BLCKSZ / 3))); + if (dataPagesFetchedBySel > dataPagesFetched) + dataPagesFetched = dataPagesFetchedBySel; + + /* Add one page cpu-cost to the startup cost */ + *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * counts.searchEntries; + + /* + * Add once again a CPU-cost for those data pages, before amortizing for + * cache. + */ + *indexTotalCost += dataPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; + + /* Account for cache effects, the same as above */ + if (outer_scans > 1 || counts.arrayScans > 1) + { + dataPagesFetched *= outer_scans * counts.arrayScans; + dataPagesFetched = index_pages_fetched(dataPagesFetched, + (BlockNumber) numDataPages, + numDataPages, root); + dataPagesFetched /= outer_scans; + } + + /* And apply random_page_cost as the cost per page */ + *indexTotalCost += *indexStartupCost + + dataPagesFetched * spc_random_page_cost; + + /* + * Add on index qual eval costs, much as in genericcostestimate. We charge + * cpu but we can disregard indexorderbys, since GIN doesn't support + * those. + */ + qual_arg_cost = index_other_operands_eval_cost(root, indexQuals); + qual_op_cost = cpu_operator_cost * list_length(indexQuals); + + *indexStartupCost += qual_arg_cost; + *indexTotalCost += qual_arg_cost; + + /* + * Add a cpu cost per search entry, corresponding to the actual visited + * entries. + */ + *indexTotalCost += (counts.searchEntries * counts.arrayScans) * (qual_op_cost); + /* Now add a cpu cost per tuple in the posting lists / trees */ + *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost); + *indexPages = dataPagesFetched; +} + +/* + * BRIN has search behavior completely different from other index types + */ +void +brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + IndexOptInfo *index = path->indexinfo; + List *indexQuals = get_quals_from_indexclauses(path->indexclauses); + double numPages = index->pages; + RelOptInfo *baserel = index->rel; + RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); + Cost spc_seq_page_cost; + Cost spc_random_page_cost; + double qual_arg_cost; + double qualSelectivity; + BrinStatsData statsData; + double indexRanges; + double minimalRanges; + double estimatedRanges; + double selec; + Relation indexRel; + ListCell *l; + VariableStatData vardata; + + Assert(rte->rtekind == RTE_RELATION); + + /* fetch estimated page cost for the tablespace containing the index */ + get_tablespace_page_costs(index->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /* + * Obtain some data from the index itself, if possible. Otherwise invent + * some plausible internal statistics based on the relation page count. + */ + if (!index->hypothetical) + { + /* + * A lock should have already been obtained on the index in plancat.c. + */ + indexRel = index_open(index->indexoid, NoLock); + brinGetStats(indexRel, &statsData); + index_close(indexRel, NoLock); + + /* work out the actual number of ranges in the index */ + indexRanges = Max(ceil((double) baserel->pages / + statsData.pagesPerRange), 1.0); + } + else + { + /* + * Assume default number of pages per range, and estimate the number + * of ranges based on that. + */ + indexRanges = Max(ceil((double) baserel->pages / + BRIN_DEFAULT_PAGES_PER_RANGE), 1.0); + + statsData.pagesPerRange = BRIN_DEFAULT_PAGES_PER_RANGE; + statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1; + } + + /* + * Compute index correlation + * + * Because we can use all index quals equally when scanning, we can use + * the largest correlation (in absolute value) among columns used by the + * query. Start at zero, the worst possible case. If we cannot find any + * correlation statistics, we will keep it as 0. + */ + *indexCorrelation = 0; + + foreach(l, path->indexclauses) + { + IndexClause *iclause = lfirst_node(IndexClause, l); + AttrNumber attnum = index->indexkeys[iclause->indexcol]; + + /* attempt to lookup stats in relation for this index column */ + if (attnum != 0) + { + /* Simple variable -- look to stats for the underlying table */ + if (get_relation_stats_hook && + (*get_relation_stats_hook) (root, rte, attnum, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it + * did supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc) + elog(ERROR, + "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = + SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(rte->relid), + Int16GetDatum(attnum), + BoolGetDatum(false)); + vardata.freefunc = ReleaseSysCache; + } + } + else + { + /* + * Looks like we've found an expression column in the index. Let's + * see if there's any stats for it. + */ + + /* get the attnum from the 0-based index. */ + attnum = iclause->indexcol + 1; + + if (get_index_stats_hook && + (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it + * did supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(index->indexoid), + Int16GetDatum(attnum), + BoolGetDatum(false)); + vardata.freefunc = ReleaseSysCache; + } + } + + if (HeapTupleIsValid(vardata.statsTuple)) + { + AttStatsSlot sslot; + + if (get_attstatsslot(&sslot, vardata.statsTuple, + STATISTIC_KIND_CORRELATION, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + double varCorrelation = 0.0; + + if (sslot.nnumbers > 0) + varCorrelation = fabs(sslot.numbers[0]); + + if (varCorrelation > *indexCorrelation) + *indexCorrelation = varCorrelation; + + free_attstatsslot(&sslot); + } + } + + ReleaseVariableStats(vardata); + } + + qualSelectivity = clauselist_selectivity(root, indexQuals, + baserel->relid, + JOIN_INNER, NULL); + + /* + * Now calculate the minimum possible ranges we could match with if all of + * the rows were in the perfect order in the table's heap. + */ + minimalRanges = ceil(indexRanges * qualSelectivity); + + /* + * Now estimate the number of ranges that we'll touch by using the + * indexCorrelation from the stats. Careful not to divide by zero (note + * we're using the absolute value of the correlation). + */ + if (*indexCorrelation < 1.0e-10) + estimatedRanges = indexRanges; + else + estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges); + + /* we expect to visit this portion of the table */ + selec = estimatedRanges / indexRanges; + + CLAMP_PROBABILITY(selec); + + *indexSelectivity = selec; + + /* + * Compute the index qual costs, much as in genericcostestimate, to add to + * the index costs. We can disregard indexorderbys, since BRIN doesn't + * support those. + */ + qual_arg_cost = index_other_operands_eval_cost(root, indexQuals); + + /* + * Compute the startup cost as the cost to read the whole revmap + * sequentially, including the cost to execute the index quals. + */ + *indexStartupCost = + spc_seq_page_cost * statsData.revmapNumPages * loop_count; + *indexStartupCost += qual_arg_cost; + + /* + * To read a BRIN index there might be a bit of back and forth over + * regular pages, as revmap might point to them out of sequential order; + * calculate the total cost as reading the whole index in random order. + */ + *indexTotalCost = *indexStartupCost + + spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count; + + /* + * Charge a small amount per range tuple which we expect to match to. This + * is meant to reflect the costs of manipulating the bitmap. The BRIN scan + * will set a bit for each page in the range when we find a matching + * range, so we must multiply the charge by the number of pages in the + * range. + */ + *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges * + statsData.pagesPerRange; + + *indexPages = index->pages; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tid.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tid.c new file mode 100644 index 00000000000..77fb74ab0c1 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tid.c @@ -0,0 +1,425 @@ +/*------------------------------------------------------------------------- + * + * tid.c + * Functions for the built-in type tuple id + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/tid.c + * + * NOTES + * input routine largely stolen from boxin(). + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> +#include <limits.h> + +#include "access/heapam.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "parser/parsetree.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/varlena.h" + + +#define LDELIM '(' +#define RDELIM ')' +#define DELIM ',' +#define NTIDARGS 2 + +static ItemPointer currtid_for_view(Relation viewrel, ItemPointer tid); + +/* ---------------------------------------------------------------- + * tidin + * ---------------------------------------------------------------- + */ +Datum +tidin(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + char *p, + *coord[NTIDARGS]; + int i; + ItemPointer result; + BlockNumber blockNumber; + OffsetNumber offsetNumber; + char *badp; + unsigned long cvt; + + for (i = 0, p = str; *p && i < NTIDARGS && *p != RDELIM; p++) + if (*p == DELIM || (*p == LDELIM && i == 0)) + coord[i++] = p + 1; + + if (i < NTIDARGS) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "tid", str))); + + errno = 0; + cvt = strtoul(coord[0], &badp, 10); + if (errno || *badp != DELIM) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "tid", str))); + blockNumber = (BlockNumber) cvt; + + /* + * Cope with possibility that unsigned long is wider than BlockNumber, in + * which case strtoul will not raise an error for some values that are out + * of the range of BlockNumber. (See similar code in oidin().) + */ +#if SIZEOF_LONG > 4 + if (cvt != (unsigned long) blockNumber && + cvt != (unsigned long) ((int32) blockNumber)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "tid", str))); +#endif + + cvt = strtoul(coord[1], &badp, 10); + if (errno || *badp != RDELIM || + cvt > USHRT_MAX) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "tid", str))); + offsetNumber = (OffsetNumber) cvt; + + result = (ItemPointer) palloc(sizeof(ItemPointerData)); + + ItemPointerSet(result, blockNumber, offsetNumber); + + PG_RETURN_ITEMPOINTER(result); +} + +/* ---------------------------------------------------------------- + * tidout + * ---------------------------------------------------------------- + */ +Datum +tidout(PG_FUNCTION_ARGS) +{ + ItemPointer itemPtr = PG_GETARG_ITEMPOINTER(0); + BlockNumber blockNumber; + OffsetNumber offsetNumber; + char buf[32]; + + blockNumber = ItemPointerGetBlockNumberNoCheck(itemPtr); + offsetNumber = ItemPointerGetOffsetNumberNoCheck(itemPtr); + + /* Perhaps someday we should output this as a record. */ + snprintf(buf, sizeof(buf), "(%u,%u)", blockNumber, offsetNumber); + + PG_RETURN_CSTRING(pstrdup(buf)); +} + +/* + * tidrecv - converts external binary format to tid + */ +Datum +tidrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + ItemPointer result; + BlockNumber blockNumber; + OffsetNumber offsetNumber; + + blockNumber = pq_getmsgint(buf, sizeof(blockNumber)); + offsetNumber = pq_getmsgint(buf, sizeof(offsetNumber)); + + result = (ItemPointer) palloc(sizeof(ItemPointerData)); + + ItemPointerSet(result, blockNumber, offsetNumber); + + PG_RETURN_ITEMPOINTER(result); +} + +/* + * tidsend - converts tid to binary format + */ +Datum +tidsend(PG_FUNCTION_ARGS) +{ + ItemPointer itemPtr = PG_GETARG_ITEMPOINTER(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, ItemPointerGetBlockNumberNoCheck(itemPtr)); + pq_sendint16(&buf, ItemPointerGetOffsetNumberNoCheck(itemPtr)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/***************************************************************************** + * PUBLIC ROUTINES * + *****************************************************************************/ + +Datum +tideq(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) == 0); +} + +Datum +tidne(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) != 0); +} + +Datum +tidlt(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) < 0); +} + +Datum +tidle(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) <= 0); +} + +Datum +tidgt(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) > 0); +} + +Datum +tidge(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) >= 0); +} + +Datum +bttidcmp(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_INT32(ItemPointerCompare(arg1, arg2)); +} + +Datum +tidlarger(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) >= 0 ? arg1 : arg2); +} + +Datum +tidsmaller(PG_FUNCTION_ARGS) +{ + ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); + ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); + + PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) <= 0 ? arg1 : arg2); +} + +Datum +hashtid(PG_FUNCTION_ARGS) +{ + ItemPointer key = PG_GETARG_ITEMPOINTER(0); + + /* + * While you'll probably have a lot of trouble with a compiler that + * insists on appending pad space to struct ItemPointerData, we can at + * least make this code work, by not using sizeof(ItemPointerData). + * Instead rely on knowing the sizes of the component fields. + */ + return hash_any((unsigned char *) key, + sizeof(BlockIdData) + sizeof(OffsetNumber)); +} + +Datum +hashtidextended(PG_FUNCTION_ARGS) +{ + ItemPointer key = PG_GETARG_ITEMPOINTER(0); + uint64 seed = PG_GETARG_INT64(1); + + /* As above */ + return hash_any_extended((unsigned char *) key, + sizeof(BlockIdData) + sizeof(OffsetNumber), + seed); +} + + +/* + * Functions to get latest tid of a specified tuple. + * + * Maybe these implementations should be moved to another place + */ + +/* + * Utility wrapper for current CTID functions. + * Returns the latest version of a tuple pointing at "tid" for + * relation "rel". + */ +static ItemPointer +currtid_internal(Relation rel, ItemPointer tid) +{ + ItemPointer result; + AclResult aclresult; + Snapshot snapshot; + TableScanDesc scan; + + result = (ItemPointer) palloc(sizeof(ItemPointerData)); + + aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(), + ACL_SELECT); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind), + RelationGetRelationName(rel)); + + if (rel->rd_rel->relkind == RELKIND_VIEW) + return currtid_for_view(rel, tid); + + if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) + elog(ERROR, "cannot look at latest visible tid for relation \"%s.%s\"", + get_namespace_name(RelationGetNamespace(rel)), + RelationGetRelationName(rel)); + + ItemPointerCopy(tid, result); + + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = table_beginscan_tid(rel, snapshot); + table_tuple_get_latest_tid(scan, result); + table_endscan(scan); + UnregisterSnapshot(snapshot); + + return result; +} + +/* + * Handle CTIDs of views. + * CTID should be defined in the view and it must + * correspond to the CTID of a base relation. + */ +static ItemPointer +currtid_for_view(Relation viewrel, ItemPointer tid) +{ + TupleDesc att = RelationGetDescr(viewrel); + RuleLock *rulelock; + RewriteRule *rewrite; + int i, + natts = att->natts, + tididx = -1; + + for (i = 0; i < natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(att, i); + + if (strcmp(NameStr(attr->attname), "ctid") == 0) + { + if (attr->atttypid != TIDOID) + elog(ERROR, "ctid isn't of type TID"); + tididx = i; + break; + } + } + if (tididx < 0) + elog(ERROR, "currtid cannot handle views with no CTID"); + rulelock = viewrel->rd_rules; + if (!rulelock) + elog(ERROR, "the view has no rules"); + for (i = 0; i < rulelock->numLocks; i++) + { + rewrite = rulelock->rules[i]; + if (rewrite->event == CMD_SELECT) + { + Query *query; + TargetEntry *tle; + + if (list_length(rewrite->actions) != 1) + elog(ERROR, "only one select rule is allowed in views"); + query = (Query *) linitial(rewrite->actions); + tle = get_tle_by_resno(query->targetList, tididx + 1); + if (tle && tle->expr && IsA(tle->expr, Var)) + { + Var *var = (Var *) tle->expr; + RangeTblEntry *rte; + + if (!IS_SPECIAL_VARNO(var->varno) && + var->varattno == SelfItemPointerAttributeNumber) + { + rte = rt_fetch(var->varno, query->rtable); + if (rte) + { + ItemPointer result; + Relation rel; + + rel = table_open(rte->relid, AccessShareLock); + result = currtid_internal(rel, tid); + table_close(rel, AccessShareLock); + return result; + } + } + } + break; + } + } + elog(ERROR, "currtid cannot handle this view"); + return NULL; +} + +/* + * currtid_byrelname + * Get the latest tuple version of the tuple pointing at a CTID, for a + * given relation name. + */ +Datum +currtid_byrelname(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + ItemPointer tid = PG_GETARG_ITEMPOINTER(1); + ItemPointer result; + RangeVar *relrv; + Relation rel; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = table_openrv(relrv, AccessShareLock); + + /* grab the latest tuple version associated to this CTID */ + result = currtid_internal(rel, tid); + + table_close(rel, AccessShareLock); + + PG_RETURN_ITEMPOINTER(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/timestamp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/timestamp.c new file mode 100644 index 00000000000..b585551bca8 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/timestamp.c @@ -0,0 +1,6015 @@ +/*------------------------------------------------------------------------- + * + * timestamp.c + * Functions for the built-in SQL types "timestamp" and "interval". + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/timestamp.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <ctype.h> +#include <math.h> +#include <limits.h> +#include <sys/time.h> + +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "common/int.h" +#include "common/int128.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "parser/scansup.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/float.h" +#include "utils/numeric.h" +#include "utils/sortsupport.h" + +/* + * gcc's -ffast-math switch breaks routines that expect exact results from + * expressions like timeval / SECS_PER_HOUR, where timeval is double. + */ +#ifdef __FAST_MATH__ +#error -ffast-math is known to break this code +#endif + +#define SAMESIGN(a,b) (((a) < 0) == ((b) < 0)) + +/* Set at postmaster start */ +__thread TimestampTz PgStartTime; + +/* Set at configuration reload */ +__thread TimestampTz PgReloadTime; + +typedef struct +{ + Timestamp current; + Timestamp finish; + Interval step; + int step_sign; +} generate_series_timestamp_fctx; + +typedef struct +{ + TimestampTz current; + TimestampTz finish; + Interval step; + int step_sign; + pg_tz *attimezone; +} generate_series_timestamptz_fctx; + + +static TimeOffset time2t(const int hour, const int min, const int sec, const fsec_t fsec); +static Timestamp dt2local(Timestamp dt, int timezone); +static bool AdjustIntervalForTypmod(Interval *interval, int32 typmod, + Node *escontext); +static TimestampTz timestamp2timestamptz(Timestamp timestamp); +static Timestamp timestamptz2timestamp(TimestampTz timestamp); + + +/* common code for timestamptypmodin and timestamptztypmodin */ +static int32 +anytimestamp_typmodin(bool istz, ArrayType *ta) +{ + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + /* + * we're not too tense about good error message here because grammar + * shouldn't allow wrong number of modifiers for TIMESTAMP + */ + if (n != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid type modifier"))); + + return anytimestamp_typmod_check(istz, tl[0]); +} + +/* exported so parse_expr.c can use it */ +int32 +anytimestamp_typmod_check(bool istz, int32 typmod) +{ + if (typmod < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("TIMESTAMP(%d)%s precision must not be negative", + typmod, (istz ? " WITH TIME ZONE" : "")))); + if (typmod > MAX_TIMESTAMP_PRECISION) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("TIMESTAMP(%d)%s precision reduced to maximum allowed, %d", + typmod, (istz ? " WITH TIME ZONE" : ""), + MAX_TIMESTAMP_PRECISION))); + typmod = MAX_TIMESTAMP_PRECISION; + } + + return typmod; +} + +/* common code for timestamptypmodout and timestamptztypmodout */ +static char * +anytimestamp_typmodout(bool istz, int32 typmod) +{ + const char *tz = istz ? " with time zone" : " without time zone"; + + if (typmod >= 0) + return psprintf("(%d)%s", (int) typmod, tz); + else + return pstrdup(tz); +} + + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +/* timestamp_in() + * Convert a string to internal form. + */ +Datum +timestamp_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + Timestamp result; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + int tz; + int dtype; + int nf; + int dterr; + char *field[MAXDATEFIELDS]; + int ftype[MAXDATEFIELDS]; + char workbuf[MAXDATELEN + MAXDATEFIELDS]; + DateTimeErrorExtra extra; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), + field, ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeDateTime(field, ftype, nf, + &dtype, tm, &fsec, &tz, &extra); + if (dterr != 0) + { + DateTimeParseError(dterr, &extra, str, "timestamp", escontext); + PG_RETURN_NULL(); + } + + switch (dtype) + { + case DTK_DATE: + if (tm2timestamp(tm, fsec, NULL, &result) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range: \"%s\"", str))); + break; + + case DTK_EPOCH: + result = SetEpochTimestamp(); + break; + + case DTK_LATE: + TIMESTAMP_NOEND(result); + break; + + case DTK_EARLY: + TIMESTAMP_NOBEGIN(result); + break; + + default: + elog(ERROR, "unexpected dtype %d while parsing timestamp \"%s\"", + dtype, str); + TIMESTAMP_NOEND(result); + } + + AdjustTimestampForTypmod(&result, typmod, escontext); + + PG_RETURN_TIMESTAMP(result); +} + +/* timestamp_out() + * Convert a timestamp to external form. + */ +Datum +timestamp_out(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + char *result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + char buf[MAXDATELEN + 1]; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + EncodeSpecialTimestamp(timestamp, buf); + else if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0) + EncodeDateTime(tm, fsec, false, 0, NULL, DateStyle, buf); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + result = pstrdup(buf); + PG_RETURN_CSTRING(result); +} + +/* + * timestamp_recv - converts external binary format to timestamp + */ +Datum +timestamp_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Timestamp timestamp; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + + timestamp = (Timestamp) pq_getmsgint64(buf); + + /* range check: see if timestamp_out would like it */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + /* ok */ ; + else if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0 || + !IS_VALID_TIMESTAMP(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + AdjustTimestampForTypmod(×tamp, typmod, NULL); + + PG_RETURN_TIMESTAMP(timestamp); +} + +/* + * timestamp_send - converts timestamp to binary format + */ +Datum +timestamp_send(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, timestamp); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +timestamptypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anytimestamp_typmodin(false, ta)); +} + +Datum +timestamptypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anytimestamp_typmodout(false, typmod)); +} + + +/* + * timestamp_support() + * + * Planner support function for the timestamp_scale() and timestamptz_scale() + * length coercion functions (we need not distinguish them here). + */ +Datum +timestamp_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + + ret = TemporalSimplify(MAX_TIMESTAMP_PRECISION, (Node *) req->fcall); + } + + PG_RETURN_POINTER(ret); +} + +/* timestamp_scale() + * Adjust time type for specified scale factor. + * Used by PostgreSQL type system to stuff columns. + */ +Datum +timestamp_scale(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + int32 typmod = PG_GETARG_INT32(1); + Timestamp result; + + result = timestamp; + + AdjustTimestampForTypmod(&result, typmod, NULL); + + PG_RETURN_TIMESTAMP(result); +} + +/* + * AdjustTimestampForTypmod --- round off a timestamp to suit given typmod + * Works for either timestamp or timestamptz. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +bool +AdjustTimestampForTypmod(Timestamp *time, int32 typmod, Node *escontext) +{ + static const int64 TimestampScales[MAX_TIMESTAMP_PRECISION + 1] = { + INT64CONST(1000000), + INT64CONST(100000), + INT64CONST(10000), + INT64CONST(1000), + INT64CONST(100), + INT64CONST(10), + INT64CONST(1) + }; + + static const int64 TimestampOffsets[MAX_TIMESTAMP_PRECISION + 1] = { + INT64CONST(500000), + INT64CONST(50000), + INT64CONST(5000), + INT64CONST(500), + INT64CONST(50), + INT64CONST(5), + INT64CONST(0) + }; + + if (!TIMESTAMP_NOT_FINITE(*time) + && (typmod != -1) && (typmod != MAX_TIMESTAMP_PRECISION)) + { + if (typmod < 0 || typmod > MAX_TIMESTAMP_PRECISION) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("timestamp(%d) precision must be between %d and %d", + typmod, 0, MAX_TIMESTAMP_PRECISION))); + + if (*time >= INT64CONST(0)) + { + *time = ((*time + TimestampOffsets[typmod]) / TimestampScales[typmod]) * + TimestampScales[typmod]; + } + else + { + *time = -((((-*time) + TimestampOffsets[typmod]) / TimestampScales[typmod]) + * TimestampScales[typmod]); + } + } + + return true; +} + +/* timestamptz_in() + * Convert a string to internal form. + */ +Datum +timestamptz_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + TimestampTz result; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + int tz; + int dtype; + int nf; + int dterr; + char *field[MAXDATEFIELDS]; + int ftype[MAXDATEFIELDS]; + char workbuf[MAXDATELEN + MAXDATEFIELDS]; + DateTimeErrorExtra extra; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), + field, ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeDateTime(field, ftype, nf, + &dtype, tm, &fsec, &tz, &extra); + if (dterr != 0) + { + DateTimeParseError(dterr, &extra, str, "timestamp with time zone", + escontext); + PG_RETURN_NULL(); + } + + switch (dtype) + { + case DTK_DATE: + if (tm2timestamp(tm, fsec, &tz, &result) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range: \"%s\"", str))); + break; + + case DTK_EPOCH: + result = SetEpochTimestamp(); + break; + + case DTK_LATE: + TIMESTAMP_NOEND(result); + break; + + case DTK_EARLY: + TIMESTAMP_NOBEGIN(result); + break; + + default: + elog(ERROR, "unexpected dtype %d while parsing timestamptz \"%s\"", + dtype, str); + TIMESTAMP_NOEND(result); + } + + AdjustTimestampForTypmod(&result, typmod, escontext); + + PG_RETURN_TIMESTAMPTZ(result); +} + +/* + * Try to parse a timezone specification, and return its timezone offset value + * if it's acceptable. Otherwise, an error is thrown. + * + * Note: some code paths update tm->tm_isdst, and some don't; current callers + * don't care, so we don't bother being consistent. + */ +static int +parse_sane_timezone(struct pg_tm *tm, text *zone) +{ + char tzname[TZ_STRLEN_MAX + 1]; + int dterr; + int tz; + + text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + /* + * Look up the requested timezone. First we try to interpret it as a + * numeric timezone specification; if DecodeTimezone decides it doesn't + * like the format, we try timezone abbreviations and names. + * + * Note pg_tzset happily parses numeric input that DecodeTimezone would + * reject. To avoid having it accept input that would otherwise be seen + * as invalid, it's enough to disallow having a digit in the first + * position of our input string. + */ + if (isdigit((unsigned char) *tzname)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid input syntax for type %s: \"%s\"", + "numeric time zone", tzname), + errhint("Numeric time zones must have \"-\" or \"+\" as first character."))); + + dterr = DecodeTimezone(tzname, &tz); + if (dterr != 0) + { + int type, + val; + pg_tz *tzp; + + if (dterr == DTERR_TZDISP_OVERFLOW) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("numeric time zone \"%s\" out of range", tzname))); + else if (dterr != DTERR_BAD_FORMAT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("time zone \"%s\" not recognized", tzname))); + + type = DecodeTimezoneName(tzname, &val, &tzp); + + if (type == TZNAME_FIXED_OFFSET) + { + /* fixed-offset abbreviation */ + tz = -val; + } + else if (type == TZNAME_DYNTZ) + { + /* dynamic-offset abbreviation, resolve using specified time */ + tz = DetermineTimeZoneAbbrevOffset(tm, tzname, tzp); + } + else + { + /* full zone name */ + tz = DetermineTimeZoneOffset(tm, tzp); + } + } + + return tz; +} + +/* + * Look up the requested timezone, returning a pg_tz struct. + * + * This is the same as DecodeTimezoneNameToTz, but starting with a text Datum. + */ +static pg_tz * +lookup_timezone(text *zone) +{ + char tzname[TZ_STRLEN_MAX + 1]; + + text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + return DecodeTimezoneNameToTz(tzname); +} + +/* + * make_timestamp_internal + * workhorse for make_timestamp and make_timestamptz + */ +static Timestamp +make_timestamp_internal(int year, int month, int day, + int hour, int min, double sec) +{ + struct pg_tm tm; + TimeOffset date; + TimeOffset time; + int dterr; + bool bc = false; + Timestamp result; + + tm.tm_year = year; + tm.tm_mon = month; + tm.tm_mday = day; + + /* Handle negative years as BC */ + if (tm.tm_year < 0) + { + bc = true; + tm.tm_year = -tm.tm_year; + } + + dterr = ValidateDate(DTK_DATE_M, false, false, bc, &tm); + + if (dterr != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), + errmsg("date field value out of range: %d-%02d-%02d", + year, month, day))); + + if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: %d-%02d-%02d", + year, month, day))); + + date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; + + /* Check for time overflow */ + if (float_time_overflows(hour, min, sec)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), + errmsg("time field value out of range: %d:%02d:%02g", + hour, min, sec))); + + /* This should match tm2time */ + time = (((hour * MINS_PER_HOUR + min) * SECS_PER_MINUTE) + * USECS_PER_SEC) + (int64) rint(sec * USECS_PER_SEC); + + result = date * USECS_PER_DAY + time; + /* check for major overflow */ + if ((result - time) / USECS_PER_DAY != date) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range: %d-%02d-%02d %d:%02d:%02g", + year, month, day, + hour, min, sec))); + + /* check for just-barely overflow (okay except time-of-day wraps) */ + /* caution: we want to allow 1999-12-31 24:00:00 */ + if ((result < 0 && date > 0) || + (result > 0 && date < -1)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range: %d-%02d-%02d %d:%02d:%02g", + year, month, day, + hour, min, sec))); + + /* final range check catches just-out-of-range timestamps */ + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range: %d-%02d-%02d %d:%02d:%02g", + year, month, day, + hour, min, sec))); + + return result; +} + +/* + * make_timestamp() - timestamp constructor + */ +Datum +make_timestamp(PG_FUNCTION_ARGS) +{ + int32 year = PG_GETARG_INT32(0); + int32 month = PG_GETARG_INT32(1); + int32 mday = PG_GETARG_INT32(2); + int32 hour = PG_GETARG_INT32(3); + int32 min = PG_GETARG_INT32(4); + float8 sec = PG_GETARG_FLOAT8(5); + Timestamp result; + + result = make_timestamp_internal(year, month, mday, + hour, min, sec); + + PG_RETURN_TIMESTAMP(result); +} + +/* + * make_timestamptz() - timestamp with time zone constructor + */ +Datum +make_timestamptz(PG_FUNCTION_ARGS) +{ + int32 year = PG_GETARG_INT32(0); + int32 month = PG_GETARG_INT32(1); + int32 mday = PG_GETARG_INT32(2); + int32 hour = PG_GETARG_INT32(3); + int32 min = PG_GETARG_INT32(4); + float8 sec = PG_GETARG_FLOAT8(5); + Timestamp result; + + result = make_timestamp_internal(year, month, mday, + hour, min, sec); + + PG_RETURN_TIMESTAMPTZ(timestamp2timestamptz(result)); +} + +/* + * Construct a timestamp with time zone. + * As above, but the time zone is specified as seventh argument. + */ +Datum +make_timestamptz_at_timezone(PG_FUNCTION_ARGS) +{ + int32 year = PG_GETARG_INT32(0); + int32 month = PG_GETARG_INT32(1); + int32 mday = PG_GETARG_INT32(2); + int32 hour = PG_GETARG_INT32(3); + int32 min = PG_GETARG_INT32(4); + float8 sec = PG_GETARG_FLOAT8(5); + text *zone = PG_GETARG_TEXT_PP(6); + TimestampTz result; + Timestamp timestamp; + struct pg_tm tt; + int tz; + fsec_t fsec; + + timestamp = make_timestamp_internal(year, month, mday, + hour, min, sec); + + if (timestamp2tm(timestamp, NULL, &tt, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + tz = parse_sane_timezone(&tt, zone); + + result = dt2local(timestamp, -tz); + + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_TIMESTAMPTZ(result); +} + +/* + * to_timestamp(double precision) + * Convert UNIX epoch to timestamptz. + */ +Datum +float8_timestamptz(PG_FUNCTION_ARGS) +{ + float8 seconds = PG_GETARG_FLOAT8(0); + TimestampTz result; + + /* Deal with NaN and infinite inputs ... */ + if (isnan(seconds)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp cannot be NaN"))); + + if (isinf(seconds)) + { + if (seconds < 0) + TIMESTAMP_NOBEGIN(result); + else + TIMESTAMP_NOEND(result); + } + else + { + /* Out of range? */ + if (seconds < + (float8) SECS_PER_DAY * (DATETIME_MIN_JULIAN - UNIX_EPOCH_JDATE) + || seconds >= + (float8) SECS_PER_DAY * (TIMESTAMP_END_JULIAN - UNIX_EPOCH_JDATE)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range: \"%g\"", seconds))); + + /* Convert UNIX epoch to Postgres epoch */ + seconds -= ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); + + seconds = rint(seconds * USECS_PER_SEC); + result = (int64) seconds; + + /* Recheck in case roundoff produces something just out of range */ + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range: \"%g\"", + PG_GETARG_FLOAT8(0)))); + } + + PG_RETURN_TIMESTAMP(result); +} + +/* timestamptz_out() + * Convert a timestamp to external form. + */ +Datum +timestamptz_out(PG_FUNCTION_ARGS) +{ + TimestampTz dt = PG_GETARG_TIMESTAMPTZ(0); + char *result; + int tz; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + const char *tzn; + char buf[MAXDATELEN + 1]; + + if (TIMESTAMP_NOT_FINITE(dt)) + EncodeSpecialTimestamp(dt, buf); + else if (timestamp2tm(dt, &tz, tm, &fsec, &tzn, NULL) == 0) + EncodeDateTime(tm, fsec, true, tz, tzn, DateStyle, buf); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + result = pstrdup(buf); + PG_RETURN_CSTRING(result); +} + +/* + * timestamptz_recv - converts external binary format to timestamptz + */ +Datum +timestamptz_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + TimestampTz timestamp; + int tz; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + + timestamp = (TimestampTz) pq_getmsgint64(buf); + + /* range check: see if timestamptz_out would like it */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + /* ok */ ; + else if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0 || + !IS_VALID_TIMESTAMP(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + AdjustTimestampForTypmod(×tamp, typmod, NULL); + + PG_RETURN_TIMESTAMPTZ(timestamp); +} + +/* + * timestamptz_send - converts timestamptz to binary format + */ +Datum +timestamptz_send(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, timestamp); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +timestamptztypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anytimestamp_typmodin(true, ta)); +} + +Datum +timestamptztypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anytimestamp_typmodout(true, typmod)); +} + + +/* timestamptz_scale() + * Adjust time type for specified scale factor. + * Used by PostgreSQL type system to stuff columns. + */ +Datum +timestamptz_scale(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); + int32 typmod = PG_GETARG_INT32(1); + TimestampTz result; + + result = timestamp; + + AdjustTimestampForTypmod(&result, typmod, NULL); + + PG_RETURN_TIMESTAMPTZ(result); +} + + +/* interval_in() + * Convert a string to internal form. + * + * External format(s): + * Uses the generic date/time parsing and decoding routines. + */ +Datum +interval_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + Interval *result; + struct pg_itm_in tt, + *itm_in = &tt; + int dtype; + int nf; + int range; + int dterr; + char *field[MAXDATEFIELDS]; + int ftype[MAXDATEFIELDS]; + char workbuf[256]; + DateTimeErrorExtra extra; + + itm_in->tm_year = 0; + itm_in->tm_mon = 0; + itm_in->tm_mday = 0; + itm_in->tm_usec = 0; + + if (typmod >= 0) + range = INTERVAL_RANGE(typmod); + else + range = INTERVAL_FULL_RANGE; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), field, + ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeInterval(field, ftype, nf, range, + &dtype, itm_in); + + /* if those functions think it's a bad format, try ISO8601 style */ + if (dterr == DTERR_BAD_FORMAT) + dterr = DecodeISO8601Interval(str, + &dtype, itm_in); + + if (dterr != 0) + { + if (dterr == DTERR_FIELD_OVERFLOW) + dterr = DTERR_INTERVAL_OVERFLOW; + DateTimeParseError(dterr, &extra, str, "interval", escontext); + PG_RETURN_NULL(); + } + + result = (Interval *) palloc(sizeof(Interval)); + + switch (dtype) + { + case DTK_DELTA: + if (itmin2interval(itm_in, result) != 0) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + break; + + default: + elog(ERROR, "unexpected dtype %d while parsing interval \"%s\"", + dtype, str); + } + + AdjustIntervalForTypmod(result, typmod, escontext); + + PG_RETURN_INTERVAL_P(result); +} + +/* interval_out() + * Convert a time span to external form. + */ +Datum +interval_out(PG_FUNCTION_ARGS) +{ + Interval *span = PG_GETARG_INTERVAL_P(0); + char *result; + struct pg_itm tt, + *itm = &tt; + char buf[MAXDATELEN + 1]; + + interval2itm(*span, itm); + EncodeInterval(itm, IntervalStyle, buf); + + result = pstrdup(buf); + PG_RETURN_CSTRING(result); +} + +/* + * interval_recv - converts external binary format to interval + */ +Datum +interval_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 typmod = PG_GETARG_INT32(2); + Interval *interval; + + interval = (Interval *) palloc(sizeof(Interval)); + + interval->time = pq_getmsgint64(buf); + interval->day = pq_getmsgint(buf, sizeof(interval->day)); + interval->month = pq_getmsgint(buf, sizeof(interval->month)); + + AdjustIntervalForTypmod(interval, typmod, NULL); + + PG_RETURN_INTERVAL_P(interval); +} + +/* + * interval_send - converts interval to binary format + */ +Datum +interval_send(PG_FUNCTION_ARGS) +{ + Interval *interval = PG_GETARG_INTERVAL_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, interval->time); + pq_sendint32(&buf, interval->day); + pq_sendint32(&buf, interval->month); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * The interval typmod stores a "range" in its high 16 bits and a "precision" + * in its low 16 bits. Both contribute to defining the resolution of the + * type. Range addresses resolution granules larger than one second, and + * precision specifies resolution below one second. This representation can + * express all SQL standard resolutions, but we implement them all in terms of + * truncating rightward from some position. Range is a bitmap of permitted + * fields, but only the temporally-smallest such field is significant to our + * calculations. Precision is a count of sub-second decimal places to retain. + * Setting all bits (INTERVAL_FULL_PRECISION) gives the same truncation + * semantics as choosing MAX_INTERVAL_PRECISION. + */ +Datum +intervaltypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + int32 typmod; + + tl = ArrayGetIntegerTypmods(ta, &n); + + /* + * tl[0] - interval range (fields bitmask) tl[1] - precision (optional) + * + * Note we must validate tl[0] even though it's normally guaranteed + * correct by the grammar --- consider SELECT 'foo'::"interval"(1000). + */ + if (n > 0) + { + switch (tl[0]) + { + case INTERVAL_MASK(YEAR): + case INTERVAL_MASK(MONTH): + case INTERVAL_MASK(DAY): + case INTERVAL_MASK(HOUR): + case INTERVAL_MASK(MINUTE): + case INTERVAL_MASK(SECOND): + case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH): + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR): + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + case INTERVAL_FULL_RANGE: + /* all OK */ + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid INTERVAL type modifier"))); + } + } + + if (n == 1) + { + if (tl[0] != INTERVAL_FULL_RANGE) + typmod = INTERVAL_TYPMOD(INTERVAL_FULL_PRECISION, tl[0]); + else + typmod = -1; + } + else if (n == 2) + { + if (tl[1] < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("INTERVAL(%d) precision must not be negative", + tl[1]))); + if (tl[1] > MAX_INTERVAL_PRECISION) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("INTERVAL(%d) precision reduced to maximum allowed, %d", + tl[1], MAX_INTERVAL_PRECISION))); + typmod = INTERVAL_TYPMOD(MAX_INTERVAL_PRECISION, tl[0]); + } + else + typmod = INTERVAL_TYPMOD(tl[1], tl[0]); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid INTERVAL type modifier"))); + typmod = 0; /* keep compiler quiet */ + } + + PG_RETURN_INT32(typmod); +} + +Datum +intervaltypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + char *res = (char *) palloc(64); + int fields; + int precision; + const char *fieldstr; + + if (typmod < 0) + { + *res = '\0'; + PG_RETURN_CSTRING(res); + } + + fields = INTERVAL_RANGE(typmod); + precision = INTERVAL_PRECISION(typmod); + + switch (fields) + { + case INTERVAL_MASK(YEAR): + fieldstr = " year"; + break; + case INTERVAL_MASK(MONTH): + fieldstr = " month"; + break; + case INTERVAL_MASK(DAY): + fieldstr = " day"; + break; + case INTERVAL_MASK(HOUR): + fieldstr = " hour"; + break; + case INTERVAL_MASK(MINUTE): + fieldstr = " minute"; + break; + case INTERVAL_MASK(SECOND): + fieldstr = " second"; + break; + case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH): + fieldstr = " year to month"; + break; + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR): + fieldstr = " day to hour"; + break; + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + fieldstr = " day to minute"; + break; + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + fieldstr = " day to second"; + break; + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + fieldstr = " hour to minute"; + break; + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + fieldstr = " hour to second"; + break; + case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + fieldstr = " minute to second"; + break; + case INTERVAL_FULL_RANGE: + fieldstr = ""; + break; + default: + elog(ERROR, "invalid INTERVAL typmod: 0x%x", typmod); + fieldstr = ""; + break; + } + + if (precision != INTERVAL_FULL_PRECISION) + snprintf(res, 64, "%s(%d)", fieldstr, precision); + else + snprintf(res, 64, "%s", fieldstr); + + PG_RETURN_CSTRING(res); +} + +/* + * Given an interval typmod value, return a code for the least-significant + * field that the typmod allows to be nonzero, for instance given + * INTERVAL DAY TO HOUR we want to identify "hour". + * + * The results should be ordered by field significance, which means + * we can't use the dt.h macros YEAR etc, because for some odd reason + * they aren't ordered that way. Instead, arbitrarily represent + * SECOND = 0, MINUTE = 1, HOUR = 2, DAY = 3, MONTH = 4, YEAR = 5. + */ +static int +intervaltypmodleastfield(int32 typmod) +{ + if (typmod < 0) + return 0; /* SECOND */ + + switch (INTERVAL_RANGE(typmod)) + { + case INTERVAL_MASK(YEAR): + return 5; /* YEAR */ + case INTERVAL_MASK(MONTH): + return 4; /* MONTH */ + case INTERVAL_MASK(DAY): + return 3; /* DAY */ + case INTERVAL_MASK(HOUR): + return 2; /* HOUR */ + case INTERVAL_MASK(MINUTE): + return 1; /* MINUTE */ + case INTERVAL_MASK(SECOND): + return 0; /* SECOND */ + case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH): + return 4; /* MONTH */ + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR): + return 2; /* HOUR */ + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + return 1; /* MINUTE */ + case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + return 0; /* SECOND */ + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE): + return 1; /* MINUTE */ + case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + return 0; /* SECOND */ + case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND): + return 0; /* SECOND */ + case INTERVAL_FULL_RANGE: + return 0; /* SECOND */ + default: + elog(ERROR, "invalid INTERVAL typmod: 0x%x", typmod); + break; + } + return 0; /* can't get here, but keep compiler quiet */ +} + + +/* + * interval_support() + * + * Planner support function for interval_scale(). + * + * Flatten superfluous calls to interval_scale(). The interval typmod is + * complex to permit accepting and regurgitating all SQL standard variations. + * For truncation purposes, it boils down to a single, simple granularity. + */ +Datum +interval_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *expr = req->fcall; + Node *typmod; + + Assert(list_length(expr->args) >= 2); + + typmod = (Node *) lsecond(expr->args); + + if (IsA(typmod, Const) && !((Const *) typmod)->constisnull) + { + Node *source = (Node *) linitial(expr->args); + int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue); + bool noop; + + if (new_typmod < 0) + noop = true; + else + { + int32 old_typmod = exprTypmod(source); + int old_least_field; + int new_least_field; + int old_precis; + int new_precis; + + old_least_field = intervaltypmodleastfield(old_typmod); + new_least_field = intervaltypmodleastfield(new_typmod); + if (old_typmod < 0) + old_precis = INTERVAL_FULL_PRECISION; + else + old_precis = INTERVAL_PRECISION(old_typmod); + new_precis = INTERVAL_PRECISION(new_typmod); + + /* + * Cast is a no-op if least field stays the same or decreases + * while precision stays the same or increases. But + * precision, which is to say, sub-second precision, only + * affects ranges that include SECOND. + */ + noop = (new_least_field <= old_least_field) && + (old_least_field > 0 /* SECOND */ || + new_precis >= MAX_INTERVAL_PRECISION || + new_precis >= old_precis); + } + if (noop) + ret = relabel_to_typmod(source, new_typmod); + } + } + + PG_RETURN_POINTER(ret); +} + +/* interval_scale() + * Adjust interval type for specified fields. + * Used by PostgreSQL type system to stuff columns. + */ +Datum +interval_scale(PG_FUNCTION_ARGS) +{ + Interval *interval = PG_GETARG_INTERVAL_P(0); + int32 typmod = PG_GETARG_INT32(1); + Interval *result; + + result = palloc(sizeof(Interval)); + *result = *interval; + + AdjustIntervalForTypmod(result, typmod, NULL); + + PG_RETURN_INTERVAL_P(result); +} + +/* + * Adjust interval for specified precision, in both YEAR to SECOND + * range and sub-second precision. + * + * Returns true on success, false on failure (if escontext points to an + * ErrorSaveContext; otherwise errors are thrown). + */ +static bool +AdjustIntervalForTypmod(Interval *interval, int32 typmod, + Node *escontext) +{ + static const int64 IntervalScales[MAX_INTERVAL_PRECISION + 1] = { + INT64CONST(1000000), + INT64CONST(100000), + INT64CONST(10000), + INT64CONST(1000), + INT64CONST(100), + INT64CONST(10), + INT64CONST(1) + }; + + static const int64 IntervalOffsets[MAX_INTERVAL_PRECISION + 1] = { + INT64CONST(500000), + INT64CONST(50000), + INT64CONST(5000), + INT64CONST(500), + INT64CONST(50), + INT64CONST(5), + INT64CONST(0) + }; + + /* + * Unspecified range and precision? Then not necessary to adjust. Setting + * typmod to -1 is the convention for all data types. + */ + if (typmod >= 0) + { + int range = INTERVAL_RANGE(typmod); + int precision = INTERVAL_PRECISION(typmod); + + /* + * Our interpretation of intervals with a limited set of fields is + * that fields to the right of the last one specified are zeroed out, + * but those to the left of it remain valid. Thus for example there + * is no operational difference between INTERVAL YEAR TO MONTH and + * INTERVAL MONTH. In some cases we could meaningfully enforce that + * higher-order fields are zero; for example INTERVAL DAY could reject + * nonzero "month" field. However that seems a bit pointless when we + * can't do it consistently. (We cannot enforce a range limit on the + * highest expected field, since we do not have any equivalent of + * SQL's <interval leading field precision>.) If we ever decide to + * revisit this, interval_support will likely require adjusting. + * + * Note: before PG 8.4 we interpreted a limited set of fields as + * actually causing a "modulo" operation on a given value, potentially + * losing high-order as well as low-order information. But there is + * no support for such behavior in the standard, and it seems fairly + * undesirable on data consistency grounds anyway. Now we only + * perform truncation or rounding of low-order fields. + */ + if (range == INTERVAL_FULL_RANGE) + { + /* Do nothing... */ + } + else if (range == INTERVAL_MASK(YEAR)) + { + interval->month = (interval->month / MONTHS_PER_YEAR) * MONTHS_PER_YEAR; + interval->day = 0; + interval->time = 0; + } + else if (range == INTERVAL_MASK(MONTH)) + { + interval->day = 0; + interval->time = 0; + } + /* YEAR TO MONTH */ + else if (range == (INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH))) + { + interval->day = 0; + interval->time = 0; + } + else if (range == INTERVAL_MASK(DAY)) + { + interval->time = 0; + } + else if (range == INTERVAL_MASK(HOUR)) + { + interval->time = (interval->time / USECS_PER_HOUR) * + USECS_PER_HOUR; + } + else if (range == INTERVAL_MASK(MINUTE)) + { + interval->time = (interval->time / USECS_PER_MINUTE) * + USECS_PER_MINUTE; + } + else if (range == INTERVAL_MASK(SECOND)) + { + /* fractional-second rounding will be dealt with below */ + } + /* DAY TO HOUR */ + else if (range == (INTERVAL_MASK(DAY) | + INTERVAL_MASK(HOUR))) + { + interval->time = (interval->time / USECS_PER_HOUR) * + USECS_PER_HOUR; + } + /* DAY TO MINUTE */ + else if (range == (INTERVAL_MASK(DAY) | + INTERVAL_MASK(HOUR) | + INTERVAL_MASK(MINUTE))) + { + interval->time = (interval->time / USECS_PER_MINUTE) * + USECS_PER_MINUTE; + } + /* DAY TO SECOND */ + else if (range == (INTERVAL_MASK(DAY) | + INTERVAL_MASK(HOUR) | + INTERVAL_MASK(MINUTE) | + INTERVAL_MASK(SECOND))) + { + /* fractional-second rounding will be dealt with below */ + } + /* HOUR TO MINUTE */ + else if (range == (INTERVAL_MASK(HOUR) | + INTERVAL_MASK(MINUTE))) + { + interval->time = (interval->time / USECS_PER_MINUTE) * + USECS_PER_MINUTE; + } + /* HOUR TO SECOND */ + else if (range == (INTERVAL_MASK(HOUR) | + INTERVAL_MASK(MINUTE) | + INTERVAL_MASK(SECOND))) + { + /* fractional-second rounding will be dealt with below */ + } + /* MINUTE TO SECOND */ + else if (range == (INTERVAL_MASK(MINUTE) | + INTERVAL_MASK(SECOND))) + { + /* fractional-second rounding will be dealt with below */ + } + else + elog(ERROR, "unrecognized interval typmod: %d", typmod); + + /* Need to adjust sub-second precision? */ + if (precision != INTERVAL_FULL_PRECISION) + { + if (precision < 0 || precision > MAX_INTERVAL_PRECISION) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("interval(%d) precision must be between %d and %d", + precision, 0, MAX_INTERVAL_PRECISION))); + + if (interval->time >= INT64CONST(0)) + { + interval->time = ((interval->time + + IntervalOffsets[precision]) / + IntervalScales[precision]) * + IntervalScales[precision]; + } + else + { + interval->time = -(((-interval->time + + IntervalOffsets[precision]) / + IntervalScales[precision]) * + IntervalScales[precision]); + } + } + } + + return true; +} + +/* + * make_interval - numeric Interval constructor + */ +Datum +make_interval(PG_FUNCTION_ARGS) +{ + int32 years = PG_GETARG_INT32(0); + int32 months = PG_GETARG_INT32(1); + int32 weeks = PG_GETARG_INT32(2); + int32 days = PG_GETARG_INT32(3); + int32 hours = PG_GETARG_INT32(4); + int32 mins = PG_GETARG_INT32(5); + double secs = PG_GETARG_FLOAT8(6); + Interval *result; + + /* + * Reject out-of-range inputs. We really ought to check the integer + * inputs as well, but it's not entirely clear what limits to apply. + */ + if (isinf(secs) || isnan(secs)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + result = (Interval *) palloc(sizeof(Interval)); + result->month = years * MONTHS_PER_YEAR + months; + result->day = weeks * 7 + days; + + secs = rint(secs * USECS_PER_SEC); + result->time = hours * ((int64) SECS_PER_HOUR * USECS_PER_SEC) + + mins * ((int64) SECS_PER_MINUTE * USECS_PER_SEC) + + (int64) secs; + + PG_RETURN_INTERVAL_P(result); +} + +/* EncodeSpecialTimestamp() + * Convert reserved timestamp data type to string. + */ +void +EncodeSpecialTimestamp(Timestamp dt, char *str) +{ + if (TIMESTAMP_IS_NOBEGIN(dt)) + strcpy(str, EARLY); + else if (TIMESTAMP_IS_NOEND(dt)) + strcpy(str, LATE); + else /* shouldn't happen */ + elog(ERROR, "invalid argument for EncodeSpecialTimestamp"); +} + +Datum +now(PG_FUNCTION_ARGS) +{ + PG_RETURN_TIMESTAMPTZ(GetCurrentTransactionStartTimestamp()); +} + +Datum +statement_timestamp(PG_FUNCTION_ARGS) +{ + PG_RETURN_TIMESTAMPTZ(GetCurrentStatementStartTimestamp()); +} + +Datum +clock_timestamp(PG_FUNCTION_ARGS) +{ + PG_RETURN_TIMESTAMPTZ(GetCurrentTimestamp()); +} + +Datum +pg_postmaster_start_time(PG_FUNCTION_ARGS) +{ + PG_RETURN_TIMESTAMPTZ(PgStartTime); +} + +Datum +pg_conf_load_time(PG_FUNCTION_ARGS) +{ + PG_RETURN_TIMESTAMPTZ(PgReloadTime); +} + +/* + * GetCurrentTimestamp -- get the current operating system time + * + * Result is in the form of a TimestampTz value, and is expressed to the + * full precision of the gettimeofday() syscall + */ +TimestampTz +GetCurrentTimestamp(void) +{ + TimestampTz result; + struct timeval tp; + + gettimeofday(&tp, NULL); + + result = (TimestampTz) tp.tv_sec - + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); + result = (result * USECS_PER_SEC) + tp.tv_usec; + + return result; +} + +/* + * GetSQLCurrentTimestamp -- implements CURRENT_TIMESTAMP, CURRENT_TIMESTAMP(n) + */ +TimestampTz +GetSQLCurrentTimestamp(int32 typmod) +{ + TimestampTz ts; + + ts = GetCurrentTransactionStartTimestamp(); + if (typmod >= 0) + AdjustTimestampForTypmod(&ts, typmod, NULL); + return ts; +} + +/* + * GetSQLLocalTimestamp -- implements LOCALTIMESTAMP, LOCALTIMESTAMP(n) + */ +Timestamp +GetSQLLocalTimestamp(int32 typmod) +{ + Timestamp ts; + + ts = timestamptz2timestamp(GetCurrentTransactionStartTimestamp()); + if (typmod >= 0) + AdjustTimestampForTypmod(&ts, typmod, NULL); + return ts; +} + +/* + * timeofday(*) -- returns the current time as a text. + */ +Datum +timeofday(PG_FUNCTION_ARGS) +{ + struct timeval tp; + char templ[128]; + char buf[128]; + pg_time_t tt; + + gettimeofday(&tp, NULL); + tt = (pg_time_t) tp.tv_sec; + pg_strftime(templ, sizeof(templ), "%a %b %d %H:%M:%S.%%06d %Y %Z", + pg_localtime(&tt, session_timezone)); + snprintf(buf, sizeof(buf), templ, tp.tv_usec); + + PG_RETURN_TEXT_P(cstring_to_text(buf)); +} + +/* + * TimestampDifference -- convert the difference between two timestamps + * into integer seconds and microseconds + * + * This is typically used to calculate a wait timeout for select(2), + * which explains the otherwise-odd choice of output format. + * + * Both inputs must be ordinary finite timestamps (in current usage, + * they'll be results from GetCurrentTimestamp()). + * + * We expect start_time <= stop_time. If not, we return zeros, + * since then we're already past the previously determined stop_time. + */ +void +TimestampDifference(TimestampTz start_time, TimestampTz stop_time, + long *secs, int *microsecs) +{ + TimestampTz diff = stop_time - start_time; + + if (diff <= 0) + { + *secs = 0; + *microsecs = 0; + } + else + { + *secs = (long) (diff / USECS_PER_SEC); + *microsecs = (int) (diff % USECS_PER_SEC); + } +} + +/* + * TimestampDifferenceMilliseconds -- convert the difference between two + * timestamps into integer milliseconds + * + * This is typically used to calculate a wait timeout for WaitLatch() + * or a related function. The choice of "long" as the result type + * is to harmonize with that; furthermore, we clamp the result to at most + * INT_MAX milliseconds, because that's all that WaitLatch() allows. + * + * We expect start_time <= stop_time. If not, we return zero, + * since then we're already past the previously determined stop_time. + * + * Subtracting finite and infinite timestamps works correctly, returning + * zero or INT_MAX as appropriate. + * + * Note we round up any fractional millisecond, since waiting for just + * less than the intended timeout is undesirable. + */ +long +TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time) +{ + TimestampTz diff; + + /* Deal with zero or negative elapsed time quickly. */ + if (start_time >= stop_time) + return 0; + /* To not fail with timestamp infinities, we must detect overflow. */ + if (pg_sub_s64_overflow(stop_time, start_time, &diff)) + return (long) INT_MAX; + if (diff >= (INT_MAX * INT64CONST(1000) - 999)) + return (long) INT_MAX; + else + return (long) ((diff + 999) / 1000); +} + +/* + * TimestampDifferenceExceeds -- report whether the difference between two + * timestamps is >= a threshold (expressed in milliseconds) + * + * Both inputs must be ordinary finite timestamps (in current usage, + * they'll be results from GetCurrentTimestamp()). + */ +bool +TimestampDifferenceExceeds(TimestampTz start_time, + TimestampTz stop_time, + int msec) +{ + TimestampTz diff = stop_time - start_time; + + return (diff >= msec * INT64CONST(1000)); +} + +/* + * Convert a time_t to TimestampTz. + * + * We do not use time_t internally in Postgres, but this is provided for use + * by functions that need to interpret, say, a stat(2) result. + * + * To avoid having the function's ABI vary depending on the width of time_t, + * we declare the argument as pg_time_t, which is cast-compatible with + * time_t but always 64 bits wide (unless the platform has no 64-bit type). + * This detail should be invisible to callers, at least at source code level. + */ +TimestampTz +time_t_to_timestamptz(pg_time_t tm) +{ + TimestampTz result; + + result = (TimestampTz) tm - + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); + result *= USECS_PER_SEC; + + return result; +} + +/* + * Convert a TimestampTz to time_t. + * + * This too is just marginally useful, but some places need it. + * + * To avoid having the function's ABI vary depending on the width of time_t, + * we declare the result as pg_time_t, which is cast-compatible with + * time_t but always 64 bits wide (unless the platform has no 64-bit type). + * This detail should be invisible to callers, at least at source code level. + */ +pg_time_t +timestamptz_to_time_t(TimestampTz t) +{ + pg_time_t result; + + result = (pg_time_t) (t / USECS_PER_SEC + + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)); + + return result; +} + +/* + * Produce a C-string representation of a TimestampTz. + * + * This is mostly for use in emitting messages. The primary difference + * from timestamptz_out is that we force the output format to ISO. Note + * also that the result is in a static buffer, not pstrdup'd. + * + * See also pg_strftime. + */ +const char * +timestamptz_to_str(TimestampTz t) +{ + static __thread char buf[MAXDATELEN + 1]; + int tz; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + const char *tzn; + + if (TIMESTAMP_NOT_FINITE(t)) + EncodeSpecialTimestamp(t, buf); + else if (timestamp2tm(t, &tz, tm, &fsec, &tzn, NULL) == 0) + EncodeDateTime(tm, fsec, true, tz, tzn, USE_ISO_DATES, buf); + else + strlcpy(buf, "(timestamp out of range)", sizeof(buf)); + + return buf; +} + + +void +dt2time(Timestamp jd, int *hour, int *min, int *sec, fsec_t *fsec) +{ + TimeOffset time; + + time = jd; + + *hour = time / USECS_PER_HOUR; + time -= (*hour) * USECS_PER_HOUR; + *min = time / USECS_PER_MINUTE; + time -= (*min) * USECS_PER_MINUTE; + *sec = time / USECS_PER_SEC; + *fsec = time - (*sec * USECS_PER_SEC); +} /* dt2time() */ + + +/* + * timestamp2tm() - Convert timestamp data type to POSIX time structure. + * + * Note that year is _not_ 1900-based, but is an explicit full value. + * Also, month is one-based, _not_ zero-based. + * Returns: + * 0 on success + * -1 on out of range + * + * If attimezone is NULL, the global timezone setting will be used. + */ +int +timestamp2tm(Timestamp dt, int *tzp, struct pg_tm *tm, fsec_t *fsec, const char **tzn, pg_tz *attimezone) +{ + Timestamp date; + Timestamp time; + pg_time_t utime; + + /* Use session timezone if caller asks for default */ + if (attimezone == NULL) + attimezone = session_timezone; + + time = dt; + TMODULO(time, date, USECS_PER_DAY); + + if (time < INT64CONST(0)) + { + time += USECS_PER_DAY; + date -= 1; + } + + /* add offset to go from J2000 back to standard Julian date */ + date += POSTGRES_EPOCH_JDATE; + + /* Julian day routine does not work for negative Julian days */ + if (date < 0 || date > (Timestamp) INT_MAX) + return -1; + + j2date((int) date, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + dt2time(time, &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec); + + /* Done if no TZ conversion wanted */ + if (tzp == NULL) + { + tm->tm_isdst = -1; + tm->tm_gmtoff = 0; + tm->tm_zone = NULL; + if (tzn != NULL) + *tzn = NULL; + return 0; + } + + /* + * If the time falls within the range of pg_time_t, use pg_localtime() to + * rotate to the local time zone. + * + * First, convert to an integral timestamp, avoiding possibly + * platform-specific roundoff-in-wrong-direction errors, and adjust to + * Unix epoch. Then see if we can convert to pg_time_t without loss. This + * coding avoids hardwiring any assumptions about the width of pg_time_t, + * so it should behave sanely on machines without int64. + */ + dt = (dt - *fsec) / USECS_PER_SEC + + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY; + utime = (pg_time_t) dt; + if ((Timestamp) utime == dt) + { + struct pg_tm *tx = pg_localtime(&utime, attimezone); + + tm->tm_year = tx->tm_year + 1900; + tm->tm_mon = tx->tm_mon + 1; + tm->tm_mday = tx->tm_mday; + tm->tm_hour = tx->tm_hour; + tm->tm_min = tx->tm_min; + tm->tm_sec = tx->tm_sec; + tm->tm_isdst = tx->tm_isdst; + tm->tm_gmtoff = tx->tm_gmtoff; + tm->tm_zone = tx->tm_zone; + *tzp = -tm->tm_gmtoff; + if (tzn != NULL) + *tzn = tm->tm_zone; + } + else + { + /* + * When out of range of pg_time_t, treat as GMT + */ + *tzp = 0; + /* Mark this as *no* time zone available */ + tm->tm_isdst = -1; + tm->tm_gmtoff = 0; + tm->tm_zone = NULL; + if (tzn != NULL) + *tzn = NULL; + } + + return 0; +} + + +/* tm2timestamp() + * Convert a tm structure to a timestamp data type. + * Note that year is _not_ 1900-based, but is an explicit full value. + * Also, month is one-based, _not_ zero-based. + * + * Returns -1 on failure (value out of range). + */ +int +tm2timestamp(struct pg_tm *tm, fsec_t fsec, int *tzp, Timestamp *result) +{ + TimeOffset date; + TimeOffset time; + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm->tm_year, tm->tm_mon, tm->tm_mday)) + { + *result = 0; /* keep compiler quiet */ + return -1; + } + + date = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; + time = time2t(tm->tm_hour, tm->tm_min, tm->tm_sec, fsec); + + *result = date * USECS_PER_DAY + time; + /* check for major overflow */ + if ((*result - time) / USECS_PER_DAY != date) + { + *result = 0; /* keep compiler quiet */ + return -1; + } + /* check for just-barely overflow (okay except time-of-day wraps) */ + /* caution: we want to allow 1999-12-31 24:00:00 */ + if ((*result < 0 && date > 0) || + (*result > 0 && date < -1)) + { + *result = 0; /* keep compiler quiet */ + return -1; + } + if (tzp != NULL) + *result = dt2local(*result, -(*tzp)); + + /* final range check catches just-out-of-range timestamps */ + if (!IS_VALID_TIMESTAMP(*result)) + { + *result = 0; /* keep compiler quiet */ + return -1; + } + + return 0; +} + + +/* interval2itm() + * Convert an Interval to a pg_itm structure. + * Note: overflow is not possible, because the pg_itm fields are + * wide enough for all possible conversion results. + */ +void +interval2itm(Interval span, struct pg_itm *itm) +{ + TimeOffset time; + TimeOffset tfrac; + + itm->tm_year = span.month / MONTHS_PER_YEAR; + itm->tm_mon = span.month % MONTHS_PER_YEAR; + itm->tm_mday = span.day; + time = span.time; + + tfrac = time / USECS_PER_HOUR; + time -= tfrac * USECS_PER_HOUR; + itm->tm_hour = tfrac; + tfrac = time / USECS_PER_MINUTE; + time -= tfrac * USECS_PER_MINUTE; + itm->tm_min = (int) tfrac; + tfrac = time / USECS_PER_SEC; + time -= tfrac * USECS_PER_SEC; + itm->tm_sec = (int) tfrac; + itm->tm_usec = (int) time; +} + +/* itm2interval() + * Convert a pg_itm structure to an Interval. + * Returns 0 if OK, -1 on overflow. + */ +int +itm2interval(struct pg_itm *itm, Interval *span) +{ + int64 total_months = (int64) itm->tm_year * MONTHS_PER_YEAR + itm->tm_mon; + + if (total_months > INT_MAX || total_months < INT_MIN) + return -1; + span->month = (int32) total_months; + span->day = itm->tm_mday; + if (pg_mul_s64_overflow(itm->tm_hour, USECS_PER_HOUR, + &span->time)) + return -1; + /* tm_min, tm_sec are 32 bits, so intermediate products can't overflow */ + if (pg_add_s64_overflow(span->time, itm->tm_min * USECS_PER_MINUTE, + &span->time)) + return -1; + if (pg_add_s64_overflow(span->time, itm->tm_sec * USECS_PER_SEC, + &span->time)) + return -1; + if (pg_add_s64_overflow(span->time, itm->tm_usec, + &span->time)) + return -1; + return 0; +} + +/* itmin2interval() + * Convert a pg_itm_in structure to an Interval. + * Returns 0 if OK, -1 on overflow. + */ +int +itmin2interval(struct pg_itm_in *itm_in, Interval *span) +{ + int64 total_months = (int64) itm_in->tm_year * MONTHS_PER_YEAR + itm_in->tm_mon; + + if (total_months > INT_MAX || total_months < INT_MIN) + return -1; + span->month = (int32) total_months; + span->day = itm_in->tm_mday; + span->time = itm_in->tm_usec; + return 0; +} + +static TimeOffset +time2t(const int hour, const int min, const int sec, const fsec_t fsec) +{ + return (((((hour * MINS_PER_HOUR) + min) * SECS_PER_MINUTE) + sec) * USECS_PER_SEC) + fsec; +} + +static Timestamp +dt2local(Timestamp dt, int timezone) +{ + dt -= (timezone * USECS_PER_SEC); + return dt; +} + + +/***************************************************************************** + * PUBLIC ROUTINES * + *****************************************************************************/ + + +Datum +timestamp_finite(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + + PG_RETURN_BOOL(!TIMESTAMP_NOT_FINITE(timestamp)); +} + +Datum +interval_finite(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(true); +} + + +/*---------------------------------------------------------- + * Relational operators for timestamp. + *---------------------------------------------------------*/ + +void +GetEpochTime(struct pg_tm *tm) +{ + struct pg_tm *t0; + pg_time_t epoch = 0; + + t0 = pg_gmtime(&epoch); + + if (t0 == NULL) + elog(ERROR, "could not convert epoch to timestamp: %m"); + + tm->tm_year = t0->tm_year; + tm->tm_mon = t0->tm_mon; + tm->tm_mday = t0->tm_mday; + tm->tm_hour = t0->tm_hour; + tm->tm_min = t0->tm_min; + tm->tm_sec = t0->tm_sec; + + tm->tm_year += 1900; + tm->tm_mon++; +} + +Timestamp +SetEpochTimestamp(void) +{ + Timestamp dt; + struct pg_tm tt, + *tm = &tt; + + GetEpochTime(tm); + /* we don't bother to test for failure ... */ + tm2timestamp(tm, 0, NULL, &dt); + + return dt; +} /* SetEpochTimestamp() */ + +/* + * We are currently sharing some code between timestamp and timestamptz. + * The comparison functions are among them. - thomas 2001-09-25 + * + * timestamp_relop - is timestamp1 relop timestamp2 + */ +int +timestamp_cmp_internal(Timestamp dt1, Timestamp dt2) +{ + return (dt1 < dt2) ? -1 : ((dt1 > dt2) ? 1 : 0); +} + +Datum +timestamp_eq(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) == 0); +} + +Datum +timestamp_ne(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) != 0); +} + +Datum +timestamp_lt(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) < 0); +} + +Datum +timestamp_gt(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) > 0); +} + +Datum +timestamp_le(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) <= 0); +} + +Datum +timestamp_ge(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) >= 0); +} + +Datum +timestamp_cmp(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_INT32(timestamp_cmp_internal(dt1, dt2)); +} + +#if SIZEOF_DATUM < 8 +/* note: this is used for timestamptz also */ +static int +timestamp_fastcmp(Datum x, Datum y, SortSupport ssup) +{ + Timestamp a = DatumGetTimestamp(x); + Timestamp b = DatumGetTimestamp(y); + + return timestamp_cmp_internal(a, b); +} +#endif + +Datum +timestamp_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + +#if SIZEOF_DATUM >= 8 + + /* + * If this build has pass-by-value timestamps, then we can use a standard + * comparator function. + */ + ssup->comparator = ssup_datum_signed_cmp; +#else + ssup->comparator = timestamp_fastcmp; +#endif + PG_RETURN_VOID(); +} + +Datum +timestamp_hash(PG_FUNCTION_ARGS) +{ + return hashint8(fcinfo); +} + +Datum +timestamp_hash_extended(PG_FUNCTION_ARGS) +{ + return hashint8extended(fcinfo); +} + +/* + * Cross-type comparison functions for timestamp vs timestamptz + */ + +int32 +timestamp_cmp_timestamptz_internal(Timestamp timestampVal, TimestampTz dt2) +{ + TimestampTz dt1; + int overflow; + + dt1 = timestamp2timestamptz_opt_overflow(timestampVal, &overflow); + if (overflow > 0) + { + /* dt1 is larger than any finite timestamp, but less than infinity */ + return TIMESTAMP_IS_NOEND(dt2) ? -1 : +1; + } + if (overflow < 0) + { + /* dt1 is less than any finite timestamp, but more than -infinity */ + return TIMESTAMP_IS_NOBEGIN(dt2) ? +1 : -1; + } + + return timestamptz_cmp_internal(dt1, dt2); +} + +Datum +timestamp_eq_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestampVal = PG_GETARG_TIMESTAMP(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) == 0); +} + +Datum +timestamp_ne_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestampVal = PG_GETARG_TIMESTAMP(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) != 0); +} + +Datum +timestamp_lt_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestampVal = PG_GETARG_TIMESTAMP(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) < 0); +} + +Datum +timestamp_gt_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestampVal = PG_GETARG_TIMESTAMP(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) > 0); +} + +Datum +timestamp_le_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestampVal = PG_GETARG_TIMESTAMP(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) <= 0); +} + +Datum +timestamp_ge_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestampVal = PG_GETARG_TIMESTAMP(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) >= 0); +} + +Datum +timestamp_cmp_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestampVal = PG_GETARG_TIMESTAMP(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + + PG_RETURN_INT32(timestamp_cmp_timestamptz_internal(timestampVal, dt2)); +} + +Datum +timestamptz_eq_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + Timestamp timestampVal = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) == 0); +} + +Datum +timestamptz_ne_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + Timestamp timestampVal = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) != 0); +} + +Datum +timestamptz_lt_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + Timestamp timestampVal = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) > 0); +} + +Datum +timestamptz_gt_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + Timestamp timestampVal = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) < 0); +} + +Datum +timestamptz_le_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + Timestamp timestampVal = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) >= 0); +} + +Datum +timestamptz_ge_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + Timestamp timestampVal = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) <= 0); +} + +Datum +timestamptz_cmp_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + Timestamp timestampVal = PG_GETARG_TIMESTAMP(1); + + PG_RETURN_INT32(-timestamp_cmp_timestamptz_internal(timestampVal, dt1)); +} + + +/* + * interval_relop - is interval1 relop interval2 + * + * Interval comparison is based on converting interval values to a linear + * representation expressed in the units of the time field (microseconds, + * in the case of integer timestamps) with days assumed to be always 24 hours + * and months assumed to be always 30 days. To avoid overflow, we need a + * wider-than-int64 datatype for the linear representation, so use INT128. + */ + +static inline INT128 +interval_cmp_value(const Interval *interval) +{ + INT128 span; + int64 days; + + /* + * Combine the month and day fields into an integral number of days. + * Because the inputs are int32, int64 arithmetic suffices here. + */ + days = interval->month * INT64CONST(30); + days += interval->day; + + /* Widen time field to 128 bits */ + span = int64_to_int128(interval->time); + + /* Scale up days to microseconds, forming a 128-bit product */ + int128_add_int64_mul_int64(&span, days, USECS_PER_DAY); + + return span; +} + +static int +interval_cmp_internal(const Interval *interval1, const Interval *interval2) +{ + INT128 span1 = interval_cmp_value(interval1); + INT128 span2 = interval_cmp_value(interval2); + + return int128_compare(span1, span2); +} + +Datum +interval_eq(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) == 0); +} + +Datum +interval_ne(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) != 0); +} + +Datum +interval_lt(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) < 0); +} + +Datum +interval_gt(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) > 0); +} + +Datum +interval_le(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) <= 0); +} + +Datum +interval_ge(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) >= 0); +} + +Datum +interval_cmp(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_INT32(interval_cmp_internal(interval1, interval2)); +} + +/* + * Hashing for intervals + * + * We must produce equal hashvals for values that interval_cmp_internal() + * considers equal. So, compute the net span the same way it does, + * and then hash that. + */ +Datum +interval_hash(PG_FUNCTION_ARGS) +{ + Interval *interval = PG_GETARG_INTERVAL_P(0); + INT128 span = interval_cmp_value(interval); + int64 span64; + + /* + * Use only the least significant 64 bits for hashing. The upper 64 bits + * seldom add any useful information, and besides we must do it like this + * for compatibility with hashes calculated before use of INT128 was + * introduced. + */ + span64 = int128_to_int64(span); + + return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64)); +} + +Datum +interval_hash_extended(PG_FUNCTION_ARGS) +{ + Interval *interval = PG_GETARG_INTERVAL_P(0); + INT128 span = interval_cmp_value(interval); + int64 span64; + + /* Same approach as interval_hash */ + span64 = int128_to_int64(span); + + return DirectFunctionCall2(hashint8extended, Int64GetDatumFast(span64), + PG_GETARG_DATUM(1)); +} + +/* overlaps_timestamp() --- implements the SQL OVERLAPS operator. + * + * Algorithm is per SQL spec. This is much harder than you'd think + * because the spec requires us to deliver a non-null answer in some cases + * where some of the inputs are null. + */ +Datum +overlaps_timestamp(PG_FUNCTION_ARGS) +{ + /* + * The arguments are Timestamps, but we leave them as generic Datums to + * avoid unnecessary conversions between value and reference forms --- not + * to mention possible dereferences of null pointers. + */ + Datum ts1 = PG_GETARG_DATUM(0); + Datum te1 = PG_GETARG_DATUM(1); + Datum ts2 = PG_GETARG_DATUM(2); + Datum te2 = PG_GETARG_DATUM(3); + bool ts1IsNull = PG_ARGISNULL(0); + bool te1IsNull = PG_ARGISNULL(1); + bool ts2IsNull = PG_ARGISNULL(2); + bool te2IsNull = PG_ARGISNULL(3); + +#define TIMESTAMP_GT(t1,t2) \ + DatumGetBool(DirectFunctionCall2(timestamp_gt,t1,t2)) +#define TIMESTAMP_LT(t1,t2) \ + DatumGetBool(DirectFunctionCall2(timestamp_lt,t1,t2)) + + /* + * If both endpoints of interval 1 are null, the result is null (unknown). + * If just one endpoint is null, take ts1 as the non-null one. Otherwise, + * take ts1 as the lesser endpoint. + */ + if (ts1IsNull) + { + if (te1IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ + ts1 = te1; + te1IsNull = true; + } + else if (!te1IsNull) + { + if (TIMESTAMP_GT(ts1, te1)) + { + Datum tt = ts1; + + ts1 = te1; + te1 = tt; + } + } + + /* Likewise for interval 2. */ + if (ts2IsNull) + { + if (te2IsNull) + PG_RETURN_NULL(); + /* swap null for non-null */ + ts2 = te2; + te2IsNull = true; + } + else if (!te2IsNull) + { + if (TIMESTAMP_GT(ts2, te2)) + { + Datum tt = ts2; + + ts2 = te2; + te2 = tt; + } + } + + /* + * At this point neither ts1 nor ts2 is null, so we can consider three + * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2 + */ + if (TIMESTAMP_GT(ts1, ts2)) + { + /* + * This case is ts1 < te2 OR te1 < te2, which may look redundant but + * in the presence of nulls it's not quite completely so. + */ + if (te2IsNull) + PG_RETURN_NULL(); + if (TIMESTAMP_LT(ts1, te2)) + PG_RETURN_BOOL(true); + if (te1IsNull) + PG_RETURN_NULL(); + + /* + * If te1 is not null then we had ts1 <= te1 above, and we just found + * ts1 >= te2, hence te1 >= te2. + */ + PG_RETURN_BOOL(false); + } + else if (TIMESTAMP_LT(ts1, ts2)) + { + /* This case is ts2 < te1 OR te2 < te1 */ + if (te1IsNull) + PG_RETURN_NULL(); + if (TIMESTAMP_LT(ts2, te1)) + PG_RETURN_BOOL(true); + if (te2IsNull) + PG_RETURN_NULL(); + + /* + * If te2 is not null then we had ts2 <= te2 above, and we just found + * ts2 >= te1, hence te2 >= te1. + */ + PG_RETURN_BOOL(false); + } + else + { + /* + * For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a + * rather silly way of saying "true if both are non-null, else null". + */ + if (te1IsNull || te2IsNull) + PG_RETURN_NULL(); + PG_RETURN_BOOL(true); + } + +#undef TIMESTAMP_GT +#undef TIMESTAMP_LT +} + + +/*---------------------------------------------------------- + * "Arithmetic" operators on date/times. + *---------------------------------------------------------*/ + +Datum +timestamp_smaller(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + Timestamp result; + + /* use timestamp_cmp_internal to be sure this agrees with comparisons */ + if (timestamp_cmp_internal(dt1, dt2) < 0) + result = dt1; + else + result = dt2; + PG_RETURN_TIMESTAMP(result); +} + +Datum +timestamp_larger(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + Timestamp result; + + if (timestamp_cmp_internal(dt1, dt2) > 0) + result = dt1; + else + result = dt2; + PG_RETURN_TIMESTAMP(result); +} + + +Datum +timestamp_mi(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + if (TIMESTAMP_NOT_FINITE(dt1) || TIMESTAMP_NOT_FINITE(dt2)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("cannot subtract infinite timestamps"))); + + if (unlikely(pg_sub_s64_overflow(dt1, dt2, &result->time))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + result->month = 0; + result->day = 0; + + /*---------- + * This is wrong, but removing it breaks a lot of regression tests. + * For example: + * + * test=> SET timezone = 'EST5EDT'; + * test=> SELECT + * test-> ('2005-10-30 13:22:00-05'::timestamptz - + * test(> '2005-10-29 13:22:00-04'::timestamptz); + * ?column? + * ---------------- + * 1 day 01:00:00 + * (1 row) + * + * so adding that to the first timestamp gets: + * + * test=> SELECT + * test-> ('2005-10-29 13:22:00-04'::timestamptz + + * test(> ('2005-10-30 13:22:00-05'::timestamptz - + * test(> '2005-10-29 13:22:00-04'::timestamptz)) at time zone 'EST'; + * timezone + * -------------------- + * 2005-10-30 14:22:00 + * (1 row) + *---------- + */ + result = DatumGetIntervalP(DirectFunctionCall1(interval_justify_hours, + IntervalPGetDatum(result))); + + PG_RETURN_INTERVAL_P(result); +} + +/* + * interval_justify_interval() + * + * Adjust interval so 'month', 'day', and 'time' portions are within + * customary bounds. Specifically: + * + * 0 <= abs(time) < 24 hours + * 0 <= abs(day) < 30 days + * + * Also, the sign bit on all three fields is made equal, so either + * all three fields are negative or all are positive. + */ +Datum +interval_justify_interval(PG_FUNCTION_ARGS) +{ + Interval *span = PG_GETARG_INTERVAL_P(0); + Interval *result; + TimeOffset wholeday; + int32 wholemonth; + + result = (Interval *) palloc(sizeof(Interval)); + result->month = span->month; + result->day = span->day; + result->time = span->time; + + /* pre-justify days if it might prevent overflow */ + if ((result->day > 0 && result->time > 0) || + (result->day < 0 && result->time < 0)) + { + wholemonth = result->day / DAYS_PER_MONTH; + result->day -= wholemonth * DAYS_PER_MONTH; + if (pg_add_s32_overflow(result->month, wholemonth, &result->month)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + } + + /* + * Since TimeOffset is int64, abs(wholeday) can't exceed about 1.07e8. If + * we pre-justified then abs(result->day) is less than DAYS_PER_MONTH, so + * this addition can't overflow. If we didn't pre-justify, then day and + * time are of different signs, so it still can't overflow. + */ + TMODULO(result->time, wholeday, USECS_PER_DAY); + result->day += wholeday; + + wholemonth = result->day / DAYS_PER_MONTH; + result->day -= wholemonth * DAYS_PER_MONTH; + if (pg_add_s32_overflow(result->month, wholemonth, &result->month)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + if (result->month > 0 && + (result->day < 0 || (result->day == 0 && result->time < 0))) + { + result->day += DAYS_PER_MONTH; + result->month--; + } + else if (result->month < 0 && + (result->day > 0 || (result->day == 0 && result->time > 0))) + { + result->day -= DAYS_PER_MONTH; + result->month++; + } + + if (result->day > 0 && result->time < 0) + { + result->time += USECS_PER_DAY; + result->day--; + } + else if (result->day < 0 && result->time > 0) + { + result->time -= USECS_PER_DAY; + result->day++; + } + + PG_RETURN_INTERVAL_P(result); +} + +/* + * interval_justify_hours() + * + * Adjust interval so 'time' contains less than a whole day, adding + * the excess to 'day'. This is useful for + * situations (such as non-TZ) where '1 day' = '24 hours' is valid, + * e.g. interval subtraction and division. + */ +Datum +interval_justify_hours(PG_FUNCTION_ARGS) +{ + Interval *span = PG_GETARG_INTERVAL_P(0); + Interval *result; + TimeOffset wholeday; + + result = (Interval *) palloc(sizeof(Interval)); + result->month = span->month; + result->day = span->day; + result->time = span->time; + + TMODULO(result->time, wholeday, USECS_PER_DAY); + if (pg_add_s32_overflow(result->day, wholeday, &result->day)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + if (result->day > 0 && result->time < 0) + { + result->time += USECS_PER_DAY; + result->day--; + } + else if (result->day < 0 && result->time > 0) + { + result->time -= USECS_PER_DAY; + result->day++; + } + + PG_RETURN_INTERVAL_P(result); +} + +/* + * interval_justify_days() + * + * Adjust interval so 'day' contains less than 30 days, adding + * the excess to 'month'. + */ +Datum +interval_justify_days(PG_FUNCTION_ARGS) +{ + Interval *span = PG_GETARG_INTERVAL_P(0); + Interval *result; + int32 wholemonth; + + result = (Interval *) palloc(sizeof(Interval)); + result->month = span->month; + result->day = span->day; + result->time = span->time; + + wholemonth = result->day / DAYS_PER_MONTH; + result->day -= wholemonth * DAYS_PER_MONTH; + if (pg_add_s32_overflow(result->month, wholemonth, &result->month)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + if (result->month > 0 && result->day < 0) + { + result->day += DAYS_PER_MONTH; + result->month--; + } + else if (result->month < 0 && result->day > 0) + { + result->day -= DAYS_PER_MONTH; + result->month++; + } + + PG_RETURN_INTERVAL_P(result); +} + +/* timestamp_pl_interval() + * Add an interval to a timestamp data type. + * Note that interval has provisions for qualitative year/month and day + * units, so try to do the right thing with them. + * To add a month, increment the month, and use the same day of month. + * Then, if the next month has fewer days, set the day of month + * to the last day of month. + * To add a day, increment the mday, and use the same time of day. + * Lastly, add in the "quantitative time". + */ +Datum +timestamp_pl_interval(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + Timestamp result; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + result = timestamp; + else + { + if (span->month != 0) + { + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + + if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + if (pg_add_s32_overflow(tm->tm_mon, span->month, &tm->tm_mon)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + if (tm->tm_mon > MONTHS_PER_YEAR) + { + tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR; + tm->tm_mon = ((tm->tm_mon - 1) % MONTHS_PER_YEAR) + 1; + } + else if (tm->tm_mon < 1) + { + tm->tm_year += tm->tm_mon / MONTHS_PER_YEAR - 1; + tm->tm_mon = tm->tm_mon % MONTHS_PER_YEAR + MONTHS_PER_YEAR; + } + + /* adjust for end of month boundary problems... */ + if (tm->tm_mday > day_tab[isleap(tm->tm_year)][tm->tm_mon - 1]) + tm->tm_mday = (day_tab[isleap(tm->tm_year)][tm->tm_mon - 1]); + + if (tm2timestamp(tm, fsec, NULL, ×tamp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + if (span->day != 0) + { + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int julian; + + if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* + * Add days by converting to and from Julian. We need an overflow + * check here since j2date expects a non-negative integer input. + */ + julian = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); + if (pg_add_s32_overflow(julian, span->day, &julian) || + julian < 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + j2date(julian, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + + if (tm2timestamp(tm, fsec, NULL, ×tamp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + if (pg_add_s64_overflow(timestamp, span->time, ×tamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + if (!IS_VALID_TIMESTAMP(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + result = timestamp; + } + + PG_RETURN_TIMESTAMP(result); +} + +Datum +timestamp_mi_interval(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + Interval tspan; + + tspan.month = -span->month; + tspan.day = -span->day; + tspan.time = -span->time; + + return DirectFunctionCall2(timestamp_pl_interval, + TimestampGetDatum(timestamp), + PointerGetDatum(&tspan)); +} + + +/* timestamptz_pl_interval_internal() + * Add an interval to a timestamptz, in the given (or session) timezone. + * + * Note that interval has provisions for qualitative year/month and day + * units, so try to do the right thing with them. + * To add a month, increment the month, and use the same day of month. + * Then, if the next month has fewer days, set the day of month + * to the last day of month. + * To add a day, increment the mday, and use the same time of day. + * Lastly, add in the "quantitative time". + */ +static TimestampTz +timestamptz_pl_interval_internal(TimestampTz timestamp, + Interval *span, + pg_tz *attimezone) +{ + TimestampTz result; + int tz; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + result = timestamp; + else + { + /* Use session timezone if caller asks for default */ + if (attimezone == NULL) + attimezone = session_timezone; + + if (span->month != 0) + { + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, attimezone) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + if (pg_add_s32_overflow(tm->tm_mon, span->month, &tm->tm_mon)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + if (tm->tm_mon > MONTHS_PER_YEAR) + { + tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR; + tm->tm_mon = ((tm->tm_mon - 1) % MONTHS_PER_YEAR) + 1; + } + else if (tm->tm_mon < 1) + { + tm->tm_year += tm->tm_mon / MONTHS_PER_YEAR - 1; + tm->tm_mon = tm->tm_mon % MONTHS_PER_YEAR + MONTHS_PER_YEAR; + } + + /* adjust for end of month boundary problems... */ + if (tm->tm_mday > day_tab[isleap(tm->tm_year)][tm->tm_mon - 1]) + tm->tm_mday = (day_tab[isleap(tm->tm_year)][tm->tm_mon - 1]); + + tz = DetermineTimeZoneOffset(tm, attimezone); + + if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + if (span->day != 0) + { + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int julian; + + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, attimezone) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* + * Add days by converting to and from Julian. We need an overflow + * check here since j2date expects a non-negative integer input. + * In practice though, it will give correct answers for small + * negative Julian dates; we should allow -1 to avoid + * timezone-dependent failures, as discussed in timestamp.h. + */ + julian = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); + if (pg_add_s32_overflow(julian, span->day, &julian) || + julian < -1) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + j2date(julian, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + + tz = DetermineTimeZoneOffset(tm, attimezone); + + if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + if (pg_add_s64_overflow(timestamp, span->time, ×tamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + if (!IS_VALID_TIMESTAMP(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + result = timestamp; + } + + return result; +} + +/* timestamptz_mi_interval_internal() + * As above, but subtract the interval. + */ +static TimestampTz +timestamptz_mi_interval_internal(TimestampTz timestamp, + Interval *span, + pg_tz *attimezone) +{ + Interval tspan; + + tspan.month = -span->month; + tspan.day = -span->day; + tspan.time = -span->time; + + return timestamptz_pl_interval_internal(timestamp, &tspan, attimezone); +} + +/* timestamptz_pl_interval() + * Add an interval to a timestamptz, in the session timezone. + */ +Datum +timestamptz_pl_interval(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_TIMESTAMP(timestamptz_pl_interval_internal(timestamp, span, NULL)); +} + +Datum +timestamptz_mi_interval(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + + PG_RETURN_TIMESTAMP(timestamptz_mi_interval_internal(timestamp, span, NULL)); +} + +/* timestamptz_pl_interval_at_zone() + * Add an interval to a timestamptz, in the specified timezone. + */ +Datum +timestamptz_pl_interval_at_zone(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + text *zone = PG_GETARG_TEXT_PP(2); + pg_tz *attimezone = lookup_timezone(zone); + + PG_RETURN_TIMESTAMP(timestamptz_pl_interval_internal(timestamp, span, attimezone)); +} + +Datum +timestamptz_mi_interval_at_zone(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); + Interval *span = PG_GETARG_INTERVAL_P(1); + text *zone = PG_GETARG_TEXT_PP(2); + pg_tz *attimezone = lookup_timezone(zone); + + PG_RETURN_TIMESTAMP(timestamptz_mi_interval_internal(timestamp, span, attimezone)); +} + +Datum +interval_um(PG_FUNCTION_ARGS) +{ + Interval *interval = PG_GETARG_INTERVAL_P(0); + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + result->time = -interval->time; + /* overflow check copied from int4um */ + if (interval->time != 0 && SAMESIGN(result->time, interval->time)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + result->day = -interval->day; + if (interval->day != 0 && SAMESIGN(result->day, interval->day)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + result->month = -interval->month; + if (interval->month != 0 && SAMESIGN(result->month, interval->month)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + PG_RETURN_INTERVAL_P(result); +} + + +Datum +interval_smaller(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + Interval *result; + + /* use interval_cmp_internal to be sure this agrees with comparisons */ + if (interval_cmp_internal(interval1, interval2) < 0) + result = interval1; + else + result = interval2; + PG_RETURN_INTERVAL_P(result); +} + +Datum +interval_larger(PG_FUNCTION_ARGS) +{ + Interval *interval1 = PG_GETARG_INTERVAL_P(0); + Interval *interval2 = PG_GETARG_INTERVAL_P(1); + Interval *result; + + if (interval_cmp_internal(interval1, interval2) > 0) + result = interval1; + else + result = interval2; + PG_RETURN_INTERVAL_P(result); +} + +Datum +interval_pl(PG_FUNCTION_ARGS) +{ + Interval *span1 = PG_GETARG_INTERVAL_P(0); + Interval *span2 = PG_GETARG_INTERVAL_P(1); + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + result->month = span1->month + span2->month; + /* overflow check copied from int4pl */ + if (SAMESIGN(span1->month, span2->month) && + !SAMESIGN(result->month, span1->month)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + result->day = span1->day + span2->day; + if (SAMESIGN(span1->day, span2->day) && + !SAMESIGN(result->day, span1->day)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + result->time = span1->time + span2->time; + if (SAMESIGN(span1->time, span2->time) && + !SAMESIGN(result->time, span1->time)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + PG_RETURN_INTERVAL_P(result); +} + +Datum +interval_mi(PG_FUNCTION_ARGS) +{ + Interval *span1 = PG_GETARG_INTERVAL_P(0); + Interval *span2 = PG_GETARG_INTERVAL_P(1); + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + result->month = span1->month - span2->month; + /* overflow check copied from int4mi */ + if (!SAMESIGN(span1->month, span2->month) && + !SAMESIGN(result->month, span1->month)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + result->day = span1->day - span2->day; + if (!SAMESIGN(span1->day, span2->day) && + !SAMESIGN(result->day, span1->day)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + result->time = span1->time - span2->time; + if (!SAMESIGN(span1->time, span2->time) && + !SAMESIGN(result->time, span1->time)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + PG_RETURN_INTERVAL_P(result); +} + +/* + * There is no interval_abs(): it is unclear what value to return: + * http://archives.postgresql.org/pgsql-general/2009-10/msg01031.php + * http://archives.postgresql.org/pgsql-general/2009-11/msg00041.php + */ + +Datum +interval_mul(PG_FUNCTION_ARGS) +{ + Interval *span = PG_GETARG_INTERVAL_P(0); + float8 factor = PG_GETARG_FLOAT8(1); + double month_remainder_days, + sec_remainder, + result_double; + int32 orig_month = span->month, + orig_day = span->day; + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + result_double = span->month * factor; + if (isnan(result_double) || !FLOAT8_FITS_IN_INT32(result_double)) + goto out_of_range; + result->month = (int32) result_double; + + result_double = span->day * factor; + if (isnan(result_double) || !FLOAT8_FITS_IN_INT32(result_double)) + goto out_of_range; + result->day = (int32) result_double; + + /* + * The above correctly handles the whole-number part of the month and day + * products, but we have to do something with any fractional part + * resulting when the factor is non-integral. We cascade the fractions + * down to lower units using the conversion factors DAYS_PER_MONTH and + * SECS_PER_DAY. Note we do NOT cascade up, since we are not forced to do + * so by the representation. The user can choose to cascade up later, + * using justify_hours and/or justify_days. + */ + + /* + * Fractional months full days into days. + * + * Floating point calculation are inherently imprecise, so these + * calculations are crafted to produce the most reliable result possible. + * TSROUND() is needed to more accurately produce whole numbers where + * appropriate. + */ + month_remainder_days = (orig_month * factor - result->month) * DAYS_PER_MONTH; + month_remainder_days = TSROUND(month_remainder_days); + sec_remainder = (orig_day * factor - result->day + + month_remainder_days - (int) month_remainder_days) * SECS_PER_DAY; + sec_remainder = TSROUND(sec_remainder); + + /* + * Might have 24:00:00 hours due to rounding, or >24 hours because of time + * cascade from months and days. It might still be >24 if the combination + * of cascade and the seconds factor operation itself. + */ + if (fabs(sec_remainder) >= SECS_PER_DAY) + { + if (pg_add_s32_overflow(result->day, + (int) (sec_remainder / SECS_PER_DAY), + &result->day)) + goto out_of_range; + sec_remainder -= (int) (sec_remainder / SECS_PER_DAY) * SECS_PER_DAY; + } + + /* cascade units down */ + if (pg_add_s32_overflow(result->day, (int32) month_remainder_days, + &result->day)) + goto out_of_range; + result_double = rint(span->time * factor + sec_remainder * USECS_PER_SEC); + if (isnan(result_double) || !FLOAT8_FITS_IN_INT64(result_double)) + goto out_of_range; + result->time = (int64) result_double; + + PG_RETURN_INTERVAL_P(result); + +out_of_range: + ereport(ERROR, + errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range")); + + PG_RETURN_NULL(); /* keep compiler quiet */ +} + +Datum +mul_d_interval(PG_FUNCTION_ARGS) +{ + /* Args are float8 and Interval *, but leave them as generic Datum */ + Datum factor = PG_GETARG_DATUM(0); + Datum span = PG_GETARG_DATUM(1); + + return DirectFunctionCall2(interval_mul, span, factor); +} + +Datum +interval_div(PG_FUNCTION_ARGS) +{ + Interval *span = PG_GETARG_INTERVAL_P(0); + float8 factor = PG_GETARG_FLOAT8(1); + double month_remainder_days, + sec_remainder, + result_double; + int32 orig_month = span->month, + orig_day = span->day; + Interval *result; + + result = (Interval *) palloc(sizeof(Interval)); + + if (factor == 0.0) + ereport(ERROR, + (errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero"))); + + result_double = span->month / factor; + if (isnan(result_double) || !FLOAT8_FITS_IN_INT32(result_double)) + goto out_of_range; + result->month = (int32) result_double; + + result_double = span->day / factor; + if (isnan(result_double) || !FLOAT8_FITS_IN_INT32(result_double)) + goto out_of_range; + result->day = (int32) result_double; + + /* + * Fractional months full days into days. See comment in interval_mul(). + */ + month_remainder_days = (orig_month / factor - result->month) * DAYS_PER_MONTH; + month_remainder_days = TSROUND(month_remainder_days); + sec_remainder = (orig_day / factor - result->day + + month_remainder_days - (int) month_remainder_days) * SECS_PER_DAY; + sec_remainder = TSROUND(sec_remainder); + if (fabs(sec_remainder) >= SECS_PER_DAY) + { + if (pg_add_s32_overflow(result->day, + (int) (sec_remainder / SECS_PER_DAY), + &result->day)) + goto out_of_range; + sec_remainder -= (int) (sec_remainder / SECS_PER_DAY) * SECS_PER_DAY; + } + + /* cascade units down */ + if (pg_add_s32_overflow(result->day, (int32) month_remainder_days, + &result->day)) + goto out_of_range; + result_double = rint(span->time / factor + sec_remainder * USECS_PER_SEC); + if (isnan(result_double) || !FLOAT8_FITS_IN_INT64(result_double)) + goto out_of_range; + result->time = (int64) result_double; + + PG_RETURN_INTERVAL_P(result); + +out_of_range: + ereport(ERROR, + errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range")); + + PG_RETURN_NULL(); /* keep compiler quiet */ +} + + +/* + * in_range support functions for timestamps and intervals. + * + * Per SQL spec, we support these with interval as the offset type. + * The spec's restriction that the offset not be negative is a bit hard to + * decipher for intervals, but we choose to interpret it the same as our + * interval comparison operators would. + */ + +Datum +in_range_timestamptz_interval(PG_FUNCTION_ARGS) +{ + TimestampTz val = PG_GETARG_TIMESTAMPTZ(0); + TimestampTz base = PG_GETARG_TIMESTAMPTZ(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + TimestampTz sum; + + if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* We don't currently bother to avoid overflow hazards here */ + if (sub) + sum = timestamptz_mi_interval_internal(base, offset, NULL); + else + sum = timestamptz_pl_interval_internal(base, offset, NULL); + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_timestamp_interval(PG_FUNCTION_ARGS) +{ + Timestamp val = PG_GETARG_TIMESTAMP(0); + Timestamp base = PG_GETARG_TIMESTAMP(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + Timestamp sum; + + if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* We don't currently bother to avoid overflow hazards here */ + if (sub) + sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_mi_interval, + TimestampGetDatum(base), + IntervalPGetDatum(offset))); + else + sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_pl_interval, + TimestampGetDatum(base), + IntervalPGetDatum(offset))); + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_interval_interval(PG_FUNCTION_ARGS) +{ + Interval *val = PG_GETARG_INTERVAL_P(0); + Interval *base = PG_GETARG_INTERVAL_P(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + Interval *sum; + + if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* We don't currently bother to avoid overflow hazards here */ + if (sub) + sum = DatumGetIntervalP(DirectFunctionCall2(interval_mi, + IntervalPGetDatum(base), + IntervalPGetDatum(offset))); + else + sum = DatumGetIntervalP(DirectFunctionCall2(interval_pl, + IntervalPGetDatum(base), + IntervalPGetDatum(offset))); + + if (less) + PG_RETURN_BOOL(interval_cmp_internal(val, sum) <= 0); + else + PG_RETURN_BOOL(interval_cmp_internal(val, sum) >= 0); +} + + +/* + * interval_accum, interval_accum_inv, and interval_avg implement the + * AVG(interval) aggregate. + * + * The transition datatype for this aggregate is a 2-element array of + * intervals, where the first is the running sum and the second contains + * the number of values so far in its 'time' field. This is a bit ugly + * but it beats inventing a specialized datatype for the purpose. + */ + +Datum +interval_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Interval *newval = PG_GETARG_INTERVAL_P(1); + Datum *transdatums; + int ndatums; + Interval sumX, + N; + Interval *newsum; + ArrayType *result; + + deconstruct_array(transarray, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE, + &transdatums, NULL, &ndatums); + if (ndatums != 2) + elog(ERROR, "expected 2-element interval array"); + + sumX = *(DatumGetIntervalP(transdatums[0])); + N = *(DatumGetIntervalP(transdatums[1])); + + newsum = DatumGetIntervalP(DirectFunctionCall2(interval_pl, + IntervalPGetDatum(&sumX), + IntervalPGetDatum(newval))); + N.time += 1; + + transdatums[0] = IntervalPGetDatum(newsum); + transdatums[1] = IntervalPGetDatum(&N); + + result = construct_array(transdatums, 2, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); +} + +Datum +interval_combine(PG_FUNCTION_ARGS) +{ + ArrayType *transarray1 = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); + Datum *transdatums1; + Datum *transdatums2; + int ndatums1; + int ndatums2; + Interval sum1, + N1; + Interval sum2, + N2; + + Interval *newsum; + ArrayType *result; + + deconstruct_array(transarray1, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE, + &transdatums1, NULL, &ndatums1); + if (ndatums1 != 2) + elog(ERROR, "expected 2-element interval array"); + + sum1 = *(DatumGetIntervalP(transdatums1[0])); + N1 = *(DatumGetIntervalP(transdatums1[1])); + + deconstruct_array(transarray2, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE, + &transdatums2, NULL, &ndatums2); + if (ndatums2 != 2) + elog(ERROR, "expected 2-element interval array"); + + sum2 = *(DatumGetIntervalP(transdatums2[0])); + N2 = *(DatumGetIntervalP(transdatums2[1])); + + newsum = DatumGetIntervalP(DirectFunctionCall2(interval_pl, + IntervalPGetDatum(&sum1), + IntervalPGetDatum(&sum2))); + N1.time += N2.time; + + transdatums1[0] = IntervalPGetDatum(newsum); + transdatums1[1] = IntervalPGetDatum(&N1); + + result = construct_array(transdatums1, 2, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); +} + +Datum +interval_accum_inv(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Interval *newval = PG_GETARG_INTERVAL_P(1); + Datum *transdatums; + int ndatums; + Interval sumX, + N; + Interval *newsum; + ArrayType *result; + + deconstruct_array(transarray, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE, + &transdatums, NULL, &ndatums); + if (ndatums != 2) + elog(ERROR, "expected 2-element interval array"); + + sumX = *(DatumGetIntervalP(transdatums[0])); + N = *(DatumGetIntervalP(transdatums[1])); + + newsum = DatumGetIntervalP(DirectFunctionCall2(interval_mi, + IntervalPGetDatum(&sumX), + IntervalPGetDatum(newval))); + N.time -= 1; + + transdatums[0] = IntervalPGetDatum(newsum); + transdatums[1] = IntervalPGetDatum(&N); + + result = construct_array(transdatums, 2, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE); + + PG_RETURN_ARRAYTYPE_P(result); +} + +Datum +interval_avg(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Datum *transdatums; + int ndatums; + Interval sumX, + N; + + deconstruct_array(transarray, + INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE, + &transdatums, NULL, &ndatums); + if (ndatums != 2) + elog(ERROR, "expected 2-element interval array"); + + sumX = *(DatumGetIntervalP(transdatums[0])); + N = *(DatumGetIntervalP(transdatums[1])); + + /* SQL defines AVG of no values to be NULL */ + if (N.time == 0) + PG_RETURN_NULL(); + + return DirectFunctionCall2(interval_div, + IntervalPGetDatum(&sumX), + Float8GetDatum((double) N.time)); +} + + +/* timestamp_age() + * Calculate time difference while retaining year/month fields. + * Note that this does not result in an accurate absolute time span + * since year and month are out of context once the arithmetic + * is done. + */ +Datum +timestamp_age(PG_FUNCTION_ARGS) +{ + Timestamp dt1 = PG_GETARG_TIMESTAMP(0); + Timestamp dt2 = PG_GETARG_TIMESTAMP(1); + Interval *result; + fsec_t fsec1, + fsec2; + struct pg_itm tt, + *tm = &tt; + struct pg_tm tt1, + *tm1 = &tt1; + struct pg_tm tt2, + *tm2 = &tt2; + + result = (Interval *) palloc(sizeof(Interval)); + + if (timestamp2tm(dt1, NULL, tm1, &fsec1, NULL, NULL) == 0 && + timestamp2tm(dt2, NULL, tm2, &fsec2, NULL, NULL) == 0) + { + /* form the symbolic difference */ + tm->tm_usec = fsec1 - fsec2; + tm->tm_sec = tm1->tm_sec - tm2->tm_sec; + tm->tm_min = tm1->tm_min - tm2->tm_min; + tm->tm_hour = tm1->tm_hour - tm2->tm_hour; + tm->tm_mday = tm1->tm_mday - tm2->tm_mday; + tm->tm_mon = tm1->tm_mon - tm2->tm_mon; + tm->tm_year = tm1->tm_year - tm2->tm_year; + + /* flip sign if necessary... */ + if (dt1 < dt2) + { + tm->tm_usec = -tm->tm_usec; + tm->tm_sec = -tm->tm_sec; + tm->tm_min = -tm->tm_min; + tm->tm_hour = -tm->tm_hour; + tm->tm_mday = -tm->tm_mday; + tm->tm_mon = -tm->tm_mon; + tm->tm_year = -tm->tm_year; + } + + /* propagate any negative fields into the next higher field */ + while (tm->tm_usec < 0) + { + tm->tm_usec += USECS_PER_SEC; + tm->tm_sec--; + } + + while (tm->tm_sec < 0) + { + tm->tm_sec += SECS_PER_MINUTE; + tm->tm_min--; + } + + while (tm->tm_min < 0) + { + tm->tm_min += MINS_PER_HOUR; + tm->tm_hour--; + } + + while (tm->tm_hour < 0) + { + tm->tm_hour += HOURS_PER_DAY; + tm->tm_mday--; + } + + while (tm->tm_mday < 0) + { + if (dt1 < dt2) + { + tm->tm_mday += day_tab[isleap(tm1->tm_year)][tm1->tm_mon - 1]; + tm->tm_mon--; + } + else + { + tm->tm_mday += day_tab[isleap(tm2->tm_year)][tm2->tm_mon - 1]; + tm->tm_mon--; + } + } + + while (tm->tm_mon < 0) + { + tm->tm_mon += MONTHS_PER_YEAR; + tm->tm_year--; + } + + /* recover sign if necessary... */ + if (dt1 < dt2) + { + tm->tm_usec = -tm->tm_usec; + tm->tm_sec = -tm->tm_sec; + tm->tm_min = -tm->tm_min; + tm->tm_hour = -tm->tm_hour; + tm->tm_mday = -tm->tm_mday; + tm->tm_mon = -tm->tm_mon; + tm->tm_year = -tm->tm_year; + } + + if (itm2interval(tm, result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + } + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_INTERVAL_P(result); +} + + +/* timestamptz_age() + * Calculate time difference while retaining year/month fields. + * Note that this does not result in an accurate absolute time span + * since year and month are out of context once the arithmetic + * is done. + */ +Datum +timestamptz_age(PG_FUNCTION_ARGS) +{ + TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0); + TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1); + Interval *result; + fsec_t fsec1, + fsec2; + struct pg_itm tt, + *tm = &tt; + struct pg_tm tt1, + *tm1 = &tt1; + struct pg_tm tt2, + *tm2 = &tt2; + int tz1; + int tz2; + + result = (Interval *) palloc(sizeof(Interval)); + + if (timestamp2tm(dt1, &tz1, tm1, &fsec1, NULL, NULL) == 0 && + timestamp2tm(dt2, &tz2, tm2, &fsec2, NULL, NULL) == 0) + { + /* form the symbolic difference */ + tm->tm_usec = fsec1 - fsec2; + tm->tm_sec = tm1->tm_sec - tm2->tm_sec; + tm->tm_min = tm1->tm_min - tm2->tm_min; + tm->tm_hour = tm1->tm_hour - tm2->tm_hour; + tm->tm_mday = tm1->tm_mday - tm2->tm_mday; + tm->tm_mon = tm1->tm_mon - tm2->tm_mon; + tm->tm_year = tm1->tm_year - tm2->tm_year; + + /* flip sign if necessary... */ + if (dt1 < dt2) + { + tm->tm_usec = -tm->tm_usec; + tm->tm_sec = -tm->tm_sec; + tm->tm_min = -tm->tm_min; + tm->tm_hour = -tm->tm_hour; + tm->tm_mday = -tm->tm_mday; + tm->tm_mon = -tm->tm_mon; + tm->tm_year = -tm->tm_year; + } + + /* propagate any negative fields into the next higher field */ + while (tm->tm_usec < 0) + { + tm->tm_usec += USECS_PER_SEC; + tm->tm_sec--; + } + + while (tm->tm_sec < 0) + { + tm->tm_sec += SECS_PER_MINUTE; + tm->tm_min--; + } + + while (tm->tm_min < 0) + { + tm->tm_min += MINS_PER_HOUR; + tm->tm_hour--; + } + + while (tm->tm_hour < 0) + { + tm->tm_hour += HOURS_PER_DAY; + tm->tm_mday--; + } + + while (tm->tm_mday < 0) + { + if (dt1 < dt2) + { + tm->tm_mday += day_tab[isleap(tm1->tm_year)][tm1->tm_mon - 1]; + tm->tm_mon--; + } + else + { + tm->tm_mday += day_tab[isleap(tm2->tm_year)][tm2->tm_mon - 1]; + tm->tm_mon--; + } + } + + while (tm->tm_mon < 0) + { + tm->tm_mon += MONTHS_PER_YEAR; + tm->tm_year--; + } + + /* + * Note: we deliberately ignore any difference between tz1 and tz2. + */ + + /* recover sign if necessary... */ + if (dt1 < dt2) + { + tm->tm_usec = -tm->tm_usec; + tm->tm_sec = -tm->tm_sec; + tm->tm_min = -tm->tm_min; + tm->tm_hour = -tm->tm_hour; + tm->tm_mday = -tm->tm_mday; + tm->tm_mon = -tm->tm_mon; + tm->tm_year = -tm->tm_year; + } + + if (itm2interval(tm, result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + } + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_INTERVAL_P(result); +} + + +/*---------------------------------------------------------- + * Conversion operators. + *---------------------------------------------------------*/ + + +/* timestamp_bin() + * Bin timestamp into specified interval. + */ +Datum +timestamp_bin(PG_FUNCTION_ARGS) +{ + Interval *stride = PG_GETARG_INTERVAL_P(0); + Timestamp timestamp = PG_GETARG_TIMESTAMP(1); + Timestamp origin = PG_GETARG_TIMESTAMP(2); + Timestamp result, + stride_usecs, + tm_diff, + tm_modulo, + tm_delta; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMP(timestamp); + + if (TIMESTAMP_NOT_FINITE(origin)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("origin out of range"))); + + if (stride->month != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("timestamps cannot be binned into intervals containing months or years"))); + + if (unlikely(pg_mul_s64_overflow(stride->day, USECS_PER_DAY, &stride_usecs)) || + unlikely(pg_add_s64_overflow(stride_usecs, stride->time, &stride_usecs))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + if (stride_usecs <= 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("stride must be greater than zero"))); + + if (unlikely(pg_sub_s64_overflow(timestamp, origin, &tm_diff))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + /* These calculations cannot overflow */ + tm_modulo = tm_diff % stride_usecs; + tm_delta = tm_diff - tm_modulo; + result = origin + tm_delta; + + /* + * We want to round towards -infinity, not 0, when tm_diff is negative and + * not a multiple of stride_usecs. This adjustment *can* cause overflow, + * since the result might now be out of the range origin .. timestamp. + */ + if (tm_modulo < 0) + { + if (unlikely(pg_sub_s64_overflow(result, stride_usecs, &result)) || + !IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + PG_RETURN_TIMESTAMP(result); +} + +/* timestamp_trunc() + * Truncate timestamp to specified units. + */ +Datum +timestamp_trunc(PG_FUNCTION_ARGS) +{ + text *units = PG_GETARG_TEXT_PP(0); + Timestamp timestamp = PG_GETARG_TIMESTAMP(1); + Timestamp result; + int type, + val; + char *lowunits; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMP(timestamp); + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + + if (type == UNITS) + { + if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + switch (val) + { + case DTK_WEEK: + { + int woy; + + woy = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday); + + /* + * If it is week 52/53 and the month is January, then the + * week must belong to the previous year. Also, some + * December dates belong to the next year. + */ + if (woy >= 52 && tm->tm_mon == 1) + --tm->tm_year; + if (woy <= 1 && tm->tm_mon == MONTHS_PER_YEAR) + ++tm->tm_year; + isoweek2date(woy, &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday)); + tm->tm_hour = 0; + tm->tm_min = 0; + tm->tm_sec = 0; + fsec = 0; + break; + } + case DTK_MILLENNIUM: + /* see comments in timestamptz_trunc */ + if (tm->tm_year > 0) + tm->tm_year = ((tm->tm_year + 999) / 1000) * 1000 - 999; + else + tm->tm_year = -((999 - (tm->tm_year - 1)) / 1000) * 1000 + 1; + /* FALL THRU */ + case DTK_CENTURY: + /* see comments in timestamptz_trunc */ + if (tm->tm_year > 0) + tm->tm_year = ((tm->tm_year + 99) / 100) * 100 - 99; + else + tm->tm_year = -((99 - (tm->tm_year - 1)) / 100) * 100 + 1; + /* FALL THRU */ + case DTK_DECADE: + /* see comments in timestamptz_trunc */ + if (val != DTK_MILLENNIUM && val != DTK_CENTURY) + { + if (tm->tm_year > 0) + tm->tm_year = (tm->tm_year / 10) * 10; + else + tm->tm_year = -((8 - (tm->tm_year - 1)) / 10) * 10; + } + /* FALL THRU */ + case DTK_YEAR: + tm->tm_mon = 1; + /* FALL THRU */ + case DTK_QUARTER: + tm->tm_mon = (3 * ((tm->tm_mon - 1) / 3)) + 1; + /* FALL THRU */ + case DTK_MONTH: + tm->tm_mday = 1; + /* FALL THRU */ + case DTK_DAY: + tm->tm_hour = 0; + /* FALL THRU */ + case DTK_HOUR: + tm->tm_min = 0; + /* FALL THRU */ + case DTK_MINUTE: + tm->tm_sec = 0; + /* FALL THRU */ + case DTK_SECOND: + fsec = 0; + break; + + case DTK_MILLISEC: + fsec = (fsec / 1000) * 1000; + break; + + case DTK_MICROSEC: + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMESTAMPOID)))); + result = 0; + } + + if (tm2timestamp(tm, fsec, NULL, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(TIMESTAMPOID)))); + result = 0; + } + + PG_RETURN_TIMESTAMP(result); +} + +/* timestamptz_bin() + * Bin timestamptz into specified interval using specified origin. + */ +Datum +timestamptz_bin(PG_FUNCTION_ARGS) +{ + Interval *stride = PG_GETARG_INTERVAL_P(0); + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1); + TimestampTz origin = PG_GETARG_TIMESTAMPTZ(2); + TimestampTz result, + stride_usecs, + tm_diff, + tm_modulo, + tm_delta; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMPTZ(timestamp); + + if (TIMESTAMP_NOT_FINITE(origin)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("origin out of range"))); + + if (stride->month != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("timestamps cannot be binned into intervals containing months or years"))); + + if (unlikely(pg_mul_s64_overflow(stride->day, USECS_PER_DAY, &stride_usecs)) || + unlikely(pg_add_s64_overflow(stride_usecs, stride->time, &stride_usecs))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + if (stride_usecs <= 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("stride must be greater than zero"))); + + if (unlikely(pg_sub_s64_overflow(timestamp, origin, &tm_diff))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + /* These calculations cannot overflow */ + tm_modulo = tm_diff % stride_usecs; + tm_delta = tm_diff - tm_modulo; + result = origin + tm_delta; + + /* + * We want to round towards -infinity, not 0, when tm_diff is negative and + * not a multiple of stride_usecs. This adjustment *can* cause overflow, + * since the result might now be out of the range origin .. timestamp. + */ + if (tm_modulo < 0) + { + if (unlikely(pg_sub_s64_overflow(result, stride_usecs, &result)) || + !IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + PG_RETURN_TIMESTAMPTZ(result); +} + +/* + * Common code for timestamptz_trunc() and timestamptz_trunc_zone(). + * + * tzp identifies the zone to truncate with respect to. We assume + * infinite timestamps have already been rejected. + */ +static TimestampTz +timestamptz_trunc_internal(text *units, TimestampTz timestamp, pg_tz *tzp) +{ + TimestampTz result; + int tz; + int type, + val; + bool redotz = false; + char *lowunits; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + + if (type == UNITS) + { + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, tzp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + switch (val) + { + case DTK_WEEK: + { + int woy; + + woy = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday); + + /* + * If it is week 52/53 and the month is January, then the + * week must belong to the previous year. Also, some + * December dates belong to the next year. + */ + if (woy >= 52 && tm->tm_mon == 1) + --tm->tm_year; + if (woy <= 1 && tm->tm_mon == MONTHS_PER_YEAR) + ++tm->tm_year; + isoweek2date(woy, &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday)); + tm->tm_hour = 0; + tm->tm_min = 0; + tm->tm_sec = 0; + fsec = 0; + redotz = true; + break; + } + /* one may consider DTK_THOUSAND and DTK_HUNDRED... */ + case DTK_MILLENNIUM: + + /* + * truncating to the millennium? what is this supposed to + * mean? let us put the first year of the millennium... i.e. + * -1000, 1, 1001, 2001... + */ + if (tm->tm_year > 0) + tm->tm_year = ((tm->tm_year + 999) / 1000) * 1000 - 999; + else + tm->tm_year = -((999 - (tm->tm_year - 1)) / 1000) * 1000 + 1; + /* FALL THRU */ + case DTK_CENTURY: + /* truncating to the century? as above: -100, 1, 101... */ + if (tm->tm_year > 0) + tm->tm_year = ((tm->tm_year + 99) / 100) * 100 - 99; + else + tm->tm_year = -((99 - (tm->tm_year - 1)) / 100) * 100 + 1; + /* FALL THRU */ + case DTK_DECADE: + + /* + * truncating to the decade? first year of the decade. must + * not be applied if year was truncated before! + */ + if (val != DTK_MILLENNIUM && val != DTK_CENTURY) + { + if (tm->tm_year > 0) + tm->tm_year = (tm->tm_year / 10) * 10; + else + tm->tm_year = -((8 - (tm->tm_year - 1)) / 10) * 10; + } + /* FALL THRU */ + case DTK_YEAR: + tm->tm_mon = 1; + /* FALL THRU */ + case DTK_QUARTER: + tm->tm_mon = (3 * ((tm->tm_mon - 1) / 3)) + 1; + /* FALL THRU */ + case DTK_MONTH: + tm->tm_mday = 1; + /* FALL THRU */ + case DTK_DAY: + tm->tm_hour = 0; + redotz = true; /* for all cases >= DAY */ + /* FALL THRU */ + case DTK_HOUR: + tm->tm_min = 0; + /* FALL THRU */ + case DTK_MINUTE: + tm->tm_sec = 0; + /* FALL THRU */ + case DTK_SECOND: + fsec = 0; + break; + case DTK_MILLISEC: + fsec = (fsec / 1000) * 1000; + break; + case DTK_MICROSEC: + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMESTAMPTZOID)))); + result = 0; + } + + if (redotz) + tz = DetermineTimeZoneOffset(tm, tzp); + + if (tm2timestamp(tm, fsec, &tz, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(TIMESTAMPTZOID)))); + result = 0; + } + + return result; +} + +/* timestamptz_trunc() + * Truncate timestamptz to specified units in session timezone. + */ +Datum +timestamptz_trunc(PG_FUNCTION_ARGS) +{ + text *units = PG_GETARG_TEXT_PP(0); + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1); + TimestampTz result; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMPTZ(timestamp); + + result = timestamptz_trunc_internal(units, timestamp, session_timezone); + + PG_RETURN_TIMESTAMPTZ(result); +} + +/* timestamptz_trunc_zone() + * Truncate timestamptz to specified units in specified timezone. + */ +Datum +timestamptz_trunc_zone(PG_FUNCTION_ARGS) +{ + text *units = PG_GETARG_TEXT_PP(0); + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1); + text *zone = PG_GETARG_TEXT_PP(2); + TimestampTz result; + pg_tz *tzp; + + /* + * timestamptz_zone() doesn't look up the zone for infinite inputs, so we + * don't do so here either. + */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMP(timestamp); + + /* + * Look up the requested timezone. + */ + tzp = lookup_timezone(zone); + + result = timestamptz_trunc_internal(units, timestamp, tzp); + + PG_RETURN_TIMESTAMPTZ(result); +} + +/* interval_trunc() + * Extract specified field from interval. + */ +Datum +interval_trunc(PG_FUNCTION_ARGS) +{ + text *units = PG_GETARG_TEXT_PP(0); + Interval *interval = PG_GETARG_INTERVAL_P(1); + Interval *result; + int type, + val; + char *lowunits; + struct pg_itm tt, + *tm = &tt; + + result = (Interval *) palloc(sizeof(Interval)); + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + + if (type == UNITS) + { + interval2itm(*interval, tm); + switch (val) + { + case DTK_MILLENNIUM: + /* caution: C division may have negative remainder */ + tm->tm_year = (tm->tm_year / 1000) * 1000; + /* FALL THRU */ + case DTK_CENTURY: + /* caution: C division may have negative remainder */ + tm->tm_year = (tm->tm_year / 100) * 100; + /* FALL THRU */ + case DTK_DECADE: + /* caution: C division may have negative remainder */ + tm->tm_year = (tm->tm_year / 10) * 10; + /* FALL THRU */ + case DTK_YEAR: + tm->tm_mon = 0; + /* FALL THRU */ + case DTK_QUARTER: + tm->tm_mon = 3 * (tm->tm_mon / 3); + /* FALL THRU */ + case DTK_MONTH: + tm->tm_mday = 0; + /* FALL THRU */ + case DTK_DAY: + tm->tm_hour = 0; + /* FALL THRU */ + case DTK_HOUR: + tm->tm_min = 0; + /* FALL THRU */ + case DTK_MINUTE: + tm->tm_sec = 0; + /* FALL THRU */ + case DTK_SECOND: + tm->tm_usec = 0; + break; + case DTK_MILLISEC: + tm->tm_usec = (tm->tm_usec / 1000) * 1000; + break; + case DTK_MICROSEC: + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(INTERVALOID)), + (val == DTK_WEEK) ? errdetail("Months usually have fractional weeks.") : 0)); + } + + if (itm2interval(tm, result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(INTERVALOID)))); + } + + PG_RETURN_INTERVAL_P(result); +} + +/* isoweek2j() + * + * Return the Julian day which corresponds to the first day (Monday) of the given ISO 8601 year and week. + * Julian days are used to convert between ISO week dates and Gregorian dates. + */ +int +isoweek2j(int year, int week) +{ + int day0, + day4; + + /* fourth day of current year */ + day4 = date2j(year, 1, 4); + + /* day0 == offset to first day of week (Monday) */ + day0 = j2day(day4 - 1); + + return ((week - 1) * 7) + (day4 - day0); +} + +/* isoweek2date() + * Convert ISO week of year number to date. + * The year field must be specified with the ISO year! + * karel 2000/08/07 + */ +void +isoweek2date(int woy, int *year, int *mon, int *mday) +{ + j2date(isoweek2j(*year, woy), year, mon, mday); +} + +/* isoweekdate2date() + * + * Convert an ISO 8601 week date (ISO year, ISO week) into a Gregorian date. + * Gregorian day of week sent so weekday strings can be supplied. + * Populates year, mon, and mday with the correct Gregorian values. + * year must be passed in as the ISO year. + */ +void +isoweekdate2date(int isoweek, int wday, int *year, int *mon, int *mday) +{ + int jday; + + jday = isoweek2j(*year, isoweek); + /* convert Gregorian week start (Sunday=1) to ISO week start (Monday=1) */ + if (wday > 1) + jday += wday - 2; + else + jday += 6; + j2date(jday, year, mon, mday); +} + +/* date2isoweek() + * + * Returns ISO week number of year. + */ +int +date2isoweek(int year, int mon, int mday) +{ + float8 result; + int day0, + day4, + dayn; + + /* current day */ + dayn = date2j(year, mon, mday); + + /* fourth day of current year */ + day4 = date2j(year, 1, 4); + + /* day0 == offset to first day of week (Monday) */ + day0 = j2day(day4 - 1); + + /* + * We need the first week containing a Thursday, otherwise this day falls + * into the previous year for purposes of counting weeks + */ + if (dayn < day4 - day0) + { + day4 = date2j(year - 1, 1, 4); + + /* day0 == offset to first day of week (Monday) */ + day0 = j2day(day4 - 1); + } + + result = (dayn - (day4 - day0)) / 7 + 1; + + /* + * Sometimes the last few days in a year will fall into the first week of + * the next year, so check for this. + */ + if (result >= 52) + { + day4 = date2j(year + 1, 1, 4); + + /* day0 == offset to first day of week (Monday) */ + day0 = j2day(day4 - 1); + + if (dayn >= day4 - day0) + result = (dayn - (day4 - day0)) / 7 + 1; + } + + return (int) result; +} + + +/* date2isoyear() + * + * Returns ISO 8601 year number. + * Note: zero or negative results follow the year-zero-exists convention. + */ +int +date2isoyear(int year, int mon, int mday) +{ + float8 result; + int day0, + day4, + dayn; + + /* current day */ + dayn = date2j(year, mon, mday); + + /* fourth day of current year */ + day4 = date2j(year, 1, 4); + + /* day0 == offset to first day of week (Monday) */ + day0 = j2day(day4 - 1); + + /* + * We need the first week containing a Thursday, otherwise this day falls + * into the previous year for purposes of counting weeks + */ + if (dayn < day4 - day0) + { + day4 = date2j(year - 1, 1, 4); + + /* day0 == offset to first day of week (Monday) */ + day0 = j2day(day4 - 1); + + year--; + } + + result = (dayn - (day4 - day0)) / 7 + 1; + + /* + * Sometimes the last few days in a year will fall into the first week of + * the next year, so check for this. + */ + if (result >= 52) + { + day4 = date2j(year + 1, 1, 4); + + /* day0 == offset to first day of week (Monday) */ + day0 = j2day(day4 - 1); + + if (dayn >= day4 - day0) + year++; + } + + return year; +} + + +/* date2isoyearday() + * + * Returns the ISO 8601 day-of-year, given a Gregorian year, month and day. + * Possible return values are 1 through 371 (364 in non-leap years). + */ +int +date2isoyearday(int year, int mon, int mday) +{ + return date2j(year, mon, mday) - isoweek2j(date2isoyear(year, mon, mday), 1) + 1; +} + +/* + * NonFiniteTimestampTzPart + * + * Used by timestamp_part and timestamptz_part when extracting from infinite + * timestamp[tz]. Returns +/-Infinity if that is the appropriate result, + * otherwise returns zero (which should be taken as meaning to return NULL). + * + * Errors thrown here for invalid units should exactly match those that + * would be thrown in the calling functions, else there will be unexpected + * discrepancies between finite- and infinite-input cases. + */ +static float8 +NonFiniteTimestampTzPart(int type, int unit, char *lowunits, + bool isNegative, bool isTz) +{ + if ((type != UNITS) && (type != RESERV)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, + format_type_be(isTz ? TIMESTAMPTZOID : TIMESTAMPOID)))); + + switch (unit) + { + /* Oscillating units */ + case DTK_MICROSEC: + case DTK_MILLISEC: + case DTK_SECOND: + case DTK_MINUTE: + case DTK_HOUR: + case DTK_DAY: + case DTK_MONTH: + case DTK_QUARTER: + case DTK_WEEK: + case DTK_DOW: + case DTK_ISODOW: + case DTK_DOY: + case DTK_TZ: + case DTK_TZ_MINUTE: + case DTK_TZ_HOUR: + return 0.0; + + /* Monotonically-increasing units */ + case DTK_YEAR: + case DTK_DECADE: + case DTK_CENTURY: + case DTK_MILLENNIUM: + case DTK_JULIAN: + case DTK_ISOYEAR: + case DTK_EPOCH: + if (isNegative) + return -get_float8_infinity(); + else + return get_float8_infinity(); + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, + format_type_be(isTz ? TIMESTAMPTZOID : TIMESTAMPOID)))); + return 0.0; /* keep compiler quiet */ + } +} + +/* timestamp_part() and extract_timestamp() + * Extract specified field from timestamp. + */ +static Datum +timestamp_part_common(PG_FUNCTION_ARGS, bool retnumeric) +{ + text *units = PG_GETARG_TEXT_PP(0); + Timestamp timestamp = PG_GETARG_TIMESTAMP(1); + int64 intresult; + Timestamp epoch; + int type, + val; + char *lowunits; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(0, lowunits, &val); + + if (TIMESTAMP_NOT_FINITE(timestamp)) + { + double r = NonFiniteTimestampTzPart(type, val, lowunits, + TIMESTAMP_IS_NOBEGIN(timestamp), + false); + + if (r) + { + if (retnumeric) + { + if (r < 0) + return DirectFunctionCall3(numeric_in, + CStringGetDatum("-Infinity"), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + else if (r > 0) + return DirectFunctionCall3(numeric_in, + CStringGetDatum("Infinity"), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + } + else + PG_RETURN_FLOAT8(r); + } + else + PG_RETURN_NULL(); + } + + if (type == UNITS) + { + if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + switch (val) + { + case DTK_MICROSEC: + intresult = tm->tm_sec * INT64CONST(1000000) + fsec; + break; + + case DTK_MILLISEC: + if (retnumeric) + /*--- + * tm->tm_sec * 1000 + fsec / 1000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3)); + else + PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0); + break; + + case DTK_SECOND: + if (retnumeric) + /*--- + * tm->tm_sec + fsec / 1'000'000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6)); + else + PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0); + break; + + case DTK_MINUTE: + intresult = tm->tm_min; + break; + + case DTK_HOUR: + intresult = tm->tm_hour; + break; + + case DTK_DAY: + intresult = tm->tm_mday; + break; + + case DTK_MONTH: + intresult = tm->tm_mon; + break; + + case DTK_QUARTER: + intresult = (tm->tm_mon - 1) / 3 + 1; + break; + + case DTK_WEEK: + intresult = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday); + break; + + case DTK_YEAR: + if (tm->tm_year > 0) + intresult = tm->tm_year; + else + /* there is no year 0, just 1 BC and 1 AD */ + intresult = tm->tm_year - 1; + break; + + case DTK_DECADE: + + /* + * what is a decade wrt dates? let us assume that decade 199 + * is 1990 thru 1999... decade 0 starts on year 1 BC, and -1 + * is 11 BC thru 2 BC... + */ + if (tm->tm_year >= 0) + intresult = tm->tm_year / 10; + else + intresult = -((8 - (tm->tm_year - 1)) / 10); + break; + + case DTK_CENTURY: + + /* ---- + * centuries AD, c>0: year in [ (c-1)* 100 + 1 : c*100 ] + * centuries BC, c<0: year in [ c*100 : (c+1) * 100 - 1] + * there is no number 0 century. + * ---- + */ + if (tm->tm_year > 0) + intresult = (tm->tm_year + 99) / 100; + else + /* caution: C division may have negative remainder */ + intresult = -((99 - (tm->tm_year - 1)) / 100); + break; + + case DTK_MILLENNIUM: + /* see comments above. */ + if (tm->tm_year > 0) + intresult = (tm->tm_year + 999) / 1000; + else + intresult = -((999 - (tm->tm_year - 1)) / 1000); + break; + + case DTK_JULIAN: + if (retnumeric) + PG_RETURN_NUMERIC(numeric_add_opt_error(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)), + numeric_div_opt_error(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec), + int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)), + NULL), + NULL)); + else + PG_RETURN_FLOAT8(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) + + ((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + + tm->tm_sec + (fsec / 1000000.0)) / (double) SECS_PER_DAY); + break; + + case DTK_ISOYEAR: + intresult = date2isoyear(tm->tm_year, tm->tm_mon, tm->tm_mday); + /* Adjust BC years */ + if (intresult <= 0) + intresult -= 1; + break; + + case DTK_DOW: + case DTK_ISODOW: + intresult = j2day(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (val == DTK_ISODOW && intresult == 0) + intresult = 7; + break; + + case DTK_DOY: + intresult = (date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) + - date2j(tm->tm_year, 1, 1) + 1); + break; + + case DTK_TZ: + case DTK_TZ_MINUTE: + case DTK_TZ_HOUR: + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMESTAMPOID)))); + intresult = 0; + } + } + else if (type == RESERV) + { + switch (val) + { + case DTK_EPOCH: + epoch = SetEpochTimestamp(); + /* (timestamp - epoch) / 1000000 */ + if (retnumeric) + { + Numeric result; + + if (timestamp < (PG_INT64_MAX + epoch)) + result = int64_div_fast_to_numeric(timestamp - epoch, 6); + else + { + result = numeric_div_opt_error(numeric_sub_opt_error(int64_to_numeric(timestamp), + int64_to_numeric(epoch), + NULL), + int64_to_numeric(1000000), + NULL); + result = DatumGetNumeric(DirectFunctionCall2(numeric_round, + NumericGetDatum(result), + Int32GetDatum(6))); + } + PG_RETURN_NUMERIC(result); + } + else + { + float8 result; + + /* try to avoid precision loss in subtraction */ + if (timestamp < (PG_INT64_MAX + epoch)) + result = (timestamp - epoch) / 1000000.0; + else + result = ((float8) timestamp - epoch) / 1000000.0; + PG_RETURN_FLOAT8(result); + } + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMESTAMPOID)))); + intresult = 0; + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(TIMESTAMPOID)))); + intresult = 0; + } + + if (retnumeric) + PG_RETURN_NUMERIC(int64_to_numeric(intresult)); + else + PG_RETURN_FLOAT8(intresult); +} + +Datum +timestamp_part(PG_FUNCTION_ARGS) +{ + return timestamp_part_common(fcinfo, false); +} + +Datum +extract_timestamp(PG_FUNCTION_ARGS) +{ + return timestamp_part_common(fcinfo, true); +} + +/* timestamptz_part() and extract_timestamptz() + * Extract specified field from timestamp with time zone. + */ +static Datum +timestamptz_part_common(PG_FUNCTION_ARGS, bool retnumeric) +{ + text *units = PG_GETARG_TEXT_PP(0); + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1); + int64 intresult; + Timestamp epoch; + int tz; + int type, + val; + char *lowunits; + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(0, lowunits, &val); + + if (TIMESTAMP_NOT_FINITE(timestamp)) + { + double r = NonFiniteTimestampTzPart(type, val, lowunits, + TIMESTAMP_IS_NOBEGIN(timestamp), + true); + + if (r) + { + if (retnumeric) + { + if (r < 0) + return DirectFunctionCall3(numeric_in, + CStringGetDatum("-Infinity"), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + else if (r > 0) + return DirectFunctionCall3(numeric_in, + CStringGetDatum("Infinity"), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + } + else + PG_RETURN_FLOAT8(r); + } + else + PG_RETURN_NULL(); + } + + if (type == UNITS) + { + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + switch (val) + { + case DTK_TZ: + intresult = -tz; + break; + + case DTK_TZ_MINUTE: + intresult = (-tz / SECS_PER_MINUTE) % MINS_PER_HOUR; + break; + + case DTK_TZ_HOUR: + intresult = -tz / SECS_PER_HOUR; + break; + + case DTK_MICROSEC: + intresult = tm->tm_sec * INT64CONST(1000000) + fsec; + break; + + case DTK_MILLISEC: + if (retnumeric) + /*--- + * tm->tm_sec * 1000 + fsec / 1000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3)); + else + PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0); + break; + + case DTK_SECOND: + if (retnumeric) + /*--- + * tm->tm_sec + fsec / 1'000'000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6)); + else + PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0); + break; + + case DTK_MINUTE: + intresult = tm->tm_min; + break; + + case DTK_HOUR: + intresult = tm->tm_hour; + break; + + case DTK_DAY: + intresult = tm->tm_mday; + break; + + case DTK_MONTH: + intresult = tm->tm_mon; + break; + + case DTK_QUARTER: + intresult = (tm->tm_mon - 1) / 3 + 1; + break; + + case DTK_WEEK: + intresult = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday); + break; + + case DTK_YEAR: + if (tm->tm_year > 0) + intresult = tm->tm_year; + else + /* there is no year 0, just 1 BC and 1 AD */ + intresult = tm->tm_year - 1; + break; + + case DTK_DECADE: + /* see comments in timestamp_part */ + if (tm->tm_year > 0) + intresult = tm->tm_year / 10; + else + intresult = -((8 - (tm->tm_year - 1)) / 10); + break; + + case DTK_CENTURY: + /* see comments in timestamp_part */ + if (tm->tm_year > 0) + intresult = (tm->tm_year + 99) / 100; + else + intresult = -((99 - (tm->tm_year - 1)) / 100); + break; + + case DTK_MILLENNIUM: + /* see comments in timestamp_part */ + if (tm->tm_year > 0) + intresult = (tm->tm_year + 999) / 1000; + else + intresult = -((999 - (tm->tm_year - 1)) / 1000); + break; + + case DTK_JULIAN: + if (retnumeric) + PG_RETURN_NUMERIC(numeric_add_opt_error(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)), + numeric_div_opt_error(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec), + int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)), + NULL), + NULL)); + else + PG_RETURN_FLOAT8(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) + + ((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + + tm->tm_sec + (fsec / 1000000.0)) / (double) SECS_PER_DAY); + break; + + case DTK_ISOYEAR: + intresult = date2isoyear(tm->tm_year, tm->tm_mon, tm->tm_mday); + /* Adjust BC years */ + if (intresult <= 0) + intresult -= 1; + break; + + case DTK_DOW: + case DTK_ISODOW: + intresult = j2day(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (val == DTK_ISODOW && intresult == 0) + intresult = 7; + break; + + case DTK_DOY: + intresult = (date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) + - date2j(tm->tm_year, 1, 1) + 1); + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMESTAMPTZOID)))); + intresult = 0; + } + } + else if (type == RESERV) + { + switch (val) + { + case DTK_EPOCH: + epoch = SetEpochTimestamp(); + /* (timestamp - epoch) / 1000000 */ + if (retnumeric) + { + Numeric result; + + if (timestamp < (PG_INT64_MAX + epoch)) + result = int64_div_fast_to_numeric(timestamp - epoch, 6); + else + { + result = numeric_div_opt_error(numeric_sub_opt_error(int64_to_numeric(timestamp), + int64_to_numeric(epoch), + NULL), + int64_to_numeric(1000000), + NULL); + result = DatumGetNumeric(DirectFunctionCall2(numeric_round, + NumericGetDatum(result), + Int32GetDatum(6))); + } + PG_RETURN_NUMERIC(result); + } + else + { + float8 result; + + /* try to avoid precision loss in subtraction */ + if (timestamp < (PG_INT64_MAX + epoch)) + result = (timestamp - epoch) / 1000000.0; + else + result = ((float8) timestamp - epoch) / 1000000.0; + PG_RETURN_FLOAT8(result); + } + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(TIMESTAMPTZOID)))); + intresult = 0; + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(TIMESTAMPTZOID)))); + + intresult = 0; + } + + if (retnumeric) + PG_RETURN_NUMERIC(int64_to_numeric(intresult)); + else + PG_RETURN_FLOAT8(intresult); +} + +Datum +timestamptz_part(PG_FUNCTION_ARGS) +{ + return timestamptz_part_common(fcinfo, false); +} + +Datum +extract_timestamptz(PG_FUNCTION_ARGS) +{ + return timestamptz_part_common(fcinfo, true); +} + + +/* interval_part() and extract_interval() + * Extract specified field from interval. + */ +static Datum +interval_part_common(PG_FUNCTION_ARGS, bool retnumeric) +{ + text *units = PG_GETARG_TEXT_PP(0); + Interval *interval = PG_GETARG_INTERVAL_P(1); + int64 intresult; + int type, + val; + char *lowunits; + struct pg_itm tt, + *tm = &tt; + + lowunits = downcase_truncate_identifier(VARDATA_ANY(units), + VARSIZE_ANY_EXHDR(units), + false); + + type = DecodeUnits(0, lowunits, &val); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(0, lowunits, &val); + + if (type == UNITS) + { + interval2itm(*interval, tm); + switch (val) + { + case DTK_MICROSEC: + intresult = tm->tm_sec * INT64CONST(1000000) + tm->tm_usec; + break; + + case DTK_MILLISEC: + if (retnumeric) + /*--- + * tm->tm_sec * 1000 + fsec / 1000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + tm->tm_usec, 3)); + else + PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + tm->tm_usec / 1000.0); + break; + + case DTK_SECOND: + if (retnumeric) + /*--- + * tm->tm_sec + fsec / 1'000'000 + * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000 + */ + PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + tm->tm_usec, 6)); + else + PG_RETURN_FLOAT8(tm->tm_sec + tm->tm_usec / 1000000.0); + break; + + case DTK_MINUTE: + intresult = tm->tm_min; + break; + + case DTK_HOUR: + intresult = tm->tm_hour; + break; + + case DTK_DAY: + intresult = tm->tm_mday; + break; + + case DTK_MONTH: + intresult = tm->tm_mon; + break; + + case DTK_QUARTER: + intresult = (tm->tm_mon / 3) + 1; + break; + + case DTK_YEAR: + intresult = tm->tm_year; + break; + + case DTK_DECADE: + /* caution: C division may have negative remainder */ + intresult = tm->tm_year / 10; + break; + + case DTK_CENTURY: + /* caution: C division may have negative remainder */ + intresult = tm->tm_year / 100; + break; + + case DTK_MILLENNIUM: + /* caution: C division may have negative remainder */ + intresult = tm->tm_year / 1000; + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unit \"%s\" not supported for type %s", + lowunits, format_type_be(INTERVALOID)))); + intresult = 0; + } + } + else if (type == RESERV && val == DTK_EPOCH) + { + if (retnumeric) + { + Numeric result; + int64 secs_from_day_month; + int64 val; + + /* + * To do this calculation in integer arithmetic even though + * DAYS_PER_YEAR is fractional, multiply everything by 4 and then + * divide by 4 again at the end. This relies on DAYS_PER_YEAR + * being a multiple of 0.25 and on SECS_PER_DAY being a multiple + * of 4. + */ + secs_from_day_month = ((int64) (4 * DAYS_PER_YEAR) * (interval->month / MONTHS_PER_YEAR) + + (int64) (4 * DAYS_PER_MONTH) * (interval->month % MONTHS_PER_YEAR) + + (int64) 4 * interval->day) * (SECS_PER_DAY / 4); + + /*--- + * result = secs_from_day_month + interval->time / 1'000'000 + * = (secs_from_day_month * 1'000'000 + interval->time) / 1'000'000 + */ + + /* + * Try the computation inside int64; if it overflows, do it in + * numeric (slower). This overflow happens around 10^9 days, so + * not common in practice. + */ + if (!pg_mul_s64_overflow(secs_from_day_month, 1000000, &val) && + !pg_add_s64_overflow(val, interval->time, &val)) + result = int64_div_fast_to_numeric(val, 6); + else + result = + numeric_add_opt_error(int64_div_fast_to_numeric(interval->time, 6), + int64_to_numeric(secs_from_day_month), + NULL); + + PG_RETURN_NUMERIC(result); + } + else + { + float8 result; + + result = interval->time / 1000000.0; + result += ((double) DAYS_PER_YEAR * SECS_PER_DAY) * (interval->month / MONTHS_PER_YEAR); + result += ((double) DAYS_PER_MONTH * SECS_PER_DAY) * (interval->month % MONTHS_PER_YEAR); + result += ((double) SECS_PER_DAY) * interval->day; + + PG_RETURN_FLOAT8(result); + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unit \"%s\" not recognized for type %s", + lowunits, format_type_be(INTERVALOID)))); + intresult = 0; + } + + if (retnumeric) + PG_RETURN_NUMERIC(int64_to_numeric(intresult)); + else + PG_RETURN_FLOAT8(intresult); +} + +Datum +interval_part(PG_FUNCTION_ARGS) +{ + return interval_part_common(fcinfo, false); +} + +Datum +extract_interval(PG_FUNCTION_ARGS) +{ + return interval_part_common(fcinfo, true); +} + + +/* timestamp_zone() + * Encode timestamp type with specified time zone. + * This function is just timestamp2timestamptz() except instead of + * shifting to the global timezone, we shift to the specified timezone. + * This is different from the other AT TIME ZONE cases because instead + * of shifting _to_ a new time zone, it sets the time to _be_ the + * specified timezone. + */ +Datum +timestamp_zone(PG_FUNCTION_ARGS) +{ + text *zone = PG_GETARG_TEXT_PP(0); + Timestamp timestamp = PG_GETARG_TIMESTAMP(1); + TimestampTz result; + int tz; + char tzname[TZ_STRLEN_MAX + 1]; + int type, + val; + pg_tz *tzp; + struct pg_tm tm; + fsec_t fsec; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMPTZ(timestamp); + + /* + * Look up the requested timezone. + */ + text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + type = DecodeTimezoneName(tzname, &val, &tzp); + + if (type == TZNAME_FIXED_OFFSET) + { + /* fixed-offset abbreviation */ + tz = val; + result = dt2local(timestamp, tz); + } + else if (type == TZNAME_DYNTZ) + { + /* dynamic-offset abbreviation, resolve using specified time */ + if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, tzp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + tz = -DetermineTimeZoneAbbrevOffset(&tm, tzname, tzp); + result = dt2local(timestamp, tz); + } + else + { + /* full zone name, rotate to that zone */ + if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, tzp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + tz = DetermineTimeZoneOffset(&tm, tzp); + if (tm2timestamp(&tm, fsec, &tz, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_TIMESTAMPTZ(result); +} + +/* timestamp_izone() + * Encode timestamp type with specified time interval as time zone. + */ +Datum +timestamp_izone(PG_FUNCTION_ARGS) +{ + Interval *zone = PG_GETARG_INTERVAL_P(0); + Timestamp timestamp = PG_GETARG_TIMESTAMP(1); + TimestampTz result; + int tz; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMPTZ(timestamp); + + if (zone->month != 0 || zone->day != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("interval time zone \"%s\" must not include months or days", + DatumGetCString(DirectFunctionCall1(interval_out, + PointerGetDatum(zone)))))); + + tz = zone->time / USECS_PER_SEC; + + result = dt2local(timestamp, tz); + + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_TIMESTAMPTZ(result); +} /* timestamp_izone() */ + +/* TimestampTimestampTzRequiresRewrite() + * + * Returns false if the TimeZone GUC setting causes timestamp_timestamptz and + * timestamptz_timestamp to be no-ops, where the return value has the same + * bits as the argument. Since project convention is to assume a GUC changes + * no more often than STABLE functions change, the answer is valid that long. + */ +bool +TimestampTimestampTzRequiresRewrite(void) +{ + long offset; + + if (pg_get_timezone_offset(session_timezone, &offset) && offset == 0) + return false; + return true; +} + +/* timestamp_timestamptz() + * Convert local timestamp to timestamp at GMT + */ +Datum +timestamp_timestamptz(PG_FUNCTION_ARGS) +{ + Timestamp timestamp = PG_GETARG_TIMESTAMP(0); + + PG_RETURN_TIMESTAMPTZ(timestamp2timestamptz(timestamp)); +} + +/* + * Convert timestamp to timestamp with time zone. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamp is finite but out of the valid range for timestamptz, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate timestamptz infinity. + */ +TimestampTz +timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) +{ + TimestampTz result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + + if (overflow) + *overflow = 0; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + return timestamp; + + /* We don't expect this to fail, but check it pro forma */ + if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0) + { + tz = DetermineTimeZoneOffset(tm, session_timezone); + + result = dt2local(timestamp, -tz); + + if (IS_VALID_TIMESTAMP(result)) + { + return result; + } + else if (overflow) + { + if (result < MIN_TIMESTAMP) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } + } + + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + return 0; +} + +/* + * Promote timestamp to timestamptz, throwing error for overflow. + */ +static TimestampTz +timestamp2timestamptz(Timestamp timestamp) +{ + return timestamp2timestamptz_opt_overflow(timestamp, NULL); +} + +/* timestamptz_timestamp() + * Convert timestamp at GMT to local timestamp + */ +Datum +timestamptz_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); + + PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp)); +} + +static Timestamp +timestamptz2timestamp(TimestampTz timestamp) +{ + Timestamp result; + struct pg_tm tt, + *tm = &tt; + fsec_t fsec; + int tz; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + result = timestamp; + else + { + if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + if (tm2timestamp(tm, fsec, NULL, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + return result; +} + +/* timestamptz_zone() + * Evaluate timestamp with time zone type at the specified time zone. + * Returns a timestamp without time zone. + */ +Datum +timestamptz_zone(PG_FUNCTION_ARGS) +{ + text *zone = PG_GETARG_TEXT_PP(0); + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1); + Timestamp result; + int tz; + char tzname[TZ_STRLEN_MAX + 1]; + int type, + val; + pg_tz *tzp; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMP(timestamp); + + /* + * Look up the requested timezone. + */ + text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + type = DecodeTimezoneName(tzname, &val, &tzp); + + if (type == TZNAME_FIXED_OFFSET) + { + /* fixed-offset abbreviation */ + tz = -val; + result = dt2local(timestamp, tz); + } + else if (type == TZNAME_DYNTZ) + { + /* dynamic-offset abbreviation, resolve using specified time */ + int isdst; + + tz = DetermineTimeZoneAbbrevOffsetTS(timestamp, tzname, tzp, &isdst); + result = dt2local(timestamp, tz); + } + else + { + /* full zone name, rotate from that zone */ + struct pg_tm tm; + fsec_t fsec; + + if (timestamp2tm(timestamp, &tz, &tm, &fsec, NULL, tzp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + if (tm2timestamp(&tm, fsec, NULL, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } + + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_TIMESTAMP(result); +} + +/* timestamptz_izone() + * Encode timestamp with time zone type with specified time interval as time zone. + * Returns a timestamp without time zone. + */ +Datum +timestamptz_izone(PG_FUNCTION_ARGS) +{ + Interval *zone = PG_GETARG_INTERVAL_P(0); + TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1); + Timestamp result; + int tz; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMP(timestamp); + + if (zone->month != 0 || zone->day != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("interval time zone \"%s\" must not include months or days", + DatumGetCString(DirectFunctionCall1(interval_out, + PointerGetDatum(zone)))))); + + tz = -(zone->time / USECS_PER_SEC); + + result = dt2local(timestamp, tz); + + if (!IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_TIMESTAMP(result); +} + +/* generate_series_timestamp() + * Generate the set of timestamps from start to finish by step + */ +Datum +generate_series_timestamp(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + generate_series_timestamp_fctx *fctx; + Timestamp result; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + Timestamp start = PG_GETARG_TIMESTAMP(0); + Timestamp finish = PG_GETARG_TIMESTAMP(1); + Interval *step = PG_GETARG_INTERVAL_P(2); + MemoryContext oldcontext; + const Interval interval_zero = {0}; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* allocate memory for user context */ + fctx = (generate_series_timestamp_fctx *) + palloc(sizeof(generate_series_timestamp_fctx)); + + /* + * Use fctx to keep state from call to call. Seed current with the + * original start value + */ + fctx->current = start; + fctx->finish = finish; + fctx->step = *step; + + /* Determine sign of the interval */ + fctx->step_sign = interval_cmp_internal(&fctx->step, &interval_zero); + + if (fctx->step_sign == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("step size cannot equal zero"))); + + funcctx->user_fctx = fctx; + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + /* + * get the saved state and use current as the result for this iteration + */ + fctx = funcctx->user_fctx; + result = fctx->current; + + if (fctx->step_sign > 0 ? + timestamp_cmp_internal(result, fctx->finish) <= 0 : + timestamp_cmp_internal(result, fctx->finish) >= 0) + { + /* increment current in preparation for next iteration */ + fctx->current = DatumGetTimestamp(DirectFunctionCall2(timestamp_pl_interval, + TimestampGetDatum(fctx->current), + PointerGetDatum(&fctx->step))); + + /* do when there is more left to send */ + SRF_RETURN_NEXT(funcctx, TimestampGetDatum(result)); + } + else + { + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); + } +} + +/* generate_series_timestamptz() + * Generate the set of timestamps from start to finish by step, + * doing arithmetic in the specified or session timezone. + */ +static Datum +generate_series_timestamptz_internal(FunctionCallInfo fcinfo) +{ + FuncCallContext *funcctx; + generate_series_timestamptz_fctx *fctx; + TimestampTz result; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + TimestampTz start = PG_GETARG_TIMESTAMPTZ(0); + TimestampTz finish = PG_GETARG_TIMESTAMPTZ(1); + Interval *step = PG_GETARG_INTERVAL_P(2); + text *zone = (PG_NARGS() == 4) ? PG_GETARG_TEXT_PP(3) : NULL; + MemoryContext oldcontext; + const Interval interval_zero = {0}; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* allocate memory for user context */ + fctx = (generate_series_timestamptz_fctx *) + palloc(sizeof(generate_series_timestamptz_fctx)); + + /* + * Use fctx to keep state from call to call. Seed current with the + * original start value + */ + fctx->current = start; + fctx->finish = finish; + fctx->step = *step; + fctx->attimezone = zone ? lookup_timezone(zone) : session_timezone; + + /* Determine sign of the interval */ + fctx->step_sign = interval_cmp_internal(&fctx->step, &interval_zero); + + if (fctx->step_sign == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("step size cannot equal zero"))); + + funcctx->user_fctx = fctx; + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + /* + * get the saved state and use current as the result for this iteration + */ + fctx = funcctx->user_fctx; + result = fctx->current; + + if (fctx->step_sign > 0 ? + timestamp_cmp_internal(result, fctx->finish) <= 0 : + timestamp_cmp_internal(result, fctx->finish) >= 0) + { + /* increment current in preparation for next iteration */ + fctx->current = timestamptz_pl_interval_internal(fctx->current, + &fctx->step, + fctx->attimezone); + + /* do when there is more left to send */ + SRF_RETURN_NEXT(funcctx, TimestampTzGetDatum(result)); + } + else + { + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); + } +} + +Datum +generate_series_timestamptz(PG_FUNCTION_ARGS) +{ + return generate_series_timestamptz_internal(fcinfo); +} + +Datum +generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS) +{ + return generate_series_timestamptz_internal(fcinfo); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/trigfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/trigfuncs.c new file mode 100644 index 00000000000..d9a616f603d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/trigfuncs.c @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * trigfuncs.c + * Builtin functions for useful trigger support. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/utils/adt/trigfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "commands/trigger.h" +#include "utils/builtins.h" +#include "utils/rel.h" + + +/* + * suppress_redundant_updates_trigger + * + * This trigger function will inhibit an update from being done + * if the OLD and NEW records are identical. + */ +Datum +suppress_redundant_updates_trigger(PG_FUNCTION_ARGS) +{ + TriggerData *trigdata = (TriggerData *) fcinfo->context; + HeapTuple newtuple, + oldtuple, + rettuple; + HeapTupleHeader newheader, + oldheader; + + /* make sure it's called as a trigger */ + if (!CALLED_AS_TRIGGER(fcinfo)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("suppress_redundant_updates_trigger: must be called as trigger"))); + + /* and that it's called on update */ + if (!TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("suppress_redundant_updates_trigger: must be called on update"))); + + /* and that it's called before update */ + if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("suppress_redundant_updates_trigger: must be called before update"))); + + /* and that it's called for each row */ + if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("suppress_redundant_updates_trigger: must be called for each row"))); + + /* get tuple data, set default result */ + rettuple = newtuple = trigdata->tg_newtuple; + oldtuple = trigdata->tg_trigtuple; + + newheader = newtuple->t_data; + oldheader = oldtuple->t_data; + + /* if the tuple payload is the same ... */ + if (newtuple->t_len == oldtuple->t_len && + newheader->t_hoff == oldheader->t_hoff && + (HeapTupleHeaderGetNatts(newheader) == + HeapTupleHeaderGetNatts(oldheader)) && + ((newheader->t_infomask & ~HEAP_XACT_MASK) == + (oldheader->t_infomask & ~HEAP_XACT_MASK)) && + memcmp(((char *) newheader) + SizeofHeapTupleHeader, + ((char *) oldheader) + SizeofHeapTupleHeader, + newtuple->t_len - SizeofHeapTupleHeader) == 0) + { + /* ... then suppress the update */ + rettuple = NULL; + } + + return PointerGetDatum(rettuple); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsginidx.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsginidx.c new file mode 100644 index 00000000000..484a003827d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsginidx.c @@ -0,0 +1,355 @@ +/*------------------------------------------------------------------------- + * + * tsginidx.c + * GIN support functions for tsvector_ops + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsginidx.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/gin.h" +#include "access/stratnum.h" +#include "miscadmin.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "varatt.h" + + +Datum +gin_cmp_tslexeme(PG_FUNCTION_ARGS) +{ + text *a = PG_GETARG_TEXT_PP(0); + text *b = PG_GETARG_TEXT_PP(1); + int cmp; + + cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), + VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), + false); + + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + PG_RETURN_INT32(cmp); +} + +Datum +gin_cmp_prefix(PG_FUNCTION_ARGS) +{ + text *a = PG_GETARG_TEXT_PP(0); + text *b = PG_GETARG_TEXT_PP(1); + +#ifdef NOT_USED + StrategyNumber strategy = PG_GETARG_UINT16(2); + Pointer extra_data = PG_GETARG_POINTER(3); +#endif + int cmp; + + cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), + VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), + true); + + if (cmp < 0) + cmp = 1; /* prevent continue scan */ + + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + PG_RETURN_INT32(cmp); +} + +Datum +gin_extract_tsvector(PG_FUNCTION_ARGS) +{ + TSVector vector = PG_GETARG_TSVECTOR(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + Datum *entries = NULL; + + *nentries = vector->size; + if (vector->size > 0) + { + int i; + WordEntry *we = ARRPTR(vector); + + entries = (Datum *) palloc(sizeof(Datum) * vector->size); + + for (i = 0; i < vector->size; i++) + { + text *txt; + + txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len); + entries[i] = PointerGetDatum(txt); + + we++; + } + } + + PG_FREE_IF_COPY(vector, 0); + PG_RETURN_POINTER(entries); +} + +Datum +gin_extract_tsquery(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + + /* StrategyNumber strategy = PG_GETARG_UINT16(2); */ + bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3); + Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); + + /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries = NULL; + + *nentries = 0; + + if (query->size > 0) + { + QueryItem *item = GETQUERY(query); + int32 i, + j; + bool *partialmatch; + int *map_item_operand; + + /* + * If the query doesn't have any required positive matches (for + * instance, it's something like '! foo'), we have to do a full index + * scan. + */ + if (tsquery_requires_match(item)) + *searchMode = GIN_SEARCH_MODE_DEFAULT; + else + *searchMode = GIN_SEARCH_MODE_ALL; + + /* count number of VAL items */ + j = 0; + for (i = 0; i < query->size; i++) + { + if (item[i].type == QI_VAL) + j++; + } + *nentries = j; + + entries = (Datum *) palloc(sizeof(Datum) * j); + partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j); + + /* + * Make map to convert item's number to corresponding operand's (the + * same, entry's) number. Entry's number is used in check array in + * consistent method. We use the same map for each entry. + */ + *extra_data = (Pointer *) palloc(sizeof(Pointer) * j); + map_item_operand = (int *) palloc0(sizeof(int) * query->size); + + /* Now rescan the VAL items and fill in the arrays */ + j = 0; + for (i = 0; i < query->size; i++) + { + if (item[i].type == QI_VAL) + { + QueryOperand *val = &item[i].qoperand; + text *txt; + + txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance, + val->length); + entries[j] = PointerGetDatum(txt); + partialmatch[j] = val->prefix; + (*extra_data)[j] = (Pointer) map_item_operand; + map_item_operand[i] = j; + j++; + } + } + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_POINTER(entries); +} + +typedef struct +{ + QueryItem *first_item; + GinTernaryValue *check; + int *map_item_operand; +} GinChkVal; + +/* + * TS_execute callback for matching a tsquery operand to GIN index data + */ +static TSTernaryValue +checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) +{ + GinChkVal *gcv = (GinChkVal *) checkval; + int j; + GinTernaryValue result; + + /* convert item's number to corresponding entry's (operand's) number */ + j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; + + /* determine presence of current entry in indexed value */ + result = gcv->check[j]; + + /* + * If any val requiring a weight is used or caller needs position + * information then we must recheck, so replace TRUE with MAYBE. + */ + if (result == GIN_TRUE) + { + if (val->weight != 0 || data != NULL) + result = GIN_MAYBE; + } + + /* + * We rely on GinTernaryValue and TSTernaryValue using equivalent value + * assignments. We could use a switch statement to map the values if that + * ever stops being true, but it seems unlikely to happen. + */ + return (TSTernaryValue) result; +} + +Datum +gin_tsquery_consistent(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + + /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ + TSQuery query = PG_GETARG_TSQUERY(2); + + /* int32 nkeys = PG_GETARG_INT32(3); */ + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res = false; + + /* Initially assume query doesn't require recheck */ + *recheck = false; + + if (query->size > 0) + { + GinChkVal gcv; + + /* + * check-parameter array has one entry for each value (operand) in the + * query. + */ + gcv.first_item = GETQUERY(query); + gcv.check = (GinTernaryValue *) check; + gcv.map_item_operand = (int *) (extra_data[0]); + + switch (TS_execute_ternary(GETQUERY(query), + &gcv, + TS_EXEC_PHRASE_NO_POS, + checkcondition_gin)) + { + case TS_NO: + res = false; + break; + case TS_YES: + res = true; + break; + case TS_MAYBE: + res = true; + *recheck = true; + break; + } + } + + PG_RETURN_BOOL(res); +} + +Datum +gin_tsquery_triconsistent(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + + /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ + TSQuery query = PG_GETARG_TSQUERY(2); + + /* int32 nkeys = PG_GETARG_INT32(3); */ + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_FALSE; + + if (query->size > 0) + { + GinChkVal gcv; + + /* + * check-parameter array has one entry for each value (operand) in the + * query. + */ + gcv.first_item = GETQUERY(query); + gcv.check = check; + gcv.map_item_operand = (int *) (extra_data[0]); + + res = TS_execute_ternary(GETQUERY(query), + &gcv, + TS_EXEC_PHRASE_NO_POS, + checkcondition_gin); + } + + PG_RETURN_GIN_TERNARY_VALUE(res); +} + +/* + * Formerly, gin_extract_tsvector had only two arguments. Now it has three, + * but we still need a pg_proc entry with two args to support reloading + * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility + * function should go away eventually. (Note: you might say "hey, but the + * code above is only *using* two args, so let's just declare it that way". + * If you try that you'll find the opr_sanity regression test complains.) + */ +Datum +gin_extract_tsvector_2args(PG_FUNCTION_ARGS) +{ + if (PG_NARGS() < 3) /* should not happen */ + elog(ERROR, "gin_extract_tsvector requires three arguments"); + return gin_extract_tsvector(fcinfo); +} + +/* + * Likewise, we need a stub version of gin_extract_tsquery declared with + * only five arguments. + */ +Datum +gin_extract_tsquery_5args(PG_FUNCTION_ARGS) +{ + if (PG_NARGS() < 7) /* should not happen */ + elog(ERROR, "gin_extract_tsquery requires seven arguments"); + return gin_extract_tsquery(fcinfo); +} + +/* + * Likewise, we need a stub version of gin_tsquery_consistent declared with + * only six arguments. + */ +Datum +gin_tsquery_consistent_6args(PG_FUNCTION_ARGS) +{ + if (PG_NARGS() < 8) /* should not happen */ + elog(ERROR, "gin_tsquery_consistent requires eight arguments"); + return gin_tsquery_consistent(fcinfo); +} + +/* + * Likewise, a stub version of gin_extract_tsquery declared with argument + * types that are no longer considered appropriate. + */ +Datum +gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS) +{ + return gin_extract_tsquery(fcinfo); +} + +/* + * Likewise, a stub version of gin_tsquery_consistent declared with argument + * types that are no longer considered appropriate. + */ +Datum +gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS) +{ + return gin_tsquery_consistent(fcinfo); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsgistidx.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsgistidx.c new file mode 100644 index 00000000000..f76fe608be2 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsgistidx.c @@ -0,0 +1,818 @@ +/*------------------------------------------------------------------------- + * + * tsgistidx.c + * GiST support functions for tsvector_ops + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsgistidx.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/gist.h" +#include "access/heaptoast.h" +#include "access/reloptions.h" +#include "lib/qunique.h" +#include "port/pg_bitutils.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/pg_crc.h" + + +/* tsvector_ops opclass options */ +typedef struct +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int siglen; /* signature length */ +} GistTsVectorOptions; + +#define SIGLEN_DEFAULT (31 * 4) +#define SIGLEN_MAX GISTMaxIndexKeySize +#define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \ + ((GistTsVectorOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \ + SIGLEN_DEFAULT) + +#define SIGLENBIT(siglen) ((siglen) * BITS_PER_BYTE) + +typedef char *BITVECP; + +#define LOOPBYTE(siglen) \ + for (i = 0; i < siglen; i++) + +#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) ) +#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 ) +#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) ) +#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) ) +#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 ) + +#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen)) +#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen)) + +#define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key)) + +/* + * type of GiST index key + */ + +typedef struct +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 flag; + char data[FLEXIBLE_ARRAY_MEMBER]; +} SignTSVector; + +#define ARRKEY 0x01 +#define SIGNKEY 0x02 +#define ALLISTRUE 0x04 + +#define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY ) +#define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY ) +#define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE ) + +#define GTHDRSIZE ( VARHDRSZ + sizeof(int32) ) +#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int32)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) ) + +#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) ) +#define GETSIGLEN(x)( VARSIZE(x) - GTHDRSIZE ) +#define GETARR(x) ( (int32*)( (char*)(x)+GTHDRSIZE ) ) +#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) ) + +static int32 sizebitvec(BITVECP sign, int siglen); + +Datum +gtsvectorin(PG_FUNCTION_ARGS) +{ + /* There's no need to support input of gtsvectors */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "gtsvector"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +#define SINGOUTSTR "%d true bits, %d false bits" +#define ARROUTSTR "%d unique words" +#define EXTRALEN ( 2*13 ) + +static __thread int outbuf_maxlen = 0; + +Datum +gtsvectorout(PG_FUNCTION_ARGS) +{ + SignTSVector *key = (SignTSVector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + char *outbuf; + + if (outbuf_maxlen == 0) + outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1; + outbuf = palloc(outbuf_maxlen); + + if (ISARRKEY(key)) + sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key)); + else + { + int siglen = GETSIGLEN(key); + int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT(siglen) : sizebitvec(GETSIGN(key), siglen); + + sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT(siglen) - cnttrue); + } + + PG_FREE_IF_COPY(key, 0); + PG_RETURN_POINTER(outbuf); +} + +static int +compareint(const void *va, const void *vb) +{ + int32 a = *((const int32 *) va); + int32 b = *((const int32 *) vb); + + if (a == b) + return 0; + return (a > b) ? 1 : -1; +} + +static void +makesign(BITVECP sign, SignTSVector *a, int siglen) +{ + int32 k, + len = ARRNELEM(a); + int32 *ptr = GETARR(a); + + MemSet(sign, 0, siglen); + for (k = 0; k < len; k++) + HASH(sign, ptr[k], siglen); +} + +static SignTSVector * +gtsvector_alloc(int flag, int len, BITVECP sign) +{ + int size = CALCGTSIZE(flag, len); + SignTSVector *res = palloc(size); + + SET_VARSIZE(res, size); + res->flag = flag; + + if ((flag & (SIGNKEY | ALLISTRUE)) == SIGNKEY && sign) + memcpy(GETSIGN(res), sign, len); + + return res; +} + + +Datum +gtsvector_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + int siglen = GET_SIGLEN(); + GISTENTRY *retval = entry; + + if (entry->leafkey) + { /* tsvector */ + TSVector val = DatumGetTSVector(entry->key); + SignTSVector *res = gtsvector_alloc(ARRKEY, val->size, NULL); + int32 len; + int32 *arr; + WordEntry *ptr = ARRPTR(val); + char *words = STRPTR(val); + + arr = GETARR(res); + len = val->size; + while (len--) + { + pg_crc32 c; + + INIT_LEGACY_CRC32(c); + COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len); + FIN_LEGACY_CRC32(c); + + *arr = *(int32 *) &c; + arr++; + ptr++; + } + + qsort(GETARR(res), val->size, sizeof(int), compareint); + len = qunique(GETARR(res), val->size, sizeof(int), compareint); + if (len != val->size) + { + /* + * there is a collision of hash-function; len is always less than + * val->size + */ + len = CALCGTSIZE(ARRKEY, len); + res = (SignTSVector *) repalloc(res, len); + SET_VARSIZE(res, len); + } + + /* make signature, if array is too long */ + if (VARSIZE(res) > TOAST_INDEX_TARGET) + { + SignTSVector *ressign = gtsvector_alloc(SIGNKEY, siglen, NULL); + + makesign(GETSIGN(ressign), res, siglen); + res = ressign; + } + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, PointerGetDatum(res), + entry->rel, entry->page, + entry->offset, false); + } + else if (ISSIGNKEY(DatumGetPointer(entry->key)) && + !ISALLTRUE(DatumGetPointer(entry->key))) + { + int32 i; + SignTSVector *res; + BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); + + LOOPBYTE(siglen) + { + if ((sign[i] & 0xff) != 0xff) + PG_RETURN_POINTER(retval); + } + + res = gtsvector_alloc(SIGNKEY | ALLISTRUE, siglen, sign); + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, PointerGetDatum(res), + entry->rel, entry->page, + entry->offset, false); + } + PG_RETURN_POINTER(retval); +} + +Datum +gtsvector_decompress(PG_FUNCTION_ARGS) +{ + /* + * We need to detoast the stored value, because the other gtsvector + * support functions don't cope with toasted values. + */ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + SignTSVector *key = (SignTSVector *) PG_DETOAST_DATUM(entry->key); + + if (key != (SignTSVector *) DatumGetPointer(entry->key)) + { + GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + + gistentryinit(*retval, PointerGetDatum(key), + entry->rel, entry->page, + entry->offset, false); + + PG_RETURN_POINTER(retval); + } + + PG_RETURN_POINTER(entry); +} + +typedef struct +{ + int32 *arrb; + int32 *arre; +} CHKVAL; + +/* + * TS_execute callback for matching a tsquery operand to GIST leaf-page data + */ +static TSTernaryValue +checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) +{ + int32 *StopLow = ((CHKVAL *) checkval)->arrb; + int32 *StopHigh = ((CHKVAL *) checkval)->arre; + int32 *StopMiddle; + + /* Loop invariant: StopLow <= val < StopHigh */ + + /* + * we are not able to find a prefix by hash value + */ + if (val->prefix) + return TS_MAYBE; + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + if (*StopMiddle == val->valcrc) + return TS_MAYBE; + else if (*StopMiddle < val->valcrc) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return TS_NO; +} + +/* + * TS_execute callback for matching a tsquery operand to GIST non-leaf data + */ +static TSTernaryValue +checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) +{ + void *key = (SignTSVector *) checkval; + + /* + * we are not able to find a prefix in signature tree + */ + if (val->prefix) + return TS_MAYBE; + + if (GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key)))) + return TS_MAYBE; + else + return TS_NO; +} + +Datum +gtsvector_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + TSQuery query = PG_GETARG_TSQUERY(1); + + /* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */ + /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); + SignTSVector *key = (SignTSVector *) DatumGetPointer(entry->key); + + /* All cases served by this function are inexact */ + *recheck = true; + + if (!query->size) + PG_RETURN_BOOL(false); + + if (ISSIGNKEY(key)) + { + if (ISALLTRUE(key)) + PG_RETURN_BOOL(true); + + PG_RETURN_BOOL(TS_execute(GETQUERY(query), + key, + TS_EXEC_PHRASE_NO_POS, + checkcondition_bit)); + } + else + { /* only leaf pages */ + CHKVAL chkval; + + chkval.arrb = GETARR(key); + chkval.arre = chkval.arrb + ARRNELEM(key); + PG_RETURN_BOOL(TS_execute(GETQUERY(query), + (void *) &chkval, + TS_EXEC_PHRASE_NO_POS, + checkcondition_arr)); + } +} + +static int32 +unionkey(BITVECP sbase, SignTSVector *add, int siglen) +{ + int32 i; + + if (ISSIGNKEY(add)) + { + BITVECP sadd = GETSIGN(add); + + if (ISALLTRUE(add)) + return 1; + + Assert(GETSIGLEN(add) == siglen); + + LOOPBYTE(siglen) + sbase[i] |= sadd[i]; + } + else + { + int32 *ptr = GETARR(add); + + for (i = 0; i < ARRNELEM(add); i++) + HASH(sbase, ptr[i], siglen); + } + return 0; +} + + +Datum +gtsvector_union(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + int *size = (int *) PG_GETARG_POINTER(1); + int siglen = GET_SIGLEN(); + SignTSVector *result = gtsvector_alloc(SIGNKEY, siglen, NULL); + BITVECP base = GETSIGN(result); + int32 i; + + memset(base, 0, siglen); + + for (i = 0; i < entryvec->n; i++) + { + if (unionkey(base, GETENTRY(entryvec, i), siglen)) + { + result->flag |= ALLISTRUE; + SET_VARSIZE(result, CALCGTSIZE(result->flag, siglen)); + break; + } + } + + *size = VARSIZE(result); + + PG_RETURN_POINTER(result); +} + +Datum +gtsvector_same(PG_FUNCTION_ARGS) +{ + SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0); + SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1); + bool *result = (bool *) PG_GETARG_POINTER(2); + int siglen = GET_SIGLEN(); + + if (ISSIGNKEY(a)) + { /* then b also ISSIGNKEY */ + if (ISALLTRUE(a) && ISALLTRUE(b)) + *result = true; + else if (ISALLTRUE(a)) + *result = false; + else if (ISALLTRUE(b)) + *result = false; + else + { + int32 i; + BITVECP sa = GETSIGN(a), + sb = GETSIGN(b); + + Assert(GETSIGLEN(a) == siglen && GETSIGLEN(b) == siglen); + + *result = true; + LOOPBYTE(siglen) + { + if (sa[i] != sb[i]) + { + *result = false; + break; + } + } + } + } + else + { /* a and b ISARRKEY */ + int32 lena = ARRNELEM(a), + lenb = ARRNELEM(b); + + if (lena != lenb) + *result = false; + else + { + int32 *ptra = GETARR(a), + *ptrb = GETARR(b); + int32 i; + + *result = true; + for (i = 0; i < lena; i++) + if (ptra[i] != ptrb[i]) + { + *result = false; + break; + } + } + } + + PG_RETURN_POINTER(result); +} + +static int32 +sizebitvec(BITVECP sign, int siglen) +{ + return pg_popcount(sign, siglen); +} + +static int +hemdistsign(BITVECP a, BITVECP b, int siglen) +{ + int i, + diff, + dist = 0; + + LOOPBYTE(siglen) + { + diff = (unsigned char) (a[i] ^ b[i]); + /* Using the popcount functions here isn't likely to win */ + dist += pg_number_of_ones[diff]; + } + return dist; +} + +static int +hemdist(SignTSVector *a, SignTSVector *b) +{ + int siglena = GETSIGLEN(a); + int siglenb = GETSIGLEN(b); + + if (ISALLTRUE(a)) + { + if (ISALLTRUE(b)) + return 0; + else + return SIGLENBIT(siglenb) - sizebitvec(GETSIGN(b), siglenb); + } + else if (ISALLTRUE(b)) + return SIGLENBIT(siglena) - sizebitvec(GETSIGN(a), siglena); + + Assert(siglena == siglenb); + + return hemdistsign(GETSIGN(a), GETSIGN(b), siglena); +} + +Datum +gtsvector_penalty(PG_FUNCTION_ARGS) +{ + GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ + GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); + float *penalty = (float *) PG_GETARG_POINTER(2); + int siglen = GET_SIGLEN(); + SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key); + SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key); + BITVECP orig = GETSIGN(origval); + + *penalty = 0.0; + + if (ISARRKEY(newval)) + { + BITVECP sign = palloc(siglen); + + makesign(sign, newval, siglen); + + if (ISALLTRUE(origval)) + { + int siglenbit = SIGLENBIT(siglen); + + *penalty = + (float) (siglenbit - sizebitvec(sign, siglen)) / + (float) (siglenbit + 1); + } + else + *penalty = hemdistsign(sign, orig, siglen); + + pfree(sign); + } + else + *penalty = hemdist(origval, newval); + PG_RETURN_POINTER(penalty); +} + +typedef struct +{ + bool allistrue; + BITVECP sign; +} CACHESIGN; + +static void +fillcache(CACHESIGN *item, SignTSVector *key, int siglen) +{ + item->allistrue = false; + if (ISARRKEY(key)) + makesign(item->sign, key, siglen); + else if (ISALLTRUE(key)) + item->allistrue = true; + else + memcpy(item->sign, GETSIGN(key), siglen); +} + +#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) +typedef struct +{ + OffsetNumber pos; + int32 cost; +} SPLITCOST; + +static int +comparecost(const void *va, const void *vb) +{ + const SPLITCOST *a = (const SPLITCOST *) va; + const SPLITCOST *b = (const SPLITCOST *) vb; + + if (a->cost == b->cost) + return 0; + else + return (a->cost > b->cost) ? 1 : -1; +} + + +static int +hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen) +{ + if (a->allistrue) + { + if (b->allistrue) + return 0; + else + return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen); + } + else if (b->allistrue) + return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen); + + return hemdistsign(a->sign, b->sign, siglen); +} + +Datum +gtsvector_picksplit(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + int siglen = GET_SIGLEN(); + OffsetNumber k, + j; + SignTSVector *datum_l, + *datum_r; + BITVECP union_l, + union_r; + int32 size_alpha, + size_beta; + int32 size_waste, + waste = -1; + int32 nbytes; + OffsetNumber seed_1 = 0, + seed_2 = 0; + OffsetNumber *left, + *right; + OffsetNumber maxoff; + BITVECP ptr; + int i; + CACHESIGN *cache; + char *cache_sign; + SPLITCOST *costvector; + + maxoff = entryvec->n - 2; + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + + cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); + cache_sign = palloc(siglen * (maxoff + 2)); + + for (j = 0; j < maxoff + 2; j++) + cache[j].sign = &cache_sign[siglen * j]; + + fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber), + siglen); + + for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) + { + for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) + { + if (k == FirstOffsetNumber) + fillcache(&cache[j], GETENTRY(entryvec, j), siglen); + + size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen); + if (size_waste > waste) + { + waste = size_waste; + seed_1 = k; + seed_2 = j; + } + } + } + + left = v->spl_left; + v->spl_nleft = 0; + right = v->spl_right; + v->spl_nright = 0; + + if (seed_1 == 0 || seed_2 == 0) + { + seed_1 = 1; + seed_2 = 2; + } + + /* form initial .. */ + datum_l = gtsvector_alloc(SIGNKEY | (cache[seed_1].allistrue ? ALLISTRUE : 0), + siglen, cache[seed_1].sign); + datum_r = gtsvector_alloc(SIGNKEY | (cache[seed_2].allistrue ? ALLISTRUE : 0), + siglen, cache[seed_2].sign); + union_l = GETSIGN(datum_l); + union_r = GETSIGN(datum_r); + maxoff = OffsetNumberNext(maxoff); + fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff), siglen); + /* sort before ... */ + costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) + { + costvector[j - 1].pos = j; + size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen); + size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen); + costvector[j - 1].cost = abs(size_alpha - size_beta); + } + qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost); + + for (k = 0; k < maxoff; k++) + { + j = costvector[k].pos; + if (j == seed_1) + { + *left++ = j; + v->spl_nleft++; + continue; + } + else if (j == seed_2) + { + *right++ = j; + v->spl_nright++; + continue; + } + + if (ISALLTRUE(datum_l) || cache[j].allistrue) + { + if (ISALLTRUE(datum_l) && cache[j].allistrue) + size_alpha = 0; + else + size_alpha = SIGLENBIT(siglen) - + sizebitvec((cache[j].allistrue) ? + GETSIGN(datum_l) : + cache[j].sign, + siglen); + } + else + size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen); + + if (ISALLTRUE(datum_r) || cache[j].allistrue) + { + if (ISALLTRUE(datum_r) && cache[j].allistrue) + size_beta = 0; + else + size_beta = SIGLENBIT(siglen) - + sizebitvec((cache[j].allistrue) ? + GETSIGN(datum_r) : + cache[j].sign, + siglen); + } + else + size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen); + + if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) + { + if (ISALLTRUE(datum_l) || cache[j].allistrue) + { + if (!ISALLTRUE(datum_l)) + memset(GETSIGN(datum_l), 0xff, siglen); + } + else + { + ptr = cache[j].sign; + LOOPBYTE(siglen) + union_l[i] |= ptr[i]; + } + *left++ = j; + v->spl_nleft++; + } + else + { + if (ISALLTRUE(datum_r) || cache[j].allistrue) + { + if (!ISALLTRUE(datum_r)) + memset(GETSIGN(datum_r), 0xff, siglen); + } + else + { + ptr = cache[j].sign; + LOOPBYTE(siglen) + union_r[i] |= ptr[i]; + } + *right++ = j; + v->spl_nright++; + } + } + + *right = *left = FirstOffsetNumber; + v->spl_ldatum = PointerGetDatum(datum_l); + v->spl_rdatum = PointerGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} + +/* + * Formerly, gtsvector_consistent was declared in pg_proc.h with arguments + * that did not match the documented conventions for GiST support functions. + * We fixed that, but we still need a pg_proc entry with the old signature + * to support reloading pre-9.6 contrib/tsearch2 opclass declarations. + * This compatibility function should go away eventually. + */ +Datum +gtsvector_consistent_oldsig(PG_FUNCTION_ARGS) +{ + return gtsvector_consistent(fcinfo); +} + +Datum +gtsvector_options(PG_FUNCTION_ARGS) +{ + local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); + + init_local_reloptions(relopts, sizeof(GistTsVectorOptions)); + add_local_int_reloption(relopts, "siglen", "signature length", + SIGLEN_DEFAULT, 1, SIGLEN_MAX, + offsetof(GistTsVectorOptions, siglen)); + + PG_RETURN_VOID(); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery.c new file mode 100644 index 00000000000..67ad876a27c --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery.c @@ -0,0 +1,1402 @@ +/*------------------------------------------------------------------------- + * + * tsquery.c + * I/O functions for tsquery + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/miscnodes.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/pg_crc.h" +#include "varatt.h" + +/* FTS operator priorities, see ts_type.h */ +const int tsearch_op_priority[OP_COUNT] = +{ + 4, /* OP_NOT */ + 2, /* OP_AND */ + 1, /* OP_OR */ + 3 /* OP_PHRASE */ +}; + +/* + * parser's states + */ +typedef enum +{ + WAITOPERAND = 1, + WAITOPERATOR = 2, + WAITFIRSTOPERAND = 3 +} ts_parserstate; + +/* + * token types for parsing + */ +typedef enum +{ + PT_END = 0, + PT_ERR = 1, + PT_VAL = 2, + PT_OPR = 3, + PT_OPEN = 4, + PT_CLOSE = 5 +} ts_tokentype; + +/* + * get token from query string + * + * All arguments except "state" are output arguments. + * + * If return value is PT_OPR, then *operator is filled with an OP_* code + * and *weight will contain a distance value in case of phrase operator. + * + * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix + * are filled. + * + * If PT_ERR is returned then a soft error has occurred. If state->escontext + * isn't already filled then this should be reported as a generic parse error. + */ +typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix); + +struct TSQueryParserStateData +{ + /* Tokenizer used for parsing tsquery */ + ts_tokenizer gettoken; + + /* State of tokenizer function */ + char *buffer; /* entire string we are scanning */ + char *buf; /* current scan point */ + int count; /* nesting count, incremented by (, + * decremented by ) */ + ts_parserstate state; + + /* polish (prefix) notation in list, filled in by push* functions */ + List *polstr; + + /* + * Strings from operands are collected in op. curop is a pointer to the + * end of used space of op. + */ + char *op; + char *curop; + int lenop; /* allocated size of op */ + int sumlen; /* used size of op */ + + /* state for value's parser */ + TSVectorParseState valstate; + + /* context object for soft errors - must match valstate's escontext */ + Node *escontext; +}; + +/* + * subroutine to parse the modifiers (weight and prefix flag currently) + * part, like ':AB*' of a query. + */ +static char * +get_modifiers(char *buf, int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + if (!t_iseq(buf, ':')) + return buf; + + buf++; + while (*buf && pg_mblen(buf) == 1) + { + switch (*buf) + { + case 'a': + case 'A': + *weight |= 1 << 3; + break; + case 'b': + case 'B': + *weight |= 1 << 2; + break; + case 'c': + case 'C': + *weight |= 1 << 1; + break; + case 'd': + case 'D': + *weight |= 1; + break; + case '*': + *prefix = true; + break; + default: + return buf; + } + buf++; + } + + return buf; +} + +/* + * Parse phrase operator. The operator + * may take the following forms: + * + * a <N> b (distance is exactly N lexemes) + * a <-> b (default distance = 1) + * + * The buffer should begin with '<' char + */ +static bool +parse_phrase_operator(TSQueryParserState pstate, int16 *distance) +{ + enum + { + PHRASE_OPEN = 0, + PHRASE_DIST, + PHRASE_CLOSE, + PHRASE_FINISH + } state = PHRASE_OPEN; + char *ptr = pstate->buf; + char *endptr; + long l = 1; /* default distance */ + + while (*ptr) + { + switch (state) + { + case PHRASE_OPEN: + if (t_iseq(ptr, '<')) + { + state = PHRASE_DIST; + ptr++; + } + else + return false; + break; + + case PHRASE_DIST: + if (t_iseq(ptr, '-')) + { + state = PHRASE_CLOSE; + ptr++; + continue; + } + + if (!t_isdigit(ptr)) + return false; + + errno = 0; + l = strtol(ptr, &endptr, 10); + if (ptr == endptr) + return false; + else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS) + ereturn(pstate->escontext, false, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("distance in phrase operator must be an integer value between zero and %d inclusive", + MAXENTRYPOS))); + else + { + state = PHRASE_CLOSE; + ptr = endptr; + } + break; + + case PHRASE_CLOSE: + if (t_iseq(ptr, '>')) + { + state = PHRASE_FINISH; + ptr++; + } + else + return false; + break; + + case PHRASE_FINISH: + *distance = (int16) l; + pstate->buf = ptr; + return true; + } + } + + return false; +} + +/* + * Parse OR operator used in websearch_to_tsquery(), returns true if we + * believe that "OR" literal could be an operator OR + */ +static bool +parse_or_operator(TSQueryParserState pstate) +{ + char *ptr = pstate->buf; + + /* it should begin with "OR" literal */ + if (pg_strncasecmp(ptr, "or", 2) != 0) + return false; + + ptr += 2; + + /* + * it shouldn't be a part of any word but somewhere later it should be + * some operand + */ + if (*ptr == '\0') /* no operand */ + return false; + + /* it shouldn't be a part of any word */ + if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr)) + return false; + + for (;;) + { + ptr += pg_mblen(ptr); + + if (*ptr == '\0') /* got end of string without operand */ + return false; + + /* + * Suppose, we found an operand, but could be a not correct operand. + * So we still treat OR literal as operation with possibly incorrect + * operand and will not search it as lexeme + */ + if (!t_isspace(ptr)) + break; + } + + pstate->buf += 2; + return true; +} + +static ts_tokentype +gettoken_query_standard(TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + while (true) + { + switch (state->state) + { + case WAITFIRSTOPERAND: + case WAITOPERAND: + if (t_iseq(state->buf, '!')) + { + state->buf++; + state->state = WAITOPERAND; + *operator = OP_NOT; + return PT_OPR; + } + else if (t_iseq(state->buf, '(')) + { + state->buf++; + state->state = WAITOPERAND; + state->count++; + return PT_OPEN; + } + else if (t_iseq(state->buf, ':')) + { + /* generic syntax error message is fine */ + return PT_ERR; + } + else if (!t_isspace(state->buf)) + { + /* + * We rely on the tsvector parser to parse the value for + * us + */ + reset_tsvector_parser(state->valstate, state->buf); + if (gettoken_tsvector(state->valstate, strval, lenval, + NULL, NULL, &state->buf)) + { + state->buf = get_modifiers(state->buf, weight, prefix); + state->state = WAITOPERATOR; + return PT_VAL; + } + else if (SOFT_ERROR_OCCURRED(state->escontext)) + { + /* gettoken_tsvector reported a soft error */ + return PT_ERR; + } + else if (state->state == WAITFIRSTOPERAND) + { + return PT_END; + } + else + ereturn(state->escontext, PT_ERR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("no operand in tsquery: \"%s\"", + state->buffer))); + } + break; + + case WAITOPERATOR: + if (t_iseq(state->buf, '&')) + { + state->buf++; + state->state = WAITOPERAND; + *operator = OP_AND; + return PT_OPR; + } + else if (t_iseq(state->buf, '|')) + { + state->buf++; + state->state = WAITOPERAND; + *operator = OP_OR; + return PT_OPR; + } + else if (parse_phrase_operator(state, weight)) + { + /* weight var is used as storage for distance */ + state->state = WAITOPERAND; + *operator = OP_PHRASE; + return PT_OPR; + } + else if (SOFT_ERROR_OCCURRED(state->escontext)) + { + /* parse_phrase_operator reported a soft error */ + return PT_ERR; + } + else if (t_iseq(state->buf, ')')) + { + state->buf++; + state->count--; + return (state->count < 0) ? PT_ERR : PT_CLOSE; + } + else if (*state->buf == '\0') + { + return (state->count) ? PT_ERR : PT_END; + } + else if (!t_isspace(state->buf)) + { + return PT_ERR; + } + break; + } + + state->buf += pg_mblen(state->buf); + } +} + +static ts_tokentype +gettoken_query_websearch(TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + while (true) + { + switch (state->state) + { + case WAITFIRSTOPERAND: + case WAITOPERAND: + if (t_iseq(state->buf, '-')) + { + state->buf++; + state->state = WAITOPERAND; + + *operator = OP_NOT; + return PT_OPR; + } + else if (t_iseq(state->buf, '"')) + { + /* Everything in quotes is processed as a single token */ + + /* skip opening quote */ + state->buf++; + *strval = state->buf; + + /* iterate to the closing quote or end of the string */ + while (*state->buf != '\0' && !t_iseq(state->buf, '"')) + state->buf++; + *lenval = state->buf - *strval; + + /* skip closing quote if not end of the string */ + if (*state->buf != '\0') + state->buf++; + + state->state = WAITOPERATOR; + state->count++; + return PT_VAL; + } + else if (ISOPERATOR(state->buf)) + { + /* or else gettoken_tsvector() will raise an error */ + state->buf++; + state->state = WAITOPERAND; + continue; + } + else if (!t_isspace(state->buf)) + { + /* + * We rely on the tsvector parser to parse the value for + * us + */ + reset_tsvector_parser(state->valstate, state->buf); + if (gettoken_tsvector(state->valstate, strval, lenval, + NULL, NULL, &state->buf)) + { + state->state = WAITOPERATOR; + return PT_VAL; + } + else if (SOFT_ERROR_OCCURRED(state->escontext)) + { + /* gettoken_tsvector reported a soft error */ + return PT_ERR; + } + else if (state->state == WAITFIRSTOPERAND) + { + return PT_END; + } + else + { + /* finally, we have to provide an operand */ + pushStop(state); + return PT_END; + } + } + break; + + case WAITOPERATOR: + if (t_iseq(state->buf, '"')) + { + /* + * put implicit AND after an operand and handle this quote + * in WAITOPERAND + */ + state->state = WAITOPERAND; + *operator = OP_AND; + return PT_OPR; + } + else if (parse_or_operator(state)) + { + state->state = WAITOPERAND; + *operator = OP_OR; + return PT_OPR; + } + else if (*state->buf == '\0') + { + return PT_END; + } + else if (!t_isspace(state->buf)) + { + /* put implicit AND after an operand */ + *operator = OP_AND; + state->state = WAITOPERAND; + return PT_OPR; + } + break; + } + + state->buf += pg_mblen(state->buf); + } +} + +static ts_tokentype +gettoken_query_plain(TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + if (*state->buf == '\0') + return PT_END; + + *strval = state->buf; + *lenval = strlen(state->buf); + state->buf += *lenval; + state->count++; + return PT_VAL; +} + +/* + * Push an operator to state->polstr + */ +void +pushOperator(TSQueryParserState state, int8 oper, int16 distance) +{ + QueryOperator *tmp; + + Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE); + + tmp = (QueryOperator *) palloc0(sizeof(QueryOperator)); + tmp->type = QI_OPR; + tmp->oper = oper; + tmp->distance = (oper == OP_PHRASE) ? distance : 0; + /* left is filled in later with findoprnd */ + + state->polstr = lcons(tmp, state->polstr); +} + +static void +pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix) +{ + QueryOperand *tmp; + + if (distance >= MAXSTRPOS) + ereturn(state->escontext,, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("value is too big in tsquery: \"%s\"", + state->buffer))); + if (lenval >= MAXSTRLEN) + ereturn(state->escontext,, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("operand is too long in tsquery: \"%s\"", + state->buffer))); + + tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); + tmp->type = QI_VAL; + tmp->weight = weight; + tmp->prefix = prefix; + tmp->valcrc = (int32) valcrc; + tmp->length = lenval; + tmp->distance = distance; + + state->polstr = lcons(tmp, state->polstr); +} + +/* + * Push an operand to state->polstr. + * + * strval must point to a string equal to state->curop. lenval is the length + * of the string. + */ +void +pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix) +{ + pg_crc32 valcrc; + + if (lenval >= MAXSTRLEN) + ereturn(state->escontext,, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long in tsquery: \"%s\"", + state->buffer))); + + INIT_LEGACY_CRC32(valcrc); + COMP_LEGACY_CRC32(valcrc, strval, lenval); + FIN_LEGACY_CRC32(valcrc); + pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix); + + /* append the value string to state.op, enlarging buffer if needed first */ + while (state->curop - state->op + lenval + 1 >= state->lenop) + { + int used = state->curop - state->op; + + state->lenop *= 2; + state->op = (char *) repalloc(state->op, state->lenop); + state->curop = state->op + used; + } + memcpy(state->curop, strval, lenval); + state->curop += lenval; + *(state->curop) = '\0'; + state->curop++; + state->sumlen += lenval + 1 /* \0 */ ; +} + + +/* + * Push a stopword placeholder to state->polstr + */ +void +pushStop(TSQueryParserState state) +{ + QueryOperand *tmp; + + tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); + tmp->type = QI_VALSTOP; + + state->polstr = lcons(tmp, state->polstr); +} + + +#define STACKDEPTH 32 + +typedef struct OperatorElement +{ + int8 op; + int16 distance; +} OperatorElement; + +static void +pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance) +{ + if (*lenstack == STACKDEPTH) /* internal error */ + elog(ERROR, "tsquery stack too small"); + + stack[*lenstack].op = op; + stack[*lenstack].distance = distance; + + (*lenstack)++; +} + +static void +cleanOpStack(TSQueryParserState state, + OperatorElement *stack, int *lenstack, int8 op) +{ + int opPriority = OP_PRIORITY(op); + + while (*lenstack) + { + /* NOT is right associative unlike to others */ + if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) || + (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op))) + break; + + (*lenstack)--; + pushOperator(state, stack[*lenstack].op, + stack[*lenstack].distance); + } +} + +/* + * Make polish (prefix) notation of query. + * + * See parse_tsquery for explanation of pushval. + */ +static void +makepol(TSQueryParserState state, + PushFunction pushval, + Datum opaque) +{ + int8 operator = 0; + ts_tokentype type; + int lenval = 0; + char *strval = NULL; + OperatorElement opstack[STACKDEPTH]; + int lenstack = 0; + int16 weight = 0; + bool prefix; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + while ((type = state->gettoken(state, &operator, + &lenval, &strval, + &weight, &prefix)) != PT_END) + { + switch (type) + { + case PT_VAL: + pushval(opaque, state, strval, lenval, weight, prefix); + break; + case PT_OPR: + cleanOpStack(state, opstack, &lenstack, operator); + pushOpStack(opstack, &lenstack, operator, weight); + break; + case PT_OPEN: + makepol(state, pushval, opaque); + break; + case PT_CLOSE: + cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); + return; + case PT_ERR: + default: + /* don't overwrite a soft error saved by gettoken function */ + if (!SOFT_ERROR_OCCURRED(state->escontext)) + errsave(state->escontext, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsquery: \"%s\"", + state->buffer))); + return; + } + /* detect soft error in pushval or recursion */ + if (SOFT_ERROR_OCCURRED(state->escontext)) + return; + } + + cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); +} + +static void +findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (*pos >= nnodes) + elog(ERROR, "malformed tsquery: operand not found"); + + if (ptr[*pos].type == QI_VAL) + { + (*pos)++; + } + else if (ptr[*pos].type == QI_VALSTOP) + { + *needcleanup = true; /* we'll have to remove stop words */ + (*pos)++; + } + else + { + Assert(ptr[*pos].type == QI_OPR); + + if (ptr[*pos].qoperator.oper == OP_NOT) + { + ptr[*pos].qoperator.left = 1; /* fixed offset */ + (*pos)++; + + /* process the only argument */ + findoprnd_recurse(ptr, pos, nnodes, needcleanup); + } + else + { + QueryOperator *curitem = &ptr[*pos].qoperator; + int tmp = *pos; /* save current position */ + + Assert(curitem->oper == OP_AND || + curitem->oper == OP_OR || + curitem->oper == OP_PHRASE); + + (*pos)++; + + /* process RIGHT argument */ + findoprnd_recurse(ptr, pos, nnodes, needcleanup); + + curitem->left = *pos - tmp; /* set LEFT arg's offset */ + + /* process LEFT argument */ + findoprnd_recurse(ptr, pos, nnodes, needcleanup); + } + } +} + + +/* + * Fill in the left-fields previously left unfilled. + * The input QueryItems must be in polish (prefix) notation. + * Also, set *needcleanup to true if there are any QI_VALSTOP nodes. + */ +static void +findoprnd(QueryItem *ptr, int size, bool *needcleanup) +{ + uint32 pos; + + *needcleanup = false; + pos = 0; + findoprnd_recurse(ptr, &pos, size, needcleanup); + + if (pos != size) + elog(ERROR, "malformed tsquery: extra nodes"); +} + + +/* + * Parse the tsquery stored in "buf". + * + * Each value (operand) in the query is passed to pushval. pushval can + * transform the simple value to an arbitrarily complex expression using + * pushValue and pushOperator. It must push a single value with pushValue, + * a complete expression with all operands, or a stopword placeholder + * with pushStop, otherwise the prefix notation representation will be broken, + * having an operator with no operand. + * + * opaque is passed on to pushval as is, pushval can use it to store its + * private state. + * + * The pushval function can record soft errors via escontext. + * Callers must check SOFT_ERROR_OCCURRED to detect that. + * + * A bitmask of flags (see ts_utils.h) and an error context object + * can be provided as well. If a soft error occurs, NULL is returned. + */ +TSQuery +parse_tsquery(char *buf, + PushFunction pushval, + Datum opaque, + int flags, + Node *escontext) +{ + struct TSQueryParserStateData state; + int i; + TSQuery query; + int commonlen; + QueryItem *ptr; + ListCell *cell; + bool noisy; + bool needcleanup; + int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY; + + /* plain should not be used with web */ + Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB)); + + /* select suitable tokenizer */ + if (flags & P_TSQ_PLAIN) + state.gettoken = gettoken_query_plain; + else if (flags & P_TSQ_WEB) + { + state.gettoken = gettoken_query_websearch; + tsv_flags |= P_TSV_IS_WEB; + } + else + state.gettoken = gettoken_query_standard; + + /* emit nuisance NOTICEs only if not doing soft errors */ + noisy = !(escontext && IsA(escontext, ErrorSaveContext)); + + /* init state */ + state.buffer = buf; + state.buf = buf; + state.count = 0; + state.state = WAITFIRSTOPERAND; + state.polstr = NIL; + state.escontext = escontext; + + /* init value parser's state */ + state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext); + + /* init list of operand */ + state.sumlen = 0; + state.lenop = 64; + state.curop = state.op = (char *) palloc(state.lenop); + *(state.curop) = '\0'; + + /* parse query & make polish notation (postfix, but in reverse order) */ + makepol(&state, pushval, opaque); + + close_tsvector_parser(state.valstate); + + if (SOFT_ERROR_OCCURRED(escontext)) + return NULL; + + if (state.polstr == NIL) + { + if (noisy) + ereport(NOTICE, + (errmsg("text-search query doesn't contain lexemes: \"%s\"", + state.buffer))); + query = (TSQuery) palloc(HDRSIZETQ); + SET_VARSIZE(query, HDRSIZETQ); + query->size = 0; + return query; + } + + if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen)) + ereturn(escontext, NULL, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("tsquery is too large"))); + commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen); + + /* Pack the QueryItems in the final TSQuery struct to return to caller */ + query = (TSQuery) palloc0(commonlen); + SET_VARSIZE(query, commonlen); + query->size = list_length(state.polstr); + ptr = GETQUERY(query); + + /* Copy QueryItems to TSQuery */ + i = 0; + foreach(cell, state.polstr) + { + QueryItem *item = (QueryItem *) lfirst(cell); + + switch (item->type) + { + case QI_VAL: + memcpy(&ptr[i], item, sizeof(QueryOperand)); + break; + case QI_VALSTOP: + ptr[i].type = QI_VALSTOP; + break; + case QI_OPR: + memcpy(&ptr[i], item, sizeof(QueryOperator)); + break; + default: + elog(ERROR, "unrecognized QueryItem type: %d", item->type); + } + i++; + } + + /* Copy all the operand strings to TSQuery */ + memcpy(GETOPERAND(query), state.op, state.sumlen); + pfree(state.op); + + /* + * Set left operand pointers for every operator. While we're at it, + * detect whether there are any QI_VALSTOP nodes. + */ + findoprnd(ptr, query->size, &needcleanup); + + /* + * If there are QI_VALSTOP nodes, delete them and simplify the tree. + */ + if (needcleanup) + query = cleanup_tsquery_stopwords(query, noisy); + + return query; +} + +static void +pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval, + int16 weight, bool prefix) +{ + pushValue(state, strval, lenval, weight, prefix); +} + +/* + * in without morphology + */ +Datum +tsqueryin(PG_FUNCTION_ARGS) +{ + char *in = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + + PG_RETURN_TSQUERY(parse_tsquery(in, + pushval_asis, + PointerGetDatum(NULL), + 0, + escontext)); +} + +/* + * out function + */ +typedef struct +{ + QueryItem *curpol; + char *buf; + char *cur; + char *op; + int buflen; +} INFIX; + +/* Makes sure inf->buf is large enough for adding 'addsize' bytes */ +#define RESIZEBUF(inf, addsize) \ +while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ +{ \ + int len = (inf)->cur - (inf)->buf; \ + (inf)->buflen *= 2; \ + (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ + (inf)->cur = (inf)->buf + len; \ +} + +/* + * recursively traverse the tree and + * print it in infix (human-readable) form + */ +static void +infix(INFIX *in, int parentPriority, bool rightPhraseOp) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->curpol->type == QI_VAL) + { + QueryOperand *curpol = &in->curpol->qoperand; + char *op = in->op + curpol->distance; + int clen; + + RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6); + *(in->cur) = '\''; + in->cur++; + while (*op) + { + if (t_iseq(op, '\'')) + { + *(in->cur) = '\''; + in->cur++; + } + else if (t_iseq(op, '\\')) + { + *(in->cur) = '\\'; + in->cur++; + } + COPYCHAR(in->cur, op); + + clen = pg_mblen(op); + op += clen; + in->cur += clen; + } + *(in->cur) = '\''; + in->cur++; + if (curpol->weight || curpol->prefix) + { + *(in->cur) = ':'; + in->cur++; + if (curpol->prefix) + { + *(in->cur) = '*'; + in->cur++; + } + if (curpol->weight & (1 << 3)) + { + *(in->cur) = 'A'; + in->cur++; + } + if (curpol->weight & (1 << 2)) + { + *(in->cur) = 'B'; + in->cur++; + } + if (curpol->weight & (1 << 1)) + { + *(in->cur) = 'C'; + in->cur++; + } + if (curpol->weight & 1) + { + *(in->cur) = 'D'; + in->cur++; + } + } + *(in->cur) = '\0'; + in->curpol++; + } + else if (in->curpol->qoperator.oper == OP_NOT) + { + int priority = QO_PRIORITY(in->curpol); + + if (priority < parentPriority) + { + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + RESIZEBUF(in, 1); + *(in->cur) = '!'; + in->cur++; + *(in->cur) = '\0'; + in->curpol++; + + infix(in, priority, false); + if (priority < parentPriority) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } + else + { + int8 op = in->curpol->qoperator.oper; + int priority = QO_PRIORITY(in->curpol); + int16 distance = in->curpol->qoperator.distance; + INFIX nrm; + bool needParenthesis = false; + + in->curpol++; + if (priority < parentPriority || + /* phrase operator depends on order */ + (op == OP_PHRASE && rightPhraseOp)) + { + needParenthesis = true; + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + + nrm.curpol = in->curpol; + nrm.op = in->op; + nrm.buflen = 16; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + + /* get right operand */ + infix(&nrm, priority, (op == OP_PHRASE)); + + /* get & print left operand */ + in->curpol = nrm.curpol; + infix(in, priority, false); + + /* print operator & right operand */ + RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf)); + switch (op) + { + case OP_OR: + sprintf(in->cur, " | %s", nrm.buf); + break; + case OP_AND: + sprintf(in->cur, " & %s", nrm.buf); + break; + case OP_PHRASE: + if (distance != 1) + sprintf(in->cur, " <%d> %s", distance, nrm.buf); + else + sprintf(in->cur, " <-> %s", nrm.buf); + break; + default: + /* OP_NOT is handled in above if-branch */ + elog(ERROR, "unrecognized operator type: %d", op); + } + in->cur = strchr(in->cur, '\0'); + pfree(nrm.buf); + + if (needParenthesis) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } +} + +Datum +tsqueryout(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + INFIX nrm; + + if (query->size == 0) + { + char *b = palloc(1); + + *b = '\0'; + PG_RETURN_POINTER(b); + } + nrm.curpol = GETQUERY(query); + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, -1 /* lowest priority */ , false); + + PG_FREE_IF_COPY(query, 0); + PG_RETURN_CSTRING(nrm.buf); +} + +/* + * Binary Input / Output functions. The binary format is as follows: + * + * uint32 number of operators/operands in the query + * + * Followed by the operators and operands, in prefix notation. For each + * operand: + * + * uint8 type, QI_VAL + * uint8 weight + * operand text in client encoding, null-terminated + * uint8 prefix + * + * For each operator: + * uint8 type, QI_OPR + * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT. + * uint16 distance (only for OP_PHRASE) + */ +Datum +tsquerysend(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + StringInfoData buf; + int i; + QueryItem *item = GETQUERY(query); + + pq_begintypsend(&buf); + + pq_sendint32(&buf, query->size); + for (i = 0; i < query->size; i++) + { + pq_sendint8(&buf, item->type); + + switch (item->type) + { + case QI_VAL: + pq_sendint8(&buf, item->qoperand.weight); + pq_sendint8(&buf, item->qoperand.prefix); + pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance); + break; + case QI_OPR: + pq_sendint8(&buf, item->qoperator.oper); + if (item->qoperator.oper == OP_PHRASE) + pq_sendint16(&buf, item->qoperator.distance); + break; + default: + elog(ERROR, "unrecognized tsquery node type: %d", item->type); + } + item++; + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +tsqueryrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + TSQuery query; + int i, + len; + QueryItem *item; + int datalen; + char *ptr; + uint32 size; + const char **operands; + bool needcleanup; + + size = pq_getmsgint(buf, sizeof(uint32)); + if (size > (MaxAllocSize / sizeof(QueryItem))) + elog(ERROR, "invalid size of tsquery"); + + /* Allocate space to temporarily hold operand strings */ + operands = palloc(size * sizeof(char *)); + + /* Allocate space for all the QueryItems. */ + len = HDRSIZETQ + sizeof(QueryItem) * size; + query = (TSQuery) palloc0(len); + query->size = size; + item = GETQUERY(query); + + datalen = 0; + for (i = 0; i < size; i++) + { + item->type = (int8) pq_getmsgint(buf, sizeof(int8)); + + if (item->type == QI_VAL) + { + size_t val_len; /* length after recoding to server + * encoding */ + uint8 weight; + uint8 prefix; + const char *val; + pg_crc32 valcrc; + + weight = (uint8) pq_getmsgint(buf, sizeof(uint8)); + prefix = (uint8) pq_getmsgint(buf, sizeof(uint8)); + val = pq_getmsgstring(buf); + val_len = strlen(val); + + /* Sanity checks */ + + if (weight > 0xF) + elog(ERROR, "invalid tsquery: invalid weight bitmap"); + + if (val_len > MAXSTRLEN) + elog(ERROR, "invalid tsquery: operand too long"); + + if (datalen > MAXSTRPOS) + elog(ERROR, "invalid tsquery: total operand length exceeded"); + + /* Looks valid. */ + + INIT_LEGACY_CRC32(valcrc); + COMP_LEGACY_CRC32(valcrc, val, val_len); + FIN_LEGACY_CRC32(valcrc); + + item->qoperand.weight = weight; + item->qoperand.prefix = (prefix) ? true : false; + item->qoperand.valcrc = (int32) valcrc; + item->qoperand.length = val_len; + item->qoperand.distance = datalen; + + /* + * Operand strings are copied to the final struct after this loop; + * here we just collect them to an array + */ + operands[i] = val; + + datalen += val_len + 1; /* + 1 for the '\0' terminator */ + } + else if (item->type == QI_OPR) + { + int8 oper; + + oper = (int8) pq_getmsgint(buf, sizeof(int8)); + if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE) + elog(ERROR, "invalid tsquery: unrecognized operator type %d", + (int) oper); + if (i == size - 1) + elog(ERROR, "invalid pointer to right operand"); + + item->qoperator.oper = oper; + if (oper == OP_PHRASE) + item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16)); + } + else + elog(ERROR, "unrecognized tsquery node type: %d", item->type); + + item++; + } + + /* Enlarge buffer to make room for the operand values. */ + query = (TSQuery) repalloc(query, len + datalen); + item = GETQUERY(query); + ptr = GETOPERAND(query); + + /* + * Fill in the left-pointers. Checks that the tree is well-formed as a + * side-effect. + */ + findoprnd(item, size, &needcleanup); + + /* Can't have found any QI_VALSTOP nodes */ + Assert(!needcleanup); + + /* Copy operands to output struct */ + for (i = 0; i < size; i++) + { + if (item->type == QI_VAL) + { + memcpy(ptr, operands[i], item->qoperand.length + 1); + ptr += item->qoperand.length + 1; + } + item++; + } + + pfree(operands); + + Assert(ptr - GETOPERAND(query) == datalen); + + SET_VARSIZE(query, len + datalen); + + PG_RETURN_TSQUERY(query); +} + +/* + * debug function, used only for view query + * which will be executed in non-leaf pages in index + */ +Datum +tsquerytree(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + INFIX nrm; + text *res; + QueryItem *q; + int len; + + if (query->size == 0) + { + res = (text *) palloc(VARHDRSZ); + SET_VARSIZE(res, VARHDRSZ); + PG_RETURN_POINTER(res); + } + + q = clean_NOT(GETQUERY(query), &len); + + if (!q) + { + res = cstring_to_text("T"); + } + else + { + nrm.curpol = q; + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, -1, false); + res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf); + pfree(q); + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_TEXT_P(res); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_cleanup.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_cleanup.c new file mode 100644 index 00000000000..dc316657706 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_cleanup.c @@ -0,0 +1,446 @@ +/*------------------------------------------------------------------------- + * + * tsquery_cleanup.c + * Cleanup query from NOT values and/or stopword + * Utility functions to correct work. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery_cleanup.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "tsearch/ts_utils.h" +#include "varatt.h" + +typedef struct NODE +{ + struct NODE *left; + struct NODE *right; + QueryItem *valnode; +} NODE; + +/* + * make query tree from plain view of query + */ +static NODE * +maketree(QueryItem *in) +{ + NODE *node = (NODE *) palloc(sizeof(NODE)); + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + node->valnode = in; + node->right = node->left = NULL; + if (in->type == QI_OPR) + { + node->right = maketree(in + 1); + if (in->qoperator.oper != OP_NOT) + node->left = maketree(in + in->qoperator.left); + } + return node; +} + +/* + * Internal state for plaintree and plainnode + */ +typedef struct +{ + QueryItem *ptr; + int len; /* allocated size of ptr */ + int cur; /* number of elements in ptr */ +} PLAINTREE; + +static void +plainnode(PLAINTREE *state, NODE *node) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (state->cur == state->len) + { + state->len *= 2; + state->ptr = (QueryItem *) repalloc(state->ptr, state->len * sizeof(QueryItem)); + } + memcpy(&(state->ptr[state->cur]), node->valnode, sizeof(QueryItem)); + if (node->valnode->type == QI_VAL) + state->cur++; + else if (node->valnode->qoperator.oper == OP_NOT) + { + state->ptr[state->cur].qoperator.left = 1; + state->cur++; + plainnode(state, node->right); + } + else + { + int cur = state->cur; + + state->cur++; + plainnode(state, node->right); + state->ptr[cur].qoperator.left = state->cur - cur; + plainnode(state, node->left); + } + pfree(node); +} + +/* + * make plain view of tree from a NODE-tree representation + */ +static QueryItem * +plaintree(NODE *root, int *len) +{ + PLAINTREE pl; + + pl.cur = 0; + pl.len = 16; + if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR)) + { + pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem)); + plainnode(&pl, root); + } + else + pl.ptr = NULL; + *len = pl.cur; + return pl.ptr; +} + +static void +freetree(NODE *node) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (!node) + return; + if (node->left) + freetree(node->left); + if (node->right) + freetree(node->right); + pfree(node); +} + +/* + * clean tree for ! operator. + * It's useful for debug, but in + * other case, such view is used with search in index. + * Operator ! always return TRUE + */ +static NODE * +clean_NOT_intree(NODE *node) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (node->valnode->type == QI_VAL) + return node; + + if (node->valnode->qoperator.oper == OP_NOT) + { + freetree(node); + return NULL; + } + + /* operator & or | */ + if (node->valnode->qoperator.oper == OP_OR) + { + if ((node->left = clean_NOT_intree(node->left)) == NULL || + (node->right = clean_NOT_intree(node->right)) == NULL) + { + freetree(node); + return NULL; + } + } + else + { + NODE *res = node; + + Assert(node->valnode->qoperator.oper == OP_AND || + node->valnode->qoperator.oper == OP_PHRASE); + + node->left = clean_NOT_intree(node->left); + node->right = clean_NOT_intree(node->right); + if (node->left == NULL && node->right == NULL) + { + pfree(node); + res = NULL; + } + else if (node->left == NULL) + { + res = node->right; + pfree(node); + } + else if (node->right == NULL) + { + res = node->left; + pfree(node); + } + return res; + } + return node; +} + +QueryItem * +clean_NOT(QueryItem *ptr, int *len) +{ + NODE *root = maketree(ptr); + + return plaintree(clean_NOT_intree(root), len); +} + + +/* + * Remove QI_VALSTOP (stopword) nodes from query tree. + * + * Returns NULL if the query degenerates to nothing. Input must not be NULL. + * + * When we remove a phrase operator due to removing one or both of its + * arguments, we might need to adjust the distance of a parent phrase + * operator. For example, 'a' is a stopword, so: + * (b <-> a) <-> c should become b <2> c + * b <-> (a <-> c) should become b <2> c + * (b <-> (a <-> a)) <-> c should become b <3> c + * b <-> ((a <-> a) <-> c) should become b <3> c + * To handle that, we define two output parameters: + * ladd: amount to add to a phrase distance to the left of this node + * radd: amount to add to a phrase distance to the right of this node + * We need two outputs because we could need to bubble up adjustments to two + * different parent phrase operators. Consider + * w <-> (((a <-> x) <2> (y <3> a)) <-> z) + * After we've removed the two a's and are considering the <2> node (which is + * now just x <2> y), we have an ladd distance of 1 that needs to propagate + * up to the topmost (leftmost) <->, and an radd distance of 3 that needs to + * propagate to the rightmost <->, so that we'll end up with + * w <2> ((x <2> y) <4> z) + * Near the bottom of the tree, we may have subtrees consisting only of + * stopwords. The distances of any phrase operators within such a subtree are + * summed and propagated to both ladd and radd, since we don't know which side + * of the lowest surviving phrase operator we are in. The rule is that any + * subtree that degenerates to NULL must return equal values of ladd and radd, + * and the parent node dealing with it should incorporate only one of those. + * + * Currently, we only implement this adjustment for adjacent phrase operators. + * Thus for example 'x <-> ((a <-> y) | z)' will become 'x <-> (y | z)', which + * isn't ideal, but there is no way to represent the really desired semantics + * without some redesign of the tsquery structure. Certainly it would not be + * any better to convert that to 'x <2> (y | z)'. Since this is such a weird + * corner case, let it go for now. But we can fix it in cases where the + * intervening non-phrase operator also gets removed, for example + * '((x <-> a) | a) <-> y' will become 'x <2> y'. + */ +static NODE * +clean_stopword_intree(NODE *node, int *ladd, int *radd) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + /* default output parameters indicate no change in parent distance */ + *ladd = *radd = 0; + + if (node->valnode->type == QI_VAL) + return node; + else if (node->valnode->type == QI_VALSTOP) + { + pfree(node); + return NULL; + } + + Assert(node->valnode->type == QI_OPR); + + if (node->valnode->qoperator.oper == OP_NOT) + { + /* NOT doesn't change pattern width, so just report child distances */ + node->right = clean_stopword_intree(node->right, ladd, radd); + if (!node->right) + { + freetree(node); + return NULL; + } + } + else + { + NODE *res = node; + bool isphrase; + int ndistance, + lladd, + lradd, + rladd, + rradd; + + /* First, recurse */ + node->left = clean_stopword_intree(node->left, &lladd, &lradd); + node->right = clean_stopword_intree(node->right, &rladd, &rradd); + + /* Check if current node is OP_PHRASE, get its distance */ + isphrase = (node->valnode->qoperator.oper == OP_PHRASE); + ndistance = isphrase ? node->valnode->qoperator.distance : 0; + + if (node->left == NULL && node->right == NULL) + { + /* + * When we collapse out a phrase node entirely, propagate its own + * distance into both *ladd and *radd; it is the responsibility of + * the parent node to count it only once. Also, for a phrase + * node, distances coming from children are summed and propagated + * up to parent (we assume lladd == lradd and rladd == rradd, else + * rule was broken at a lower level). But if this isn't a phrase + * node, take the larger of the two child distances; that + * corresponds to what TS_execute will do in non-stopword cases. + */ + if (isphrase) + *ladd = *radd = lladd + ndistance + rladd; + else + *ladd = *radd = Max(lladd, rladd); + freetree(node); + return NULL; + } + else if (node->left == NULL) + { + /* Removing this operator and left subnode */ + /* lladd and lradd are equal/redundant, don't count both */ + if (isphrase) + { + /* operator's own distance must propagate to left */ + *ladd = lladd + ndistance + rladd; + *radd = rradd; + } + else + { + /* at non-phrase op, just forget the left subnode entirely */ + *ladd = rladd; + *radd = rradd; + } + res = node->right; + pfree(node); + } + else if (node->right == NULL) + { + /* Removing this operator and right subnode */ + /* rladd and rradd are equal/redundant, don't count both */ + if (isphrase) + { + /* operator's own distance must propagate to right */ + *ladd = lladd; + *radd = lradd + ndistance + rradd; + } + else + { + /* at non-phrase op, just forget the right subnode entirely */ + *ladd = lladd; + *radd = lradd; + } + res = node->left; + pfree(node); + } + else if (isphrase) + { + /* Absorb appropriate corrections at this level */ + node->valnode->qoperator.distance += lradd + rladd; + /* Propagate up any unaccounted-for corrections */ + *ladd = lladd; + *radd = rradd; + } + else + { + /* We're keeping a non-phrase operator, so ladd/radd remain 0 */ + } + + return res; + } + return node; +} + +/* + * Number of elements in query tree + */ +static int32 +calcstrlen(NODE *node) +{ + int32 size = 0; + + if (node->valnode->type == QI_VAL) + { + size = node->valnode->qoperand.length + 1; + } + else + { + Assert(node->valnode->type == QI_OPR); + + size = calcstrlen(node->right); + if (node->valnode->qoperator.oper != OP_NOT) + size += calcstrlen(node->left); + } + + return size; +} + +/* + * Remove QI_VALSTOP (stopword) nodes from TSQuery. + */ +TSQuery +cleanup_tsquery_stopwords(TSQuery in, bool noisy) +{ + int32 len, + lenstr, + commonlen, + i; + NODE *root; + int ladd, + radd; + TSQuery out; + QueryItem *items; + char *operands; + + if (in->size == 0) + return in; + + /* eliminate stop words */ + root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd); + if (root == NULL) + { + if (noisy) + ereport(NOTICE, + (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored"))); + out = palloc(HDRSIZETQ); + out->size = 0; + SET_VARSIZE(out, HDRSIZETQ); + return out; + } + + /* + * Build TSQuery from plain view + */ + + lenstr = calcstrlen(root); + items = plaintree(root, &len); + commonlen = COMPUTESIZE(len, lenstr); + + out = palloc(commonlen); + SET_VARSIZE(out, commonlen); + out->size = len; + + memcpy(GETQUERY(out), items, len * sizeof(QueryItem)); + + items = GETQUERY(out); + operands = GETOPERAND(out); + for (i = 0; i < out->size; i++) + { + QueryOperand *op = (QueryOperand *) &items[i]; + + if (op->type != QI_VAL) + continue; + + memcpy(operands, GETOPERAND(in) + op->distance, op->length); + operands[op->length] = '\0'; + op->distance = operands - GETOPERAND(out); + operands += op->length + 1; + } + + return out; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_gist.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_gist.c new file mode 100644 index 00000000000..7c99348d44c --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_gist.c @@ -0,0 +1,277 @@ +/*------------------------------------------------------------------------- + * + * tsquery_gist.c + * GiST index support for tsquery + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery_gist.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/gist.h" +#include "access/stratnum.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + +#define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key) + + +Datum +gtsquery_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval = entry; + + if (entry->leafkey) + { + TSQuerySign sign; + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + sign = makeTSQuerySign(DatumGetTSQuery(entry->key)); + + gistentryinit(*retval, TSQuerySignGetDatum(sign), + entry->rel, entry->page, + entry->offset, false); + } + + PG_RETURN_POINTER(retval); +} + +/* + * We do not need a decompress function, because the other gtsquery + * support functions work with the compressed representation. + */ + +Datum +gtsquery_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + TSQuery query = PG_GETARG_TSQUERY(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + + /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); + TSQuerySign key = DatumGetTSQuerySign(entry->key); + TSQuerySign sq = makeTSQuerySign(query); + bool retval; + + /* All cases served by this function are inexact */ + *recheck = true; + + switch (strategy) + { + case RTContainsStrategyNumber: + if (GIST_LEAF(entry)) + retval = (key & sq) == sq; + else + retval = (key & sq) != 0; + break; + case RTContainedByStrategyNumber: + if (GIST_LEAF(entry)) + retval = (key & sq) == key; + else + retval = (key & sq) != 0; + break; + default: + retval = false; + } + PG_RETURN_BOOL(retval); +} + +Datum +gtsquery_union(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + int *size = (int *) PG_GETARG_POINTER(1); + TSQuerySign sign; + int i; + + sign = 0; + + for (i = 0; i < entryvec->n; i++) + sign |= GETENTRY(entryvec, i); + + *size = sizeof(TSQuerySign); + + PG_RETURN_TSQUERYSIGN(sign); +} + +Datum +gtsquery_same(PG_FUNCTION_ARGS) +{ + TSQuerySign a = PG_GETARG_TSQUERYSIGN(0); + TSQuerySign b = PG_GETARG_TSQUERYSIGN(1); + bool *result = (bool *) PG_GETARG_POINTER(2); + + *result = (a == b); + + PG_RETURN_POINTER(result); +} + +static int +sizebitvec(TSQuerySign sign) +{ + int size = 0, + i; + + for (i = 0; i < TSQS_SIGLEN; i++) + size += 0x01 & (sign >> i); + + return size; +} + +static int +hemdist(TSQuerySign a, TSQuerySign b) +{ + TSQuerySign res = a ^ b; + + return sizebitvec(res); +} + +Datum +gtsquery_penalty(PG_FUNCTION_ARGS) +{ + TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key); + TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key); + float *penalty = (float *) PG_GETARG_POINTER(2); + + *penalty = hemdist(origval, newval); + + PG_RETURN_POINTER(penalty); +} + + +typedef struct +{ + OffsetNumber pos; + int32 cost; +} SPLITCOST; + +static int +comparecost(const void *a, const void *b) +{ + if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost) + return 0; + else + return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1; +} + +#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) + +Datum +gtsquery_picksplit(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + OffsetNumber maxoff = entryvec->n - 2; + OffsetNumber k, + j; + TSQuerySign datum_l, + datum_r; + int32 size_alpha, + size_beta; + int32 size_waste, + waste = -1; + int32 nbytes; + OffsetNumber seed_1 = 0, + seed_2 = 0; + OffsetNumber *left, + *right; + + SPLITCOST *costvector; + + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + left = v->spl_left = (OffsetNumber *) palloc(nbytes); + right = v->spl_right = (OffsetNumber *) palloc(nbytes); + v->spl_nleft = v->spl_nright = 0; + + for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) + for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) + { + size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k)); + if (size_waste > waste) + { + waste = size_waste; + seed_1 = k; + seed_2 = j; + } + } + + + if (seed_1 == 0 || seed_2 == 0) + { + seed_1 = 1; + seed_2 = 2; + } + + datum_l = GETENTRY(entryvec, seed_1); + datum_r = GETENTRY(entryvec, seed_2); + + maxoff = OffsetNumberNext(maxoff); + costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) + { + costvector[j - 1].pos = j; + size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j)); + size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j)); + costvector[j - 1].cost = abs(size_alpha - size_beta); + } + qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost); + + for (k = 0; k < maxoff; k++) + { + j = costvector[k].pos; + if (j == seed_1) + { + *left++ = j; + v->spl_nleft++; + continue; + } + else if (j == seed_2) + { + *right++ = j; + v->spl_nright++; + continue; + } + size_alpha = hemdist(datum_l, GETENTRY(entryvec, j)); + size_beta = hemdist(datum_r, GETENTRY(entryvec, j)); + + if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) + { + datum_l |= GETENTRY(entryvec, j); + *left++ = j; + v->spl_nleft++; + } + else + { + datum_r |= GETENTRY(entryvec, j); + *right++ = j; + v->spl_nright++; + } + } + + *right = *left = FirstOffsetNumber; + v->spl_ldatum = TSQuerySignGetDatum(datum_l); + v->spl_rdatum = TSQuerySignGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} + +/* + * Formerly, gtsquery_consistent was declared in pg_proc.h with arguments + * that did not match the documented conventions for GiST support functions. + * We fixed that, but we still need a pg_proc entry with the old signature + * to support reloading pre-9.6 contrib/tsearch2 opclass declarations. + * This compatibility function should go away eventually. + */ +Datum +gtsquery_consistent_oldsig(PG_FUNCTION_ARGS) +{ + return gtsquery_consistent(fcinfo); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_op.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_op.c new file mode 100644 index 00000000000..2bc4ec904fe --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_op.c @@ -0,0 +1,359 @@ +/*------------------------------------------------------------------------- + * + * tsquery_op.c + * Various operations with tsquery + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery_op.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "lib/qunique.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "varatt.h" + +Datum +tsquery_numnode(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + int nnode = query->size; + + PG_FREE_IF_COPY(query, 0); + PG_RETURN_INT32(nnode); +} + +static QTNode * +join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance) +{ + QTNode *res = (QTNode *) palloc0(sizeof(QTNode)); + + res->flags |= QTN_NEEDFREE; + + res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); + res->valnode->type = QI_OPR; + res->valnode->qoperator.oper = operator; + if (operator == OP_PHRASE) + res->valnode->qoperator.distance = distance; + + res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); + res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b)); + res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a)); + res->nchild = 2; + + return res; +} + +Datum +tsquery_and(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + QTNode *res; + TSQuery query; + + if (a->size == 0) + { + PG_FREE_IF_COPY(a, 1); + PG_RETURN_POINTER(b); + } + else if (b->size == 0) + { + PG_FREE_IF_COPY(b, 1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b, OP_AND, 0); + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_TSQUERY(query); +} + +Datum +tsquery_or(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + QTNode *res; + TSQuery query; + + if (a->size == 0) + { + PG_FREE_IF_COPY(a, 1); + PG_RETURN_POINTER(b); + } + else if (b->size == 0) + { + PG_FREE_IF_COPY(b, 1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b, OP_OR, 0); + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_TSQUERY(query); +} + +Datum +tsquery_phrase_distance(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + QTNode *res; + TSQuery query; + int32 distance = PG_GETARG_INT32(2); + + if (distance < 0 || distance > MAXENTRYPOS) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("distance in phrase operator must be an integer value between zero and %d inclusive", + MAXENTRYPOS))); + if (a->size == 0) + { + PG_FREE_IF_COPY(a, 1); + PG_RETURN_POINTER(b); + } + else if (b->size == 0) + { + PG_FREE_IF_COPY(b, 1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b, OP_PHRASE, (uint16) distance); + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_TSQUERY(query); +} + +Datum +tsquery_phrase(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(tsquery_phrase_distance, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + Int32GetDatum(1))); +} + +Datum +tsquery_not(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + QTNode *res; + TSQuery query; + + if (a->size == 0) + PG_RETURN_POINTER(a); + + res = (QTNode *) palloc0(sizeof(QTNode)); + + res->flags |= QTN_NEEDFREE; + + res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); + res->valnode->type = QI_OPR; + res->valnode->qoperator.oper = OP_NOT; + + res->child = (QTNode **) palloc0(sizeof(QTNode *)); + res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a)); + res->nchild = 1; + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + + PG_RETURN_POINTER(query); +} + +static int +CompareTSQ(TSQuery a, TSQuery b) +{ + if (a->size != b->size) + { + return (a->size < b->size) ? -1 : 1; + } + else if (VARSIZE(a) != VARSIZE(b)) + { + return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1; + } + else if (a->size != 0) + { + QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a)); + QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b)); + int res = QTNodeCompare(an, bn); + + QTNFree(an); + QTNFree(bn); + + return res; + } + + return 0; +} + +Datum +tsquery_cmp(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + int res = CompareTSQ(a, b); + + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_INT32(res); +} + +#define CMPFUNC( NAME, CONDITION ) \ +Datum \ +NAME(PG_FUNCTION_ARGS) { \ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); \ + TSQuery b = PG_GETARG_TSQUERY_COPY(1); \ + int res = CompareTSQ(a,b); \ + \ + PG_FREE_IF_COPY(a,0); \ + PG_FREE_IF_COPY(b,1); \ + \ + PG_RETURN_BOOL( CONDITION ); \ +} \ +/* keep compiler quiet - no extra ; */ \ +extern int no_such_variable + +CMPFUNC(tsquery_lt, res < 0); +CMPFUNC(tsquery_le, res <= 0); +CMPFUNC(tsquery_eq, res == 0); +CMPFUNC(tsquery_ge, res >= 0); +CMPFUNC(tsquery_gt, res > 0); +CMPFUNC(tsquery_ne, res != 0); + +TSQuerySign +makeTSQuerySign(TSQuery a) +{ + int i; + QueryItem *ptr = GETQUERY(a); + TSQuerySign sign = 0; + + for (i = 0; i < a->size; i++) + { + if (ptr->type == QI_VAL) + sign |= ((TSQuerySign) 1) << (((unsigned int) ptr->qoperand.valcrc) % TSQS_SIGLEN); + ptr++; + } + + return sign; +} + +static char ** +collectTSQueryValues(TSQuery a, int *nvalues_p) +{ + QueryItem *ptr = GETQUERY(a); + char *operand = GETOPERAND(a); + char **values; + int nvalues = 0; + int i; + + values = (char **) palloc(sizeof(char *) * a->size); + + for (i = 0; i < a->size; i++) + { + if (ptr->type == QI_VAL) + { + int len = ptr->qoperand.length; + char *val; + + val = palloc(len + 1); + memcpy(val, operand + ptr->qoperand.distance, len); + val[len] = '\0'; + + values[nvalues++] = val; + } + ptr++; + } + + *nvalues_p = nvalues; + return values; +} + +static int +cmp_string(const void *a, const void *b) +{ + const char *sa = *((char *const *) a); + const char *sb = *((char *const *) b); + + return strcmp(sa, sb); +} + +Datum +tsq_mcontains(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + TSQuery ex = PG_GETARG_TSQUERY(1); + char **query_values; + int query_nvalues; + char **ex_values; + int ex_nvalues; + bool result = true; + + /* Extract the query terms into arrays */ + query_values = collectTSQueryValues(query, &query_nvalues); + ex_values = collectTSQueryValues(ex, &ex_nvalues); + + /* Sort and remove duplicates from both arrays */ + qsort(query_values, query_nvalues, sizeof(char *), cmp_string); + query_nvalues = qunique(query_values, query_nvalues, sizeof(char *), + cmp_string); + qsort(ex_values, ex_nvalues, sizeof(char *), cmp_string); + ex_nvalues = qunique(ex_values, ex_nvalues, sizeof(char *), cmp_string); + + if (ex_nvalues > query_nvalues) + result = false; + else + { + int i; + int j = 0; + + for (i = 0; i < ex_nvalues; i++) + { + for (; j < query_nvalues; j++) + { + if (strcmp(ex_values[i], query_values[j]) == 0) + break; + } + if (j == query_nvalues) + { + result = false; + break; + } + } + } + + PG_RETURN_BOOL(result); +} + +Datum +tsq_mcontained(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall2(tsq_mcontains, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0))); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_rewrite.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_rewrite.c new file mode 100644 index 00000000000..7e736351628 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_rewrite.c @@ -0,0 +1,462 @@ +/*------------------------------------------------------------------------- + * + * tsquery_rewrite.c + * Utilities for reconstructing tsquery + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery_rewrite.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_type.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + + +/* + * If "node" is equal to "ex", return a copy of "subs" instead. + * If "ex" matches a subset of node's children, return a modified version + * of "node" in which those children are replaced with a copy of "subs". + * Otherwise return "node" unmodified. + * + * The QTN_NOCHANGE bit is set in successfully modified nodes, so that + * we won't uselessly recurse into them. + * Also, set *isfind true if we make a replacement. + */ +static QTNode * +findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) +{ + /* Can't match unless signature matches and node type matches. */ + if ((node->sign & ex->sign) != ex->sign || + node->valnode->type != ex->valnode->type) + return node; + + /* Ignore nodes marked NOCHANGE, too. */ + if (node->flags & QTN_NOCHANGE) + return node; + + if (node->valnode->type == QI_OPR) + { + /* Must be same operator. */ + if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper) + return node; + + if (node->nchild == ex->nchild) + { + /* + * Simple case: when same number of children, match if equal. + * (This is reliable when the children were sorted earlier.) + */ + if (QTNEq(node, ex)) + { + /* Match; delete node and return a copy of subs instead. */ + QTNFree(node); + if (subs) + { + node = QTNCopy(subs); + node->flags |= QTN_NOCHANGE; + } + else + node = NULL; + *isfind = true; + } + } + else if (node->nchild > ex->nchild && ex->nchild > 0) + { + /* + * AND and OR are commutative/associative, so we should check if a + * subset of the children match. For example, if node is A|B|C, + * and ex is B|C, we have a match after we notionally convert node + * to A|(B|C). This does not work for NOT or PHRASE nodes, but we + * can't get here for those node types because they have a fixed + * number of children. + * + * Because we expect that the children are sorted, it suffices to + * make one pass through the two lists to find the matches. + */ + bool *matched; + int nmatched; + int i, + j; + + /* Assert that the subset rule is OK */ + Assert(node->valnode->qoperator.oper == OP_AND || + node->valnode->qoperator.oper == OP_OR); + + /* matched[] will record which children of node matched */ + matched = (bool *) palloc0(node->nchild * sizeof(bool)); + nmatched = 0; + i = j = 0; + while (i < node->nchild && j < ex->nchild) + { + int cmp = QTNodeCompare(node->child[i], ex->child[j]); + + if (cmp == 0) + { + /* match! */ + matched[i] = true; + nmatched++; + i++, j++; + } + else if (cmp < 0) + { + /* node->child[i] has no match, ignore it */ + i++; + } + else + { + /* ex->child[j] has no match; we can give up immediately */ + break; + } + } + + if (nmatched == ex->nchild) + { + /* collapse out the matched children of node */ + j = 0; + for (i = 0; i < node->nchild; i++) + { + if (matched[i]) + QTNFree(node->child[i]); + else + node->child[j++] = node->child[i]; + } + + /* and instead insert a copy of subs */ + if (subs) + { + subs = QTNCopy(subs); + subs->flags |= QTN_NOCHANGE; + node->child[j++] = subs; + } + + node->nchild = j; + + /* + * At this point we might have a node with zero or one child, + * which should be simplified. But we leave it to our caller + * (dofindsubquery) to take care of that. + */ + + /* + * Re-sort the node to put new child in the right place. This + * is a bit bogus, because it won't matter for findsubquery's + * remaining processing, and it's insufficient to prepare the + * tree for another search (we would need to re-flatten as + * well, and we don't want to do that because we'd lose the + * QTN_NOCHANGE marking on the new child). But it's needed to + * keep the results the same as the regression tests expect. + */ + QTNSort(node); + + *isfind = true; + } + + pfree(matched); + } + } + else + { + Assert(node->valnode->type == QI_VAL); + + if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc) + return node; + else if (QTNEq(node, ex)) + { + QTNFree(node); + if (subs) + { + node = QTNCopy(subs); + node->flags |= QTN_NOCHANGE; + } + else + { + node = NULL; + } + *isfind = true; + } + } + + return node; +} + +/* + * Recursive guts of findsubquery(): attempt to replace "ex" with "subs" + * at the root node, and if we failed to do so, recursively match against + * child nodes. + * + * Delete any void subtrees resulting from the replacement. + * In the following example '5' is replaced by empty operand: + * + * AND -> 6 + * / \ + * 5 OR + * / \ + * 6 5 + */ +static QTNode * +dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + /* also, since it's a bit expensive, let's check for query cancel. */ + CHECK_FOR_INTERRUPTS(); + + /* match at the node itself */ + root = findeq(root, ex, subs, isfind); + + /* unless we matched here, consider matches at child nodes */ + if (root && (root->flags & QTN_NOCHANGE) == 0 && + root->valnode->type == QI_OPR) + { + int i, + j = 0; + + /* + * Any subtrees that are replaced by NULL must be dropped from the + * tree. + */ + for (i = 0; i < root->nchild; i++) + { + root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind); + if (root->child[j]) + j++; + } + + root->nchild = j; + + /* + * If we have just zero or one remaining child node, simplify out this + * operator node. + */ + if (root->nchild == 0) + { + QTNFree(root); + root = NULL; + } + else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT) + { + QTNode *nroot = root->child[0]; + + pfree(root); + root = nroot; + } + } + + return root; +} + +/* + * Substitute "subs" for "ex" throughout the QTNode tree at root. + * + * If isfind isn't NULL, set *isfind to show whether we made any substitution. + * + * Both "root" and "ex" must have been through QTNTernary and QTNSort + * to ensure reliable matching. + */ +QTNode * +findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) +{ + bool DidFind = false; + + root = dofindsubquery(root, ex, subs, &DidFind); + + if (isfind) + *isfind = DidFind; + + return root; +} + +Datum +tsquery_rewrite_query(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY_COPY(0); + text *in = PG_GETARG_TEXT_PP(1); + TSQuery rewrited = query; + MemoryContext outercontext = CurrentMemoryContext; + MemoryContext oldcontext; + QTNode *tree; + char *buf; + SPIPlanPtr plan; + Portal portal; + bool isnull; + + if (query->size == 0) + { + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER(rewrited); + } + + tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); + QTNTernary(tree); + QTNSort(tree); + + buf = text_to_cstring(in); + + SPI_connect(); + + if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare(\"%s\") failed", buf); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) + elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf); + + SPI_cursor_fetch(portal, true, 100); + + if (SPI_tuptable == NULL || + SPI_tuptable->tupdesc->natts != 2 || + SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID || + SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ts_rewrite query must return two tsquery columns"))); + + while (SPI_processed > 0 && tree) + { + uint64 i; + + for (i = 0; i < SPI_processed && tree; i++) + { + Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); + Datum sdata; + + if (isnull) + continue; + + sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); + + if (!isnull) + { + TSQuery qtex = DatumGetTSQuery(qdata); + TSQuery qtsubs = DatumGetTSQuery(sdata); + QTNode *qex, + *qsubs = NULL; + + if (qtex->size == 0) + { + if (qtex != (TSQuery) DatumGetPointer(qdata)) + pfree(qtex); + if (qtsubs != (TSQuery) DatumGetPointer(sdata)) + pfree(qtsubs); + continue; + } + + qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex)); + + QTNTernary(qex); + QTNSort(qex); + + if (qtsubs->size) + qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs)); + + oldcontext = MemoryContextSwitchTo(outercontext); + tree = findsubquery(tree, qex, qsubs, NULL); + MemoryContextSwitchTo(oldcontext); + + QTNFree(qex); + if (qtex != (TSQuery) DatumGetPointer(qdata)) + pfree(qtex); + QTNFree(qsubs); + if (qtsubs != (TSQuery) DatumGetPointer(sdata)) + pfree(qtsubs); + + if (tree) + { + /* ready the tree for another pass */ + QTNClearFlags(tree, QTN_NOCHANGE); + QTNTernary(tree); + QTNSort(tree); + } + } + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_fetch(portal, true, 100); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_close(portal); + SPI_freeplan(plan); + SPI_finish(); + + if (tree) + { + QTNBinary(tree); + rewrited = QTN2QT(tree); + QTNFree(tree); + PG_FREE_IF_COPY(query, 0); + } + else + { + SET_VARSIZE(rewrited, HDRSIZETQ); + rewrited->size = 0; + } + + pfree(buf); + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER(rewrited); +} + +Datum +tsquery_rewrite(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY_COPY(0); + TSQuery ex = PG_GETARG_TSQUERY(1); + TSQuery subst = PG_GETARG_TSQUERY(2); + TSQuery rewrited = query; + QTNode *tree, + *qex, + *subs = NULL; + + if (query->size == 0 || ex->size == 0) + { + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); + } + + tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); + QTNTernary(tree); + QTNSort(tree); + + qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex)); + QTNTernary(qex); + QTNSort(qex); + + if (subst->size) + subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); + + tree = findsubquery(tree, qex, subs, NULL); + + QTNFree(qex); + QTNFree(subs); + + if (!tree) + { + SET_VARSIZE(rewrited, HDRSIZETQ); + rewrited->size = 0; + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); + } + else + { + QTNBinary(tree); + rewrited = QTN2QT(tree); + QTNFree(tree); + } + + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_util.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_util.c new file mode 100644 index 00000000000..7b6970a6f82 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery_util.c @@ -0,0 +1,448 @@ +/*------------------------------------------------------------------------- + * + * tsquery_util.c + * Utilities for tsquery datatype + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery_util.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "tsearch/ts_utils.h" +#include "varatt.h" + +/* + * Build QTNode tree for a tsquery given in QueryItem array format. + */ +QTNode * +QT2QTN(QueryItem *in, char *operand) +{ + QTNode *node = (QTNode *) palloc0(sizeof(QTNode)); + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + node->valnode = in; + + if (in->type == QI_OPR) + { + node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); + node->child[0] = QT2QTN(in + 1, operand); + node->sign = node->child[0]->sign; + if (in->qoperator.oper == OP_NOT) + node->nchild = 1; + else + { + node->nchild = 2; + node->child[1] = QT2QTN(in + in->qoperator.left, operand); + node->sign |= node->child[1]->sign; + } + } + else if (operand) + { + node->word = operand + in->qoperand.distance; + node->sign = ((uint32) 1) << (((unsigned int) in->qoperand.valcrc) % 32); + } + + return node; +} + +/* + * Free a QTNode tree. + * + * Referenced "word" and "valnode" items are freed if marked as transient + * by flags. + */ +void +QTNFree(QTNode *in) +{ + if (!in) + return; + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0) + pfree(in->word); + + if (in->valnode->type == QI_OPR) + { + int i; + + for (i = 0; i < in->nchild; i++) + QTNFree(in->child[i]); + } + if (in->child) + pfree(in->child); + + if (in->flags & QTN_NEEDFREE) + pfree(in->valnode); + + pfree(in); +} + +/* + * Sort comparator for QTNodes. + * + * The sort order is somewhat arbitrary. + */ +int +QTNodeCompare(QTNode *an, QTNode *bn) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (an->valnode->type != bn->valnode->type) + return (an->valnode->type > bn->valnode->type) ? -1 : 1; + + if (an->valnode->type == QI_OPR) + { + QueryOperator *ao = &an->valnode->qoperator; + QueryOperator *bo = &bn->valnode->qoperator; + + if (ao->oper != bo->oper) + return (ao->oper > bo->oper) ? -1 : 1; + + if (an->nchild != bn->nchild) + return (an->nchild > bn->nchild) ? -1 : 1; + + { + int i, + res; + + for (i = 0; i < an->nchild; i++) + if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0) + return res; + } + + if (ao->oper == OP_PHRASE && ao->distance != bo->distance) + return (ao->distance > bo->distance) ? -1 : 1; + + return 0; + } + else if (an->valnode->type == QI_VAL) + { + QueryOperand *ao = &an->valnode->qoperand; + QueryOperand *bo = &bn->valnode->qoperand; + + if (ao->valcrc != bo->valcrc) + { + return (ao->valcrc > bo->valcrc) ? -1 : 1; + } + + return tsCompareString(an->word, ao->length, bn->word, bo->length, false); + } + else + { + elog(ERROR, "unrecognized QueryItem type: %d", an->valnode->type); + return 0; /* keep compiler quiet */ + } +} + +/* + * qsort comparator for QTNode pointers. + */ +static int +cmpQTN(const void *a, const void *b) +{ + return QTNodeCompare(*(QTNode *const *) a, *(QTNode *const *) b); +} + +/* + * Canonicalize a QTNode tree by sorting the children of AND/OR nodes + * into an arbitrary but well-defined order. + */ +void +QTNSort(QTNode *in) +{ + int i; + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->valnode->type != QI_OPR) + return; + + for (i = 0; i < in->nchild; i++) + QTNSort(in->child[i]); + if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE) + qsort(in->child, in->nchild, sizeof(QTNode *), cmpQTN); +} + +/* + * Are two QTNode trees equal according to QTNodeCompare? + */ +bool +QTNEq(QTNode *a, QTNode *b) +{ + uint32 sign = a->sign & b->sign; + + if (!(sign == a->sign && sign == b->sign)) + return false; + + return (QTNodeCompare(a, b) == 0); +} + +/* + * Remove unnecessary intermediate nodes. For example: + * + * OR OR + * a OR -> a b c + * b c + */ +void +QTNTernary(QTNode *in) +{ + int i; + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->valnode->type != QI_OPR) + return; + + for (i = 0; i < in->nchild; i++) + QTNTernary(in->child[i]); + + /* Only AND and OR are associative, so don't flatten other node types */ + if (in->valnode->qoperator.oper != OP_AND && + in->valnode->qoperator.oper != OP_OR) + return; + + for (i = 0; i < in->nchild; i++) + { + QTNode *cc = in->child[i]; + + if (cc->valnode->type == QI_OPR && + in->valnode->qoperator.oper == cc->valnode->qoperator.oper) + { + int oldnchild = in->nchild; + + in->nchild += cc->nchild - 1; + in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *)); + + if (i + 1 != oldnchild) + memmove(in->child + i + cc->nchild, in->child + i + 1, + (oldnchild - i - 1) * sizeof(QTNode *)); + + memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *)); + i += cc->nchild - 1; + + if (cc->flags & QTN_NEEDFREE) + pfree(cc->valnode); + pfree(cc); + } + } +} + +/* + * Convert a tree to binary tree by inserting intermediate nodes. + * (Opposite of QTNTernary) + */ +void +QTNBinary(QTNode *in) +{ + int i; + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->valnode->type != QI_OPR) + return; + + for (i = 0; i < in->nchild; i++) + QTNBinary(in->child[i]); + + while (in->nchild > 2) + { + QTNode *nn = (QTNode *) palloc0(sizeof(QTNode)); + + nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); + nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); + + nn->nchild = 2; + nn->flags = QTN_NEEDFREE; + + nn->child[0] = in->child[0]; + nn->child[1] = in->child[1]; + nn->sign = nn->child[0]->sign | nn->child[1]->sign; + + nn->valnode->type = in->valnode->type; + nn->valnode->qoperator.oper = in->valnode->qoperator.oper; + + in->child[0] = nn; + in->child[1] = in->child[in->nchild - 1]; + in->nchild--; + } +} + +/* + * Count the total length of operand strings in tree (including '\0'- + * terminators) and the total number of nodes. + * Caller must initialize *sumlen and *nnode to zeroes. + */ +static void +cntsize(QTNode *in, int *sumlen, int *nnode) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + *nnode += 1; + if (in->valnode->type == QI_OPR) + { + int i; + + for (i = 0; i < in->nchild; i++) + cntsize(in->child[i], sumlen, nnode); + } + else + { + *sumlen += in->valnode->qoperand.length + 1; + } +} + +typedef struct +{ + QueryItem *curitem; + char *operand; + char *curoperand; +} QTN2QTState; + +/* + * Recursively convert a QTNode tree into flat tsquery format. + * Caller must have allocated arrays of the correct size. + */ +static void +fillQT(QTN2QTState *state, QTNode *in) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->valnode->type == QI_VAL) + { + memcpy(state->curitem, in->valnode, sizeof(QueryOperand)); + + memcpy(state->curoperand, in->word, in->valnode->qoperand.length); + state->curitem->qoperand.distance = state->curoperand - state->operand; + state->curoperand[in->valnode->qoperand.length] = '\0'; + state->curoperand += in->valnode->qoperand.length + 1; + state->curitem++; + } + else + { + QueryItem *curitem = state->curitem; + + Assert(in->valnode->type == QI_OPR); + + memcpy(state->curitem, in->valnode, sizeof(QueryOperator)); + + Assert(in->nchild <= 2); + state->curitem++; + + fillQT(state, in->child[0]); + + if (in->nchild == 2) + { + curitem->qoperator.left = state->curitem - curitem; + fillQT(state, in->child[1]); + } + } +} + +/* + * Build flat tsquery from a QTNode tree. + */ +TSQuery +QTN2QT(QTNode *in) +{ + TSQuery out; + int len; + int sumlen = 0, + nnode = 0; + QTN2QTState state; + + cntsize(in, &sumlen, &nnode); + + if (TSQUERY_TOO_BIG(nnode, sumlen)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("tsquery is too large"))); + len = COMPUTESIZE(nnode, sumlen); + + out = (TSQuery) palloc0(len); + SET_VARSIZE(out, len); + out->size = nnode; + + state.curitem = GETQUERY(out); + state.operand = state.curoperand = GETOPERAND(out); + + fillQT(&state, in); + return out; +} + +/* + * Copy a QTNode tree. + * + * Modifiable copies of the words and valnodes are made, too. + */ +QTNode * +QTNCopy(QTNode *in) +{ + QTNode *out; + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + out = (QTNode *) palloc(sizeof(QTNode)); + + *out = *in; + out->valnode = (QueryItem *) palloc(sizeof(QueryItem)); + *(out->valnode) = *(in->valnode); + out->flags |= QTN_NEEDFREE; + + if (in->valnode->type == QI_VAL) + { + out->word = palloc(in->valnode->qoperand.length + 1); + memcpy(out->word, in->word, in->valnode->qoperand.length); + out->word[in->valnode->qoperand.length] = '\0'; + out->flags |= QTN_WORDFREE; + } + else + { + int i; + + out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild); + + for (i = 0; i < in->nchild; i++) + out->child[i] = QTNCopy(in->child[i]); + } + + return out; +} + +/* + * Clear the specified flag bit(s) in all nodes of a QTNode tree. + */ +void +QTNClearFlags(QTNode *in, uint32 flags) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + in->flags &= ~flags; + + if (in->valnode->type != QI_VAL) + { + int i; + + for (i = 0; i < in->nchild; i++) + QTNClearFlags(in->child[i], flags); + } +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsrank.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsrank.c new file mode 100644 index 00000000000..a5db96f3c89 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsrank.c @@ -0,0 +1,1012 @@ +/*------------------------------------------------------------------------- + * + * tsrank.c + * rank tsvector by tsquery + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsrank.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> +#include <math.h> + +#include "miscadmin.h" +#include "tsearch/ts_utils.h" +#include "utils/array.h" +#include "utils/builtins.h" + +static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f}; + +#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] ) + +#define RANK_NO_NORM 0x00 +#define RANK_NORM_LOGLENGTH 0x01 +#define RANK_NORM_LENGTH 0x02 +#define RANK_NORM_EXTDIST 0x04 +#define RANK_NORM_UNIQ 0x08 +#define RANK_NORM_LOGUNIQ 0x10 +#define RANK_NORM_RDIVRPLUS1 0x20 +#define DEF_NORM_METHOD RANK_NO_NORM + +static float calc_rank_or(const float *w, TSVector t, TSQuery q); +static float calc_rank_and(const float *w, TSVector t, TSQuery q); + +/* + * Returns a weight of a word collocation + */ +static float4 +word_distance(int32 w) +{ + if (w > 100) + return 1e-30f; + + return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2)); +} + +static int +cnt_length(TSVector t) +{ + WordEntry *ptr = ARRPTR(t), + *end = (WordEntry *) STRPTR(t); + int len = 0; + + while (ptr < end) + { + int clen = POSDATALEN(t, ptr); + + if (clen == 0) + len += 1; + else + len += clen; + + ptr++; + } + + return len; +} + + +#define WordECompareQueryItem(e,q,p,i,m) \ + tsCompareString((q) + (i)->distance, (i)->length, \ + (e) + (p)->pos, (p)->len, (m)) + + +/* + * Returns a pointer to a WordEntry's array corresponding to 'item' from + * tsvector 't'. 'q' is the TSQuery containing 'item'. + * Returns NULL if not found. + */ +static WordEntry * +find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem) +{ + WordEntry *StopLow = ARRPTR(t); + WordEntry *StopHigh = (WordEntry *) STRPTR(t); + WordEntry *StopMiddle = StopHigh; + int difference; + + *nitem = 0; + + /* Loop invariant: StopLow <= item < StopHigh */ + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false); + if (difference == 0) + { + StopHigh = StopMiddle; + *nitem = 1; + break; + } + else if (difference > 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + if (item->prefix) + { + if (StopLow >= StopHigh) + StopMiddle = StopHigh; + + *nitem = 0; + + while (StopMiddle < (WordEntry *) STRPTR(t) && + WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0) + { + (*nitem)++; + StopMiddle++; + } + } + + return (*nitem > 0) ? StopHigh : NULL; +} + + +/* + * sort QueryOperands by (length, word) + */ +static int +compareQueryOperand(const void *a, const void *b, void *arg) +{ + char *operand = (char *) arg; + QueryOperand *qa = (*(QueryOperand *const *) a); + QueryOperand *qb = (*(QueryOperand *const *) b); + + return tsCompareString(operand + qa->distance, qa->length, + operand + qb->distance, qb->length, + false); +} + +/* + * Returns a sorted, de-duplicated array of QueryOperands in a query. + * The returned QueryOperands are pointers to the original QueryOperands + * in the query. + * + * Length of the returned array is stored in *size + */ +static QueryOperand ** +SortAndUniqItems(TSQuery q, int *size) +{ + char *operand = GETOPERAND(q); + QueryItem *item = GETQUERY(q); + QueryOperand **res, + **ptr, + **prevptr; + + ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size); + + /* Collect all operands from the tree to res */ + while ((*size)--) + { + if (item->type == QI_VAL) + { + *ptr = (QueryOperand *) item; + ptr++; + } + item++; + } + + *size = ptr - res; + if (*size < 2) + return res; + + qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, operand); + + ptr = res + 1; + prevptr = res; + + /* remove duplicates */ + while (ptr - res < *size) + { + if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0) + { + prevptr++; + *prevptr = *ptr; + } + ptr++; + } + + *size = prevptr + 1 - res; + return res; +} + +static float +calc_rank_and(const float *w, TSVector t, TSQuery q) +{ + WordEntryPosVector **pos; + WordEntryPosVector1 posnull; + WordEntryPosVector *POSNULL; + int i, + k, + l, + p; + WordEntry *entry, + *firstentry; + WordEntryPos *post, + *ct; + int32 dimt, + lenct, + dist, + nitem; + float res = -1.0; + QueryOperand **item; + int size = q->size; + + item = SortAndUniqItems(q, &size); + if (size < 2) + { + pfree(item); + return calc_rank_or(w, t, q); + } + pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size); + + /* A dummy WordEntryPos array to use when haspos is false */ + posnull.npos = 1; + posnull.pos[0] = 0; + WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1); + POSNULL = (WordEntryPosVector *) &posnull; + + for (i = 0; i < size; i++) + { + firstentry = entry = find_wordentry(t, q, item[i], &nitem); + if (!entry) + continue; + + while (entry - firstentry < nitem) + { + if (entry->haspos) + pos[i] = _POSVECPTR(t, entry); + else + pos[i] = POSNULL; + + dimt = pos[i]->npos; + post = pos[i]->pos; + for (k = 0; k < i; k++) + { + if (!pos[k]) + continue; + lenct = pos[k]->npos; + ct = pos[k]->pos; + for (l = 0; l < dimt; l++) + { + for (p = 0; p < lenct; p++) + { + dist = abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); + if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL))) + { + float curw; + + if (!dist) + dist = MAXENTRYPOS; + curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); + res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); + } + } + } + } + + entry++; + } + } + pfree(pos); + pfree(item); + return res; +} + +static float +calc_rank_or(const float *w, TSVector t, TSQuery q) +{ + WordEntry *entry, + *firstentry; + WordEntryPosVector1 posnull; + WordEntryPos *post; + int32 dimt, + j, + i, + nitem; + float res = 0.0; + QueryOperand **item; + int size = q->size; + + /* A dummy WordEntryPos array to use when haspos is false */ + posnull.npos = 1; + posnull.pos[0] = 0; + + item = SortAndUniqItems(q, &size); + + for (i = 0; i < size; i++) + { + float resj, + wjm; + int32 jm; + + firstentry = entry = find_wordentry(t, q, item[i], &nitem); + if (!entry) + continue; + + while (entry - firstentry < nitem) + { + if (entry->haspos) + { + dimt = POSDATALEN(t, entry); + post = POSDATAPTR(t, entry); + } + else + { + dimt = posnull.npos; + post = posnull.pos; + } + + resj = 0.0; + wjm = -1.0; + jm = 0; + for (j = 0; j < dimt; j++) + { + resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); + if (wpos(post[j]) > wjm) + { + wjm = wpos(post[j]); + jm = j; + } + } +/* + limit (sum(1/i^2),i=1,inf) = pi^2/6 + resj = sum(wi/i^2),i=1,noccurrence, + wi - should be sorted desc, + don't sort for now, just choose maximum weight. This should be corrected + Oleg Bartunov +*/ + res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; + + entry++; + } + } + if (size > 0) + res = res / size; + pfree(item); + return res; +} + +static float +calc_rank(const float *w, TSVector t, TSQuery q, int32 method) +{ + QueryItem *item = GETQUERY(q); + float res = 0.0; + int len; + + if (!t->size || !q->size) + return 0.0; + + /* XXX: What about NOT? */ + res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND || + item->qoperator.oper == OP_PHRASE)) ? + calc_rank_and(w, t, q) : + calc_rank_or(w, t, q); + + if (res < 0) + res = 1e-20f; + + if ((method & RANK_NORM_LOGLENGTH) && t->size > 0) + res /= log((double) (cnt_length(t) + 1)) / log(2.0); + + if (method & RANK_NORM_LENGTH) + { + len = cnt_length(t); + if (len > 0) + res /= (float) len; + } + + /* RANK_NORM_EXTDIST not applicable */ + + if ((method & RANK_NORM_UNIQ) && t->size > 0) + res /= (float) (t->size); + + if ((method & RANK_NORM_LOGUNIQ) && t->size > 0) + res /= log((double) (t->size + 1)) / log(2.0); + + if (method & RANK_NORM_RDIVRPLUS1) + res /= (res + 1); + + return res; +} + +static const float * +getWeights(ArrayType *win) +{ + static __thread float ws[lengthof(weights)]; + int i; + float4 *arrdata; + + if (win == NULL) + return weights; + + if (ARR_NDIM(win) != 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array of weight must be one-dimensional"))); + + if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights)) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array of weight is too short"))); + + if (array_contains_nulls(win)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array of weight must not contain nulls"))); + + arrdata = (float4 *) ARR_DATA_PTR(win); + for (i = 0; i < lengthof(weights); i++) + { + ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i]; + if (ws[i] > 1.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("weight out of range"))); + } + + return ws; +} + +Datum +ts_rank_wttf(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + int method = PG_GETARG_INT32(3); + float res; + + res = calc_rank(getWeights(win), txt, query, method); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_wtt(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + float res; + + res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_ttf(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + int method = PG_GETARG_INT32(2); + float res; + + res = calc_rank(getWeights(NULL), txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_tt(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + float res; + + res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +typedef struct +{ + union + { + struct + { /* compiled doc representation */ + QueryItem **items; + int16 nitem; + } query; + struct + { /* struct is used for preparing doc + * representation */ + QueryItem *item; + WordEntry *entry; + } map; + } data; + WordEntryPos pos; +} DocRepresentation; + +static int +compareDocR(const void *va, const void *vb) +{ + const DocRepresentation *a = (const DocRepresentation *) va; + const DocRepresentation *b = (const DocRepresentation *) vb; + + if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos)) + { + if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos)) + { + if (a->data.map.entry == b->data.map.entry) + return 0; + + return (a->data.map.entry > b->data.map.entry) ? 1 : -1; + } + + return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1; + } + + return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1; +} + +#define MAXQROPOS MAXENTRYPOS +typedef struct +{ + bool operandexists; + bool reverseinsert; /* indicates insert order, true means + * descending order */ + uint32 npos; + WordEntryPos pos[MAXQROPOS]; +} QueryRepresentationOperand; + +typedef struct +{ + TSQuery query; + QueryRepresentationOperand *operandData; +} QueryRepresentation; + +#define QR_GET_OPERAND_DATA(q, v) \ + ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) ) + +/* + * TS_execute callback for matching a tsquery operand to QueryRepresentation + */ +static TSTernaryValue +checkcondition_QueryOperand(void *checkval, QueryOperand *val, + ExecPhraseData *data) +{ + QueryRepresentation *qr = (QueryRepresentation *) checkval; + QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val); + + if (!opData->operandexists) + return TS_NO; + + if (data) + { + data->npos = opData->npos; + data->pos = opData->pos; + if (opData->reverseinsert) + data->pos += MAXQROPOS - opData->npos; + } + + return TS_YES; +} + +typedef struct +{ + int pos; + int p; + int q; + DocRepresentation *begin; + DocRepresentation *end; +} CoverExt; + +static void +resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert) +{ + int i; + + for (i = 0; i < qr->query->size; i++) + { + qr->operandData[i].operandexists = false; + qr->operandData[i].reverseinsert = reverseinsert; + qr->operandData[i].npos = 0; + } +} + +static void +fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry) +{ + int i; + int lastPos; + QueryRepresentationOperand *opData; + + for (i = 0; i < entry->data.query.nitem; i++) + { + if (entry->data.query.items[i]->type != QI_VAL) + continue; + + opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]); + + opData->operandexists = true; + + if (opData->npos == 0) + { + lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0; + opData->pos[lastPos] = entry->pos; + opData->npos++; + continue; + } + + lastPos = opData->reverseinsert ? + (MAXQROPOS - opData->npos) : + (opData->npos - 1); + + if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos)) + { + lastPos = opData->reverseinsert ? + (MAXQROPOS - 1 - opData->npos) : + (opData->npos); + + opData->pos[lastPos] = entry->pos; + opData->npos++; + } + } +} + +static bool +Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) +{ + DocRepresentation *ptr; + int lastpos = ext->pos; + bool found = false; + + /* + * since this function recurses, it could be driven to stack overflow. + * (though any decent compiler will optimize away the tail-recursion. + */ + check_stack_depth(); + + resetQueryRepresentation(qr, false); + + ext->p = INT_MAX; + ext->q = 0; + ptr = doc + ext->pos; + + /* find upper bound of cover from current position, move up */ + while (ptr - doc < len) + { + fillQueryRepresentationData(qr, ptr); + + if (TS_execute(GETQUERY(qr->query), (void *) qr, + TS_EXEC_EMPTY, checkcondition_QueryOperand)) + { + if (WEP_GETPOS(ptr->pos) > ext->q) + { + ext->q = WEP_GETPOS(ptr->pos); + ext->end = ptr; + lastpos = ptr - doc; + found = true; + } + break; + } + ptr++; + } + + if (!found) + return false; + + resetQueryRepresentation(qr, true); + + ptr = doc + lastpos; + + /* find lower bound of cover from found upper bound, move down */ + while (ptr >= doc + ext->pos) + { + /* + * we scan doc from right to left, so pos info in reverse order! + */ + fillQueryRepresentationData(qr, ptr); + + if (TS_execute(GETQUERY(qr->query), (void *) qr, + TS_EXEC_EMPTY, checkcondition_QueryOperand)) + { + if (WEP_GETPOS(ptr->pos) < ext->p) + { + ext->begin = ptr; + ext->p = WEP_GETPOS(ptr->pos); + } + break; + } + ptr--; + } + + if (ext->p <= ext->q) + { + /* + * set position for next try to next lexeme after beginning of found + * cover + */ + ext->pos = (ptr - doc) + 1; + return true; + } + + ext->pos++; + return Cover(doc, len, qr, ext); +} + +static DocRepresentation * +get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) +{ + QueryItem *item = GETQUERY(qr->query); + WordEntry *entry, + *firstentry; + WordEntryPos *post; + int32 dimt, /* number of 'post' items */ + j, + i, + nitem; + int len = qr->query->size * 4, + cur = 0; + DocRepresentation *doc; + + doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); + + /* + * Iterate through query to make DocRepresentation for words and it's + * entries satisfied by query + */ + for (i = 0; i < qr->query->size; i++) + { + QueryOperand *curoperand; + + if (item[i].type != QI_VAL) + continue; + + curoperand = &item[i].qoperand; + + firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem); + if (!entry) + continue; + + /* iterations over entries in tsvector */ + while (entry - firstentry < nitem) + { + if (entry->haspos) + { + dimt = POSDATALEN(txt, entry); + post = POSDATAPTR(txt, entry); + } + else + { + /* ignore words without positions */ + entry++; + continue; + } + + while (cur + dimt >= len) + { + len *= 2; + doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); + } + + /* iterations over entry's positions */ + for (j = 0; j < dimt; j++) + { + if (curoperand->weight == 0 || + curoperand->weight & (1 << WEP_GETWEIGHT(post[j]))) + { + doc[cur].pos = post[j]; + doc[cur].data.map.entry = entry; + doc[cur].data.map.item = (QueryItem *) curoperand; + cur++; + } + } + + entry++; + } + } + + if (cur > 0) + { + DocRepresentation *rptr = doc + 1, + *wptr = doc, + storage; + + /* + * Sort representation in ascending order by pos and entry + */ + qsort(doc, cur, sizeof(DocRepresentation), compareDocR); + + /* + * Join QueryItem per WordEntry and it's position + */ + storage.pos = doc->pos; + storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size); + storage.data.query.items[0] = doc->data.map.item; + storage.data.query.nitem = 1; + + while (rptr - doc < cur) + { + if (rptr->pos == (rptr - 1)->pos && + rptr->data.map.entry == (rptr - 1)->data.map.entry) + { + storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item; + storage.data.query.nitem++; + } + else + { + *wptr = storage; + wptr++; + storage.pos = rptr->pos; + storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size); + storage.data.query.items[0] = rptr->data.map.item; + storage.data.query.nitem = 1; + } + + rptr++; + } + + *wptr = storage; + wptr++; + + *doclen = wptr - doc; + return doc; + } + + pfree(doc); + return NULL; +} + +static float4 +calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method) +{ + DocRepresentation *doc; + int len, + i, + doclen = 0; + CoverExt ext; + double Wdoc = 0.0; + double invws[lengthof(weights)]; + double SumDist = 0.0, + PrevExtPos = 0.0; + int NExtent = 0; + QueryRepresentation qr; + + + for (i = 0; i < lengthof(weights); i++) + { + invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i])); + if (invws[i] > 1.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("weight out of range"))); + invws[i] = 1.0 / invws[i]; + } + + qr.query = query; + qr.operandData = (QueryRepresentationOperand *) + palloc0(sizeof(QueryRepresentationOperand) * query->size); + + doc = get_docrep(txt, &qr, &doclen); + if (!doc) + { + pfree(qr.operandData); + return 0.0; + } + + MemSet(&ext, 0, sizeof(CoverExt)); + while (Cover(doc, doclen, &qr, &ext)) + { + double Cpos = 0.0; + double InvSum = 0.0; + double CurExtPos; + int nNoise; + DocRepresentation *ptr = ext.begin; + + while (ptr <= ext.end) + { + InvSum += invws[WEP_GETWEIGHT(ptr->pos)]; + ptr++; + } + + Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum; + + /* + * if doc are big enough then ext.q may be equal to ext.p due to limit + * of positional information. In this case we approximate number of + * noise word as half cover's length + */ + nNoise = (ext.q - ext.p) - (ext.end - ext.begin); + if (nNoise < 0) + nNoise = (ext.end - ext.begin) / 2; + Wdoc += Cpos / ((double) (1 + nNoise)); + + CurExtPos = ((double) (ext.q + ext.p)) / 2.0; + if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by + * zero in a case of + * multiple lexize */ ) + SumDist += 1.0 / (CurExtPos - PrevExtPos); + + PrevExtPos = CurExtPos; + NExtent++; + } + + if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0) + Wdoc /= log((double) (cnt_length(txt) + 1)); + + if (method & RANK_NORM_LENGTH) + { + len = cnt_length(txt); + if (len > 0) + Wdoc /= (double) len; + } + + if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0) + Wdoc /= ((double) NExtent) / SumDist; + + if ((method & RANK_NORM_UNIQ) && txt->size > 0) + Wdoc /= (double) (txt->size); + + if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) + Wdoc /= log((double) (txt->size + 1)) / log(2.0); + + if (method & RANK_NORM_RDIVRPLUS1) + Wdoc /= (Wdoc + 1); + + pfree(doc); + + pfree(qr.operandData); + + return (float4) Wdoc; +} + +Datum +ts_rankcd_wttf(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + int method = PG_GETARG_INT32(3); + float res; + + res = calc_rank_cd(getWeights(win), txt, query, method); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_wtt(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + float res; + + res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_ttf(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + int method = PG_GETARG_INT32(2); + float res; + + res = calc_rank_cd(getWeights(NULL), txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_tt(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + float res; + + res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector.c new file mode 100644 index 00000000000..dff0bfe41fc --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector.c @@ -0,0 +1,558 @@ +/*------------------------------------------------------------------------- + * + * tsvector.c + * I/O functions for tsvector + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsvector.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "libpq/pqformat.h" +#include "nodes/miscnodes.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "varatt.h" + +typedef struct +{ + WordEntry entry; /* must be first! */ + WordEntryPos *pos; + int poslen; /* number of elements in pos */ +} WordEntryIN; + + +/* Compare two WordEntryPos values for qsort */ +int +compareWordEntryPos(const void *a, const void *b) +{ + int apos = WEP_GETPOS(*(const WordEntryPos *) a); + int bpos = WEP_GETPOS(*(const WordEntryPos *) b); + + if (apos == bpos) + return 0; + return (apos > bpos) ? 1 : -1; +} + +/* + * Removes duplicate pos entries. If there's two entries with same pos but + * different weight, the higher weight is retained, so we can't use + * qunique here. + * + * Returns new length. + */ +static int +uniquePos(WordEntryPos *a, int l) +{ + WordEntryPos *ptr, + *res; + + if (l <= 1) + return l; + + qsort(a, l, sizeof(WordEntryPos), compareWordEntryPos); + + res = a; + ptr = a + 1; + while (ptr - a < l) + { + if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res)) + { + res++; + *res = *ptr; + if (res - a >= MAXNUMPOS - 1 || + WEP_GETPOS(*res) == MAXENTRYPOS - 1) + break; + } + else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res)) + WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr)); + ptr++; + } + + return res + 1 - a; +} + +/* Compare two WordEntryIN values for qsort */ +static int +compareentry(const void *va, const void *vb, void *arg) +{ + const WordEntryIN *a = (const WordEntryIN *) va; + const WordEntryIN *b = (const WordEntryIN *) vb; + char *BufferStr = (char *) arg; + + return tsCompareString(&BufferStr[a->entry.pos], a->entry.len, + &BufferStr[b->entry.pos], b->entry.len, + false); +} + +/* + * Sort an array of WordEntryIN, remove duplicates. + * *outbuflen receives the amount of space needed for strings and positions. + */ +static int +uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen) +{ + int buflen; + WordEntryIN *ptr, + *res; + + Assert(l >= 1); + + if (l > 1) + qsort_arg(a, l, sizeof(WordEntryIN), compareentry, buf); + + buflen = 0; + res = a; + ptr = a + 1; + while (ptr - a < l) + { + if (!(ptr->entry.len == res->entry.len && + strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], + res->entry.len) == 0)) + { + /* done accumulating data into *res, count space needed */ + buflen += res->entry.len; + if (res->entry.haspos) + { + res->poslen = uniquePos(res->pos, res->poslen); + buflen = SHORTALIGN(buflen); + buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16); + } + res++; + if (res != ptr) + memcpy(res, ptr, sizeof(WordEntryIN)); + } + else if (ptr->entry.haspos) + { + if (res->entry.haspos) + { + /* append ptr's positions to res's positions */ + int newlen = ptr->poslen + res->poslen; + + res->pos = (WordEntryPos *) + repalloc(res->pos, newlen * sizeof(WordEntryPos)); + memcpy(&res->pos[res->poslen], ptr->pos, + ptr->poslen * sizeof(WordEntryPos)); + res->poslen = newlen; + pfree(ptr->pos); + } + else + { + /* just give ptr's positions to pos */ + res->entry.haspos = 1; + res->pos = ptr->pos; + res->poslen = ptr->poslen; + } + } + ptr++; + } + + /* count space needed for last item */ + buflen += res->entry.len; + if (res->entry.haspos) + { + res->poslen = uniquePos(res->pos, res->poslen); + buflen = SHORTALIGN(buflen); + buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16); + } + + *outbuflen = buflen; + return res + 1 - a; +} + +static int +WordEntryCMP(WordEntry *a, WordEntry *b, char *buf) +{ + return compareentry(a, b, buf); +} + + +Datum +tsvectorin(PG_FUNCTION_ARGS) +{ + char *buf = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + TSVectorParseState state; + WordEntryIN *arr; + int totallen; + int arrlen; /* allocated size of arr */ + WordEntry *inarr; + int len = 0; + TSVector in; + int i; + char *token; + int toklen; + WordEntryPos *pos; + int poslen; + char *strbuf; + int stroff; + + /* + * Tokens are appended to tmpbuf, cur is a pointer to the end of used + * space in tmpbuf. + */ + char *tmpbuf; + char *cur; + int buflen = 256; /* allocated size of tmpbuf */ + + state = init_tsvector_parser(buf, 0, escontext); + + arrlen = 64; + arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); + cur = tmpbuf = (char *) palloc(buflen); + + while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL)) + { + if (toklen >= MAXSTRLEN) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long (%ld bytes, max %ld bytes)", + (long) toklen, + (long) (MAXSTRLEN - 1)))); + + if (cur - tmpbuf > MAXSTRPOS) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)", + (long) (cur - tmpbuf), (long) MAXSTRPOS))); + + /* + * Enlarge buffers if needed + */ + if (len >= arrlen) + { + arrlen *= 2; + arr = (WordEntryIN *) + repalloc(arr, sizeof(WordEntryIN) * arrlen); + } + while ((cur - tmpbuf) + toklen >= buflen) + { + int dist = cur - tmpbuf; + + buflen *= 2; + tmpbuf = (char *) repalloc(tmpbuf, buflen); + cur = tmpbuf + dist; + } + arr[len].entry.len = toklen; + arr[len].entry.pos = cur - tmpbuf; + memcpy(cur, token, toklen); + cur += toklen; + + if (poslen != 0) + { + arr[len].entry.haspos = 1; + arr[len].pos = pos; + arr[len].poslen = poslen; + } + else + { + arr[len].entry.haspos = 0; + arr[len].pos = NULL; + arr[len].poslen = 0; + } + len++; + } + + close_tsvector_parser(state); + + /* Did gettoken_tsvector fail? */ + if (SOFT_ERROR_OCCURRED(escontext)) + PG_RETURN_NULL(); + + if (len > 0) + len = uniqueentry(arr, len, tmpbuf, &buflen); + else + buflen = 0; + + if (buflen > MAXSTRPOS) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS))); + + totallen = CALCDATASIZE(len, buflen); + in = (TSVector) palloc0(totallen); + SET_VARSIZE(in, totallen); + in->size = len; + inarr = ARRPTR(in); + strbuf = STRPTR(in); + stroff = 0; + for (i = 0; i < len; i++) + { + memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len); + arr[i].entry.pos = stroff; + stroff += arr[i].entry.len; + if (arr[i].entry.haspos) + { + /* This should be unreachable because of MAXNUMPOS restrictions */ + if (arr[i].poslen > 0xFFFF) + elog(ERROR, "positions array too long"); + + /* Copy number of positions */ + stroff = SHORTALIGN(stroff); + *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen; + stroff += sizeof(uint16); + + /* Copy positions */ + memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos)); + stroff += arr[i].poslen * sizeof(WordEntryPos); + + pfree(arr[i].pos); + } + inarr[i] = arr[i].entry; + } + + Assert((strbuf + stroff - (char *) in) == totallen); + + PG_RETURN_TSVECTOR(in); +} + +Datum +tsvectorout(PG_FUNCTION_ARGS) +{ + TSVector out = PG_GETARG_TSVECTOR(0); + char *outbuf; + int32 i, + lenbuf = 0, + pp; + WordEntry *ptr = ARRPTR(out); + char *curbegin, + *curin, + *curout; + + lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ; + for (i = 0; i < out->size; i++) + { + lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ; + if (ptr[i].haspos) + lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i])); + } + + curout = outbuf = (char *) palloc(lenbuf); + for (i = 0; i < out->size; i++) + { + curbegin = curin = STRPTR(out) + ptr->pos; + if (i != 0) + *curout++ = ' '; + *curout++ = '\''; + while (curin - curbegin < ptr->len) + { + int len = pg_mblen(curin); + + if (t_iseq(curin, '\'')) + *curout++ = '\''; + else if (t_iseq(curin, '\\')) + *curout++ = '\\'; + + while (len--) + *curout++ = *curin++; + } + + *curout++ = '\''; + if ((pp = POSDATALEN(out, ptr)) != 0) + { + WordEntryPos *wptr; + + *curout++ = ':'; + wptr = POSDATAPTR(out, ptr); + while (pp) + { + curout += sprintf(curout, "%d", WEP_GETPOS(*wptr)); + switch (WEP_GETWEIGHT(*wptr)) + { + case 3: + *curout++ = 'A'; + break; + case 2: + *curout++ = 'B'; + break; + case 1: + *curout++ = 'C'; + break; + case 0: + default: + break; + } + + if (pp > 1) + *curout++ = ','; + pp--; + wptr++; + } + } + ptr++; + } + + *curout = '\0'; + PG_FREE_IF_COPY(out, 0); + PG_RETURN_CSTRING(outbuf); +} + +/* + * Binary Input / Output functions. The binary format is as follows: + * + * uint32 number of lexemes + * + * for each lexeme: + * lexeme text in client encoding, null-terminated + * uint16 number of positions + * for each position: + * uint16 WordEntryPos + */ + +Datum +tsvectorsend(PG_FUNCTION_ARGS) +{ + TSVector vec = PG_GETARG_TSVECTOR(0); + StringInfoData buf; + int i, + j; + WordEntry *weptr = ARRPTR(vec); + + pq_begintypsend(&buf); + + pq_sendint32(&buf, vec->size); + for (i = 0; i < vec->size; i++) + { + uint16 npos; + + /* + * the strings in the TSVector array are not null-terminated, so we + * have to send the null-terminator separately + */ + pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len); + pq_sendbyte(&buf, '\0'); + + npos = POSDATALEN(vec, weptr); + pq_sendint16(&buf, npos); + + if (npos > 0) + { + WordEntryPos *wepptr = POSDATAPTR(vec, weptr); + + for (j = 0; j < npos; j++) + pq_sendint16(&buf, wepptr[j]); + } + weptr++; + } + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +tsvectorrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + TSVector vec; + int i; + int32 nentries; + int datalen; /* number of bytes used in the variable size + * area after fixed size TSVector header and + * WordEntries */ + Size hdrlen; + Size len; /* allocated size of vec */ + bool needSort = false; + + nentries = pq_getmsgint(buf, sizeof(int32)); + if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry))) + elog(ERROR, "invalid size of tsvector"); + + hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries; + + len = hdrlen * 2; /* times two to make room for lexemes */ + vec = (TSVector) palloc0(len); + vec->size = nentries; + + datalen = 0; + for (i = 0; i < nentries; i++) + { + const char *lexeme; + uint16 npos; + size_t lex_len; + + lexeme = pq_getmsgstring(buf); + npos = (uint16) pq_getmsgint(buf, sizeof(uint16)); + + /* sanity checks */ + + lex_len = strlen(lexeme); + if (lex_len > MAXSTRLEN) + elog(ERROR, "invalid tsvector: lexeme too long"); + + if (datalen > MAXSTRPOS) + elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded"); + + if (npos > MAXNUMPOS) + elog(ERROR, "unexpected number of tsvector positions"); + + /* + * Looks valid. Fill the WordEntry struct, and copy lexeme. + * + * But make sure the buffer is large enough first. + */ + while (hdrlen + SHORTALIGN(datalen + lex_len) + + sizeof(uint16) + npos * sizeof(WordEntryPos) >= len) + { + len *= 2; + vec = (TSVector) repalloc(vec, len); + } + + vec->entries[i].haspos = (npos > 0) ? 1 : 0; + vec->entries[i].len = lex_len; + vec->entries[i].pos = datalen; + + memcpy(STRPTR(vec) + datalen, lexeme, lex_len); + + datalen += lex_len; + + if (i > 0 && WordEntryCMP(&vec->entries[i], + &vec->entries[i - 1], + STRPTR(vec)) <= 0) + needSort = true; + + /* Receive positions */ + if (npos > 0) + { + uint16 j; + WordEntryPos *wepptr; + + /* + * Pad to 2-byte alignment if necessary. Though we used palloc0 + * for the initial allocation, subsequent repalloc'd memory areas + * are not initialized to zero. + */ + if (datalen != SHORTALIGN(datalen)) + { + *(STRPTR(vec) + datalen) = '\0'; + datalen = SHORTALIGN(datalen); + } + + memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16)); + + wepptr = POSDATAPTR(vec, &vec->entries[i]); + for (j = 0; j < npos; j++) + { + wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos)); + if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1])) + elog(ERROR, "position information is misordered"); + } + + datalen += sizeof(uint16) + npos * sizeof(WordEntryPos); + } + } + + SET_VARSIZE(vec, hdrlen + datalen); + + if (needSort) + qsort_arg(ARRPTR(vec), vec->size, sizeof(WordEntry), + compareentry, STRPTR(vec)); + + PG_RETURN_TSVECTOR(vec); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_op.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_op.c new file mode 100644 index 00000000000..4457c5d4f9f --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_op.c @@ -0,0 +1,2893 @@ +/*------------------------------------------------------------------------- + * + * tsvector_op.c + * operations over tsvector + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsvector_op.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> + +#include "access/htup_details.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "commands/trigger.h" +#include "executor/spi.h" +#include "funcapi.h" +#include "lib/qunique.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "parser/parse_coerce.h" +#include "tsearch/ts_utils.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/regproc.h" +#include "utils/rel.h" + + +typedef struct +{ + WordEntry *arrb; + WordEntry *arre; + char *values; + char *operand; +} CHKVAL; + + +typedef struct StatEntry +{ + uint32 ndoc; /* zero indicates that we were already here + * while walking through the tree */ + uint32 nentry; + struct StatEntry *left; + struct StatEntry *right; + uint32 lenlexeme; + char lexeme[FLEXIBLE_ARRAY_MEMBER]; +} StatEntry; + +#define STATENTRYHDRSZ (offsetof(StatEntry, lexeme)) + +typedef struct +{ + int32 weight; + + uint32 maxdepth; + + StatEntry **stack; + uint32 stackpos; + + StatEntry *root; +} TSVectorStat; + + +static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, + uint32 flags, + TSExecuteCallback chkcond); +static bool TS_execute_locations_recurse(QueryItem *curitem, + void *arg, + TSExecuteCallback chkcond, + List **locations); +static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len); +static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column); + + +/* + * Order: haspos, len, word, for all positions (pos, weight) + */ +static int +silly_cmp_tsvector(const TSVector a, const TSVector b) +{ + if (VARSIZE(a) < VARSIZE(b)) + return -1; + else if (VARSIZE(a) > VARSIZE(b)) + return 1; + else if (a->size < b->size) + return -1; + else if (a->size > b->size) + return 1; + else + { + WordEntry *aptr = ARRPTR(a); + WordEntry *bptr = ARRPTR(b); + int i = 0; + int res; + + + for (i = 0; i < a->size; i++) + { + if (aptr->haspos != bptr->haspos) + { + return (aptr->haspos > bptr->haspos) ? -1 : 1; + } + else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0) + { + return res; + } + else if (aptr->haspos) + { + WordEntryPos *ap = POSDATAPTR(a, aptr); + WordEntryPos *bp = POSDATAPTR(b, bptr); + int j; + + if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr)) + return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1; + + for (j = 0; j < POSDATALEN(a, aptr); j++) + { + if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp)) + { + return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1; + } + else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp)) + { + return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1; + } + ap++, bp++; + } + } + + aptr++; + bptr++; + } + } + + return 0; +} + +#define TSVECTORCMPFUNC( type, action, ret ) \ +Datum \ +tsvector_##type(PG_FUNCTION_ARGS) \ +{ \ + TSVector a = PG_GETARG_TSVECTOR(0); \ + TSVector b = PG_GETARG_TSVECTOR(1); \ + int res = silly_cmp_tsvector(a, b); \ + PG_FREE_IF_COPY(a,0); \ + PG_FREE_IF_COPY(b,1); \ + PG_RETURN_##ret( res action 0 ); \ +} \ +/* keep compiler quiet - no extra ; */ \ +extern int no_such_variable + +TSVECTORCMPFUNC(lt, <, BOOL); +TSVECTORCMPFUNC(le, <=, BOOL); +TSVECTORCMPFUNC(eq, ==, BOOL); +TSVECTORCMPFUNC(ge, >=, BOOL); +TSVECTORCMPFUNC(gt, >, BOOL); +TSVECTORCMPFUNC(ne, !=, BOOL); +TSVECTORCMPFUNC(cmp, +, INT32); + +Datum +tsvector_strip(PG_FUNCTION_ARGS) +{ + TSVector in = PG_GETARG_TSVECTOR(0); + TSVector out; + int i, + len = 0; + WordEntry *arrin = ARRPTR(in), + *arrout; + char *cur; + + for (i = 0; i < in->size; i++) + len += arrin[i].len; + + len = CALCDATASIZE(in->size, len); + out = (TSVector) palloc0(len); + SET_VARSIZE(out, len); + out->size = in->size; + arrout = ARRPTR(out); + cur = STRPTR(out); + for (i = 0; i < in->size; i++) + { + memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len); + arrout[i].haspos = 0; + arrout[i].len = arrin[i].len; + arrout[i].pos = cur - STRPTR(out); + cur += arrout[i].len; + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); +} + +Datum +tsvector_length(PG_FUNCTION_ARGS) +{ + TSVector in = PG_GETARG_TSVECTOR(0); + int32 ret = in->size; + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_INT32(ret); +} + +Datum +tsvector_setweight(PG_FUNCTION_ARGS) +{ + TSVector in = PG_GETARG_TSVECTOR(0); + char cw = PG_GETARG_CHAR(1); + TSVector out; + int i, + j; + WordEntry *entry; + WordEntryPos *p; + int w = 0; + + switch (cw) + { + case 'A': + case 'a': + w = 3; + break; + case 'B': + case 'b': + w = 2; + break; + case 'C': + case 'c': + w = 1; + break; + case 'D': + case 'd': + w = 0; + break; + default: + /* internal error */ + elog(ERROR, "unrecognized weight: %d", cw); + } + + out = (TSVector) palloc(VARSIZE(in)); + memcpy(out, in, VARSIZE(in)); + entry = ARRPTR(out); + i = out->size; + while (i--) + { + if ((j = POSDATALEN(out, entry)) != 0) + { + p = POSDATAPTR(out, entry); + while (j--) + { + WEP_SETWEIGHT(*p, w); + p++; + } + } + entry++; + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); +} + +/* + * setweight(tsin tsvector, char_weight "char", lexemes "text"[]) + * + * Assign weight w to elements of tsin that are listed in lexemes. + */ +Datum +tsvector_setweight_by_filter(PG_FUNCTION_ARGS) +{ + TSVector tsin = PG_GETARG_TSVECTOR(0); + char char_weight = PG_GETARG_CHAR(1); + ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2); + + TSVector tsout; + int i, + j, + nlexemes, + weight; + WordEntry *entry; + Datum *dlexemes; + bool *nulls; + + switch (char_weight) + { + case 'A': + case 'a': + weight = 3; + break; + case 'B': + case 'b': + weight = 2; + break; + case 'C': + case 'c': + weight = 1; + break; + case 'D': + case 'd': + weight = 0; + break; + default: + /* internal error */ + elog(ERROR, "unrecognized weight: %c", char_weight); + } + + tsout = (TSVector) palloc(VARSIZE(tsin)); + memcpy(tsout, tsin, VARSIZE(tsin)); + entry = ARRPTR(tsout); + + deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes); + + /* + * Assuming that lexemes array is significantly shorter than tsvector we + * can iterate through lexemes performing binary search of each lexeme + * from lexemes in tsvector. + */ + for (i = 0; i < nlexemes; i++) + { + char *lex; + int lex_len, + lex_pos; + + /* Ignore null array elements, they surely don't match */ + if (nulls[i]) + continue; + + lex = VARDATA(dlexemes[i]); + lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + lex_pos = tsvector_bsearch(tsout, lex, lex_len); + + if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0) + { + WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos); + + while (j--) + { + WEP_SETWEIGHT(*p, weight); + p++; + } + } + } + + PG_FREE_IF_COPY(tsin, 0); + PG_FREE_IF_COPY(lexemes, 2); + + PG_RETURN_POINTER(tsout); +} + +#define compareEntry(pa, a, pb, b) \ + tsCompareString((pa) + (a)->pos, (a)->len, \ + (pb) + (b)->pos, (b)->len, \ + false) + +/* + * Add positions from src to dest after offsetting them by maxpos. + * Return the number added (might be less than expected due to overflow) + */ +static int32 +add_pos(TSVector src, WordEntry *srcptr, + TSVector dest, WordEntry *destptr, + int32 maxpos) +{ + uint16 *clen = &_POSVECPTR(dest, destptr)->npos; + int i; + uint16 slen = POSDATALEN(src, srcptr), + startlen; + WordEntryPos *spos = POSDATAPTR(src, srcptr), + *dpos = POSDATAPTR(dest, destptr); + + if (!destptr->haspos) + *clen = 0; + + startlen = *clen; + for (i = 0; + i < slen && *clen < MAXNUMPOS && + (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); + i++) + { + WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i])); + WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos)); + (*clen)++; + } + + if (*clen != startlen) + destptr->haspos = 1; + return *clen - startlen; +} + +/* + * Perform binary search of given lexeme in TSVector. + * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't + * found. + */ +static int +tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len) +{ + WordEntry *arrin = ARRPTR(tsv); + int StopLow = 0, + StopHigh = tsv->size, + StopMiddle, + cmp; + + while (StopLow < StopHigh) + { + StopMiddle = (StopLow + StopHigh) / 2; + + cmp = tsCompareString(lexeme, lexeme_len, + STRPTR(tsv) + arrin[StopMiddle].pos, + arrin[StopMiddle].len, + false); + + if (cmp < 0) + StopHigh = StopMiddle; + else if (cmp > 0) + StopLow = StopMiddle + 1; + else /* found it */ + return StopMiddle; + } + + return -1; +} + +/* + * qsort comparator functions + */ + +static int +compare_int(const void *va, const void *vb) +{ + int a = *((const int *) va); + int b = *((const int *) vb); + + if (a == b) + return 0; + return (a > b) ? 1 : -1; +} + +static int +compare_text_lexemes(const void *va, const void *vb) +{ + Datum a = *((const Datum *) va); + Datum b = *((const Datum *) vb); + char *alex = VARDATA_ANY(a); + int alex_len = VARSIZE_ANY_EXHDR(a); + char *blex = VARDATA_ANY(b); + int blex_len = VARSIZE_ANY_EXHDR(b); + + return tsCompareString(alex, alex_len, blex, blex_len, false); +} + +/* + * Internal routine to delete lexemes from TSVector by array of offsets. + * + * int *indices_to_delete -- array of lexeme offsets to delete (modified here!) + * int indices_count -- size of that array + * + * Returns new TSVector without given lexemes along with their positions + * and weights. + */ +static TSVector +tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete, + int indices_count) +{ + TSVector tsout; + WordEntry *arrin = ARRPTR(tsv), + *arrout; + char *data = STRPTR(tsv), + *dataout; + int i, /* index in arrin */ + j, /* index in arrout */ + k, /* index in indices_to_delete */ + curoff; /* index in dataout area */ + + /* + * Sort the filter array to simplify membership checks below. Also, get + * rid of any duplicate entries, so that we can assume that indices_count + * is exactly equal to the number of lexemes that will be removed. + */ + if (indices_count > 1) + { + qsort(indices_to_delete, indices_count, sizeof(int), compare_int); + indices_count = qunique(indices_to_delete, indices_count, sizeof(int), + compare_int); + } + + /* + * Here we overestimate tsout size, since we don't know how much space is + * used by the deleted lexeme(s). We will set exact size below. + */ + tsout = (TSVector) palloc0(VARSIZE(tsv)); + + /* This count must be correct because STRPTR(tsout) relies on it. */ + tsout->size = tsv->size - indices_count; + + /* + * Copy tsv to tsout, skipping lexemes listed in indices_to_delete. + */ + arrout = ARRPTR(tsout); + dataout = STRPTR(tsout); + curoff = 0; + for (i = j = k = 0; i < tsv->size; i++) + { + /* + * If current i is present in indices_to_delete, skip this lexeme. + * Since indices_to_delete is already sorted, we only need to check + * the current (k'th) entry. + */ + if (k < indices_count && i == indices_to_delete[k]) + { + k++; + continue; + } + + /* Copy lexeme and its positions and weights */ + memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len); + arrout[j].haspos = arrin[i].haspos; + arrout[j].len = arrin[i].len; + arrout[j].pos = curoff; + curoff += arrin[i].len; + if (arrin[i].haspos) + { + int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos) + + sizeof(uint16); + + curoff = SHORTALIGN(curoff); + memcpy(dataout + curoff, + STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len), + len); + curoff += len; + } + + j++; + } + + /* + * k should now be exactly equal to indices_count. If it isn't then the + * caller provided us with indices outside of [0, tsv->size) range and + * estimation of tsout's size is wrong. + */ + Assert(k == indices_count); + + SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff)); + return tsout; +} + +/* + * Delete given lexeme from tsvector. + * Implementation of user-level ts_delete(tsvector, text). + */ +Datum +tsvector_delete_str(PG_FUNCTION_ARGS) +{ + TSVector tsin = PG_GETARG_TSVECTOR(0), + tsout; + text *tlexeme = PG_GETARG_TEXT_PP(1); + char *lexeme = VARDATA_ANY(tlexeme); + int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme), + skip_index; + + if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1) + PG_RETURN_POINTER(tsin); + + tsout = tsvector_delete_by_indices(tsin, &skip_index, 1); + + PG_FREE_IF_COPY(tsin, 0); + PG_FREE_IF_COPY(tlexeme, 1); + PG_RETURN_POINTER(tsout); +} + +/* + * Delete given array of lexemes from tsvector. + * Implementation of user-level ts_delete(tsvector, text[]). + */ +Datum +tsvector_delete_arr(PG_FUNCTION_ARGS) +{ + TSVector tsin = PG_GETARG_TSVECTOR(0), + tsout; + ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1); + int i, + nlex, + skip_count, + *skip_indices; + Datum *dlexemes; + bool *nulls; + + deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex); + + /* + * In typical use case array of lexemes to delete is relatively small. So + * here we optimize things for that scenario: iterate through lexarr + * performing binary search of each lexeme from lexarr in tsvector. + */ + skip_indices = palloc0(nlex * sizeof(int)); + for (i = skip_count = 0; i < nlex; i++) + { + char *lex; + int lex_len, + lex_pos; + + /* Ignore null array elements, they surely don't match */ + if (nulls[i]) + continue; + + lex = VARDATA(dlexemes[i]); + lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + lex_pos = tsvector_bsearch(tsin, lex, lex_len); + + if (lex_pos >= 0) + skip_indices[skip_count++] = lex_pos; + } + + tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count); + + pfree(skip_indices); + PG_FREE_IF_COPY(tsin, 0); + PG_FREE_IF_COPY(lexemes, 1); + + PG_RETURN_POINTER(tsout); +} + +/* + * Expand tsvector as table with following columns: + * lexeme: lexeme text + * positions: integer array of lexeme positions + * weights: char array of weights corresponding to positions + */ +Datum +tsvector_unnest(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + TSVector tsin; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + tupdesc = CreateTemplateTupleDesc(3); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions", + INT2ARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights", + TEXTARRAYOID, -1, 0); + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + funcctx->tuple_desc = tupdesc; + + funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0); + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + tsin = (TSVector) funcctx->user_fctx; + + if (funcctx->call_cntr < tsin->size) + { + WordEntry *arrin = ARRPTR(tsin); + char *data = STRPTR(tsin); + HeapTuple tuple; + int j, + i = funcctx->call_cntr; + bool nulls[] = {false, false, false}; + Datum values[3]; + + values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)); + + if (arrin[i].haspos) + { + WordEntryPosVector *posv; + Datum *positions; + Datum *weights; + char weight; + + /* + * Internally tsvector stores position and weight in the same + * uint16 (2 bits for weight, 14 for position). Here we extract + * that in two separate arrays. + */ + posv = _POSVECPTR(tsin, arrin + i); + positions = palloc(posv->npos * sizeof(Datum)); + weights = palloc(posv->npos * sizeof(Datum)); + for (j = 0; j < posv->npos; j++) + { + positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j])); + weight = 'D' - WEP_GETWEIGHT(posv->pos[j]); + weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight, + 1)); + } + + values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID)); + values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID)); + } + else + { + nulls[1] = nulls[2] = true; + } + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + { + SRF_RETURN_DONE(funcctx); + } +} + +/* + * Convert tsvector to array of lexemes. + */ +Datum +tsvector_to_array(PG_FUNCTION_ARGS) +{ + TSVector tsin = PG_GETARG_TSVECTOR(0); + WordEntry *arrin = ARRPTR(tsin); + Datum *elements; + int i; + ArrayType *array; + + elements = palloc(tsin->size * sizeof(Datum)); + + for (i = 0; i < tsin->size; i++) + { + elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, + arrin[i].len)); + } + + array = construct_array_builtin(elements, tsin->size, TEXTOID); + + pfree(elements); + PG_FREE_IF_COPY(tsin, 0); + PG_RETURN_POINTER(array); +} + +/* + * Build tsvector from array of lexemes. + */ +Datum +array_to_tsvector(PG_FUNCTION_ARGS) +{ + ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + TSVector tsout; + Datum *dlexemes; + WordEntry *arrout; + bool *nulls; + int nitems, + i, + tslen, + datalen = 0; + char *cur; + + deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems); + + /* + * Reject nulls and zero length strings (maybe we should just ignore them, + * instead?) + */ + for (i = 0; i < nitems; i++) + { + if (nulls[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("lexeme array may not contain nulls"))); + + if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0) + ereport(ERROR, + (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING), + errmsg("lexeme array may not contain empty strings"))); + } + + /* Sort and de-dup, because this is required for a valid tsvector. */ + if (nitems > 1) + { + qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes); + nitems = qunique(dlexemes, nitems, sizeof(Datum), + compare_text_lexemes); + } + + /* Calculate space needed for surviving lexemes. */ + for (i = 0; i < nitems; i++) + datalen += VARSIZE(dlexemes[i]) - VARHDRSZ; + tslen = CALCDATASIZE(nitems, datalen); + + /* Allocate and fill tsvector. */ + tsout = (TSVector) palloc0(tslen); + SET_VARSIZE(tsout, tslen); + tsout->size = nitems; + + arrout = ARRPTR(tsout); + cur = STRPTR(tsout); + for (i = 0; i < nitems; i++) + { + char *lex = VARDATA(dlexemes[i]); + int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + + memcpy(cur, lex, lex_len); + arrout[i].haspos = 0; + arrout[i].len = lex_len; + arrout[i].pos = cur - STRPTR(tsout); + cur += lex_len; + } + + PG_FREE_IF_COPY(v, 0); + PG_RETURN_POINTER(tsout); +} + +/* + * ts_filter(): keep only lexemes with given weights in tsvector. + */ +Datum +tsvector_filter(PG_FUNCTION_ARGS) +{ + TSVector tsin = PG_GETARG_TSVECTOR(0), + tsout; + ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1); + WordEntry *arrin = ARRPTR(tsin), + *arrout; + char *datain = STRPTR(tsin), + *dataout; + Datum *dweights; + bool *nulls; + int nweights; + int i, + j; + int cur_pos = 0; + char mask = 0; + + deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights); + + for (i = 0; i < nweights; i++) + { + char char_weight; + + if (nulls[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("weight array may not contain nulls"))); + + char_weight = DatumGetChar(dweights[i]); + switch (char_weight) + { + case 'A': + case 'a': + mask = mask | 8; + break; + case 'B': + case 'b': + mask = mask | 4; + break; + case 'C': + case 'c': + mask = mask | 2; + break; + case 'D': + case 'd': + mask = mask | 1; + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized weight: \"%c\"", char_weight))); + } + } + + tsout = (TSVector) palloc0(VARSIZE(tsin)); + tsout->size = tsin->size; + arrout = ARRPTR(tsout); + dataout = STRPTR(tsout); + + for (i = j = 0; i < tsin->size; i++) + { + WordEntryPosVector *posvin, + *posvout; + int npos = 0; + int k; + + if (!arrin[i].haspos) + continue; + + posvin = _POSVECPTR(tsin, arrin + i); + posvout = (WordEntryPosVector *) + (dataout + SHORTALIGN(cur_pos + arrin[i].len)); + + for (k = 0; k < posvin->npos; k++) + { + if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k]))) + posvout->pos[npos++] = posvin->pos[k]; + } + + /* if no satisfactory positions found, skip lexeme */ + if (!npos) + continue; + + arrout[j].haspos = true; + arrout[j].len = arrin[i].len; + arrout[j].pos = cur_pos; + + memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len); + posvout->npos = npos; + cur_pos += SHORTALIGN(arrin[i].len); + cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) + + sizeof(uint16); + j++; + } + + tsout->size = j; + if (dataout != STRPTR(tsout)) + memmove(STRPTR(tsout), dataout, cur_pos); + + SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos)); + + PG_FREE_IF_COPY(tsin, 0); + PG_RETURN_POINTER(tsout); +} + +Datum +tsvector_concat(PG_FUNCTION_ARGS) +{ + TSVector in1 = PG_GETARG_TSVECTOR(0); + TSVector in2 = PG_GETARG_TSVECTOR(1); + TSVector out; + WordEntry *ptr; + WordEntry *ptr1, + *ptr2; + WordEntryPos *p; + int maxpos = 0, + i, + j, + i1, + i2, + dataoff, + output_bytes, + output_size; + char *data, + *data1, + *data2; + + /* Get max position in in1; we'll need this to offset in2's positions */ + ptr = ARRPTR(in1); + i = in1->size; + while (i--) + { + if ((j = POSDATALEN(in1, ptr)) != 0) + { + p = POSDATAPTR(in1, ptr); + while (j--) + { + if (WEP_GETPOS(*p) > maxpos) + maxpos = WEP_GETPOS(*p); + p++; + } + } + ptr++; + } + + ptr1 = ARRPTR(in1); + ptr2 = ARRPTR(in2); + data1 = STRPTR(in1); + data2 = STRPTR(in2); + i1 = in1->size; + i2 = in2->size; + + /* + * Conservative estimate of space needed. We might need all the data in + * both inputs, and conceivably add a pad byte before position data for + * each item where there was none before. + */ + output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2; + + out = (TSVector) palloc0(output_bytes); + SET_VARSIZE(out, output_bytes); + + /* + * We must make out->size valid so that STRPTR(out) is sensible. We'll + * collapse out any unused space at the end. + */ + out->size = in1->size + in2->size; + + ptr = ARRPTR(out); + data = STRPTR(out); + dataoff = 0; + while (i1 && i2) + { + int cmp = compareEntry(data1, ptr1, data2, ptr2); + + if (cmp < 0) + { /* in1 first */ + ptr->haspos = ptr1->haspos; + ptr->len = ptr1->len; + memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); + ptr->pos = dataoff; + dataoff += ptr1->len; + if (ptr->haspos) + { + dataoff = SHORTALIGN(dataoff); + memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); + } + + ptr++; + ptr1++; + i1--; + } + else if (cmp > 0) + { /* in2 first */ + ptr->haspos = ptr2->haspos; + ptr->len = ptr2->len; + memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); + ptr->pos = dataoff; + dataoff += ptr2->len; + if (ptr->haspos) + { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos); + + if (addlen == 0) + ptr->haspos = 0; + else + { + dataoff = SHORTALIGN(dataoff); + dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); + } + } + + ptr++; + ptr2++; + i2--; + } + else + { + ptr->haspos = ptr1->haspos | ptr2->haspos; + ptr->len = ptr1->len; + memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); + ptr->pos = dataoff; + dataoff += ptr1->len; + if (ptr->haspos) + { + if (ptr1->haspos) + { + dataoff = SHORTALIGN(dataoff); + memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); + if (ptr2->haspos) + dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); + } + else /* must have ptr2->haspos */ + { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos); + + if (addlen == 0) + ptr->haspos = 0; + else + { + dataoff = SHORTALIGN(dataoff); + dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); + } + } + } + + ptr++; + ptr1++; + ptr2++; + i1--; + i2--; + } + } + + while (i1) + { + ptr->haspos = ptr1->haspos; + ptr->len = ptr1->len; + memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); + ptr->pos = dataoff; + dataoff += ptr1->len; + if (ptr->haspos) + { + dataoff = SHORTALIGN(dataoff); + memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); + } + + ptr++; + ptr1++; + i1--; + } + + while (i2) + { + ptr->haspos = ptr2->haspos; + ptr->len = ptr2->len; + memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); + ptr->pos = dataoff; + dataoff += ptr2->len; + if (ptr->haspos) + { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos); + + if (addlen == 0) + ptr->haspos = 0; + else + { + dataoff = SHORTALIGN(dataoff); + dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); + } + } + + ptr++; + ptr2++; + i2--; + } + + /* + * Instead of checking each offset individually, we check for overflow of + * pos fields once at the end. + */ + if (dataoff > MAXSTRPOS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS))); + + /* + * Adjust sizes (asserting that we didn't overrun the original estimates) + * and collapse out any unused array entries. + */ + output_size = ptr - ARRPTR(out); + Assert(output_size <= out->size); + out->size = output_size; + if (data != STRPTR(out)) + memmove(STRPTR(out), data, dataoff); + output_bytes = CALCDATASIZE(out->size, dataoff); + Assert(output_bytes <= VARSIZE(out)); + SET_VARSIZE(out, output_bytes); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_POINTER(out); +} + +/* + * Compare two strings by tsvector rules. + * + * if prefix = true then it returns zero value iff b has prefix a + */ +int32 +tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) +{ + int cmp; + + if (lena == 0) + { + if (prefix) + cmp = 0; /* empty string is prefix of anything */ + else + cmp = (lenb > 0) ? -1 : 0; + } + else if (lenb == 0) + { + cmp = (lena > 0) ? 1 : 0; + } + else + { + cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb)); + + if (prefix) + { + if (cmp == 0 && lena > lenb) + cmp = 1; /* a is longer, so not a prefix of b */ + } + else if (cmp == 0 && lena != lenb) + { + cmp = (lena < lenb) ? -1 : 1; + } + } + + return cmp; +} + +/* + * Check weight info or/and fill 'data' with the required positions + */ +static TSTernaryValue +checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, + ExecPhraseData *data) +{ + TSTernaryValue result = TS_NO; + + Assert(data == NULL || data->npos == 0); + + if (entry->haspos) + { + WordEntryPosVector *posvec; + + /* + * We can't use the _POSVECPTR macro here because the pointer to the + * tsvector's lexeme storage is already contained in chkval->values. + */ + posvec = (WordEntryPosVector *) + (chkval->values + SHORTALIGN(entry->pos + entry->len)); + + if (val->weight && data) + { + WordEntryPos *posvec_iter = posvec->pos; + WordEntryPos *dptr; + + /* + * Filter position information by weights + */ + dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos); + data->allocated = true; + + /* Is there a position with a matching weight? */ + while (posvec_iter < posvec->pos + posvec->npos) + { + /* If true, append this position to the data->pos */ + if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter))) + { + *dptr = WEP_GETPOS(*posvec_iter); + dptr++; + } + + posvec_iter++; + } + + data->npos = dptr - data->pos; + + if (data->npos > 0) + result = TS_YES; + else + { + pfree(data->pos); + data->pos = NULL; + data->allocated = false; + } + } + else if (val->weight) + { + WordEntryPos *posvec_iter = posvec->pos; + + /* Is there a position with a matching weight? */ + while (posvec_iter < posvec->pos + posvec->npos) + { + if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter))) + { + result = TS_YES; + break; /* no need to go further */ + } + + posvec_iter++; + } + } + else if (data) + { + data->npos = posvec->npos; + data->pos = posvec->pos; + data->allocated = false; + result = TS_YES; + } + else + { + /* simplest case: no weight check, positions not needed */ + result = TS_YES; + } + } + else + { + /* + * Position info is lacking, so if the caller requires it, we can only + * say that maybe there is a match. + * + * Notice, however, that we *don't* check val->weight here. + * Historically, stripped tsvectors are considered to match queries + * whether or not the query has a weight restriction; that's a little + * dubious but we'll preserve the behavior. + */ + if (data) + result = TS_MAYBE; + else + result = TS_YES; + } + + return result; +} + +/* + * TS_execute callback for matching a tsquery operand to plain tsvector data + */ +static TSTernaryValue +checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) +{ + CHKVAL *chkval = (CHKVAL *) checkval; + WordEntry *StopLow = chkval->arrb; + WordEntry *StopHigh = chkval->arre; + WordEntry *StopMiddle = StopHigh; + TSTernaryValue res = TS_NO; + + /* Loop invariant: StopLow <= val < StopHigh */ + while (StopLow < StopHigh) + { + int difference; + + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = tsCompareString(chkval->operand + val->distance, + val->length, + chkval->values + StopMiddle->pos, + StopMiddle->len, + false); + + if (difference == 0) + { + /* Check weight info & fill 'data' with positions */ + res = checkclass_str(chkval, StopMiddle, val, data); + break; + } + else if (difference > 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + /* + * If it's a prefix search, we should also consider lexemes that the + * search term is a prefix of (which will necessarily immediately follow + * the place we found in the above loop). But we can skip them if there + * was a definite match on the exact term AND the caller doesn't need + * position info. + */ + if (val->prefix && (res != TS_YES || data)) + { + WordEntryPos *allpos = NULL; + int npos = 0, + totalpos = 0; + + /* adjust start position for corner case */ + if (StopLow >= StopHigh) + StopMiddle = StopHigh; + + /* we don't try to re-use any data from the initial match */ + if (data) + { + if (data->allocated) + pfree(data->pos); + data->pos = NULL; + data->allocated = false; + data->npos = 0; + } + res = TS_NO; + + while ((res != TS_YES || data) && + StopMiddle < chkval->arre && + tsCompareString(chkval->operand + val->distance, + val->length, + chkval->values + StopMiddle->pos, + StopMiddle->len, + true) == 0) + { + TSTernaryValue subres; + + subres = checkclass_str(chkval, StopMiddle, val, data); + + if (subres != TS_NO) + { + if (data) + { + /* + * We need to join position information + */ + if (subres == TS_MAYBE) + { + /* + * No position info for this match, so we must report + * MAYBE overall. + */ + res = TS_MAYBE; + /* forget any previous positions */ + npos = 0; + /* don't leak storage */ + if (allpos) + pfree(allpos); + break; + } + + while (npos + data->npos > totalpos) + { + if (totalpos == 0) + { + totalpos = 256; + allpos = palloc(sizeof(WordEntryPos) * totalpos); + } + else + { + totalpos *= 2; + allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos); + } + } + + memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos); + npos += data->npos; + + /* don't leak storage from individual matches */ + if (data->allocated) + pfree(data->pos); + data->pos = NULL; + data->allocated = false; + /* it's important to reset data->npos before next loop */ + data->npos = 0; + } + else + { + /* Don't need positions, just handle YES/MAYBE */ + if (subres == TS_YES || res == TS_NO) + res = subres; + } + } + + StopMiddle++; + } + + if (data && npos > 0) + { + /* Sort and make unique array of found positions */ + data->pos = allpos; + qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos); + data->npos = qunique(data->pos, npos, sizeof(WordEntryPos), + compareWordEntryPos); + data->allocated = true; + res = TS_YES; + } + } + + return res; +} + +/* + * Compute output position list for a tsquery operator in phrase mode. + * + * Merge the position lists in Ldata and Rdata as specified by "emit", + * returning the result list into *data. The input position lists must be + * sorted and unique, and the output will be as well. + * + * data: pointer to initially-all-zeroes output struct, or NULL + * Ldata, Rdata: input position lists + * emit: bitmask of TSPO_XXX flags + * Loffset: offset to be added to Ldata positions before comparing/outputting + * Roffset: offset to be added to Rdata positions before comparing/outputting + * max_npos: maximum possible required size of output position array + * + * Loffset and Roffset should not be negative, else we risk trying to output + * negative positions, which won't fit into WordEntryPos. + * + * The result is boolean (TS_YES or TS_NO), but for the caller's convenience + * we return it as TSTernaryValue. + * + * Returns TS_YES if any positions were emitted to *data; or if data is NULL, + * returns TS_YES if any positions would have been emitted. + */ +#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */ +#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */ +#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */ + +static TSTernaryValue +TS_phrase_output(ExecPhraseData *data, + ExecPhraseData *Ldata, + ExecPhraseData *Rdata, + int emit, + int Loffset, + int Roffset, + int max_npos) +{ + int Lindex, + Rindex; + + /* Loop until both inputs are exhausted */ + Lindex = Rindex = 0; + while (Lindex < Ldata->npos || Rindex < Rdata->npos) + { + int Lpos, + Rpos; + int output_pos = 0; + + /* + * Fetch current values to compare. WEP_GETPOS() is needed because + * ExecPhraseData->data can point to a tsvector's WordEntryPosVector. + */ + if (Lindex < Ldata->npos) + Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset; + else + { + /* L array exhausted, so we're done if R_ONLY isn't set */ + if (!(emit & TSPO_R_ONLY)) + break; + Lpos = INT_MAX; + } + if (Rindex < Rdata->npos) + Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset; + else + { + /* R array exhausted, so we're done if L_ONLY isn't set */ + if (!(emit & TSPO_L_ONLY)) + break; + Rpos = INT_MAX; + } + + /* Merge-join the two input lists */ + if (Lpos < Rpos) + { + /* Lpos is not matched in Rdata, should we output it? */ + if (emit & TSPO_L_ONLY) + output_pos = Lpos; + Lindex++; + } + else if (Lpos == Rpos) + { + /* Lpos and Rpos match ... should we output it? */ + if (emit & TSPO_BOTH) + output_pos = Rpos; + Lindex++; + Rindex++; + } + else /* Lpos > Rpos */ + { + /* Rpos is not matched in Ldata, should we output it? */ + if (emit & TSPO_R_ONLY) + output_pos = Rpos; + Rindex++; + } + + if (output_pos > 0) + { + if (data) + { + /* Store position, first allocating output array if needed */ + if (data->pos == NULL) + { + data->pos = (WordEntryPos *) + palloc(max_npos * sizeof(WordEntryPos)); + data->allocated = true; + } + data->pos[data->npos++] = output_pos; + } + else + { + /* + * Exact positions not needed, so return TS_YES as soon as we + * know there is at least one. + */ + return TS_YES; + } + } + } + + if (data && data->npos > 0) + { + /* Let's assert we didn't overrun the array */ + Assert(data->npos <= max_npos); + return TS_YES; + } + return TS_NO; +} + +/* + * Execute tsquery at or below an OP_PHRASE operator. + * + * This handles tsquery execution at recursion levels where we need to care + * about match locations. + * + * In addition to the same arguments used for TS_execute, the caller may pass + * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme + * match position info on success. data == NULL if no position data need be + * returned. + * Note: the function assumes data != NULL for operators other than OP_PHRASE. + * This is OK because an outside call always starts from an OP_PHRASE node, + * and all internal recursion cases pass data != NULL. + * + * The detailed semantics of the match data, given that the function returned + * TS_YES (successful match), are: + * + * npos > 0, negate = false: + * query is matched at specified position(s) (and only those positions) + * npos > 0, negate = true: + * query is matched at all positions *except* specified position(s) + * npos = 0, negate = true: + * query is matched at all positions + * npos = 0, negate = false: + * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate) + * + * Successful matches also return a "width" value which is the match width in + * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches, + * and is the sum of the phrase operator distances for phrase matches. Note + * that when width > 0, the listed positions represent the ends of matches not + * the starts. (This unintuitive rule is needed to avoid possibly generating + * negative positions, which wouldn't fit into the WordEntryPos arrays.) + * + * If the TSExecuteCallback function reports that an operand is present + * but fails to provide position(s) for it, we will return TS_MAYBE when + * it is possible but not certain that the query is matched. + * + * When the function returns TS_NO or TS_MAYBE, it must return npos = 0, + * negate = false (which is the state initialized by the caller); but the + * "width" output in such cases is undefined. + */ +static TSTernaryValue +TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallback chkcond, + ExecPhraseData *data) +{ + ExecPhraseData Ldata, + Rdata; + TSTernaryValue lmatch, + rmatch; + int Loffset, + Roffset, + maxwidth; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* ... and let's check for query cancel while we're at it */ + CHECK_FOR_INTERRUPTS(); + + if (curitem->type == QI_VAL) + return chkcond(arg, (QueryOperand *) curitem, data); + + switch (curitem->qoperator.oper) + { + case OP_NOT: + + /* + * We need not touch data->width, since a NOT operation does not + * change the match width. + */ + if (flags & TS_EXEC_SKIP_NOT) + { + /* with SKIP_NOT, report NOT as "match everywhere" */ + Assert(data->npos == 0 && !data->negate); + data->negate = true; + return TS_YES; + } + switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data)) + { + case TS_NO: + /* change "match nowhere" to "match everywhere" */ + Assert(data->npos == 0 && !data->negate); + data->negate = true; + return TS_YES; + case TS_YES: + if (data->npos > 0) + { + /* we have some positions, invert negate flag */ + data->negate = !data->negate; + return TS_YES; + } + else if (data->negate) + { + /* change "match everywhere" to "match nowhere" */ + data->negate = false; + return TS_NO; + } + /* Should not get here if result was TS_YES */ + Assert(false); + break; + case TS_MAYBE: + /* match positions are, and remain, uncertain */ + return TS_MAYBE; + } + break; + + case OP_PHRASE: + case OP_AND: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); + + lmatch = TS_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata); + if (lmatch == TS_NO) + return TS_NO; + + rmatch = TS_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata); + if (rmatch == TS_NO) + return TS_NO; + + /* + * If either operand has no position information, then we can't + * return reliable position data, only a MAYBE result. + */ + if (lmatch == TS_MAYBE || rmatch == TS_MAYBE) + return TS_MAYBE; + + if (curitem->qoperator.oper == OP_PHRASE) + { + /* + * Compute Loffset and Roffset suitable for phrase match, and + * compute overall width of whole phrase match. + */ + Loffset = curitem->qoperator.distance + Rdata.width; + Roffset = 0; + if (data) + data->width = curitem->qoperator.distance + + Ldata.width + Rdata.width; + } + else + { + /* + * For OP_AND, set output width and alignment like OP_OR (see + * comment below) + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + if (data) + data->width = maxwidth; + } + + if (Ldata.negate && Rdata.negate) + { + /* !L & !R: treat as !(L | R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + if (data) + data->negate = true; + return TS_YES; + } + else if (Ldata.negate) + { + /* !L & R */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + } + else if (Rdata.negate) + { + /* L & !R */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + } + else + { + /* straight AND */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + } + + case OP_OR: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); + + lmatch = TS_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata); + rmatch = TS_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata); + + if (lmatch == TS_NO && rmatch == TS_NO) + return TS_NO; + + /* + * If either operand has no position information, then we can't + * return reliable position data, only a MAYBE result. + */ + if (lmatch == TS_MAYBE || rmatch == TS_MAYBE) + return TS_MAYBE; + + /* + * Cope with undefined output width from failed submatch. (This + * takes less code than trying to ensure that all failure returns + * set data->width to zero.) + */ + if (lmatch == TS_NO) + Ldata.width = 0; + if (rmatch == TS_NO) + Rdata.width = 0; + + /* + * For OP_AND and OP_OR, report the width of the wider of the two + * inputs, and align the narrower input's positions to the right + * end of that width. This rule deals at least somewhat + * reasonably with cases like "x <-> (y | z <-> q)". + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + data->width = maxwidth; + + if (Ldata.negate && Rdata.negate) + { + /* !L | !R: treat as !(L & R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + data->negate = true; + return TS_YES; + } + else if (Ldata.negate) + { + /* !L | R: treat as !(L & !R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + data->negate = true; + return TS_YES; + } + else if (Rdata.negate) + { + /* L | !R: treat as !(!L & R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + data->negate = true; + return TS_YES; + } + else + { + /* straight OR */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + } + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return TS_NO; +} + + +/* + * Evaluate tsquery boolean expression. + * + * curitem: current tsquery item (initially, the first one) + * arg: opaque value to pass through to callback function + * flags: bitmask of flag bits shown in ts_utils.h + * chkcond: callback function to check whether a primitive value is present + */ +bool +TS_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallback chkcond) +{ + /* + * If we get TS_MAYBE from the recursion, return true. We could only see + * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no + * need to check again. + */ + return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO; +} + +/* + * Evaluate tsquery boolean expression. + * + * This is the same as TS_execute except that TS_MAYBE is returned as-is. + */ +TSTernaryValue +TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallback chkcond) +{ + return TS_execute_recurse(curitem, arg, flags, chkcond); +} + +/* + * TS_execute recursion for operators above any phrase operator. Here we do + * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE + * operator, we pass it off to TS_phrase_execute which does worry. + */ +static TSTernaryValue +TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallback chkcond) +{ + TSTernaryValue lmatch; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* ... and let's check for query cancel while we're at it */ + CHECK_FOR_INTERRUPTS(); + + if (curitem->type == QI_VAL) + return chkcond(arg, (QueryOperand *) curitem, + NULL /* don't need position info */ ); + + switch (curitem->qoperator.oper) + { + case OP_NOT: + if (flags & TS_EXEC_SKIP_NOT) + return TS_YES; + switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond)) + { + case TS_NO: + return TS_YES; + case TS_YES: + return TS_NO; + case TS_MAYBE: + return TS_MAYBE; + } + break; + + case OP_AND: + lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg, + flags, chkcond); + if (lmatch == TS_NO) + return TS_NO; + switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond)) + { + case TS_NO: + return TS_NO; + case TS_YES: + return lmatch; + case TS_MAYBE: + return TS_MAYBE; + } + break; + + case OP_OR: + lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg, + flags, chkcond); + if (lmatch == TS_YES) + return TS_YES; + switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond)) + { + case TS_NO: + return lmatch; + case TS_YES: + return TS_YES; + case TS_MAYBE: + return TS_MAYBE; + } + break; + + case OP_PHRASE: + + /* + * If we get a MAYBE result, and the caller doesn't want that, + * convert it to NO. It would be more consistent, perhaps, to + * return the result of TS_phrase_execute() verbatim and then + * convert MAYBE results at the top of the recursion. But + * converting at the topmost phrase operator gives results that + * are bug-compatible with the old implementation, so do it like + * this for now. + */ + switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL)) + { + case TS_NO: + return TS_NO; + case TS_YES: + return TS_YES; + case TS_MAYBE: + return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO; + } + break; + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return TS_NO; +} + +/* + * Evaluate tsquery and report locations of matching terms. + * + * This is like TS_execute except that it returns match locations not just + * success/failure status. The callback function is required to provide + * position data (we report failure if it doesn't). + * + * On successful match, the result is a List of ExecPhraseData structs, one + * for each AND'ed term or phrase operator in the query. Each struct includes + * a sorted array of lexeme positions matching that term. (Recall that for + * phrase operators, the match includes width+1 lexemes, and the recorded + * position is that of the rightmost lexeme.) + * + * OR subexpressions are handled by union'ing their match locations into a + * single List element, which is valid since any of those locations contains + * a match. However, when some of the OR'ed terms are phrase operators, we + * report the maximum width of any of the OR'ed terms, making such cases + * slightly imprecise in the conservative direction. (For example, if the + * tsquery is "(A <-> B) | C", an occurrence of C in the data would be + * reported as though it includes the lexeme to the left of C.) + * + * Locations of NOT subexpressions are not reported. (Obviously, there can + * be no successful NOT matches at top level, or the match would have failed. + * So this amounts to ignoring NOTs underneath ORs.) + * + * The result is NIL if no match, or if position data was not returned. + * + * Arguments are the same as for TS_execute, although flags is currently + * vestigial since none of the defined bits are sensible here. + */ +List * +TS_execute_locations(QueryItem *curitem, void *arg, + uint32 flags, + TSExecuteCallback chkcond) +{ + List *result; + + /* No flags supported, as yet */ + Assert(flags == TS_EXEC_EMPTY); + if (TS_execute_locations_recurse(curitem, arg, chkcond, &result)) + return result; + return NIL; +} + +/* + * TS_execute_locations recursion for operators above any phrase operator. + * OP_PHRASE subexpressions can be passed off to TS_phrase_execute. + */ +static bool +TS_execute_locations_recurse(QueryItem *curitem, void *arg, + TSExecuteCallback chkcond, + List **locations) +{ + bool lmatch, + rmatch; + List *llocations, + *rlocations; + ExecPhraseData *data; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* ... and let's check for query cancel while we're at it */ + CHECK_FOR_INTERRUPTS(); + + /* Default locations result is empty */ + *locations = NIL; + + if (curitem->type == QI_VAL) + { + data = palloc0_object(ExecPhraseData); + if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES) + { + *locations = list_make1(data); + return true; + } + pfree(data); + return false; + } + + switch (curitem->qoperator.oper) + { + case OP_NOT: + if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond, + &llocations)) + return true; /* we don't pass back any locations */ + return false; + + case OP_AND: + if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left, + arg, chkcond, + &llocations)) + return false; + if (!TS_execute_locations_recurse(curitem + 1, + arg, chkcond, + &rlocations)) + return false; + *locations = list_concat(llocations, rlocations); + return true; + + case OP_OR: + lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left, + arg, chkcond, + &llocations); + rmatch = TS_execute_locations_recurse(curitem + 1, + arg, chkcond, + &rlocations); + if (lmatch || rmatch) + { + /* + * We generate an AND'able location struct from each + * combination of sub-matches, following the disjunctive law + * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D). + * + * However, if either input didn't produce locations (i.e., it + * failed or was a NOT), we must just return the other list. + */ + if (llocations == NIL) + *locations = rlocations; + else if (rlocations == NIL) + *locations = llocations; + else + { + ListCell *ll; + + foreach(ll, llocations) + { + ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll); + ListCell *lr; + + foreach(lr, rlocations) + { + ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr); + + data = palloc0_object(ExecPhraseData); + (void) TS_phrase_output(data, ldata, rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + 0, 0, + ldata->npos + rdata->npos); + /* Report the larger width, as explained above. */ + data->width = Max(ldata->width, rdata->width); + *locations = lappend(*locations, data); + } + } + } + + return true; + } + return false; + + case OP_PHRASE: + /* We can hand this off to TS_phrase_execute */ + data = palloc0_object(ExecPhraseData); + if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond, + data) == TS_YES) + { + if (!data->negate) + *locations = list_make1(data); + return true; + } + pfree(data); + return false; + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return false; +} + +/* + * Detect whether a tsquery boolean expression requires any positive matches + * to values shown in the tsquery. + * + * This is needed to know whether a GIN index search requires full index scan. + * For example, 'x & !y' requires a match of x, so it's sufficient to scan + * entries for x; but 'x | !y' could match rows containing neither x nor y. + */ +bool +tsquery_requires_match(QueryItem *curitem) +{ + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + if (curitem->type == QI_VAL) + return true; + + switch (curitem->qoperator.oper) + { + case OP_NOT: + + /* + * Assume there are no required matches underneath a NOT. For + * some cases with nested NOTs, we could prove there's a required + * match, but it seems unlikely to be worth the trouble. + */ + return false; + + case OP_PHRASE: + + /* + * Treat OP_PHRASE as OP_AND here + */ + case OP_AND: + /* If either side requires a match, we're good */ + if (tsquery_requires_match(curitem + curitem->qoperator.left)) + return true; + else + return tsquery_requires_match(curitem + 1); + + case OP_OR: + /* Both sides must require a match */ + if (tsquery_requires_match(curitem + curitem->qoperator.left)) + return tsquery_requires_match(curitem + 1); + else + return false; + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return false; +} + +/* + * boolean operations + */ +Datum +ts_match_qv(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0))); +} + +Datum +ts_match_vq(PG_FUNCTION_ARGS) +{ + TSVector val = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + CHKVAL chkval; + bool result; + + /* empty query matches nothing */ + if (!query->size) + { + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(false); + } + + chkval.arrb = ARRPTR(val); + chkval.arre = chkval.arrb + val->size; + chkval.values = STRPTR(val); + chkval.operand = GETOPERAND(query); + result = TS_execute(GETQUERY(query), + &chkval, + TS_EXEC_EMPTY, + checkcondition_str); + + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(result); +} + +Datum +ts_match_tt(PG_FUNCTION_ARGS) +{ + TSVector vector; + TSQuery query; + bool res; + + vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector, + PG_GETARG_DATUM(0))); + query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery, + PG_GETARG_DATUM(1))); + + res = DatumGetBool(DirectFunctionCall2(ts_match_vq, + TSVectorGetDatum(vector), + TSQueryGetDatum(query))); + + pfree(vector); + pfree(query); + + PG_RETURN_BOOL(res); +} + +Datum +ts_match_tq(PG_FUNCTION_ARGS) +{ + TSVector vector; + TSQuery query = PG_GETARG_TSQUERY(1); + bool res; + + vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector, + PG_GETARG_DATUM(0))); + + res = DatumGetBool(DirectFunctionCall2(ts_match_vq, + TSVectorGetDatum(vector), + TSQueryGetDatum(query))); + + pfree(vector); + PG_FREE_IF_COPY(query, 1); + + PG_RETURN_BOOL(res); +} + +/* + * ts_stat statistic function support + */ + + +/* + * Returns the number of positions in value 'wptr' within tsvector 'txt', + * that have a weight equal to one of the weights in 'weight' bitmask. + */ +static int +check_weight(TSVector txt, WordEntry *wptr, int8 weight) +{ + int len = POSDATALEN(txt, wptr); + int num = 0; + WordEntryPos *ptr = POSDATAPTR(txt, wptr); + + while (len--) + { + if (weight & (1 << WEP_GETWEIGHT(*ptr))) + num++; + ptr++; + } + return num; +} + +#define compareStatWord(a,e,t) \ + tsCompareString((a)->lexeme, (a)->lenlexeme, \ + STRPTR(t) + (e)->pos, (e)->len, \ + false) + +static void +insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off) +{ + WordEntry *we = ARRPTR(txt) + off; + StatEntry *node = stat->root, + *pnode = NULL; + int n, + res = 0; + uint32 depth = 1; + + if (stat->weight == 0) + n = (we->haspos) ? POSDATALEN(txt, we) : 1; + else + n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0; + + if (n == 0) + return; /* nothing to insert */ + + while (node) + { + res = compareStatWord(node, we, txt); + + if (res == 0) + { + break; + } + else + { + pnode = node; + node = (res < 0) ? node->left : node->right; + } + depth++; + } + + if (depth > stat->maxdepth) + stat->maxdepth = depth; + + if (node == NULL) + { + node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len); + node->left = node->right = NULL; + node->ndoc = 1; + node->nentry = n; + node->lenlexeme = we->len; + memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme); + + if (pnode == NULL) + { + stat->root = node; + } + else + { + if (res < 0) + pnode->left = node; + else + pnode->right = node; + } + } + else + { + node->ndoc++; + node->nentry += n; + } +} + +static void +chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, + uint32 low, uint32 high, uint32 offset) +{ + uint32 pos; + uint32 middle = (low + high) >> 1; + + pos = (low + middle) >> 1; + if (low != middle && pos >= offset && pos - offset < txt->size) + insertStatEntry(persistentContext, stat, txt, pos - offset); + pos = (high + middle + 1) >> 1; + if (middle + 1 != high && pos >= offset && pos - offset < txt->size) + insertStatEntry(persistentContext, stat, txt, pos - offset); + + if (low != middle) + chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset); + if (high != middle + 1) + chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset); +} + +/* + * This is written like a custom aggregate function, because the + * original plan was to do just that. Unfortunately, an aggregate function + * can't return a set, so that plan was abandoned. If that limitation is + * lifted in the future, ts_stat could be a real aggregate function so that + * you could use it like this: + * + * SELECT ts_stat(vector_column) FROM vector_table; + * + * where vector_column is a tsvector-type column in vector_table. + */ + +static TSVectorStat * +ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data) +{ + TSVector txt = DatumGetTSVector(data); + uint32 i, + nbit = 0, + offset; + + if (stat == NULL) + { /* Init in first */ + stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat)); + stat->maxdepth = 1; + } + + /* simple check of correctness */ + if (txt == NULL || txt->size == 0) + { + if (txt && txt != (TSVector) DatumGetPointer(data)) + pfree(txt); + return stat; + } + + i = txt->size - 1; + for (; i > 0; i >>= 1) + nbit++; + + nbit = 1 << nbit; + offset = (nbit - txt->size) / 2; + + insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset); + chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset); + + return stat; +} + +static void +ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, + TSVectorStat *stat) +{ + TupleDesc tupdesc; + MemoryContext oldcontext; + StatEntry *node; + + funcctx->user_fctx = (void *) stat; + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1)); + stat->stackpos = 0; + + node = stat->root; + /* find leftmost value */ + if (node == NULL) + stat->stack[stat->stackpos] = NULL; + else + for (;;) + { + stat->stack[stat->stackpos] = node; + if (node->left) + { + stat->stackpos++; + node = node->left; + } + else + break; + } + Assert(stat->stackpos <= stat->maxdepth); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + funcctx->tuple_desc = tupdesc; + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + + MemoryContextSwitchTo(oldcontext); +} + +static StatEntry * +walkStatEntryTree(TSVectorStat *stat) +{ + StatEntry *node = stat->stack[stat->stackpos]; + + if (node == NULL) + return NULL; + + if (node->ndoc != 0) + { + /* return entry itself: we already was at left sublink */ + return node; + } + else if (node->right && node->right != stat->stack[stat->stackpos + 1]) + { + /* go on right sublink */ + stat->stackpos++; + node = node->right; + + /* find most-left value */ + for (;;) + { + stat->stack[stat->stackpos] = node; + if (node->left) + { + stat->stackpos++; + node = node->left; + } + else + break; + } + Assert(stat->stackpos <= stat->maxdepth); + } + else + { + /* we already return all left subtree, itself and right subtree */ + if (stat->stackpos == 0) + return NULL; + + stat->stackpos--; + return walkStatEntryTree(stat); + } + + return node; +} + +static Datum +ts_process_call(FuncCallContext *funcctx) +{ + TSVectorStat *st; + StatEntry *entry; + + st = (TSVectorStat *) funcctx->user_fctx; + + entry = walkStatEntryTree(st); + + if (entry != NULL) + { + Datum result; + char *values[3]; + char ndoc[16]; + char nentry[16]; + HeapTuple tuple; + + values[0] = palloc(entry->lenlexeme + 1); + memcpy(values[0], entry->lexeme, entry->lenlexeme); + (values[0])[entry->lenlexeme] = '\0'; + sprintf(ndoc, "%d", entry->ndoc); + values[1] = ndoc; + sprintf(nentry, "%d", entry->nentry); + values[2] = nentry; + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = HeapTupleGetDatum(tuple); + + pfree(values[0]); + + /* mark entry as already visited */ + entry->ndoc = 0; + + return result; + } + + return (Datum) 0; +} + +static TSVectorStat * +ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws) +{ + char *query = text_to_cstring(txt); + TSVectorStat *stat; + bool isnull; + Portal portal; + SPIPlanPtr plan; + + if ((plan = SPI_prepare(query, 0, NULL)) == NULL) + /* internal error */ + elog(ERROR, "SPI_prepare(\"%s\") failed", query); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) + /* internal error */ + elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); + + SPI_cursor_fetch(portal, true, 100); + + if (SPI_tuptable == NULL || + SPI_tuptable->tupdesc->natts != 1 || + !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1), + TSVECTOROID)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ts_stat query must return one tsvector column"))); + + stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat)); + stat->maxdepth = 1; + + if (ws) + { + char *buf; + + buf = VARDATA_ANY(ws); + while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws)) + { + if (pg_mblen(buf) == 1) + { + switch (*buf) + { + case 'A': + case 'a': + stat->weight |= 1 << 3; + break; + case 'B': + case 'b': + stat->weight |= 1 << 2; + break; + case 'C': + case 'c': + stat->weight |= 1 << 1; + break; + case 'D': + case 'd': + stat->weight |= 1; + break; + default: + stat->weight |= 0; + } + } + buf += pg_mblen(buf); + } + } + + while (SPI_processed > 0) + { + uint64 i; + + for (i = 0; i < SPI_processed; i++) + { + Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); + + if (!isnull) + stat = ts_accum(persistentContext, stat, data); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_fetch(portal, true, 100); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_close(portal); + SPI_freeplan(plan); + pfree(query); + + return stat; +} + +Datum +ts_stat1(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + TSVectorStat *stat; + text *txt = PG_GETARG_TEXT_PP(0); + + funcctx = SRF_FIRSTCALL_INIT(); + SPI_connect(); + stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL); + PG_FREE_IF_COPY(txt, 0); + ts_setup_firstcall(fcinfo, funcctx, stat); + SPI_finish(); + } + + funcctx = SRF_PERCALL_SETUP(); + if ((result = ts_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +Datum +ts_stat2(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + TSVectorStat *stat; + text *txt = PG_GETARG_TEXT_PP(0); + text *ws = PG_GETARG_TEXT_PP(1); + + funcctx = SRF_FIRSTCALL_INIT(); + SPI_connect(); + stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws); + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(ws, 1); + ts_setup_firstcall(fcinfo, funcctx, stat); + SPI_finish(); + } + + funcctx = SRF_PERCALL_SETUP(); + if ((result = ts_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + + +/* + * Triggers for automatic update of a tsvector column from text column(s) + * + * Trigger arguments are either + * name of tsvector col, name of tsconfig to use, name(s) of text col(s) + * name of tsvector col, name of regconfig col, name(s) of text col(s) + * ie, tsconfig can either be specified by name, or indirectly as the + * contents of a regconfig field in the row. If the name is used, it must + * be explicitly schema-qualified. + */ +Datum +tsvector_update_trigger_byid(PG_FUNCTION_ARGS) +{ + return tsvector_update_trigger(fcinfo, false); +} + +Datum +tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS) +{ + return tsvector_update_trigger(fcinfo, true); +} + +static Datum +tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column) +{ + TriggerData *trigdata; + Trigger *trigger; + Relation rel; + HeapTuple rettuple = NULL; + int tsvector_attr_num, + i; + ParsedText prs; + Datum datum; + bool isnull; + text *txt; + Oid cfgId; + bool update_needed; + + /* Check call context */ + if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */ + elog(ERROR, "tsvector_update_trigger: not fired by trigger manager"); + + trigdata = (TriggerData *) fcinfo->context; + if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) + elog(ERROR, "tsvector_update_trigger: must be fired for row"); + if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event)) + elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event"); + + if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) + { + rettuple = trigdata->tg_trigtuple; + update_needed = true; + } + else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + { + rettuple = trigdata->tg_newtuple; + update_needed = false; /* computed below */ + } + else + elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE"); + + trigger = trigdata->tg_trigger; + rel = trigdata->tg_relation; + + if (trigger->tgnargs < 3) + elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)"); + + /* Find the target tsvector column */ + tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); + if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("tsvector column \"%s\" does not exist", + trigger->tgargs[0]))); + /* This will effectively reject system columns, so no separate test: */ + if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num), + TSVECTOROID)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" is not of tsvector type", + trigger->tgargs[0]))); + + /* Find the configuration to use */ + if (config_column) + { + int config_attr_num; + + config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]); + if (config_attr_num == SPI_ERROR_NOATTRIBUTE) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("configuration column \"%s\" does not exist", + trigger->tgargs[1]))); + if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num), + REGCONFIGOID)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" is not of regconfig type", + trigger->tgargs[1]))); + + datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull); + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("configuration column \"%s\" must not be null", + trigger->tgargs[1]))); + cfgId = DatumGetObjectId(datum); + } + else + { + List *names; + + names = stringToQualifiedNameList(trigger->tgargs[1], NULL); + /* require a schema so that results are not search path dependent */ + if (list_length(names) < 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("text search configuration name \"%s\" must be schema-qualified", + trigger->tgargs[1]))); + cfgId = get_ts_config_oid(names, false); + } + + /* initialize parse state */ + prs.lenwords = 32; + prs.curwords = 0; + prs.pos = 0; + prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); + + /* find all words in indexable column(s) */ + for (i = 2; i < trigger->tgnargs; i++) + { + int numattr; + + numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); + if (numattr == SPI_ERROR_NOATTRIBUTE) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + trigger->tgargs[i]))); + if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" is not of a character type", + trigger->tgargs[i]))); + + if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols)) + update_needed = true; + + datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); + if (isnull) + continue; + + txt = DatumGetTextPP(datum); + + parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt)); + + if (txt != (text *) DatumGetPointer(datum)) + pfree(txt); + } + + if (update_needed) + { + /* make tsvector value */ + datum = TSVectorGetDatum(make_tsvector(&prs)); + isnull = false; + + /* and insert it into tuple */ + rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att, + 1, &tsvector_attr_num, + &datum, &isnull); + + pfree(DatumGetPointer(datum)); + } + + return PointerGetDatum(rettuple); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_parser.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_parser.c new file mode 100644 index 00000000000..13e075831fe --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_parser.c @@ -0,0 +1,388 @@ +/*------------------------------------------------------------------------- + * + * tsvector_parser.c + * Parser for tsvector + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsvector_parser.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/ts_locale.h" +#include "tsearch/ts_utils.h" + + +/* + * Private state of tsvector parser. Note that tsquery also uses this code to + * parse its input, hence the boolean flags. The oprisdelim and is_tsquery + * flags are both true or both false in current usage, but we keep them + * separate for clarity. + * + * If oprisdelim is set, the following characters are treated as delimiters + * (in addition to whitespace): ! | & ( ) + * + * is_tsquery affects *only* the content of error messages. + * + * is_web can be true to further modify tsquery parsing. + * + * If escontext is an ErrorSaveContext node, then soft errors can be + * captured there rather than being thrown. + */ +struct TSVectorParseStateData +{ + char *prsbuf; /* next input character */ + char *bufstart; /* whole string (used only for errors) */ + char *word; /* buffer to hold the current word */ + int len; /* size in bytes allocated for 'word' */ + int eml; /* max bytes per character */ + bool oprisdelim; /* treat ! | * ( ) as delimiters? */ + bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */ + bool is_web; /* we're in websearch_to_tsquery() */ + Node *escontext; /* for soft error reporting */ +}; + + +/* + * Initializes a parser state object for the given input string. + * A bitmask of flags (see ts_utils.h) and an error context object + * can be provided as well. + */ +TSVectorParseState +init_tsvector_parser(char *input, int flags, Node *escontext) +{ + TSVectorParseState state; + + state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData)); + state->prsbuf = input; + state->bufstart = input; + state->len = 32; + state->word = (char *) palloc(state->len); + state->eml = pg_database_encoding_max_length(); + state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0; + state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0; + state->is_web = (flags & P_TSV_IS_WEB) != 0; + state->escontext = escontext; + + return state; +} + +/* + * Reinitializes parser to parse 'input', instead of previous input. + * + * Note that bufstart (the string reported in errors) is not changed. + */ +void +reset_tsvector_parser(TSVectorParseState state, char *input) +{ + state->prsbuf = input; +} + +/* + * Shuts down a tsvector parser. + */ +void +close_tsvector_parser(TSVectorParseState state) +{ + pfree(state->word); + pfree(state); +} + +/* increase the size of 'word' if needed to hold one more character */ +#define RESIZEPRSBUF \ +do { \ + int clen = curpos - state->word; \ + if ( clen + state->eml >= state->len ) \ + { \ + state->len *= 2; \ + state->word = (char *) repalloc(state->word, state->len); \ + curpos = state->word + clen; \ + } \ +} while (0) + +/* Fills gettoken_tsvector's output parameters, and returns true */ +#define RETURN_TOKEN \ +do { \ + if (pos_ptr != NULL) \ + { \ + *pos_ptr = pos; \ + *poslen = npos; \ + } \ + else if (pos != NULL) \ + pfree(pos); \ + \ + if (strval != NULL) \ + *strval = state->word; \ + if (lenval != NULL) \ + *lenval = curpos - state->word; \ + if (endptr != NULL) \ + *endptr = state->prsbuf; \ + return true; \ +} while(0) + + +/* State codes used in gettoken_tsvector */ +#define WAITWORD 1 +#define WAITENDWORD 2 +#define WAITNEXTCHAR 3 +#define WAITENDCMPLX 4 +#define WAITPOSINFO 5 +#define INPOSINFO 6 +#define WAITPOSDELIM 7 +#define WAITCHARCMPLX 8 + +#define PRSSYNTAXERROR return prssyntaxerror(state) + +static bool +prssyntaxerror(TSVectorParseState state) +{ + errsave(state->escontext, + (errcode(ERRCODE_SYNTAX_ERROR), + state->is_tsquery ? + errmsg("syntax error in tsquery: \"%s\"", state->bufstart) : + errmsg("syntax error in tsvector: \"%s\"", state->bufstart))); + /* In soft error situation, return false as convenience for caller */ + return false; +} + + +/* + * Get next token from string being parsed. Returns true if successful, + * false if end of input string is reached or soft error. + * + * On success, these output parameters are filled in: + * + * *strval pointer to token + * *lenval length of *strval + * *pos_ptr pointer to a palloc'd array of positions and weights + * associated with the token. If the caller is not interested + * in the information, NULL can be supplied. Otherwise + * the caller is responsible for pfreeing the array. + * *poslen number of elements in *pos_ptr + * *endptr scan resumption point + * + * Pass NULL for any unwanted output parameters. + * + * If state->escontext is an ErrorSaveContext, then caller must check + * SOFT_ERROR_OCCURRED() to determine whether a "false" result means + * error or normal end-of-string. + */ +bool +gettoken_tsvector(TSVectorParseState state, + char **strval, int *lenval, + WordEntryPos **pos_ptr, int *poslen, + char **endptr) +{ + int oldstate = 0; + char *curpos = state->word; + int statecode = WAITWORD; + + /* + * pos is for collecting the comma delimited list of positions followed by + * the actual token. + */ + WordEntryPos *pos = NULL; + int npos = 0; /* elements of pos used */ + int posalen = 0; /* allocated size of pos */ + + while (1) + { + if (statecode == WAITWORD) + { + if (*(state->prsbuf) == '\0') + return false; + else if (!state->is_web && t_iseq(state->prsbuf, '\'')) + statecode = WAITENDCMPLX; + else if (!state->is_web && t_iseq(state->prsbuf, '\\')) + { + statecode = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) || + (state->is_web && t_iseq(state->prsbuf, '"'))) + PRSSYNTAXERROR; + else if (!t_isspace(state->prsbuf)) + { + COPYCHAR(curpos, state->prsbuf); + curpos += pg_mblen(state->prsbuf); + statecode = WAITENDWORD; + } + } + else if (statecode == WAITNEXTCHAR) + { + if (*(state->prsbuf) == '\0') + ereturn(state->escontext, false, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("there is no escaped character: \"%s\"", + state->bufstart))); + else + { + RESIZEPRSBUF; + COPYCHAR(curpos, state->prsbuf); + curpos += pg_mblen(state->prsbuf); + Assert(oldstate != 0); + statecode = oldstate; + } + } + else if (statecode == WAITENDWORD) + { + if (!state->is_web && t_iseq(state->prsbuf, '\\')) + { + statecode = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || + (state->oprisdelim && ISOPERATOR(state->prsbuf)) || + (state->is_web && t_iseq(state->prsbuf, '"'))) + { + RESIZEPRSBUF; + if (curpos == state->word) + PRSSYNTAXERROR; + *(curpos) = '\0'; + RETURN_TOKEN; + } + else if (t_iseq(state->prsbuf, ':')) + { + if (curpos == state->word) + PRSSYNTAXERROR; + *(curpos) = '\0'; + if (state->oprisdelim) + RETURN_TOKEN; + else + statecode = INPOSINFO; + } + else + { + RESIZEPRSBUF; + COPYCHAR(curpos, state->prsbuf); + curpos += pg_mblen(state->prsbuf); + } + } + else if (statecode == WAITENDCMPLX) + { + if (!state->is_web && t_iseq(state->prsbuf, '\'')) + { + statecode = WAITCHARCMPLX; + } + else if (!state->is_web && t_iseq(state->prsbuf, '\\')) + { + statecode = WAITNEXTCHAR; + oldstate = WAITENDCMPLX; + } + else if (*(state->prsbuf) == '\0') + PRSSYNTAXERROR; + else + { + RESIZEPRSBUF; + COPYCHAR(curpos, state->prsbuf); + curpos += pg_mblen(state->prsbuf); + } + } + else if (statecode == WAITCHARCMPLX) + { + if (!state->is_web && t_iseq(state->prsbuf, '\'')) + { + RESIZEPRSBUF; + COPYCHAR(curpos, state->prsbuf); + curpos += pg_mblen(state->prsbuf); + statecode = WAITENDCMPLX; + } + else + { + RESIZEPRSBUF; + *(curpos) = '\0'; + if (curpos == state->word) + PRSSYNTAXERROR; + if (state->oprisdelim) + { + /* state->prsbuf+=pg_mblen(state->prsbuf); */ + RETURN_TOKEN; + } + else + statecode = WAITPOSINFO; + continue; /* recheck current character */ + } + } + else if (statecode == WAITPOSINFO) + { + if (t_iseq(state->prsbuf, ':')) + statecode = INPOSINFO; + else + RETURN_TOKEN; + } + else if (statecode == INPOSINFO) + { + if (t_isdigit(state->prsbuf)) + { + if (posalen == 0) + { + posalen = 4; + pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen); + npos = 0; + } + else if (npos + 1 >= posalen) + { + posalen *= 2; + pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen); + } + npos++; + WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf))); + /* we cannot get here in tsquery, so no need for 2 errmsgs */ + if (WEP_GETPOS(pos[npos - 1]) == 0) + ereturn(state->escontext, false, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("wrong position info in tsvector: \"%s\"", + state->bufstart))); + WEP_SETWEIGHT(pos[npos - 1], 0); + statecode = WAITPOSDELIM; + } + else + PRSSYNTAXERROR; + } + else if (statecode == WAITPOSDELIM) + { + if (t_iseq(state->prsbuf, ',')) + statecode = INPOSINFO; + else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) + { + if (WEP_GETWEIGHT(pos[npos - 1])) + PRSSYNTAXERROR; + WEP_SETWEIGHT(pos[npos - 1], 3); + } + else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) + { + if (WEP_GETWEIGHT(pos[npos - 1])) + PRSSYNTAXERROR; + WEP_SETWEIGHT(pos[npos - 1], 2); + } + else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) + { + if (WEP_GETWEIGHT(pos[npos - 1])) + PRSSYNTAXERROR; + WEP_SETWEIGHT(pos[npos - 1], 1); + } + else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) + { + if (WEP_GETWEIGHT(pos[npos - 1])) + PRSSYNTAXERROR; + WEP_SETWEIGHT(pos[npos - 1], 0); + } + else if (t_isspace(state->prsbuf) || + *(state->prsbuf) == '\0') + RETURN_TOKEN; + else if (!t_isdigit(state->prsbuf)) + PRSSYNTAXERROR; + } + else /* internal error */ + elog(ERROR, "unrecognized state in gettoken_tsvector: %d", + statecode); + + /* get next char */ + state->prsbuf += pg_mblen(state->prsbuf); + } +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/uuid.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/uuid.c new file mode 100644 index 00000000000..4f7aa768fda --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/uuid.c @@ -0,0 +1,423 @@ +/*------------------------------------------------------------------------- + * + * uuid.c + * Functions for the built-in type "uuid". + * + * Copyright (c) 2007-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/uuid.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "common/hashfn.h" +#include "lib/hyperloglog.h" +#include "libpq/pqformat.h" +#include "port/pg_bswap.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/sortsupport.h" +#include "utils/uuid.h" + +/* sortsupport for uuid */ +typedef struct +{ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} uuid_sortsupport_state; + +static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext); +static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2); +static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup); +static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup); +static Datum uuid_abbrev_convert(Datum original, SortSupport ssup); + +Datum +uuid_in(PG_FUNCTION_ARGS) +{ + char *uuid_str = PG_GETARG_CSTRING(0); + pg_uuid_t *uuid; + + uuid = (pg_uuid_t *) palloc(sizeof(*uuid)); + string_to_uuid(uuid_str, uuid, fcinfo->context); + PG_RETURN_UUID_P(uuid); +} + +Datum +uuid_out(PG_FUNCTION_ARGS) +{ + pg_uuid_t *uuid = PG_GETARG_UUID_P(0); + static const char hex_chars[] = "0123456789abcdef"; + StringInfoData buf; + int i; + + initStringInfo(&buf); + for (i = 0; i < UUID_LEN; i++) + { + int hi; + int lo; + + /* + * We print uuid values as a string of 8, 4, 4, 4, and then 12 + * hexadecimal characters, with each group is separated by a hyphen + * ("-"). Therefore, add the hyphens at the appropriate places here. + */ + if (i == 4 || i == 6 || i == 8 || i == 10) + appendStringInfoChar(&buf, '-'); + + hi = uuid->data[i] >> 4; + lo = uuid->data[i] & 0x0F; + + appendStringInfoChar(&buf, hex_chars[hi]); + appendStringInfoChar(&buf, hex_chars[lo]); + } + + PG_RETURN_CSTRING(buf.data); +} + +/* + * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash + * after each group of 4 hexadecimal digits, and optionally surrounded by {}. + * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal + * digits, is the only one used for output.) + */ +static void +string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext) +{ + const char *src = source; + bool braces = false; + int i; + + if (src[0] == '{') + { + src++; + braces = true; + } + + for (i = 0; i < UUID_LEN; i++) + { + char str_buf[3]; + + if (src[0] == '\0' || src[1] == '\0') + goto syntax_error; + memcpy(str_buf, src, 2); + if (!isxdigit((unsigned char) str_buf[0]) || + !isxdigit((unsigned char) str_buf[1])) + goto syntax_error; + + str_buf[2] = '\0'; + uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16); + src += 2; + if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1) + src++; + } + + if (braces) + { + if (*src != '}') + goto syntax_error; + src++; + } + + if (*src != '\0') + goto syntax_error; + + return; + +syntax_error: + ereturn(escontext,, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "uuid", source))); +} + +Datum +uuid_recv(PG_FUNCTION_ARGS) +{ + StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0); + pg_uuid_t *uuid; + + uuid = (pg_uuid_t *) palloc(UUID_LEN); + memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN); + PG_RETURN_POINTER(uuid); +} + +Datum +uuid_send(PG_FUNCTION_ARGS) +{ + pg_uuid_t *uuid = PG_GETARG_UUID_P(0); + StringInfoData buffer; + + pq_begintypsend(&buffer); + pq_sendbytes(&buffer, uuid->data, UUID_LEN); + PG_RETURN_BYTEA_P(pq_endtypsend(&buffer)); +} + +/* internal uuid compare function */ +static int +uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2) +{ + return memcmp(arg1->data, arg2->data, UUID_LEN); +} + +Datum +uuid_lt(PG_FUNCTION_ARGS) +{ + pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); + pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); + + PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0); +} + +Datum +uuid_le(PG_FUNCTION_ARGS) +{ + pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); + pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); + + PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0); +} + +Datum +uuid_eq(PG_FUNCTION_ARGS) +{ + pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); + pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); + + PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0); +} + +Datum +uuid_ge(PG_FUNCTION_ARGS) +{ + pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); + pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); + + PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0); +} + +Datum +uuid_gt(PG_FUNCTION_ARGS) +{ + pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); + pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); + + PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0); +} + +Datum +uuid_ne(PG_FUNCTION_ARGS) +{ + pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); + pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); + + PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0); +} + +/* handler for btree index operator */ +Datum +uuid_cmp(PG_FUNCTION_ARGS) +{ + pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); + pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); + + PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2)); +} + +/* + * Sort support strategy routine + */ +Datum +uuid_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = uuid_fast_cmp; + ssup->ssup_extra = NULL; + + if (ssup->abbreviate) + { + uuid_sortsupport_state *uss; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + uss = palloc(sizeof(uuid_sortsupport_state)); + uss->input_count = 0; + uss->estimating = true; + initHyperLogLog(&uss->abbr_card, 10); + + ssup->ssup_extra = uss; + + ssup->comparator = ssup_datum_unsigned_cmp; + ssup->abbrev_converter = uuid_abbrev_convert; + ssup->abbrev_abort = uuid_abbrev_abort; + ssup->abbrev_full_comparator = uuid_fast_cmp; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * SortSupport comparison func + */ +static int +uuid_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + pg_uuid_t *arg1 = DatumGetUUIDP(x); + pg_uuid_t *arg2 = DatumGetUUIDP(y); + + return uuid_internal_cmp(arg1, arg2); +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization. + * + * We pay no attention to the cardinality of the non-abbreviated data, because + * there is no equality fast-path within authoritative uuid comparator. + */ +static bool +uuid_abbrev_abort(int memtupcount, SortSupport ssup) +{ + uuid_sortsupport_state *uss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&uss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. Stop even + * counting at that point. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "uuid_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count, memtupcount); +#endif + uss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row + * fudge factor allows us to abort earlier on genuinely pathological data + * where we've had exactly one abbreviated value in the first 2k + * (non-null) rows. + */ + if (abbr_card < uss->input_count / 2000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "uuid_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, + memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "uuid_abbrev: cardinality %f after " INT64_FORMAT + " values (%d rows)", abbr_card, uss->input_count, memtupcount); +#endif + + return false; +} + +/* + * Conversion routine for sortsupport. Converts original uuid representation + * to abbreviated key representation. Our encoding strategy is simple -- pack + * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian + * machines, the bytes are stored in reverse order), and treat it as an + * unsigned integer. + */ +static Datum +uuid_abbrev_convert(Datum original, SortSupport ssup) +{ + uuid_sortsupport_state *uss = ssup->ssup_extra; + pg_uuid_t *authoritative = DatumGetUUIDP(original); + Datum res; + + memcpy(&res, authoritative->data, sizeof(Datum)); + uss->input_count += 1; + + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + /* + * Byteswap on little-endian machines. + * + * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer + * 3-way comparator) works correctly on all platforms. If we didn't do + * this, the comparator would have to call memcmp() with a pair of + * pointers to the first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + + return res; +} + +/* hash index support */ +Datum +uuid_hash(PG_FUNCTION_ARGS) +{ + pg_uuid_t *key = PG_GETARG_UUID_P(0); + + return hash_any(key->data, UUID_LEN); +} + +Datum +uuid_hash_extended(PG_FUNCTION_ARGS) +{ + pg_uuid_t *key = PG_GETARG_UUID_P(0); + + return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1)); +} + +Datum +gen_random_uuid(PG_FUNCTION_ARGS) +{ + pg_uuid_t *uuid = palloc(UUID_LEN); + + if (!pg_strong_random(uuid, UUID_LEN)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate random values"))); + + /* + * Set magic numbers for a "version 4" (pseudorandom) UUID, see + * http://tools.ietf.org/html/rfc4122#section-4.4 + */ + uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */ + uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */ + + PG_RETURN_UUID_P(uuid); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varbit.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varbit.c new file mode 100644 index 00000000000..3dbbd1207f9 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varbit.c @@ -0,0 +1,1894 @@ +/*------------------------------------------------------------------------- + * + * varbit.c + * Functions for the SQL datatypes BIT() and BIT VARYING(). + * + * The data structure contains the following elements: + * header -- length of the whole data structure (incl header) + * in bytes (as with all varying length datatypes) + * data section -- private data section for the bits data structures + * bitlength -- length of the bit string in bits + * bitdata -- bit string, most significant byte first + * + * The length of the bitdata vector should always be exactly as many + * bytes as are needed for the given bitlength. If the bitlength is + * not a multiple of 8, the extra low-order padding bits of the last + * byte must be zeroes. + * + * attypmod is defined as the length of the bit string in bits, or for + * varying bits the maximum length. + * + * Code originally contributed by Adriaan Joubert. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/varbit.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "common/int.h" +#include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "port/pg_bitutils.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/varbit.h" + +#define HEXDIG(z) ((z)<10 ? ((z)+'0') : ((z)-10+'A')) + +/* Mask off any bits that should be zero in the last byte of a bitstring */ +#define VARBIT_PAD(vb) \ + do { \ + int32 pad_ = VARBITPAD(vb); \ + Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \ + if (pad_ > 0) \ + *(VARBITS(vb) + VARBITBYTES(vb) - 1) &= BITMASK << pad_; \ + } while (0) + +/* + * Many functions work byte-by-byte, so they have a pointer handy to the + * last-plus-one byte, which saves a cycle or two. + */ +#define VARBIT_PAD_LAST(vb, ptr) \ + do { \ + int32 pad_ = VARBITPAD(vb); \ + Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \ + if (pad_ > 0) \ + *((ptr) - 1) &= BITMASK << pad_; \ + } while (0) + +/* Assert proper padding of a bitstring */ +#ifdef USE_ASSERT_CHECKING +#define VARBIT_CORRECTLY_PADDED(vb) \ + do { \ + int32 pad_ = VARBITPAD(vb); \ + Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \ + Assert(pad_ == 0 || \ + (*(VARBITS(vb) + VARBITBYTES(vb) - 1) & ~(BITMASK << pad_)) == 0); \ + } while (0) +#else +#define VARBIT_CORRECTLY_PADDED(vb) ((void) 0) +#endif + +static VarBit *bit_catenate(VarBit *arg1, VarBit *arg2); +static VarBit *bitsubstring(VarBit *arg, int32 s, int32 l, + bool length_not_specified); +static VarBit *bit_overlay(VarBit *t1, VarBit *t2, int sp, int sl); + + +/* + * common code for bittypmodin and varbittypmodin + */ +static int32 +anybit_typmodin(ArrayType *ta, const char *typename) +{ + int32 typmod; + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + /* + * we're not too tense about good error message here because grammar + * shouldn't allow wrong number of modifiers for BIT + */ + if (n != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid type modifier"))); + + if (*tl < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("length for type %s must be at least 1", + typename))); + if (*tl > (MaxAttrSize * BITS_PER_BYTE)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("length for type %s cannot exceed %d", + typename, MaxAttrSize * BITS_PER_BYTE))); + + typmod = *tl; + + return typmod; +} + +/* + * common code for bittypmodout and varbittypmodout + */ +static char * +anybit_typmodout(int32 typmod) +{ + char *res = (char *) palloc(64); + + if (typmod >= 0) + snprintf(res, 64, "(%d)", typmod); + else + *res = '\0'; + + return res; +} + + +/* + * bit_in - + * converts a char string to the internal representation of a bitstring. + * The length is determined by the number of bits required plus + * VARHDRSZ bytes or from atttypmod. + */ +Datum +bit_in(PG_FUNCTION_ARGS) +{ + char *input_string = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + VarBit *result; /* The resulting bit string */ + char *sp; /* pointer into the character string */ + bits8 *r; /* pointer into the result */ + int len, /* Length of the whole data structure */ + bitlen, /* Number of bits in the bit string */ + slen; /* Length of the input string */ + bool bit_not_hex; /* false = hex string true = bit string */ + int bc; + bits8 x = 0; + + /* Check that the first character is a b or an x */ + if (input_string[0] == 'b' || input_string[0] == 'B') + { + bit_not_hex = true; + sp = input_string + 1; + } + else if (input_string[0] == 'x' || input_string[0] == 'X') + { + bit_not_hex = false; + sp = input_string + 1; + } + else + { + /* + * Otherwise it's binary. This allows things like cast('1001' as bit) + * to work transparently. + */ + bit_not_hex = true; + sp = input_string; + } + + /* + * Determine bitlength from input string. MaxAllocSize ensures a regular + * input is small enough, but we must check hex input. + */ + slen = strlen(sp); + if (bit_not_hex) + bitlen = slen; + else + { + if (slen > VARBITMAXLEN / 4) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("bit string length exceeds the maximum allowed (%d)", + VARBITMAXLEN))); + bitlen = slen * 4; + } + + /* + * Sometimes atttypmod is not supplied. If it is supplied we need to make + * sure that the bitstring fits. + */ + if (atttypmod <= 0) + atttypmod = bitlen; + else if (bitlen != atttypmod) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), + errmsg("bit string length %d does not match type bit(%d)", + bitlen, atttypmod))); + + len = VARBITTOTALLEN(atttypmod); + /* set to 0 so that *r is always initialised and string is zero-padded */ + result = (VarBit *) palloc0(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = atttypmod; + + r = VARBITS(result); + if (bit_not_hex) + { + /* Parse the bit representation of the string */ + /* We know it fits, as bitlen was compared to atttypmod */ + x = HIGHBIT; + for (; *sp; sp++) + { + if (*sp == '1') + *r |= x; + else if (*sp != '0') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("\"%.*s\" is not a valid binary digit", + pg_mblen(sp), sp))); + + x >>= 1; + if (x == 0) + { + x = HIGHBIT; + r++; + } + } + } + else + { + /* Parse the hex representation of the string */ + for (bc = 0; *sp; sp++) + { + if (*sp >= '0' && *sp <= '9') + x = (bits8) (*sp - '0'); + else if (*sp >= 'A' && *sp <= 'F') + x = (bits8) (*sp - 'A') + 10; + else if (*sp >= 'a' && *sp <= 'f') + x = (bits8) (*sp - 'a') + 10; + else + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("\"%.*s\" is not a valid hexadecimal digit", + pg_mblen(sp), sp))); + + if (bc) + { + *r++ |= x; + bc = 0; + } + else + { + *r = x << 4; + bc = 1; + } + } + } + + PG_RETURN_VARBIT_P(result); +} + + +Datum +bit_out(PG_FUNCTION_ARGS) +{ +#if 1 + /* same as varbit output */ + return varbit_out(fcinfo); +#else + + /* + * This is how one would print a hex string, in case someone wants to + * write a formatting function. + */ + VarBit *s = PG_GETARG_VARBIT_P(0); + char *result, + *r; + bits8 *sp; + int i, + len, + bitlen; + + /* Assertion to help catch any bit functions that don't pad correctly */ + VARBIT_CORRECTLY_PADDED(s); + + bitlen = VARBITLEN(s); + len = (bitlen + 3) / 4; + result = (char *) palloc(len + 2); + sp = VARBITS(s); + r = result; + *r++ = 'X'; + /* we cheat by knowing that we store full bytes zero padded */ + for (i = 0; i < len; i += 2, sp++) + { + *r++ = HEXDIG((*sp) >> 4); + *r++ = HEXDIG((*sp) & 0xF); + } + + /* + * Go back one step if we printed a hex number that was not part of the + * bitstring anymore + */ + if (i > len) + r--; + *r = '\0'; + + PG_RETURN_CSTRING(result); +#endif +} + +/* + * bit_recv - converts external binary format to bit + */ +Datum +bit_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + VarBit *result; + int len, + bitlen; + + bitlen = pq_getmsgint(buf, sizeof(int32)); + if (bitlen < 0 || bitlen > VARBITMAXLEN) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid length in external bit string"))); + + /* + * Sometimes atttypmod is not supplied. If it is supplied we need to make + * sure that the bitstring fits. + */ + if (atttypmod > 0 && bitlen != atttypmod) + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), + errmsg("bit string length %d does not match type bit(%d)", + bitlen, atttypmod))); + + len = VARBITTOTALLEN(bitlen); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = bitlen; + + pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result)); + + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); + + PG_RETURN_VARBIT_P(result); +} + +/* + * bit_send - converts bit to binary format + */ +Datum +bit_send(PG_FUNCTION_ARGS) +{ + /* Exactly the same as varbit_send, so share code */ + return varbit_send(fcinfo); +} + +/* + * bit() + * Converts a bit() type to a specific internal length. + * len is the bitlength specified in the column definition. + * + * If doing implicit cast, raise error when source data is wrong length. + * If doing explicit cast, silently truncate or zero-pad to specified length. + */ +Datum +bit(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + int32 len = PG_GETARG_INT32(1); + bool isExplicit = PG_GETARG_BOOL(2); + VarBit *result; + int rlen; + + /* No work if typmod is invalid or supplied data matches it already */ + if (len <= 0 || len > VARBITMAXLEN || len == VARBITLEN(arg)) + PG_RETURN_VARBIT_P(arg); + + if (!isExplicit) + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), + errmsg("bit string length %d does not match type bit(%d)", + VARBITLEN(arg), len))); + + rlen = VARBITTOTALLEN(len); + /* set to 0 so that string is zero-padded */ + result = (VarBit *) palloc0(rlen); + SET_VARSIZE(result, rlen); + VARBITLEN(result) = len; + + memcpy(VARBITS(result), VARBITS(arg), + Min(VARBITBYTES(result), VARBITBYTES(arg))); + + /* + * Make sure last byte is zero-padded if needed. This is useless but safe + * if source data was shorter than target length (we assume the last byte + * of the source data was itself correctly zero-padded). + */ + VARBIT_PAD(result); + + PG_RETURN_VARBIT_P(result); +} + +Datum +bittypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anybit_typmodin(ta, "bit")); +} + +Datum +bittypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anybit_typmodout(typmod)); +} + + +/* + * varbit_in - + * converts a string to the internal representation of a bitstring. + * This is the same as bit_in except that atttypmod is taken as + * the maximum length, not the exact length to force the bitstring to. + */ +Datum +varbit_in(PG_FUNCTION_ARGS) +{ + char *input_string = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; + VarBit *result; /* The resulting bit string */ + char *sp; /* pointer into the character string */ + bits8 *r; /* pointer into the result */ + int len, /* Length of the whole data structure */ + bitlen, /* Number of bits in the bit string */ + slen; /* Length of the input string */ + bool bit_not_hex; /* false = hex string true = bit string */ + int bc; + bits8 x = 0; + + /* Check that the first character is a b or an x */ + if (input_string[0] == 'b' || input_string[0] == 'B') + { + bit_not_hex = true; + sp = input_string + 1; + } + else if (input_string[0] == 'x' || input_string[0] == 'X') + { + bit_not_hex = false; + sp = input_string + 1; + } + else + { + bit_not_hex = true; + sp = input_string; + } + + /* + * Determine bitlength from input string. MaxAllocSize ensures a regular + * input is small enough, but we must check hex input. + */ + slen = strlen(sp); + if (bit_not_hex) + bitlen = slen; + else + { + if (slen > VARBITMAXLEN / 4) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("bit string length exceeds the maximum allowed (%d)", + VARBITMAXLEN))); + bitlen = slen * 4; + } + + /* + * Sometimes atttypmod is not supplied. If it is supplied we need to make + * sure that the bitstring fits. + */ + if (atttypmod <= 0) + atttypmod = bitlen; + else if (bitlen > atttypmod) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("bit string too long for type bit varying(%d)", + atttypmod))); + + len = VARBITTOTALLEN(bitlen); + /* set to 0 so that *r is always initialised and string is zero-padded */ + result = (VarBit *) palloc0(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = Min(bitlen, atttypmod); + + r = VARBITS(result); + if (bit_not_hex) + { + /* Parse the bit representation of the string */ + /* We know it fits, as bitlen was compared to atttypmod */ + x = HIGHBIT; + for (; *sp; sp++) + { + if (*sp == '1') + *r |= x; + else if (*sp != '0') + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("\"%.*s\" is not a valid binary digit", + pg_mblen(sp), sp))); + + x >>= 1; + if (x == 0) + { + x = HIGHBIT; + r++; + } + } + } + else + { + /* Parse the hex representation of the string */ + for (bc = 0; *sp; sp++) + { + if (*sp >= '0' && *sp <= '9') + x = (bits8) (*sp - '0'); + else if (*sp >= 'A' && *sp <= 'F') + x = (bits8) (*sp - 'A') + 10; + else if (*sp >= 'a' && *sp <= 'f') + x = (bits8) (*sp - 'a') + 10; + else + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("\"%.*s\" is not a valid hexadecimal digit", + pg_mblen(sp), sp))); + + if (bc) + { + *r++ |= x; + bc = 0; + } + else + { + *r = x << 4; + bc = 1; + } + } + } + + PG_RETURN_VARBIT_P(result); +} + +/* + * varbit_out - + * Prints the string as bits to preserve length accurately + * + * XXX varbit_recv() and hex input to varbit_in() can load a value that this + * cannot emit. Consider using hex output for such values. + */ +Datum +varbit_out(PG_FUNCTION_ARGS) +{ + VarBit *s = PG_GETARG_VARBIT_P(0); + char *result, + *r; + bits8 *sp; + bits8 x; + int i, + k, + len; + + /* Assertion to help catch any bit functions that don't pad correctly */ + VARBIT_CORRECTLY_PADDED(s); + + len = VARBITLEN(s); + result = (char *) palloc(len + 1); + sp = VARBITS(s); + r = result; + for (i = 0; i <= len - BITS_PER_BYTE; i += BITS_PER_BYTE, sp++) + { + /* print full bytes */ + x = *sp; + for (k = 0; k < BITS_PER_BYTE; k++) + { + *r++ = IS_HIGHBIT_SET(x) ? '1' : '0'; + x <<= 1; + } + } + if (i < len) + { + /* print the last partial byte */ + x = *sp; + for (k = i; k < len; k++) + { + *r++ = IS_HIGHBIT_SET(x) ? '1' : '0'; + x <<= 1; + } + } + *r = '\0'; + + PG_RETURN_CSTRING(result); +} + +/* + * varbit_recv - converts external binary format to varbit + * + * External format is the bitlen as an int32, then the byte array. + */ +Datum +varbit_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + VarBit *result; + int len, + bitlen; + + bitlen = pq_getmsgint(buf, sizeof(int32)); + if (bitlen < 0 || bitlen > VARBITMAXLEN) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid length in external bit string"))); + + /* + * Sometimes atttypmod is not supplied. If it is supplied we need to make + * sure that the bitstring fits. + */ + if (atttypmod > 0 && bitlen > atttypmod) + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("bit string too long for type bit varying(%d)", + atttypmod))); + + len = VARBITTOTALLEN(bitlen); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = bitlen; + + pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result)); + + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); + + PG_RETURN_VARBIT_P(result); +} + +/* + * varbit_send - converts varbit to binary format + */ +Datum +varbit_send(PG_FUNCTION_ARGS) +{ + VarBit *s = PG_GETARG_VARBIT_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, VARBITLEN(s)); + pq_sendbytes(&buf, VARBITS(s), VARBITBYTES(s)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * varbit_support() + * + * Planner support function for the varbit() length coercion function. + * + * Currently, the only interesting thing we can do is flatten calls that set + * the new maximum length >= the previous maximum length. We can ignore the + * isExplicit argument, since that only affects truncation cases. + */ +Datum +varbit_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *expr = req->fcall; + Node *typmod; + + Assert(list_length(expr->args) >= 2); + + typmod = (Node *) lsecond(expr->args); + + if (IsA(typmod, Const) && !((Const *) typmod)->constisnull) + { + Node *source = (Node *) linitial(expr->args); + int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue); + int32 old_max = exprTypmod(source); + int32 new_max = new_typmod; + + /* Note: varbit() treats typmod 0 as invalid, so we do too */ + if (new_max <= 0 || (old_max > 0 && old_max <= new_max)) + ret = relabel_to_typmod(source, new_typmod); + } + } + + PG_RETURN_POINTER(ret); +} + +/* + * varbit() + * Converts a varbit() type to a specific internal length. + * len is the maximum bitlength specified in the column definition. + * + * If doing implicit cast, raise error when source data is too long. + * If doing explicit cast, silently truncate to max length. + */ +Datum +varbit(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + int32 len = PG_GETARG_INT32(1); + bool isExplicit = PG_GETARG_BOOL(2); + VarBit *result; + int rlen; + + /* No work if typmod is invalid or supplied data matches it already */ + if (len <= 0 || len >= VARBITLEN(arg)) + PG_RETURN_VARBIT_P(arg); + + if (!isExplicit) + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("bit string too long for type bit varying(%d)", + len))); + + rlen = VARBITTOTALLEN(len); + result = (VarBit *) palloc(rlen); + SET_VARSIZE(result, rlen); + VARBITLEN(result) = len; + + memcpy(VARBITS(result), VARBITS(arg), VARBITBYTES(result)); + + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); + + PG_RETURN_VARBIT_P(result); +} + +Datum +varbittypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anybit_typmodin(ta, "varbit")); +} + +Datum +varbittypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anybit_typmodout(typmod)); +} + + +/* + * Comparison operators + * + * We only need one set of comparison operators for bitstrings, as the lengths + * are stored in the same way for zero-padded and varying bit strings. + * + * Note that the standard is not unambiguous about the comparison between + * zero-padded bit strings and varying bitstrings. If the same value is written + * into a zero padded bitstring as into a varying bitstring, but the zero + * padded bitstring has greater length, it will be bigger. + * + * Zeros from the beginning of a bitstring cannot simply be ignored, as they + * may be part of a bit string and may be significant. + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + */ + +/* + * bit_cmp + * + * Compares two bitstrings and returns <0, 0, >0 depending on whether the first + * string is smaller, equal, or bigger than the second. All bits are considered + * and additional zero bits may make one string smaller/larger than the other, + * even if their zero-padded values would be the same. + */ +static int32 +bit_cmp(VarBit *arg1, VarBit *arg2) +{ + int bitlen1, + bytelen1, + bitlen2, + bytelen2; + int32 cmp; + + bytelen1 = VARBITBYTES(arg1); + bytelen2 = VARBITBYTES(arg2); + + cmp = memcmp(VARBITS(arg1), VARBITS(arg2), Min(bytelen1, bytelen2)); + if (cmp == 0) + { + bitlen1 = VARBITLEN(arg1); + bitlen2 = VARBITLEN(arg2); + if (bitlen1 != bitlen2) + cmp = (bitlen1 < bitlen2) ? -1 : 1; + } + return cmp; +} + +Datum +biteq(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + bool result; + int bitlen1, + bitlen2; + + bitlen1 = VARBITLEN(arg1); + bitlen2 = VARBITLEN(arg2); + + /* fast path for different-length inputs */ + if (bitlen1 != bitlen2) + result = false; + else + result = (bit_cmp(arg1, arg2) == 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bitne(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + bool result; + int bitlen1, + bitlen2; + + bitlen1 = VARBITLEN(arg1); + bitlen2 = VARBITLEN(arg2); + + /* fast path for different-length inputs */ + if (bitlen1 != bitlen2) + result = true; + else + result = (bit_cmp(arg1, arg2) != 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bitlt(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + bool result; + + result = (bit_cmp(arg1, arg2) < 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bitle(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + bool result; + + result = (bit_cmp(arg1, arg2) <= 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bitgt(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + bool result; + + result = (bit_cmp(arg1, arg2) > 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bitge(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + bool result; + + result = (bit_cmp(arg1, arg2) >= 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bitcmp(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + int32 result; + + result = bit_cmp(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + +/* + * bitcat + * Concatenation of bit strings + */ +Datum +bitcat(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + + PG_RETURN_VARBIT_P(bit_catenate(arg1, arg2)); +} + +static VarBit * +bit_catenate(VarBit *arg1, VarBit *arg2) +{ + VarBit *result; + int bitlen1, + bitlen2, + bytelen, + bit1pad, + bit2shift; + bits8 *pr, + *pa; + + bitlen1 = VARBITLEN(arg1); + bitlen2 = VARBITLEN(arg2); + + if (bitlen1 > VARBITMAXLEN - bitlen2) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("bit string length exceeds the maximum allowed (%d)", + VARBITMAXLEN))); + bytelen = VARBITTOTALLEN(bitlen1 + bitlen2); + + result = (VarBit *) palloc(bytelen); + SET_VARSIZE(result, bytelen); + VARBITLEN(result) = bitlen1 + bitlen2; + + /* Copy the first bitstring in */ + memcpy(VARBITS(result), VARBITS(arg1), VARBITBYTES(arg1)); + + /* Copy the second bit string */ + bit1pad = VARBITPAD(arg1); + if (bit1pad == 0) + { + memcpy(VARBITS(result) + VARBITBYTES(arg1), VARBITS(arg2), + VARBITBYTES(arg2)); + } + else if (bitlen2 > 0) + { + /* We need to shift all the bits to fit */ + bit2shift = BITS_PER_BYTE - bit1pad; + pr = VARBITS(result) + VARBITBYTES(arg1) - 1; + for (pa = VARBITS(arg2); pa < VARBITEND(arg2); pa++) + { + *pr |= ((*pa >> bit2shift) & BITMASK); + pr++; + if (pr < VARBITEND(result)) + *pr = (*pa << bit1pad) & BITMASK; + } + } + + /* The pad bits should be already zero at this point */ + + return result; +} + +/* + * bitsubstr + * retrieve a substring from the bit string. + * Note, s is 1-based. + * SQL draft 6.10 9) + */ +Datum +bitsubstr(PG_FUNCTION_ARGS) +{ + PG_RETURN_VARBIT_P(bitsubstring(PG_GETARG_VARBIT_P(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +Datum +bitsubstr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_VARBIT_P(bitsubstring(PG_GETARG_VARBIT_P(0), + PG_GETARG_INT32(1), + -1, true)); +} + +static VarBit * +bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified) +{ + VarBit *result; + int bitlen, + rbitlen, + len, + ishift, + i; + int32 e, + s1, + e1; + bits8 *r, + *ps; + + bitlen = VARBITLEN(arg); + s1 = Max(s, 1); + /* If we do not have an upper bound, use end of string */ + if (length_not_specified) + { + e1 = bitlen + 1; + } + else if (l < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + e1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(s, l, &e)) + { + /* + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. + */ + e1 = bitlen + 1; + } + else + { + e1 = Min(e, bitlen + 1); + } + if (s1 > bitlen || e1 <= s1) + { + /* Need to return a zero-length bitstring */ + len = VARBITTOTALLEN(0); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = 0; + } + else + { + /* + * OK, we've got a true substring starting at position s1-1 and ending + * at position e1-1 + */ + rbitlen = e1 - s1; + len = VARBITTOTALLEN(rbitlen); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = rbitlen; + len -= VARHDRSZ + VARBITHDRSZ; + /* Are we copying from a byte boundary? */ + if ((s1 - 1) % BITS_PER_BYTE == 0) + { + /* Yep, we are copying bytes */ + memcpy(VARBITS(result), VARBITS(arg) + (s1 - 1) / BITS_PER_BYTE, + len); + } + else + { + /* Figure out how much we need to shift the sequence by */ + ishift = (s1 - 1) % BITS_PER_BYTE; + r = VARBITS(result); + ps = VARBITS(arg) + (s1 - 1) / BITS_PER_BYTE; + for (i = 0; i < len; i++) + { + *r = (*ps << ishift) & BITMASK; + if ((++ps) < VARBITEND(arg)) + *r |= *ps >> (BITS_PER_BYTE - ishift); + r++; + } + } + + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); + } + + return result; +} + +/* + * bitoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +bitoverlay(PG_FUNCTION_ARGS) +{ + VarBit *t1 = PG_GETARG_VARBIT_P(0); + VarBit *t2 = PG_GETARG_VARBIT_P(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_VARBIT_P(bit_overlay(t1, t2, sp, sl)); +} + +Datum +bitoverlay_no_len(PG_FUNCTION_ARGS) +{ + VarBit *t1 = PG_GETARG_VARBIT_P(0); + VarBit *t2 = PG_GETARG_VARBIT_P(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = VARBITLEN(t2); /* defaults to length(t2) */ + PG_RETURN_VARBIT_P(bit_overlay(t1, t2, sp, sl)); +} + +static VarBit * +bit_overlay(VarBit *t1, VarBit *t2, int sp, int sl) +{ + VarBit *result; + VarBit *s1; + VarBit *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = bitsubstring(t1, 1, sp - 1, false); + s2 = bitsubstring(t1, sp_pl_sl, -1, true); + result = bit_catenate(s1, t2); + result = bit_catenate(result, s2); + + return result; +} + +/* + * bit_count + * + * Returns the number of bits set in a bit string. + */ +Datum +bit_bit_count(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + + PG_RETURN_INT64(pg_popcount((char *) VARBITS(arg), VARBITBYTES(arg))); +} + +/* + * bitlength, bitoctetlength + * Return the length of a bit string + */ +Datum +bitlength(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + + PG_RETURN_INT32(VARBITLEN(arg)); +} + +Datum +bitoctetlength(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + + PG_RETURN_INT32(VARBITBYTES(arg)); +} + +/* + * bit_and + * perform a logical AND on two bit strings. + */ +Datum +bit_and(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + VarBit *result; + int len, + bitlen1, + bitlen2, + i; + bits8 *p1, + *p2, + *r; + + bitlen1 = VARBITLEN(arg1); + bitlen2 = VARBITLEN(arg2); + if (bitlen1 != bitlen2) + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), + errmsg("cannot AND bit strings of different sizes"))); + + len = VARSIZE(arg1); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = bitlen1; + + p1 = VARBITS(arg1); + p2 = VARBITS(arg2); + r = VARBITS(result); + for (i = 0; i < VARBITBYTES(arg1); i++) + *r++ = *p1++ & *p2++; + + /* Padding is not needed as & of 0 pads is 0 */ + + PG_RETURN_VARBIT_P(result); +} + +/* + * bit_or + * perform a logical OR on two bit strings. + */ +Datum +bit_or(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + VarBit *result; + int len, + bitlen1, + bitlen2, + i; + bits8 *p1, + *p2, + *r; + + bitlen1 = VARBITLEN(arg1); + bitlen2 = VARBITLEN(arg2); + if (bitlen1 != bitlen2) + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), + errmsg("cannot OR bit strings of different sizes"))); + len = VARSIZE(arg1); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = bitlen1; + + p1 = VARBITS(arg1); + p2 = VARBITS(arg2); + r = VARBITS(result); + for (i = 0; i < VARBITBYTES(arg1); i++) + *r++ = *p1++ | *p2++; + + /* Padding is not needed as | of 0 pads is 0 */ + + PG_RETURN_VARBIT_P(result); +} + +/* + * bitxor + * perform a logical XOR on two bit strings. + */ +Datum +bitxor(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + VarBit *arg2 = PG_GETARG_VARBIT_P(1); + VarBit *result; + int len, + bitlen1, + bitlen2, + i; + bits8 *p1, + *p2, + *r; + + bitlen1 = VARBITLEN(arg1); + bitlen2 = VARBITLEN(arg2); + if (bitlen1 != bitlen2) + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), + errmsg("cannot XOR bit strings of different sizes"))); + + len = VARSIZE(arg1); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = bitlen1; + + p1 = VARBITS(arg1); + p2 = VARBITS(arg2); + r = VARBITS(result); + for (i = 0; i < VARBITBYTES(arg1); i++) + *r++ = *p1++ ^ *p2++; + + /* Padding is not needed as ^ of 0 pads is 0 */ + + PG_RETURN_VARBIT_P(result); +} + +/* + * bitnot + * perform a logical NOT on a bit string. + */ +Datum +bitnot(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + VarBit *result; + bits8 *p, + *r; + + result = (VarBit *) palloc(VARSIZE(arg)); + SET_VARSIZE(result, VARSIZE(arg)); + VARBITLEN(result) = VARBITLEN(arg); + + p = VARBITS(arg); + r = VARBITS(result); + for (; p < VARBITEND(arg); p++) + *r++ = ~*p; + + /* Must zero-pad the result, because extra bits are surely 1's here */ + VARBIT_PAD_LAST(result, r); + + PG_RETURN_VARBIT_P(result); +} + +/* + * bitshiftleft + * do a left shift (i.e. towards the beginning of the string) + */ +Datum +bitshiftleft(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + int32 shft = PG_GETARG_INT32(1); + VarBit *result; + int byte_shift, + ishift, + len; + bits8 *p, + *r; + + /* Negative shift is a shift to the right */ + if (shft < 0) + { + /* Prevent integer overflow in negation */ + if (shft < -VARBITMAXLEN) + shft = -VARBITMAXLEN; + PG_RETURN_DATUM(DirectFunctionCall2(bitshiftright, + VarBitPGetDatum(arg), + Int32GetDatum(-shft))); + } + + result = (VarBit *) palloc(VARSIZE(arg)); + SET_VARSIZE(result, VARSIZE(arg)); + VARBITLEN(result) = VARBITLEN(arg); + r = VARBITS(result); + + /* If we shifted all the bits out, return an all-zero string */ + if (shft >= VARBITLEN(arg)) + { + MemSet(r, 0, VARBITBYTES(arg)); + PG_RETURN_VARBIT_P(result); + } + + byte_shift = shft / BITS_PER_BYTE; + ishift = shft % BITS_PER_BYTE; + p = VARBITS(arg) + byte_shift; + + if (ishift == 0) + { + /* Special case: we can do a memcpy */ + len = VARBITBYTES(arg) - byte_shift; + memcpy(r, p, len); + MemSet(r + len, 0, byte_shift); + } + else + { + for (; p < VARBITEND(arg); r++) + { + *r = *p << ishift; + if ((++p) < VARBITEND(arg)) + *r |= *p >> (BITS_PER_BYTE - ishift); + } + for (; r < VARBITEND(result); r++) + *r = 0; + } + + /* The pad bits should be already zero at this point */ + + PG_RETURN_VARBIT_P(result); +} + +/* + * bitshiftright + * do a right shift (i.e. towards the end of the string) + */ +Datum +bitshiftright(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + int32 shft = PG_GETARG_INT32(1); + VarBit *result; + int byte_shift, + ishift, + len; + bits8 *p, + *r; + + /* Negative shift is a shift to the left */ + if (shft < 0) + { + /* Prevent integer overflow in negation */ + if (shft < -VARBITMAXLEN) + shft = -VARBITMAXLEN; + PG_RETURN_DATUM(DirectFunctionCall2(bitshiftleft, + VarBitPGetDatum(arg), + Int32GetDatum(-shft))); + } + + result = (VarBit *) palloc(VARSIZE(arg)); + SET_VARSIZE(result, VARSIZE(arg)); + VARBITLEN(result) = VARBITLEN(arg); + r = VARBITS(result); + + /* If we shifted all the bits out, return an all-zero string */ + if (shft >= VARBITLEN(arg)) + { + MemSet(r, 0, VARBITBYTES(arg)); + PG_RETURN_VARBIT_P(result); + } + + byte_shift = shft / BITS_PER_BYTE; + ishift = shft % BITS_PER_BYTE; + p = VARBITS(arg); + + /* Set the first part of the result to 0 */ + MemSet(r, 0, byte_shift); + r += byte_shift; + + if (ishift == 0) + { + /* Special case: we can do a memcpy */ + len = VARBITBYTES(arg) - byte_shift; + memcpy(r, p, len); + r += len; + } + else + { + if (r < VARBITEND(result)) + *r = 0; /* initialize first byte */ + for (; r < VARBITEND(result); p++) + { + *r |= *p >> ishift; + if ((++r) < VARBITEND(result)) + *r = (*p << (BITS_PER_BYTE - ishift)) & BITMASK; + } + } + + /* We may have shifted 1's into the pad bits, so fix that */ + VARBIT_PAD_LAST(result, r); + + PG_RETURN_VARBIT_P(result); +} + +/* + * This is not defined in any standard. We retain the natural ordering of + * bits here, as it just seems more intuitive. + */ +Datum +bitfromint4(PG_FUNCTION_ARGS) +{ + int32 a = PG_GETARG_INT32(0); + int32 typmod = PG_GETARG_INT32(1); + VarBit *result; + bits8 *r; + int rlen; + int destbitsleft, + srcbitsleft; + + if (typmod <= 0 || typmod > VARBITMAXLEN) + typmod = 1; /* default bit length */ + + rlen = VARBITTOTALLEN(typmod); + result = (VarBit *) palloc(rlen); + SET_VARSIZE(result, rlen); + VARBITLEN(result) = typmod; + + r = VARBITS(result); + destbitsleft = typmod; + srcbitsleft = 32; + /* drop any input bits that don't fit */ + srcbitsleft = Min(srcbitsleft, destbitsleft); + /* sign-fill any excess bytes in output */ + while (destbitsleft >= srcbitsleft + 8) + { + *r++ = (bits8) ((a < 0) ? BITMASK : 0); + destbitsleft -= 8; + } + /* store first fractional byte */ + if (destbitsleft > srcbitsleft) + { + unsigned int val = (unsigned int) (a >> (destbitsleft - 8)); + + /* Force sign-fill in case the compiler implements >> as zero-fill */ + if (a < 0) + val |= ((unsigned int) -1) << (srcbitsleft + 8 - destbitsleft); + *r++ = (bits8) (val & BITMASK); + destbitsleft -= 8; + } + /* Now srcbitsleft and destbitsleft are the same, need not track both */ + /* store whole bytes */ + while (destbitsleft >= 8) + { + *r++ = (bits8) ((a >> (destbitsleft - 8)) & BITMASK); + destbitsleft -= 8; + } + /* store last fractional byte */ + if (destbitsleft > 0) + *r = (bits8) ((a << (8 - destbitsleft)) & BITMASK); + + PG_RETURN_VARBIT_P(result); +} + +Datum +bittoint4(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + uint32 result; + bits8 *r; + + /* Check that the bit string is not too long */ + if (VARBITLEN(arg) > sizeof(result) * BITS_PER_BYTE) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + result = 0; + for (r = VARBITS(arg); r < VARBITEND(arg); r++) + { + result <<= BITS_PER_BYTE; + result |= *r; + } + /* Now shift the result to take account of the padding at the end */ + result >>= VARBITPAD(arg); + + PG_RETURN_INT32(result); +} + +Datum +bitfromint8(PG_FUNCTION_ARGS) +{ + int64 a = PG_GETARG_INT64(0); + int32 typmod = PG_GETARG_INT32(1); + VarBit *result; + bits8 *r; + int rlen; + int destbitsleft, + srcbitsleft; + + if (typmod <= 0 || typmod > VARBITMAXLEN) + typmod = 1; /* default bit length */ + + rlen = VARBITTOTALLEN(typmod); + result = (VarBit *) palloc(rlen); + SET_VARSIZE(result, rlen); + VARBITLEN(result) = typmod; + + r = VARBITS(result); + destbitsleft = typmod; + srcbitsleft = 64; + /* drop any input bits that don't fit */ + srcbitsleft = Min(srcbitsleft, destbitsleft); + /* sign-fill any excess bytes in output */ + while (destbitsleft >= srcbitsleft + 8) + { + *r++ = (bits8) ((a < 0) ? BITMASK : 0); + destbitsleft -= 8; + } + /* store first fractional byte */ + if (destbitsleft > srcbitsleft) + { + unsigned int val = (unsigned int) (a >> (destbitsleft - 8)); + + /* Force sign-fill in case the compiler implements >> as zero-fill */ + if (a < 0) + val |= ((unsigned int) -1) << (srcbitsleft + 8 - destbitsleft); + *r++ = (bits8) (val & BITMASK); + destbitsleft -= 8; + } + /* Now srcbitsleft and destbitsleft are the same, need not track both */ + /* store whole bytes */ + while (destbitsleft >= 8) + { + *r++ = (bits8) ((a >> (destbitsleft - 8)) & BITMASK); + destbitsleft -= 8; + } + /* store last fractional byte */ + if (destbitsleft > 0) + *r = (bits8) ((a << (8 - destbitsleft)) & BITMASK); + + PG_RETURN_VARBIT_P(result); +} + +Datum +bittoint8(PG_FUNCTION_ARGS) +{ + VarBit *arg = PG_GETARG_VARBIT_P(0); + uint64 result; + bits8 *r; + + /* Check that the bit string is not too long */ + if (VARBITLEN(arg) > sizeof(result) * BITS_PER_BYTE) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + result = 0; + for (r = VARBITS(arg); r < VARBITEND(arg); r++) + { + result <<= BITS_PER_BYTE; + result |= *r; + } + /* Now shift the result to take account of the padding at the end */ + result >>= VARBITPAD(arg); + + PG_RETURN_INT64(result); +} + + +/* + * Determines the position of S2 in the bitstring S1 (1-based string). + * If S2 does not appear in S1 this function returns 0. + * If S2 is of length 0 this function returns 1. + * Compatible in usage with POSITION() functions for other data types. + */ +Datum +bitposition(PG_FUNCTION_ARGS) +{ + VarBit *str = PG_GETARG_VARBIT_P(0); + VarBit *substr = PG_GETARG_VARBIT_P(1); + int substr_length, + str_length, + i, + is; + bits8 *s, /* pointer into substring */ + *p; /* pointer into str */ + bits8 cmp, /* shifted substring byte to compare */ + mask1, /* mask for substring byte shifted right */ + mask2, /* mask for substring byte shifted left */ + end_mask, /* pad mask for last substring byte */ + str_mask; /* pad mask for last string byte */ + bool is_match; + + /* Get the substring length */ + substr_length = VARBITLEN(substr); + str_length = VARBITLEN(str); + + /* String has zero length or substring longer than string, return 0 */ + if ((str_length == 0) || (substr_length > str_length)) + PG_RETURN_INT32(0); + + /* zero-length substring means return 1 */ + if (substr_length == 0) + PG_RETURN_INT32(1); + + /* Initialise the padding masks */ + end_mask = BITMASK << VARBITPAD(substr); + str_mask = BITMASK << VARBITPAD(str); + for (i = 0; i < VARBITBYTES(str) - VARBITBYTES(substr) + 1; i++) + { + for (is = 0; is < BITS_PER_BYTE; is++) + { + is_match = true; + p = VARBITS(str) + i; + mask1 = BITMASK >> is; + mask2 = ~mask1; + for (s = VARBITS(substr); + is_match && s < VARBITEND(substr); s++) + { + cmp = *s >> is; + if (s == VARBITEND(substr) - 1) + { + mask1 &= end_mask >> is; + if (p == VARBITEND(str) - 1) + { + /* Check that there is enough of str left */ + if (mask1 & ~str_mask) + { + is_match = false; + break; + } + mask1 &= str_mask; + } + } + is_match = ((cmp ^ *p) & mask1) == 0; + if (!is_match) + break; + /* Move on to the next byte */ + p++; + if (p == VARBITEND(str)) + { + mask2 = end_mask << (BITS_PER_BYTE - is); + is_match = mask2 == 0; +#if 0 + elog(DEBUG4, "S. %d %d em=%2x sm=%2x r=%d", + i, is, end_mask, mask2, is_match); +#endif + break; + } + cmp = *s << (BITS_PER_BYTE - is); + if (s == VARBITEND(substr) - 1) + { + mask2 &= end_mask << (BITS_PER_BYTE - is); + if (p == VARBITEND(str) - 1) + { + if (mask2 & ~str_mask) + { + is_match = false; + break; + } + mask2 &= str_mask; + } + } + is_match = ((cmp ^ *p) & mask2) == 0; + } + /* Have we found a match? */ + if (is_match) + PG_RETURN_INT32(i * BITS_PER_BYTE + is + 1); + } + } + PG_RETURN_INT32(0); +} + + +/* + * bitsetbit + * + * Given an instance of type 'bit' creates a new one with + * the Nth bit set to the given value. + * + * The bit location is specified left-to-right in a zero-based fashion + * consistent with the other get_bit and set_bit functions, but + * inconsistent with the standard substring, position, overlay functions + */ +Datum +bitsetbit(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + int32 n = PG_GETARG_INT32(1); + int32 newBit = PG_GETARG_INT32(2); + VarBit *result; + int len, + bitlen; + bits8 *r, + *p; + int byteNo, + bitNo; + + bitlen = VARBITLEN(arg1); + if (n < 0 || n >= bitlen) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("bit index %d out of valid range (0..%d)", + n, bitlen - 1))); + + /* + * sanity check! + */ + if (newBit != 0 && newBit != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new bit must be 0 or 1"))); + + len = VARSIZE(arg1); + result = (VarBit *) palloc(len); + SET_VARSIZE(result, len); + VARBITLEN(result) = bitlen; + + p = VARBITS(arg1); + r = VARBITS(result); + + memcpy(r, p, VARBITBYTES(arg1)); + + byteNo = n / BITS_PER_BYTE; + bitNo = BITS_PER_BYTE - 1 - (n % BITS_PER_BYTE); + + /* + * Update the byte. + */ + if (newBit == 0) + r[byteNo] &= (~(1 << bitNo)); + else + r[byteNo] |= (1 << bitNo); + + PG_RETURN_VARBIT_P(result); +} + +/* + * bitgetbit + * + * returns the value of the Nth bit of a bit array (0 or 1). + * + * The bit location is specified left-to-right in a zero-based fashion + * consistent with the other get_bit and set_bit functions, but + * inconsistent with the standard substring, position, overlay functions + */ +Datum +bitgetbit(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + int32 n = PG_GETARG_INT32(1); + int bitlen; + bits8 *p; + int byteNo, + bitNo; + + bitlen = VARBITLEN(arg1); + if (n < 0 || n >= bitlen) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("bit index %d out of valid range (0..%d)", + n, bitlen - 1))); + + p = VARBITS(arg1); + + byteNo = n / BITS_PER_BYTE; + bitNo = BITS_PER_BYTE - 1 - (n % BITS_PER_BYTE); + + if (p[byteNo] & (1 << bitNo)) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(0); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varchar.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varchar.c new file mode 100644 index 00000000000..b92ff4d266e --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varchar.c @@ -0,0 +1,1231 @@ +/*------------------------------------------------------------------------- + * + * varchar.c + * Functions for the built-in types char(n) and varchar(n). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/varchar.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/detoast.h" +#include "access/htup_details.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "libpq/pqformat.h" +#include "mb/pg_wchar.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" +#include "utils/varlena.h" + +/* common code for bpchartypmodin and varchartypmodin */ +static int32 +anychar_typmodin(ArrayType *ta, const char *typename) +{ + int32 typmod; + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + /* + * we're not too tense about good error message here because grammar + * shouldn't allow wrong number of modifiers for CHAR + */ + if (n != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid type modifier"))); + + if (*tl < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("length for type %s must be at least 1", typename))); + if (*tl > MaxAttrSize) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("length for type %s cannot exceed %d", + typename, MaxAttrSize))); + + /* + * For largely historical reasons, the typmod is VARHDRSZ plus the number + * of characters; there is enough client-side code that knows about that + * that we'd better not change it. + */ + typmod = VARHDRSZ + *tl; + + return typmod; +} + +/* common code for bpchartypmodout and varchartypmodout */ +static char * +anychar_typmodout(int32 typmod) +{ + char *res = (char *) palloc(64); + + if (typmod > VARHDRSZ) + snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ)); + else + *res = '\0'; + + return res; +} + + +/* + * CHAR() and VARCHAR() types are part of the SQL standard. CHAR() + * is for blank-padded string whose length is specified in CREATE TABLE. + * VARCHAR is for storing string whose length is at most the length specified + * at CREATE TABLE time. + * + * It's hard to implement these types because we cannot figure out + * the length of the type from the type itself. I changed (hopefully all) the + * fmgr calls that invoke input functions of a data type to supply the + * length also. (eg. in INSERTs, we have the tupleDescriptor which contains + * the length of the attributes and hence the exact length of the char() or + * varchar(). We pass this to bpcharin() or varcharin().) In the case where + * we cannot determine the length, we pass in -1 instead and the input + * converter does not enforce any length check. + * + * We actually implement this as a varlena so that we don't have to pass in + * the length for the comparison functions. (The difference between these + * types and "text" is that we truncate and possibly blank-pad the string + * at insertion time.) + * + * - ay 6/95 + */ + + +/***************************************************************************** + * bpchar - char() * + *****************************************************************************/ + +/* + * bpchar_input -- common guts of bpcharin and bpcharrecv + * + * s is the input text of length len (may not be null-terminated) + * atttypmod is the typmod value to apply + * + * Note that atttypmod is measured in characters, which + * is not necessarily the same as the number of bytes. + * + * If the input string is too long, raise an error, unless the extra + * characters are spaces, in which case they're truncated. (per SQL) + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() + * to detect errors. + */ +static BpChar * +bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext) +{ + BpChar *result; + char *r; + size_t maxlen; + + /* If typmod is -1 (or invalid), use the actual string length */ + if (atttypmod < (int32) VARHDRSZ) + maxlen = len; + else + { + size_t charlen; /* number of CHARACTERS in the input */ + + maxlen = atttypmod - VARHDRSZ; + charlen = pg_mbstrlen_with_len(s, len); + if (charlen > maxlen) + { + /* Verify that extra characters are spaces, and clip them off */ + size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); + size_t j; + + /* + * at this point, len is the actual BYTE length of the input + * string, maxlen is the max number of CHARACTERS allowed for this + * bpchar type, mbmaxlen is the length in BYTES of those chars. + */ + for (j = mbmaxlen; j < len; j++) + { + if (s[j] != ' ') + ereturn(escontext, NULL, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character(%d)", + (int) maxlen))); + } + + /* + * Now we set maxlen to the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len = mbmaxlen; + } + else + { + /* + * Now we set maxlen to the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len + (maxlen - charlen); + } + } + + result = (BpChar *) palloc(maxlen + VARHDRSZ); + SET_VARSIZE(result, maxlen + VARHDRSZ); + r = VARDATA(result); + memcpy(r, s, len); + + /* blank pad the string if necessary */ + if (maxlen > len) + memset(r + len, ' ', maxlen - len); + + return result; +} + +/* + * Convert a C string to CHARACTER internal representation. atttypmod + * is the declared length of the type plus VARHDRSZ. + */ +Datum +bpcharin(PG_FUNCTION_ARGS) +{ + char *s = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + BpChar *result; + + result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context); + PG_RETURN_BPCHAR_P(result); +} + + +/* + * Convert a CHARACTER value to a C string. + * + * Uses the text conversion functions, which is only appropriate if BpChar + * and text are equivalent types. + */ +Datum +bpcharout(PG_FUNCTION_ARGS) +{ + Datum txt = PG_GETARG_DATUM(0); + + PG_RETURN_CSTRING(TextDatumGetCString(txt)); +} + +/* + * bpcharrecv - converts external binary format to bpchar + */ +Datum +bpcharrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + BpChar *result; + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + result = bpchar_input(str, nbytes, atttypmod, NULL); + pfree(str); + PG_RETURN_BPCHAR_P(result); +} + +/* + * bpcharsend - converts bpchar to binary format + */ +Datum +bpcharsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as textsend, so share code */ + return textsend(fcinfo); +} + + +/* + * Converts a CHARACTER type to the specified size. + * + * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. + * isExplicit is true if this is for an explicit cast to char(N). + * + * Truncation rules: for an explicit cast, silently truncate to the given + * length; for an implicit cast, raise error unless extra characters are + * all spaces. (This is sort-of per SQL: the spec would actually have us + * raise a "completion condition" for the explicit cast case, but Postgres + * hasn't got such a concept.) + */ +Datum +bpchar(PG_FUNCTION_ARGS) +{ + BpChar *source = PG_GETARG_BPCHAR_PP(0); + int32 maxlen = PG_GETARG_INT32(1); + bool isExplicit = PG_GETARG_BOOL(2); + BpChar *result; + int32 len; + char *r; + char *s; + int i; + int charlen; /* number of characters in the input string + + * VARHDRSZ */ + + /* No work if typmod is invalid */ + if (maxlen < (int32) VARHDRSZ) + PG_RETURN_BPCHAR_P(source); + + maxlen -= VARHDRSZ; + + len = VARSIZE_ANY_EXHDR(source); + s = VARDATA_ANY(source); + + charlen = pg_mbstrlen_with_len(s, len); + + /* No work if supplied data matches typmod already */ + if (charlen == maxlen) + PG_RETURN_BPCHAR_P(source); + + if (charlen > maxlen) + { + /* Verify that extra characters are spaces, and clip them off */ + size_t maxmblen; + + maxmblen = pg_mbcharcliplen(s, len, maxlen); + + if (!isExplicit) + { + for (i = maxmblen; i < len; i++) + if (s[i] != ' ') + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character(%d)", + maxlen))); + } + + len = maxmblen; + + /* + * At this point, maxlen is the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len; + } + else + { + /* + * At this point, maxlen is the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len + (maxlen - charlen); + } + + Assert(maxlen >= len); + + result = palloc(maxlen + VARHDRSZ); + SET_VARSIZE(result, maxlen + VARHDRSZ); + r = VARDATA(result); + + memcpy(r, s, len); + + /* blank pad the string if necessary */ + if (maxlen > len) + memset(r + len, ' ', maxlen - len); + + PG_RETURN_BPCHAR_P(result); +} + + +/* char_bpchar() + * Convert char to bpchar(1). + */ +Datum +char_bpchar(PG_FUNCTION_ARGS) +{ + char c = PG_GETARG_CHAR(0); + BpChar *result; + + result = (BpChar *) palloc(VARHDRSZ + 1); + + SET_VARSIZE(result, VARHDRSZ + 1); + *(VARDATA(result)) = c; + + PG_RETURN_BPCHAR_P(result); +} + + +/* bpchar_name() + * Converts a bpchar() type to a NameData type. + */ +Datum +bpchar_name(PG_FUNCTION_ARGS) +{ + BpChar *s = PG_GETARG_BPCHAR_PP(0); + char *s_data; + Name result; + int len; + + len = VARSIZE_ANY_EXHDR(s); + s_data = VARDATA_ANY(s); + + /* Truncate oversize input */ + if (len >= NAMEDATALEN) + len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1); + + /* Remove trailing blanks */ + while (len > 0) + { + if (s_data[len - 1] != ' ') + break; + len--; + } + + /* We use palloc0 here to ensure result is zero-padded */ + result = (Name) palloc0(NAMEDATALEN); + memcpy(NameStr(*result), s_data, len); + + PG_RETURN_NAME(result); +} + +/* name_bpchar() + * Converts a NameData type to a bpchar type. + * + * Uses the text conversion functions, which is only appropriate if BpChar + * and text are equivalent types. + */ +Datum +name_bpchar(PG_FUNCTION_ARGS) +{ + Name s = PG_GETARG_NAME(0); + BpChar *result; + + result = (BpChar *) cstring_to_text(NameStr(*s)); + PG_RETURN_BPCHAR_P(result); +} + +Datum +bpchartypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anychar_typmodin(ta, "char")); +} + +Datum +bpchartypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anychar_typmodout(typmod)); +} + + +/***************************************************************************** + * varchar - varchar(n) + * + * Note: varchar piggybacks on type text for most operations, and so has no + * C-coded functions except for I/O and typmod checking. + *****************************************************************************/ + +/* + * varchar_input -- common guts of varcharin and varcharrecv + * + * s is the input text of length len (may not be null-terminated) + * atttypmod is the typmod value to apply + * + * Note that atttypmod is measured in characters, which + * is not necessarily the same as the number of bytes. + * + * If the input string is too long, raise an error, unless the extra + * characters are spaces, in which case they're truncated. (per SQL) + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() + * to detect errors. + */ +static VarChar * +varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext) +{ + VarChar *result; + size_t maxlen; + + maxlen = atttypmod - VARHDRSZ; + + if (atttypmod >= (int32) VARHDRSZ && len > maxlen) + { + /* Verify that extra characters are spaces, and clip them off */ + size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); + size_t j; + + for (j = mbmaxlen; j < len; j++) + { + if (s[j] != ' ') + ereturn(escontext, NULL, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character varying(%d)", + (int) maxlen))); + } + + len = mbmaxlen; + } + + /* + * We can use cstring_to_text_with_len because VarChar and text are + * binary-compatible types. + */ + result = (VarChar *) cstring_to_text_with_len(s, len); + return result; +} + +/* + * Convert a C string to VARCHAR internal representation. atttypmod + * is the declared length of the type plus VARHDRSZ. + */ +Datum +varcharin(PG_FUNCTION_ARGS) +{ + char *s = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + VarChar *result; + + result = varchar_input(s, strlen(s), atttypmod, fcinfo->context); + PG_RETURN_VARCHAR_P(result); +} + + +/* + * Convert a VARCHAR value to a C string. + * + * Uses the text to C string conversion function, which is only appropriate + * if VarChar and text are equivalent types. + */ +Datum +varcharout(PG_FUNCTION_ARGS) +{ + Datum txt = PG_GETARG_DATUM(0); + + PG_RETURN_CSTRING(TextDatumGetCString(txt)); +} + +/* + * varcharrecv - converts external binary format to varchar + */ +Datum +varcharrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + VarChar *result; + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + result = varchar_input(str, nbytes, atttypmod, NULL); + pfree(str); + PG_RETURN_VARCHAR_P(result); +} + +/* + * varcharsend - converts varchar to binary format + */ +Datum +varcharsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as textsend, so share code */ + return textsend(fcinfo); +} + + +/* + * varchar_support() + * + * Planner support function for the varchar() length coercion function. + * + * Currently, the only interesting thing we can do is flatten calls that set + * the new maximum length >= the previous maximum length. We can ignore the + * isExplicit argument, since that only affects truncation cases. + */ +Datum +varchar_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *expr = req->fcall; + Node *typmod; + + Assert(list_length(expr->args) >= 2); + + typmod = (Node *) lsecond(expr->args); + + if (IsA(typmod, Const) && !((Const *) typmod)->constisnull) + { + Node *source = (Node *) linitial(expr->args); + int32 old_typmod = exprTypmod(source); + int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue); + int32 old_max = old_typmod - VARHDRSZ; + int32 new_max = new_typmod - VARHDRSZ; + + if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max)) + ret = relabel_to_typmod(source, new_typmod); + } + } + + PG_RETURN_POINTER(ret); +} + +/* + * Converts a VARCHAR type to the specified size. + * + * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. + * isExplicit is true if this is for an explicit cast to varchar(N). + * + * Truncation rules: for an explicit cast, silently truncate to the given + * length; for an implicit cast, raise error unless extra characters are + * all spaces. (This is sort-of per SQL: the spec would actually have us + * raise a "completion condition" for the explicit cast case, but Postgres + * hasn't got such a concept.) + */ +Datum +varchar(PG_FUNCTION_ARGS) +{ + VarChar *source = PG_GETARG_VARCHAR_PP(0); + int32 typmod = PG_GETARG_INT32(1); + bool isExplicit = PG_GETARG_BOOL(2); + int32 len, + maxlen; + size_t maxmblen; + int i; + char *s_data; + + len = VARSIZE_ANY_EXHDR(source); + s_data = VARDATA_ANY(source); + maxlen = typmod - VARHDRSZ; + + /* No work if typmod is invalid or supplied data fits it already */ + if (maxlen < 0 || len <= maxlen) + PG_RETURN_VARCHAR_P(source); + + /* only reach here if string is too long... */ + + /* truncate multibyte string preserving multibyte boundary */ + maxmblen = pg_mbcharcliplen(s_data, len, maxlen); + + if (!isExplicit) + { + for (i = maxmblen; i < len; i++) + if (s_data[i] != ' ') + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character varying(%d)", + maxlen))); + } + + PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data, + maxmblen)); +} + +Datum +varchartypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anychar_typmodin(ta, "varchar")); +} + +Datum +varchartypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anychar_typmodout(typmod)); +} + + +/***************************************************************************** + * Exported functions + *****************************************************************************/ + +/* "True" length (not counting trailing blanks) of a BpChar */ +static inline int +bcTruelen(BpChar *arg) +{ + return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg)); +} + +int +bpchartruelen(char *s, int len) +{ + int i; + + /* + * Note that we rely on the assumption that ' ' is a singleton unit on + * every supported multibyte server encoding. + */ + for (i = len - 1; i >= 0; i--) + { + if (s[i] != ' ') + break; + } + return i + 1; +} + +Datum +bpcharlen(PG_FUNCTION_ARGS) +{ + BpChar *arg = PG_GETARG_BPCHAR_PP(0); + int len; + + /* get number of bytes, ignoring trailing spaces */ + len = bcTruelen(arg); + + /* in multibyte encoding, convert to number of characters */ + if (pg_database_encoding_max_length() != 1) + len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len); + + PG_RETURN_INT32(len); +} + +Datum +bpcharoctetlen(PG_FUNCTION_ARGS) +{ + Datum arg = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ); +} + + +/***************************************************************************** + * Comparison Functions used for bpchar + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + +Datum +bpchareq(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + bool result; + Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; + + check_collation_set(collid); + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || pg_locale_deterministic(mylocale)) + { + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = false; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); + } + else + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) == 0); + } + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bpcharne(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + bool result; + Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; + + check_collation_set(collid); + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || pg_locale_deterministic(mylocale)) + { + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = true; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); + } + else + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) != 0); + } + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bpcharlt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp < 0); +} + +Datum +bpcharle(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp <= 0); +} + +Datum +bpchargt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp > 0); +} + +Datum +bpcharge(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp >= 0); +} + +Datum +bpcharcmp(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bpchar_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport */ + varstr_sortsupport(ssup, BPCHAROID, collid); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +Datum +bpchar_larger(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2); +} + +Datum +bpchar_smaller(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2); +} + + +/* + * bpchar needs a specialized hash function because we want to ignore + * trailing blanks in comparisons. + */ +Datum +hashbpchar(PG_FUNCTION_ARGS) +{ + BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); + char *keydata; + int keylen; + pg_locale_t mylocale = 0; + Datum result; + + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + keydata = VARDATA_ANY(key); + keylen = bcTruelen(key); + + if (!lc_collate_is_c(collid)) + mylocale = pg_newlocale_from_collation(collid); + + if (pg_locale_deterministic(mylocale)) + { + result = hash_any((unsigned char *) keydata, keylen); + } + else + { + Size bsize, + rsize; + char *buf; + + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize + 1); + + rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + /* + * In principle, there's no reason to include the terminating NUL + * character in the hash, but it was done before and the behavior must + * be preserved. + */ + result = hash_any((uint8_t *) buf, bsize + 1); + + pfree(buf); + } + + /* Avoid leaking memory for toasted inputs */ + PG_FREE_IF_COPY(key, 0); + + return result; +} + +Datum +hashbpcharextended(PG_FUNCTION_ARGS) +{ + BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); + char *keydata; + int keylen; + pg_locale_t mylocale = 0; + Datum result; + + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + keydata = VARDATA_ANY(key); + keylen = bcTruelen(key); + + if (!lc_collate_is_c(collid)) + mylocale = pg_newlocale_from_collation(collid); + + if (pg_locale_deterministic(mylocale)) + { + result = hash_any_extended((unsigned char *) keydata, keylen, + PG_GETARG_INT64(1)); + } + else + { + Size bsize, + rsize; + char *buf; + + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize + 1); + + rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + /* + * In principle, there's no reason to include the terminating NUL + * character in the hash, but it was done before and the behavior must + * be preserved. + */ + result = hash_any_extended((uint8_t *) buf, bsize + 1, + PG_GETARG_INT64(1)); + + pfree(buf); + } + + PG_FREE_IF_COPY(key, 0); + + return result; +} + +/* + * The following operators support character-by-character comparison + * of bpchar datums, to allow building indexes suitable for LIKE clauses. + * Note that the regular bpchareq/bpcharne comparison operators, and + * regular support functions 1 and 2 with "C" collation are assumed to be + * compatible with these! + */ + +static int +internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2) +{ + int result; + int len1, + len2; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if (result != 0) + return result; + else if (len1 < len2) + return -1; + else if (len1 > len2) + return 1; + else + return 0; +} + + +Datum +bpchar_pattern_lt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result < 0); +} + + +Datum +bpchar_pattern_le(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result <= 0); +} + + +Datum +bpchar_pattern_ge(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result >= 0); +} + + +Datum +bpchar_pattern_gt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result > 0); +} + + +Datum +btbpchar_pattern_cmp(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + + +Datum +btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varlena.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varlena.c new file mode 100644 index 00000000000..06cc9fdd41a --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varlena.c @@ -0,0 +1,6532 @@ +/*------------------------------------------------------------------------- + * + * varlena.c + * Functions for the variable-length built-in types. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/varlena.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> + +#include "access/detoast.h" +#include "access/toast_compression.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "common/int.h" +#include "common/unicode_norm.h" +#include "funcapi.h" +#include "lib/hyperloglog.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "parser/scansup.h" +#include "port/pg_bswap.h" +#include "regex/regex.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/guc.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_locale.h" +#include "utils/sortsupport.h" +#include "utils/varlena.h" + + +/* GUC variable */ +__thread int bytea_output = BYTEA_OUTPUT_HEX; + +typedef struct varlena VarString; + +/* + * State for text_position_* functions. + */ +typedef struct +{ + bool is_multibyte_char_in_char; /* need to check char boundaries? */ + + char *str1; /* haystack string */ + char *str2; /* needle string */ + int len1; /* string lengths in bytes */ + int len2; + + /* Skip table for Boyer-Moore-Horspool search algorithm: */ + int skiptablemask; /* mask for ANDing with skiptable subscripts */ + int skiptable[256]; /* skip distance for given mismatched char */ + + char *last_match; /* pointer to last match in 'str1' */ + + /* + * Sometimes we need to convert the byte position of a match to a + * character position. These store the last position that was converted, + * so that on the next call, we can continue from that point, rather than + * count characters from the very beginning. + */ + char *refpoint; /* pointer within original haystack string */ + int refpos; /* 0-based character offset of the same point */ +} TextPositionState; + +typedef struct +{ + char *buf1; /* 1st string, or abbreviation original string + * buf */ + char *buf2; /* 2nd string, or abbreviation strxfrm() buf */ + int buflen1; /* Allocated length of buf1 */ + int buflen2; /* Allocated length of buf2 */ + int last_len1; /* Length of last buf1 string/strxfrm() input */ + int last_len2; /* Length of last buf2 string/strxfrm() blob */ + int last_returned; /* Last comparison result (cache) */ + bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */ + bool collate_c; + Oid typid; /* Actual datatype (text/bpchar/bytea/name) */ + hyperLogLogState abbr_card; /* Abbreviated key cardinality state */ + hyperLogLogState full_card; /* Full key cardinality state */ + double prop_card; /* Required cardinality proportion */ + pg_locale_t locale; +} VarStringSortSupport; + +/* + * Output data for split_text(): we output either to an array or a table. + * tupstore and tupdesc must be set up in advance to output to a table. + */ +typedef struct +{ + ArrayBuildState *astate; + Tuplestorestate *tupstore; + TupleDesc tupdesc; +} SplitTextOutputData; + +/* + * This should be large enough that most strings will fit, but small enough + * that we feel comfortable putting it on the stack + */ +#define TEXTBUFLEN 1024 + +#define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X)) +#define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X)) + +static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup); +static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup); +static int namefastcmp_c(Datum x, Datum y, SortSupport ssup); +static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup); +static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup); +static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup); +static Datum varstr_abbrev_convert(Datum original, SortSupport ssup); +static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup); +static int32 text_length(Datum str); +static text *text_catenate(text *t1, text *t2); +static text *text_substring(Datum str, + int32 start, + int32 length, + bool length_not_specified); +static text *text_overlay(text *t1, text *t2, int sp, int sl); +static int text_position(text *t1, text *t2, Oid collid); +static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state); +static bool text_position_next(TextPositionState *state); +static char *text_position_next_internal(char *start_ptr, TextPositionState *state); +static char *text_position_get_match_ptr(TextPositionState *state); +static int text_position_get_match_pos(TextPositionState *state); +static void text_position_cleanup(TextPositionState *state); +static void check_collation_set(Oid collid); +static int text_cmp(text *arg1, text *arg2, Oid collid); +static bytea *bytea_catenate(bytea *t1, bytea *t2); +static bytea *bytea_substring(Datum str, + int S, + int L, + bool length_not_specified); +static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); +static void appendStringInfoText(StringInfo str, const text *t); +static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate); +static void split_text_accum_result(SplitTextOutputData *tstate, + text *field_value, + text *null_string, + Oid collation); +static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, + const char *fldsep, const char *null_string); +static StringInfo makeStringAggState(FunctionCallInfo fcinfo); +static bool text_format_parse_digits(const char **ptr, const char *end_ptr, + int *value); +static const char *text_format_parse_format(const char *start_ptr, + const char *end_ptr, + int *argpos, int *widthpos, + int *flags, int *width); +static void text_format_string_conversion(StringInfo buf, char conversion, + FmgrInfo *typOutputInfo, + Datum value, bool isNull, + int flags, int width); +static void text_format_append_string(StringInfo buf, const char *str, + int flags, int width); + + +/***************************************************************************** + * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE * + *****************************************************************************/ + +/* + * cstring_to_text + * + * Create a text value from a null-terminated C string. + * + * The new text value is freshly palloc'd with a full-size VARHDR. + */ +text * +cstring_to_text(const char *s) +{ + return cstring_to_text_with_len(s, strlen(s)); +} + +/* + * cstring_to_text_with_len + * + * Same as cstring_to_text except the caller specifies the string length; + * the string need not be null_terminated. + */ +text * +cstring_to_text_with_len(const char *s, int len) +{ + text *result = (text *) palloc(len + VARHDRSZ); + + SET_VARSIZE(result, len + VARHDRSZ); + memcpy(VARDATA(result), s, len); + + return result; +} + +/* + * text_to_cstring + * + * Create a palloc'd, null-terminated C string from a text value. + * + * We support being passed a compressed or toasted text value. + * This is a bit bogus since such values shouldn't really be referred to as + * "text *", but it seems useful for robustness. If we didn't handle that + * case here, we'd need another routine that did, anyway. + */ +char * +text_to_cstring(const text *t) +{ + /* must cast away the const, unfortunately */ + text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t)); + int len = VARSIZE_ANY_EXHDR(tunpacked); + char *result; + + result = (char *) palloc(len + 1); + memcpy(result, VARDATA_ANY(tunpacked), len); + result[len] = '\0'; + + if (tunpacked != t) + pfree(tunpacked); + + return result; +} + +/* + * text_to_cstring_buffer + * + * Copy a text value into a caller-supplied buffer of size dst_len. + * + * The text string is truncated if necessary to fit. The result is + * guaranteed null-terminated (unless dst_len == 0). + * + * We support being passed a compressed or toasted text value. + * This is a bit bogus since such values shouldn't really be referred to as + * "text *", but it seems useful for robustness. If we didn't handle that + * case here, we'd need another routine that did, anyway. + */ +void +text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) +{ + /* must cast away the const, unfortunately */ + text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src)); + size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked); + + if (dst_len > 0) + { + dst_len--; + if (dst_len >= src_len) + dst_len = src_len; + else /* ensure truncation is encoding-safe */ + dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len); + memcpy(dst, VARDATA_ANY(srcunpacked), dst_len); + dst[dst_len] = '\0'; + } + + if (srcunpacked != src) + pfree(srcunpacked); +} + + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +/* + * byteain - converts from printable representation of byte array + * + * Non-printable characters must be passed as '\nnn' (octal) and are + * converted to internal form. '\' must be passed as '\\'. + * ereport(ERROR, ...) if bad form. + * + * BUGS: + * The input is scanned twice. + * The error checking of input is minimal. + */ +Datum +byteain(PG_FUNCTION_ARGS) +{ + char *inputText = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + char *tp; + char *rp; + int bc; + bytea *result; + + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), + escontext); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) + { + if (tp[0] != '\\') + tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + tp += 4; + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + tp += 2; + else + { + /* + * one backslash, not followed by another or ### valid octal + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + bc += VARHDRSZ; + + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); + + tp = inputText; + rp = VARDATA(result); + while (*tp != '\0') + { + if (tp[0] != '\\') + *rp++ = *tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + { + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); + + tp += 4; + } + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + { + *rp++ = '\\'; + tp += 2; + } + else + { + /* + * We should never get here. The first pass should not allow it. + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + PG_RETURN_BYTEA_P(result); +} + +/* + * byteaout - converts to printable representation of byte array + * + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. + */ +Datum +byteaout(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_PP(0); + char *result; + char *rp; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; + uint64 len; + int i; + + len = 1; /* empty string has 1 char */ + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + len += 2; + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + len += 4; + else + len++; + } + + /* + * In principle len can't overflow uint32 if the input fit in 1GB, but + * for safety let's check rather than relying on palloc's internal + * check. + */ + if (len > MaxAllocSize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_internal("result of bytea output conversion is too large"))); + rp = result = (char *) palloc(len); + + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + { + *rp++ = '\\'; + *rp++ = '\\'; + } + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + { + int val; /* holds unprintable chars */ + + val = *vp; + rp[0] = '\\'; + rp[3] = DIG(val & 07); + val >>= 3; + rp[2] = DIG(val & 07); + val >>= 3; + rp[1] = DIG(val & 03); + rp += 4; + } + else + *rp++ = *vp; + } + } + else + { + elog(ERROR, "unrecognized bytea_output setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * bytearecv - converts external binary format to bytea + */ +Datum +bytearecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + bytea *result; + int nbytes; + + nbytes = buf->len - buf->cursor; + result = (bytea *) palloc(nbytes + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + pq_copymsgbytes(buf, VARDATA(result), nbytes); + PG_RETURN_BYTEA_P(result); +} + +/* + * byteasend - converts bytea to binary format + * + * This is a special case: just copy the input... + */ +Datum +byteasend(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); + + PG_RETURN_BYTEA_P(vlena); +} + +Datum +bytea_string_agg_transfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + /* Append the value unless null, preceding it with the delimiter. */ + if (!PG_ARGISNULL(1)) + { + bytea *value = PG_GETARG_BYTEA_PP(1); + bool isfirst = false; + + /* + * You might think we can just throw away the first delimiter, however + * we must keep it as we may be a parallel worker doing partial + * aggregation building a state to send to the main process. We need + * to keep the delimiter of every aggregation so that the combine + * function can properly join up the strings of two separately + * partially aggregated results. The first delimiter is only stripped + * off in the final function. To know how much to strip off the front + * of the string, we store the length of the first delimiter in the + * StringInfo's cursor field, which we don't otherwise need here. + */ + if (state == NULL) + { + state = makeStringAggState(fcinfo); + isfirst = true; + } + + if (!PG_ARGISNULL(2)) + { + bytea *delim = PG_GETARG_BYTEA_PP(2); + + appendBinaryStringInfo(state, VARDATA_ANY(delim), + VARSIZE_ANY_EXHDR(delim)); + if (isfirst) + state->cursor = VARSIZE_ANY_EXHDR(delim); + } + + appendBinaryStringInfo(state, VARDATA_ANY(value), + VARSIZE_ANY_EXHDR(value)); + } + + /* + * The transition type for string_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + if (state) + PG_RETURN_POINTER(state); + PG_RETURN_NULL(); +} + +Datum +bytea_string_agg_finalfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + if (state != NULL) + { + /* As per comment in transfn, strip data before the cursor position */ + bytea *result; + int strippedlen = state->len - state->cursor; + + result = (bytea *) palloc(strippedlen + VARHDRSZ); + SET_VARSIZE(result, strippedlen + VARHDRSZ); + memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); + PG_RETURN_BYTEA_P(result); + } + else + PG_RETURN_NULL(); +} + +/* + * textin - converts cstring to internal representation + */ +Datum +textin(PG_FUNCTION_ARGS) +{ + char *inputText = PG_GETARG_CSTRING(0); + + PG_RETURN_TEXT_P(cstring_to_text(inputText)); +} + +/* + * textout - converts internal representation to cstring + */ +Datum +textout(PG_FUNCTION_ARGS) +{ + Datum txt = PG_GETARG_DATUM(0); + + PG_RETURN_CSTRING(TextDatumGetCString(txt)); +} + +/* + * textrecv - converts external binary format to text + */ +Datum +textrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + text *result; + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + + result = cstring_to_text_with_len(str, nbytes); + pfree(str); + PG_RETURN_TEXT_P(result); +} + +/* + * textsend - converts text to binary format + */ +Datum +textsend(PG_FUNCTION_ARGS) +{ + text *t = PG_GETARG_TEXT_PP(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * unknownin - converts cstring to internal representation + */ +Datum +unknownin(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + + /* representation is same as cstring */ + PG_RETURN_CSTRING(pstrdup(str)); +} + +/* + * unknownout - converts internal representation to cstring + */ +Datum +unknownout(PG_FUNCTION_ARGS) +{ + /* representation is same as cstring */ + char *str = PG_GETARG_CSTRING(0); + + PG_RETURN_CSTRING(pstrdup(str)); +} + +/* + * unknownrecv - converts external binary format to unknown + */ +Datum +unknownrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + /* representation is same as cstring */ + PG_RETURN_CSTRING(str); +} + +/* + * unknownsend - converts unknown to binary format + */ +Datum +unknownsend(PG_FUNCTION_ARGS) +{ + /* representation is same as cstring */ + char *str = PG_GETARG_CSTRING(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendtext(&buf, str, strlen(str)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* ========== PUBLIC ROUTINES ========== */ + +/* + * textlen - + * returns the logical length of a text* + * (which is less than the VARSIZE of the text*) + */ +Datum +textlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* try to avoid decompressing argument */ + PG_RETURN_INT32(text_length(str)); +} + +/* + * text_length - + * Does the real work for textlen() + * + * This is broken out so it can be called directly by other string processing + * functions. Note that the argument is passed as a Datum, to indicate that + * it may still be in compressed form. We can avoid decompressing it at all + * in some cases. + */ +static int32 +text_length(Datum str) +{ + /* fastpath when max encoding length is one */ + if (pg_database_encoding_max_length() == 1) + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); + else + { + text *t = DatumGetTextPP(str); + + PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t), + VARSIZE_ANY_EXHDR(t))); + } +} + +/* + * textoctetlen - + * returns the physical length of a text* + * (which is less than the VARSIZE of the text*) + */ +Datum +textoctetlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); +} + +/* + * textcat - + * takes two text* and returns a text* that is the concatenation of + * the two. + * + * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96. + * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10. + * Allocate space for output in all cases. + * XXX - thomas 1997-07-10 + */ +Datum +textcat(PG_FUNCTION_ARGS) +{ + text *t1 = PG_GETARG_TEXT_PP(0); + text *t2 = PG_GETARG_TEXT_PP(1); + + PG_RETURN_TEXT_P(text_catenate(t1, t2)); +} + +/* + * text_catenate + * Guts of textcat(), broken out so it can be used by other functions + * + * Arguments can be in short-header form, but not compressed or out-of-line + */ +static text * +text_catenate(text *t1, text *t2) +{ + text *result; + int len1, + len2, + len; + char *ptr; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + /* paranoia ... probably should throw error instead? */ + if (len1 < 0) + len1 = 0; + if (len2 < 0) + len2 = 0; + + len = len1 + len2 + VARHDRSZ; + result = (text *) palloc(len); + + /* Set size of result string... */ + SET_VARSIZE(result, len); + + /* Fill data field of result string... */ + ptr = VARDATA(result); + if (len1 > 0) + memcpy(ptr, VARDATA_ANY(t1), len1); + if (len2 > 0) + memcpy(ptr + len1, VARDATA_ANY(t2), len2); + + return result; +} + +/* + * charlen_to_bytelen() + * Compute the number of bytes occupied by n characters starting at *p + * + * It is caller's responsibility that there actually are n characters; + * the string need not be null-terminated. + */ +static int +charlen_to_bytelen(const char *p, int n) +{ + if (pg_database_encoding_max_length() == 1) + { + /* Optimization for single-byte encodings */ + return n; + } + else + { + const char *s; + + for (s = p; n > 0; n--) + s += pg_mblen(s); + + return s - p; + } +} + +/* + * text_substr() + * Return a substring starting at the specified position. + * - thomas 1997-12-31 + * + * Input: + * - string + * - starting position (is one-based) + * - string length + * + * If the starting position is zero or less, then return from the start of the string + * adjusting the length to be consistent with the "negative start" per SQL. + * If the length is less than zero, return the remaining string. + * + * Added multibyte support. + * - Tatsuo Ishii 1998-4-21 + * Changed behavior if starting position is less than one to conform to SQL behavior. + * Formerly returned the entire string; now returns a portion. + * - Thomas Lockhart 1998-12-10 + * Now uses faster TOAST-slicing interface + * - John Gray 2002-02-22 + * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change + * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw + * error; if E < 1, return '', not entire string). Fixed MB related bug when + * S > LC and < LC + 4 sometimes garbage characters are returned. + * - Joe Conway 2002-08-10 + */ +Datum +text_substr(PG_FUNCTION_ARGS) +{ + PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +/* + * text_substr_no_len - + * Wrapper to avoid opr_sanity failure due to + * one function accepting a different number of args. + */ +Datum +text_substr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + -1, true)); +} + +/* + * text_substring - + * Does the real work for text_substr() and text_substr_no_len() + * + * This is broken out so it can be called directly by other string processing + * functions. Note that the argument is passed as a Datum, to indicate that + * it may still be in compressed/toasted form. We can avoid detoasting all + * of it in some cases. + * + * The result is always a freshly palloc'd datum. + */ +static text * +text_substring(Datum str, int32 start, int32 length, bool length_not_specified) +{ + int32 eml = pg_database_encoding_max_length(); + int32 S = start; /* start position */ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * SQL99 says S can be zero or negative, but we still must fetch from the + * start of the string. + */ + S1 = Max(S, 1); + + /* life is easy if the encoding max length is 1 */ + if (eml == 1) + { + if (length_not_specified) /* special case - get length to end of + * string */ + L1 = -1; + else if (length < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, length, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case + * the substring must run to end of string. + */ + L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return cstring_to_text(""); + + L1 = E - S1; + } + + /* + * If the start position is past the end of the string, SQL99 says to + * return a zero-length string -- DatumGetTextPSlice() will do that + * for us. We need only convert S1 to zero-based starting position. + */ + return DatumGetTextPSlice(str, S1 - 1, L1); + } + else if (eml > 1) + { + /* + * When encoding max length is > 1, we can't get LC without + * detoasting, so we'll grab a conservatively large slice now and go + * back later to do the right thing + */ + int32 slice_start; + int32 slice_size; + int32 slice_strlen; + text *slice; + int32 E1; + int32 i; + char *p; + char *s; + text *ret; + + /* + * We need to start at position zero because there is no way to know + * in advance which byte offset corresponds to the supplied start + * position. + */ + slice_start = 0; + + if (length_not_specified) /* special case - get length to end of + * string */ + slice_size = L1 = -1; + else if (length < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + slice_size = L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, length, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case + * the substring must run to end of string. + */ + slice_size = L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return cstring_to_text(""); + + /* + * if E is past the end of the string, the tuple toaster will + * truncate the length for us + */ + L1 = E - S1; + + /* + * Total slice size in bytes can't be any longer than the start + * position plus substring length times the encoding max length. + * If that overflows, we can just use -1. + */ + if (pg_mul_s32_overflow(E, eml, &slice_size)) + slice_size = -1; + } + + /* + * If we're working with an untoasted source, no need to do an extra + * copying step. + */ + if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) || + VARATT_IS_EXTERNAL(DatumGetPointer(str))) + slice = DatumGetTextPSlice(str, slice_start, slice_size); + else + slice = (text *) DatumGetPointer(str); + + /* see if we got back an empty string */ + if (VARSIZE_ANY_EXHDR(slice) == 0) + { + if (slice != (text *) DatumGetPointer(str)) + pfree(slice); + return cstring_to_text(""); + } + + /* Now we can get the actual length of the slice in MB characters */ + slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice), + VARSIZE_ANY_EXHDR(slice)); + + /* + * Check that the start position wasn't > slice_strlen. If so, SQL99 + * says to return a zero-length string. + */ + if (S1 > slice_strlen) + { + if (slice != (text *) DatumGetPointer(str)) + pfree(slice); + return cstring_to_text(""); + } + + /* + * Adjust L1 and E1 now that we know the slice string length. Again + * remember that S1 is one based, and slice_start is zero based. + */ + if (L1 > -1) + E1 = Min(S1 + L1, slice_start + 1 + slice_strlen); + else + E1 = slice_start + 1 + slice_strlen; + + /* + * Find the start position in the slice; remember S1 is not zero based + */ + p = VARDATA_ANY(slice); + for (i = 0; i < S1 - 1; i++) + p += pg_mblen(p); + + /* hang onto a pointer to our start position */ + s = p; + + /* + * Count the actual bytes used by the substring of the requested + * length. + */ + for (i = S1; i < E1; i++) + p += pg_mblen(p); + + ret = (text *) palloc(VARHDRSZ + (p - s)); + SET_VARSIZE(ret, VARHDRSZ + (p - s)); + memcpy(VARDATA(ret), s, (p - s)); + + if (slice != (text *) DatumGetPointer(str)) + pfree(slice); + + return ret; + } + else + elog(ERROR, "invalid backend encoding: encoding max length < 1"); + + /* not reached: suppress compiler warning */ + return NULL; +} + +/* + * textoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +textoverlay(PG_FUNCTION_ARGS) +{ + text *t1 = PG_GETARG_TEXT_PP(0); + text *t2 = PG_GETARG_TEXT_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl)); +} + +Datum +textoverlay_no_len(PG_FUNCTION_ARGS) +{ + text *t1 = PG_GETARG_TEXT_PP(0); + text *t2 = PG_GETARG_TEXT_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */ + PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl)); +} + +static text * +text_overlay(text *t1, text *t2, int sp, int sl) +{ + text *result; + text *s1; + text *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false); + s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); + result = text_catenate(s1, t2); + result = text_catenate(result, s2); + + return result; +} + +/* + * textpos - + * Return the position of the specified substring. + * Implements the SQL POSITION() function. + * Ref: A Guide To The SQL Standard, Date & Darwen, 1997 + * - thomas 1997-07-27 + */ +Datum +textpos(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + text *search_str = PG_GETARG_TEXT_PP(1); + + PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION())); +} + +/* + * text_position - + * Does the real work for textpos() + * + * Inputs: + * t1 - string to be searched + * t2 - pattern to match within t1 + * Result: + * Character index of the first matched char, starting from 1, + * or 0 if no match. + * + * This is broken out so it can be called directly by other string processing + * functions. + */ +static int +text_position(text *t1, text *t2, Oid collid) +{ + TextPositionState state; + int result; + + /* Empty needle always matches at position 1 */ + if (VARSIZE_ANY_EXHDR(t2) < 1) + return 1; + + /* Otherwise, can't match if haystack is shorter than needle */ + if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2)) + return 0; + + text_position_setup(t1, t2, collid, &state); + if (!text_position_next(&state)) + result = 0; + else + result = text_position_get_match_pos(&state); + text_position_cleanup(&state); + return result; +} + + +/* + * text_position_setup, text_position_next, text_position_cleanup - + * Component steps of text_position() + * + * These are broken out so that a string can be efficiently searched for + * multiple occurrences of the same pattern. text_position_next may be + * called multiple times, and it advances to the next match on each call. + * text_position_get_match_ptr() and text_position_get_match_pos() return + * a pointer or 1-based character position of the last match, respectively. + * + * The "state" variable is normally just a local variable in the caller. + * + * NOTE: text_position_next skips over the matched portion. For example, + * searching for "xx" in "xxx" returns only one match, not two. + */ + +static void +text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) +{ + int len1 = VARSIZE_ANY_EXHDR(t1); + int len2 = VARSIZE_ANY_EXHDR(t2); + pg_locale_t mylocale = 0; + + check_collation_set(collid); + + if (!lc_collate_is_c(collid)) + mylocale = pg_newlocale_from_collation(collid); + + if (!pg_locale_deterministic(mylocale)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for substring searches"))); + + Assert(len1 > 0); + Assert(len2 > 0); + + /* + * Even with a multi-byte encoding, we perform the search using the raw + * byte sequence, ignoring multibyte issues. For UTF-8, that works fine, + * because in UTF-8 the byte sequence of one character cannot contain + * another character. For other multi-byte encodings, we do the search + * initially as a simple byte search, ignoring multibyte issues, but + * verify afterwards that the match we found is at a character boundary, + * and continue the search if it was a false match. + */ + if (pg_database_encoding_max_length() == 1) + state->is_multibyte_char_in_char = false; + else if (GetDatabaseEncoding() == PG_UTF8) + state->is_multibyte_char_in_char = false; + else + state->is_multibyte_char_in_char = true; + + state->str1 = VARDATA_ANY(t1); + state->str2 = VARDATA_ANY(t2); + state->len1 = len1; + state->len2 = len2; + state->last_match = NULL; + state->refpoint = state->str1; + state->refpos = 0; + + /* + * Prepare the skip table for Boyer-Moore-Horspool searching. In these + * notes we use the terminology that the "haystack" is the string to be + * searched (t1) and the "needle" is the pattern being sought (t2). + * + * If the needle is empty or bigger than the haystack then there is no + * point in wasting cycles initializing the table. We also choose not to + * use B-M-H for needles of length 1, since the skip table can't possibly + * save anything in that case. + */ + if (len1 >= len2 && len2 > 1) + { + int searchlength = len1 - len2; + int skiptablemask; + int last; + int i; + const char *str2 = state->str2; + + /* + * First we must determine how much of the skip table to use. The + * declaration of TextPositionState allows up to 256 elements, but for + * short search problems we don't really want to have to initialize so + * many elements --- it would take too long in comparison to the + * actual search time. So we choose a useful skip table size based on + * the haystack length minus the needle length. The closer the needle + * length is to the haystack length the less useful skipping becomes. + * + * Note: since we use bit-masking to select table elements, the skip + * table size MUST be a power of 2, and so the mask must be 2^N-1. + */ + if (searchlength < 16) + skiptablemask = 3; + else if (searchlength < 64) + skiptablemask = 7; + else if (searchlength < 128) + skiptablemask = 15; + else if (searchlength < 512) + skiptablemask = 31; + else if (searchlength < 2048) + skiptablemask = 63; + else if (searchlength < 4096) + skiptablemask = 127; + else + skiptablemask = 255; + state->skiptablemask = skiptablemask; + + /* + * Initialize the skip table. We set all elements to the needle + * length, since this is the correct skip distance for any character + * not found in the needle. + */ + for (i = 0; i <= skiptablemask; i++) + state->skiptable[i] = len2; + + /* + * Now examine the needle. For each character except the last one, + * set the corresponding table element to the appropriate skip + * distance. Note that when two characters share the same skip table + * entry, the one later in the needle must determine the skip + * distance. + */ + last = len2 - 1; + + for (i = 0; i < last; i++) + state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i; + } +} + +/* + * Advance to the next match, starting from the end of the previous match + * (or the beginning of the string, on first call). Returns true if a match + * is found. + * + * Note that this refuses to match an empty-string needle. Most callers + * will have handled that case specially and we'll never see it here. + */ +static bool +text_position_next(TextPositionState *state) +{ + int needle_len = state->len2; + char *start_ptr; + char *matchptr; + + if (needle_len <= 0) + return false; /* result for empty pattern */ + + /* Start from the point right after the previous match. */ + if (state->last_match) + start_ptr = state->last_match + needle_len; + else + start_ptr = state->str1; + +retry: + matchptr = text_position_next_internal(start_ptr, state); + + if (!matchptr) + return false; + + /* + * Found a match for the byte sequence. If this is a multibyte encoding, + * where one character's byte sequence can appear inside a longer + * multi-byte character, we need to verify that the match was at a + * character boundary, not in the middle of a multi-byte character. + */ + if (state->is_multibyte_char_in_char) + { + /* Walk one character at a time, until we reach the match. */ + + /* the search should never move backwards. */ + Assert(state->refpoint <= matchptr); + + while (state->refpoint < matchptr) + { + /* step to next character. */ + state->refpoint += pg_mblen(state->refpoint); + state->refpos++; + + /* + * If we stepped over the match's start position, then it was a + * false positive, where the byte sequence appeared in the middle + * of a multi-byte character. Skip it, and continue the search at + * the next character boundary. + */ + if (state->refpoint > matchptr) + { + start_ptr = state->refpoint; + goto retry; + } + } + } + + state->last_match = matchptr; + return true; +} + +/* + * Subroutine of text_position_next(). This searches for the raw byte + * sequence, ignoring any multi-byte encoding issues. Returns the first + * match starting at 'start_ptr', or NULL if no match is found. + */ +static char * +text_position_next_internal(char *start_ptr, TextPositionState *state) +{ + int haystack_len = state->len1; + int needle_len = state->len2; + int skiptablemask = state->skiptablemask; + const char *haystack = state->str1; + const char *needle = state->str2; + const char *haystack_end = &haystack[haystack_len]; + const char *hptr; + + Assert(start_ptr >= haystack && start_ptr <= haystack_end); + + if (needle_len == 1) + { + /* No point in using B-M-H for a one-character needle */ + char nchar = *needle; + + hptr = start_ptr; + while (hptr < haystack_end) + { + if (*hptr == nchar) + return (char *) hptr; + hptr++; + } + } + else + { + const char *needle_last = &needle[needle_len - 1]; + + /* Start at startpos plus the length of the needle */ + hptr = start_ptr + needle_len - 1; + while (hptr < haystack_end) + { + /* Match the needle scanning *backward* */ + const char *nptr; + const char *p; + + nptr = needle_last; + p = hptr; + while (*nptr == *p) + { + /* Matched it all? If so, return 1-based position */ + if (nptr == needle) + return (char *) p; + nptr--, p--; + } + + /* + * No match, so use the haystack char at hptr to decide how far to + * advance. If the needle had any occurrence of that character + * (or more precisely, one sharing the same skiptable entry) + * before its last character, then we advance far enough to align + * the last such needle character with that haystack position. + * Otherwise we can advance by the whole needle length. + */ + hptr += state->skiptable[(unsigned char) *hptr & skiptablemask]; + } + } + + return 0; /* not found */ +} + +/* + * Return a pointer to the current match. + * + * The returned pointer points into the original haystack string. + */ +static char * +text_position_get_match_ptr(TextPositionState *state) +{ + return state->last_match; +} + +/* + * Return the offset of the current match. + * + * The offset is in characters, 1-based. + */ +static int +text_position_get_match_pos(TextPositionState *state) +{ + /* Convert the byte position to char position. */ + state->refpos += pg_mbstrlen_with_len(state->refpoint, + state->last_match - state->refpoint); + state->refpoint = state->last_match; + return state->refpos + 1; +} + +/* + * Reset search state to the initial state installed by text_position_setup. + * + * The next call to text_position_next will search from the beginning + * of the string. + */ +static void +text_position_reset(TextPositionState *state) +{ + state->last_match = NULL; + state->refpoint = state->str1; + state->refpos = 0; +} + +static void +text_position_cleanup(TextPositionState *state) +{ + /* no cleanup needed */ +} + + +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + +/* varstr_cmp() + * Comparison function for text strings with given lengths. + * Includes locale support, but must copy strings to temporary memory + * to allow null-termination for inputs to strcoll(). + * Returns an integer less than, equal to, or greater than zero, indicating + * whether arg1 is less than, equal to, or greater than arg2. + * + * Note: many functions that depend on this are marked leakproof; therefore, + * avoid reporting the actual contents of the input when throwing errors. + * All errors herein should be things that can't happen except on corrupt + * data, anyway; otherwise we will have trouble with indexing strings that + * would cause them. + */ +int +varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) +{ + int result; + + check_collation_set(collid); + + /* + * Unfortunately, there is no strncoll(), so in the non-C locale case we + * have to do some memory copying. This turns out to be significantly + * slower, so we optimize the case where LC_COLLATE is C. We also try to + * optimize relatively-short strings by avoiding palloc/pfree overhead. + */ + if (lc_collate_is_c(collid)) + { + result = memcmp(arg1, arg2, Min(len1, len2)); + if ((result == 0) && (len1 != len2)) + result = (len1 < len2) ? -1 : 1; + } + else + { + pg_locale_t mylocale; + + mylocale = pg_newlocale_from_collation(collid); + + /* + * memcmp() can't tell us which of two unequal strings sorts first, + * but it's a cheap way to tell if they're equal. Testing shows that + * memcmp() followed by strcoll() is only trivially slower than + * strcoll() by itself, so we don't lose much if this doesn't work out + * very often, and if it does - for example, because there are many + * equal strings in the input - then we win big by avoiding expensive + * collation-aware comparisons. + */ + if (len1 == len2 && memcmp(arg1, arg2, len1) == 0) + return 0; + + result = pg_strncoll(arg1, len1, arg2, len2, mylocale); + + /* Break tie if necessary. */ + if (result == 0 && pg_locale_deterministic(mylocale)) + { + result = memcmp(arg1, arg2, Min(len1, len2)); + if ((result == 0) && (len1 != len2)) + result = (len1 < len2) ? -1 : 1; + } + } + + return result; +} + +/* text_cmp() + * Internal comparison function for text strings. + * Returns -1, 0 or 1 + */ +static int +text_cmp(text *arg1, text *arg2, Oid collid) +{ + char *a1p, + *a2p; + int len1, + len2; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + return varstr_cmp(a1p, len1, a2p, len2, collid); +} + +/* + * Comparison functions for text strings. + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + */ + +Datum +texteq(PG_FUNCTION_ARGS) +{ + Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; + bool result; + + check_collation_set(collid); + + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || pg_locale_deterministic(mylocale)) + { + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Size len1, + len2; + + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. In + * fact, we don't even have to do a bitwise comparison if we can show + * the lengths of the strings are unequal; which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + } + else + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + + result = (text_cmp(arg1, arg2, collid) == 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +textne(PG_FUNCTION_ARGS) +{ + Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; + bool result; + + check_collation_set(collid); + + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || pg_locale_deterministic(mylocale)) + { + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Size len1, + len2; + + /* See comment in texteq() */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + } + else + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + + result = (text_cmp(arg1, arg2, collid) != 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +text_lt(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +text_le(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +text_gt(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +text_ge(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +text_starts_with(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; + bool result; + Size len1, + len2; + + check_collation_set(collid); + + if (!lc_collate_is_c(collid)) + mylocale = pg_newlocale_from_collation(collid); + + if (!pg_locale_deterministic(mylocale)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for substring searches"))); + + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len2 > len1) + result = false; + else + { + text *targ1 = text_substring(arg1, 1, len2, false); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + VARSIZE_ANY_EXHDR(targ2)) == 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +bttextcmp(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int32 result; + + result = text_cmp(arg1, arg2, PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + +Datum +bttextsortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport */ + varstr_sortsupport(ssup, TEXTOID, collid); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +/* + * Generic sortsupport interface for character type's operator classes. + * Includes locale support, and support for BpChar semantics (i.e. removing + * trailing spaces before comparison). + * + * Relies on the assumption that text, VarChar, BpChar, and bytea all have the + * same representation. Callers that always use the C collation (e.g. + * non-collatable type callers like bytea) may have NUL bytes in their strings; + * this will not work with any other collation, though. + */ +void +varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) +{ + bool abbreviate = ssup->abbreviate; + bool collate_c = false; + VarStringSortSupport *sss; + pg_locale_t locale = 0; + + check_collation_set(collid); + + /* + * If possible, set ssup->comparator to a function which can be used to + * directly compare two datums. If we can do this, we'll avoid the + * overhead of a trip through the fmgr layer for every comparison, which + * can be substantial. + * + * Most typically, we'll set the comparator to varlenafastcmp_locale, + * which uses strcoll() to perform comparisons. We use that for the + * BpChar case too, but type NAME uses namefastcmp_locale. However, if + * LC_COLLATE = C, we can make things quite a bit faster with + * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use + * memcmp() rather than strcoll(). + */ + if (lc_collate_is_c(collid)) + { + if (typid == BPCHAROID) + ssup->comparator = bpcharfastcmp_c; + else if (typid == NAMEOID) + { + ssup->comparator = namefastcmp_c; + /* Not supporting abbreviation with type NAME, for now */ + abbreviate = false; + } + else + ssup->comparator = varstrfastcmp_c; + + collate_c = true; + } + else + { + /* + * We need a collation-sensitive comparison. To make things faster, + * we'll figure out the collation based on the locale id and cache the + * result. + */ + locale = pg_newlocale_from_collation(collid); + + /* + * We use varlenafastcmp_locale except for type NAME. + */ + if (typid == NAMEOID) + { + ssup->comparator = namefastcmp_locale; + /* Not supporting abbreviation with type NAME, for now */ + abbreviate = false; + } + else + ssup->comparator = varlenafastcmp_locale; + } + + /* + * Unfortunately, it seems that abbreviation for non-C collations is + * broken on many common platforms; see pg_strxfrm_enabled(). + * + * Even apart from the risk of broken locales, it's possible that there + * are platforms where the use of abbreviated keys should be disabled at + * compile time. Having only 4 byte datums could make worst-case + * performance drastically more likely, for example. Moreover, macOS's + * strxfrm() implementation is known to not effectively concentrate a + * significant amount of entropy from the original string in earlier + * transformed blobs. It's possible that other supported platforms are + * similarly encumbered. So, if we ever get past disabling this + * categorically, we may still want or need to disable it for particular + * platforms. + */ + if (!collate_c && !pg_strxfrm_enabled(locale)) + abbreviate = false; + + /* + * If we're using abbreviated keys, or if we're using a locale-aware + * comparison, we need to initialize a VarStringSortSupport object. Both + * cases will make use of the temporary buffers we initialize here for + * scratch space (and to detect requirement for BpChar semantics from + * caller), and the abbreviation case requires additional state. + */ + if (abbreviate || !collate_c) + { + sss = palloc(sizeof(VarStringSortSupport)); + sss->buf1 = palloc(TEXTBUFLEN); + sss->buflen1 = TEXTBUFLEN; + sss->buf2 = palloc(TEXTBUFLEN); + sss->buflen2 = TEXTBUFLEN; + /* Start with invalid values */ + sss->last_len1 = -1; + sss->last_len2 = -1; + /* Initialize */ + sss->last_returned = 0; + sss->locale = locale; + + /* + * To avoid somehow confusing a strxfrm() blob and an original string, + * constantly keep track of the variety of data that buf1 and buf2 + * currently contain. + * + * Comparisons may be interleaved with conversion calls. Frequently, + * conversions and comparisons are batched into two distinct phases, + * but the correctness of caching cannot hinge upon this. For + * comparison caching, buffer state is only trusted if cache_blob is + * found set to false, whereas strxfrm() caching only trusts the state + * when cache_blob is found set to true. + * + * Arbitrarily initialize cache_blob to true. + */ + sss->cache_blob = true; + sss->collate_c = collate_c; + sss->typid = typid; + ssup->ssup_extra = sss; + + /* + * If possible, plan to use the abbreviated keys optimization. The + * core code may switch back to authoritative comparator should + * abbreviation be aborted. + */ + if (abbreviate) + { + sss->prop_card = 0.20; + initHyperLogLog(&sss->abbr_card, 10); + initHyperLogLog(&sss->full_card, 10); + ssup->abbrev_full_comparator = ssup->comparator; + ssup->comparator = ssup_datum_unsigned_cmp; + ssup->abbrev_converter = varstr_abbrev_convert; + ssup->abbrev_abort = varstr_abbrev_abort; + } + } +} + +/* + * sortsupport comparison func (for C locale case) + */ +static int +varstrfastcmp_c(Datum x, Datum y, SortSupport ssup) +{ + VarString *arg1 = DatumGetVarStringPP(x); + VarString *arg2 = DatumGetVarStringPP(y); + char *a1p, + *a2p; + int len1, + len2, + result; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + result = memcmp(a1p, a2p, Min(len1, len2)); + if ((result == 0) && (len1 != len2)) + result = (len1 < len2) ? -1 : 1; + + /* We can't afford to leak memory here. */ + if (PointerGetDatum(arg1) != x) + pfree(arg1); + if (PointerGetDatum(arg2) != y) + pfree(arg2); + + return result; +} + +/* + * sortsupport comparison func (for BpChar C locale case) + * + * BpChar outsources its sortsupport to this module. Specialization for the + * varstr_sortsupport BpChar case, modeled on + * internal_bpchar_pattern_compare(). + */ +static int +bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup) +{ + BpChar *arg1 = DatumGetBpCharPP(x); + BpChar *arg2 = DatumGetBpCharPP(y); + char *a1p, + *a2p; + int len1, + len2, + result; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1)); + len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2)); + + result = memcmp(a1p, a2p, Min(len1, len2)); + if ((result == 0) && (len1 != len2)) + result = (len1 < len2) ? -1 : 1; + + /* We can't afford to leak memory here. */ + if (PointerGetDatum(arg1) != x) + pfree(arg1); + if (PointerGetDatum(arg2) != y) + pfree(arg2); + + return result; +} + +/* + * sortsupport comparison func (for NAME C locale case) + */ +static int +namefastcmp_c(Datum x, Datum y, SortSupport ssup) +{ + Name arg1 = DatumGetName(x); + Name arg2 = DatumGetName(y); + + return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); +} + +/* + * sortsupport comparison func (for locale case with all varlena types) + */ +static int +varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup) +{ + VarString *arg1 = DatumGetVarStringPP(x); + VarString *arg2 = DatumGetVarStringPP(y); + char *a1p, + *a2p; + int len1, + len2, + result; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup); + + /* We can't afford to leak memory here. */ + if (PointerGetDatum(arg1) != x) + pfree(arg1); + if (PointerGetDatum(arg2) != y) + pfree(arg2); + + return result; +} + +/* + * sortsupport comparison func (for locale case with NAME type) + */ +static int +namefastcmp_locale(Datum x, Datum y, SortSupport ssup) +{ + Name arg1 = DatumGetName(x); + Name arg2 = DatumGetName(y); + + return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)), + NameStr(*arg2), strlen(NameStr(*arg2)), + ssup); +} + +/* + * sortsupport comparison func for locale cases + */ +static int +varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) +{ + VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra; + int result; + bool arg1_match; + + /* Fast pre-check for equality, as discussed in varstr_cmp() */ + if (len1 == len2 && memcmp(a1p, a2p, len1) == 0) + { + /* + * No change in buf1 or buf2 contents, so avoid changing last_len1 or + * last_len2. Existing contents of buffers might still be used by + * next call. + * + * It's fine to allow the comparison of BpChar padding bytes here, + * even though that implies that the memcmp() will usually be + * performed for BpChar callers (though multibyte characters could + * still prevent that from occurring). The memcmp() is still very + * cheap, and BpChar's funny semantics have us remove trailing spaces + * (not limited to padding), so we need make no distinction between + * padding space characters and "real" space characters. + */ + return 0; + } + + if (sss->typid == BPCHAROID) + { + /* Get true number of bytes, ignoring trailing spaces */ + len1 = bpchartruelen(a1p, len1); + len2 = bpchartruelen(a2p, len2); + } + + if (len1 >= sss->buflen1) + { + sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize)); + sss->buf1 = repalloc(sss->buf1, sss->buflen1); + } + if (len2 >= sss->buflen2) + { + sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize)); + sss->buf2 = repalloc(sss->buf2, sss->buflen2); + } + + /* + * We're likely to be asked to compare the same strings repeatedly, and + * memcmp() is so much cheaper than strcoll() that it pays to try to cache + * comparisons, even though in general there is no reason to think that + * that will work out (every string datum may be unique). Caching does + * not slow things down measurably when it doesn't work out, and can speed + * things up by rather a lot when it does. In part, this is because the + * memcmp() compares data from cachelines that are needed in L1 cache even + * when the last comparison's result cannot be reused. + */ + arg1_match = true; + if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0) + { + arg1_match = false; + memcpy(sss->buf1, a1p, len1); + sss->buf1[len1] = '\0'; + sss->last_len1 = len1; + } + + /* + * If we're comparing the same two strings as last time, we can return the + * same answer without calling strcoll() again. This is more likely than + * it seems (at least with moderate to low cardinality sets), because + * quicksort compares the same pivot against many values. + */ + if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0) + { + memcpy(sss->buf2, a2p, len2); + sss->buf2[len2] = '\0'; + sss->last_len2 = len2; + } + else if (arg1_match && !sss->cache_blob) + { + /* Use result cached following last actual strcoll() call */ + return sss->last_returned; + } + + result = pg_strcoll(sss->buf1, sss->buf2, sss->locale); + + /* Break tie if necessary. */ + if (result == 0 && pg_locale_deterministic(sss->locale)) + result = strcmp(sss->buf1, sss->buf2); + + /* Cache result, perhaps saving an expensive strcoll() call next time */ + sss->cache_blob = false; + sss->last_returned = result; + return result; +} + +/* + * Conversion routine for sortsupport. Converts original to abbreviated key + * representation. Our encoding strategy is simple -- pack the first 8 bytes + * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are + * stored in reverse order), and treat it as an unsigned integer. When the "C" + * locale is used, or in case of bytea, just memcpy() from original instead. + */ +static Datum +varstr_abbrev_convert(Datum original, SortSupport ssup) +{ + const size_t max_prefix_bytes = sizeof(Datum); + VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra; + VarString *authoritative = DatumGetVarStringPP(original); + char *authoritative_data = VARDATA_ANY(authoritative); + + /* working state */ + Datum res; + char *pres; + int len; + uint32 hash; + + pres = (char *) &res; + /* memset(), so any non-overwritten bytes are NUL */ + memset(pres, 0, max_prefix_bytes); + len = VARSIZE_ANY_EXHDR(authoritative); + + /* Get number of bytes, ignoring trailing spaces */ + if (sss->typid == BPCHAROID) + len = bpchartruelen(authoritative_data, len); + + /* + * If we're using the C collation, use memcpy(), rather than strxfrm(), to + * abbreviate keys. The full comparator for the C locale is always + * memcmp(). It would be incorrect to allow bytea callers (callers that + * always force the C collation -- bytea isn't a collatable type, but this + * approach is convenient) to use strxfrm(). This is because bytea + * strings may contain NUL bytes. Besides, this should be faster, too. + * + * More generally, it's okay that bytea callers can have NUL bytes in + * strings because abbreviated cmp need not make a distinction between + * terminating NUL bytes, and NUL bytes representing actual NULs in the + * authoritative representation. Hopefully a comparison at or past one + * abbreviated key's terminating NUL byte will resolve the comparison + * without consulting the authoritative representation; specifically, some + * later non-NUL byte in the longer string can resolve the comparison + * against a subsequent terminating NUL in the shorter string. There will + * usually be what is effectively a "length-wise" resolution there and + * then. + * + * If that doesn't work out -- if all bytes in the longer string + * positioned at or past the offset of the smaller string's (first) + * terminating NUL are actually representative of NUL bytes in the + * authoritative binary string (perhaps with some *terminating* NUL bytes + * towards the end of the longer string iff it happens to still be small) + * -- then an authoritative tie-breaker will happen, and do the right + * thing: explicitly consider string length. + */ + if (sss->collate_c) + memcpy(pres, authoritative_data, Min(len, max_prefix_bytes)); + else + { + Size bsize; + + /* + * We're not using the C collation, so fall back on strxfrm or ICU + * analogs. + */ + + /* By convention, we use buffer 1 to store and NUL-terminate */ + if (len >= sss->buflen1) + { + sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize)); + sss->buf1 = repalloc(sss->buf1, sss->buflen1); + } + + /* Might be able to reuse strxfrm() blob from last call */ + if (sss->last_len1 == len && sss->cache_blob && + memcmp(sss->buf1, authoritative_data, len) == 0) + { + memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2)); + /* No change affecting cardinality, so no hashing required */ + goto done; + } + + memcpy(sss->buf1, authoritative_data, len); + + /* + * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated strings. + */ + sss->buf1[len] = '\0'; + sss->last_len1 = len; + + if (pg_strxfrm_prefix_enabled(sss->locale)) + { + if (sss->buflen2 < max_prefix_bytes) + { + sss->buflen2 = Max(max_prefix_bytes, + Min(sss->buflen2 * 2, MaxAllocSize)); + sss->buf2 = repalloc(sss->buf2, sss->buflen2); + } + + bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1, + max_prefix_bytes, sss->locale); + sss->last_len2 = bsize; + } + else + { + /* + * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try + * again. The pg_strxfrm() function leaves the result buffer + * content undefined if the result did not fit, so we need to + * retry until everything fits, even though we only need the first + * few bytes in the end. + */ + for (;;) + { + bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2, + sss->locale); + + sss->last_len2 = bsize; + if (bsize < sss->buflen2) + break; + + /* + * Grow buffer and retry. + */ + sss->buflen2 = Max(bsize + 1, + Min(sss->buflen2 * 2, MaxAllocSize)); + sss->buf2 = repalloc(sss->buf2, sss->buflen2); + } + } + + /* + * Every Datum byte is always compared. This is safe because the + * strxfrm() blob is itself NUL terminated, leaving no danger of + * misinterpreting any NUL bytes not intended to be interpreted as + * logically representing termination. + * + * (Actually, even if there were NUL bytes in the blob it would be + * okay. See remarks on bytea case above.) + */ + memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize)); + } + + /* + * Maintain approximate cardinality of both abbreviated keys and original, + * authoritative keys using HyperLogLog. Used as cheap insurance against + * the worst case, where we do many string transformations for no saving + * in full strcoll()-based comparisons. These statistics are used by + * varstr_abbrev_abort(). + * + * First, Hash key proper, or a significant fraction of it. Mix in length + * in order to compensate for cases where differences are past + * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing. + */ + hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data, + Min(len, PG_CACHE_LINE_SIZE))); + + if (len > PG_CACHE_LINE_SIZE) + hash ^= DatumGetUInt32(hash_uint32((uint32) len)); + + addHyperLogLog(&sss->full_card, hash); + + /* Hash abbreviated key */ +#if SIZEOF_DATUM == 8 + { + uint32 lohalf, + hihalf; + + lohalf = (uint32) res; + hihalf = (uint32) (res >> 32); + hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf)); + } +#else /* SIZEOF_DATUM != 8 */ + hash = DatumGetUInt32(hash_uint32((uint32) res)); +#endif + + addHyperLogLog(&sss->abbr_card, hash); + + /* Cache result, perhaps saving an expensive strxfrm() call next time */ + sss->cache_blob = true; +done: + + /* + * Byteswap on little-endian machines. + * + * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer + * 3-way comparator) works correctly on all platforms. If we didn't do + * this, the comparator would have to call memcmp() with a pair of + * pointers to the first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + + /* Don't leak memory here */ + if (PointerGetDatum(authoritative) != original) + pfree(authoritative); + + return res; +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization, using + * heuristic rules. Returns value indicating if the abbreviation optimization + * should be aborted, based on its projected effectiveness. + */ +static bool +varstr_abbrev_abort(int memtupcount, SortSupport ssup) +{ + VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra; + double abbrev_distinct, + key_distinct; + + Assert(ssup->abbreviate); + + /* Have a little patience */ + if (memtupcount < 100) + return false; + + abbrev_distinct = estimateHyperLogLog(&sss->abbr_card); + key_distinct = estimateHyperLogLog(&sss->full_card); + + /* + * Clamp cardinality estimates to at least one distinct value. While + * NULLs are generally disregarded, if only NULL values were seen so far, + * that might misrepresent costs if we failed to clamp. + */ + if (abbrev_distinct <= 1.0) + abbrev_distinct = 1.0; + + if (key_distinct <= 1.0) + key_distinct = 1.0; + + /* + * In the worst case all abbreviated keys are identical, while at the same + * time there are differences within full key strings not captured in + * abbreviations. + */ +#ifdef TRACE_SORT + if (trace_sort) + { + double norm_abbrev_card = abbrev_distinct / (double) memtupcount; + + elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f " + "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)", + memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card, + sss->prop_card); + } +#endif + + /* + * If the number of distinct abbreviated keys approximately matches the + * number of distinct authoritative original keys, that's reason enough to + * proceed. We can win even with a very low cardinality set if most + * tie-breakers only memcmp(). This is by far the most important + * consideration. + * + * While comparisons that are resolved at the abbreviated key level are + * considerably cheaper than tie-breakers resolved with memcmp(), both of + * those two outcomes are so much cheaper than a full strcoll() once + * sorting is underway that it doesn't seem worth it to weigh abbreviated + * cardinality against the overall size of the set in order to more + * accurately model costs. Assume that an abbreviated comparison, and an + * abbreviated comparison with a cheap memcmp()-based authoritative + * resolution are equivalent. + */ + if (abbrev_distinct > key_distinct * sss->prop_card) + { + /* + * When we have exceeded 10,000 tuples, decay required cardinality + * aggressively for next call. + * + * This is useful because the number of comparisons required on + * average increases at a linearithmic rate, and at roughly 10,000 + * tuples that factor will start to dominate over the linear costs of + * string transformation (this is a conservative estimate). The decay + * rate is chosen to be a little less aggressive than halving -- which + * (since we're called at points at which memtupcount has doubled) + * would never see the cost model actually abort past the first call + * following a decay. This decay rate is mostly a precaution against + * a sudden, violent swing in how well abbreviated cardinality tracks + * full key cardinality. The decay also serves to prevent a marginal + * case from being aborted too late, when too much has already been + * invested in string transformation. + * + * It's possible for sets of several million distinct strings with + * mere tens of thousands of distinct abbreviated keys to still + * benefit very significantly. This will generally occur provided + * each abbreviated key is a proxy for a roughly uniform number of the + * set's full keys. If it isn't so, we hope to catch that early and + * abort. If it isn't caught early, by the time the problem is + * apparent it's probably not worth aborting. + */ + if (memtupcount > 10000) + sss->prop_card *= 0.65; + + return false; + } + + /* + * Abort abbreviation strategy. + * + * The worst case, where all abbreviated keys are identical while all + * original strings differ will typically only see a regression of about + * 10% in execution time for small to medium sized lists of strings. + * Whereas on modern CPUs where cache stalls are the dominant cost, we can + * often expect very large improvements, particularly with sets of strings + * of moderately high to high abbreviated cardinality. There is little to + * lose but much to gain, which our strategy reflects. + */ +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "varstr_abbrev: aborted abbreviation at %d " + "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)", + memtupcount, abbrev_distinct, key_distinct, sss->prop_card); +#endif + + return true; +} + +/* + * Generic equalimage support function for character type's operator classes. + * Disables the use of deduplication with nondeterministic collations. + */ +Datum +btvarstrequalimage(PG_FUNCTION_ARGS) +{ + /* Oid opcintype = PG_GETARG_OID(0); */ + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); + + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + get_collation_isdeterministic(collid)) + PG_RETURN_BOOL(true); + else + PG_RETURN_BOOL(false); +} + +Datum +text_larger(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + text *result; + + result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2); + + PG_RETURN_TEXT_P(result); +} + +Datum +text_smaller(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + text *result; + + result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2); + + PG_RETURN_TEXT_P(result); +} + + +/* + * Cross-type comparison functions for types text and name. + */ + +Datum +nameeqtext(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + size_t len1 = strlen(NameStr(*arg1)); + size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); + bool result; + + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = (varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); + + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +texteqname(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + Name arg2 = PG_GETARG_NAME(1); + size_t len1 = VARSIZE_ANY_EXHDR(arg1); + size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); + bool result; + + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = (varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); + + PG_FREE_IF_COPY(arg1, 0); + + PG_RETURN_BOOL(result); +} + +Datum +namenetext(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + size_t len1 = strlen(NameStr(*arg1)); + size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); + bool result; + + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = !(varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); + + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +textnename(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + Name arg2 = PG_GETARG_NAME(1); + size_t len1 = VARSIZE_ANY_EXHDR(arg1); + size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); + bool result; + + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = !(varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); + + PG_FREE_IF_COPY(arg1, 0); + + PG_RETURN_BOOL(result); +} + +Datum +btnametextcmp(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int32 result; + + result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), + VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2), + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + +Datum +bttextnamecmp(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + Name arg2 = PG_GETARG_NAME(1); + int32 result; + + result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1), + NameStr(*arg2), strlen(NameStr(*arg2)), + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + + PG_RETURN_INT32(result); +} + +#define CmpCall(cmpfunc) \ + DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \ + PG_GET_COLLATION(), \ + PG_GETARG_DATUM(0), \ + PG_GETARG_DATUM(1))) + +Datum +namelttext(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0); +} + +Datum +nameletext(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0); +} + +Datum +namegttext(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0); +} + +Datum +namegetext(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0); +} + +Datum +textltname(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0); +} + +Datum +textlename(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0); +} + +Datum +textgtname(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0); +} + +Datum +textgename(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0); +} + +#undef CmpCall + + +/* + * The following operators support character-by-character comparison + * of text datums, to allow building indexes suitable for LIKE clauses. + * Note that the regular texteq/textne comparison operators, and regular + * support functions 1 and 2 with "C" collation are assumed to be + * compatible with these! + */ + +static int +internal_text_pattern_compare(text *arg1, text *arg2) +{ + int result; + int len1, + len2; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if (result != 0) + return result; + else if (len1 < len2) + return -1; + else if (len1 > len2) + return 1; + else + return 0; +} + + +Datum +text_pattern_lt(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result < 0); +} + + +Datum +text_pattern_le(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result <= 0); +} + + +Datum +text_pattern_ge(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result >= 0); +} + + +Datum +text_pattern_gt(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result > 0); +} + + +Datum +bttext_pattern_cmp(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + + +Datum +bttext_pattern_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + + +/*------------------------------------------------------------- + * byteaoctetlen + * + * get the number of bytes contained in an instance of type 'bytea' + *------------------------------------------------------------- + */ +Datum +byteaoctetlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); +} + +/* + * byteacat - + * takes two bytea* and returns a bytea* that is the concatenation of + * the two. + * + * Cloned from textcat and modified as required. + */ +Datum +byteacat(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + + PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); +} + +/* + * bytea_catenate + * Guts of byteacat(), broken out so it can be used by other functions + * + * Arguments can be in short-header form, but not compressed or out-of-line + */ +static bytea * +bytea_catenate(bytea *t1, bytea *t2) +{ + bytea *result; + int len1, + len2, + len; + char *ptr; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + /* paranoia ... probably should throw error instead? */ + if (len1 < 0) + len1 = 0; + if (len2 < 0) + len2 = 0; + + len = len1 + len2 + VARHDRSZ; + result = (bytea *) palloc(len); + + /* Set size of result string... */ + SET_VARSIZE(result, len); + + /* Fill data field of result string... */ + ptr = VARDATA(result); + if (len1 > 0) + memcpy(ptr, VARDATA_ANY(t1), len1); + if (len2 > 0) + memcpy(ptr + len1, VARDATA_ANY(t2), len2); + + return result; +} + +#define PG_STR_GET_BYTEA(str_) \ + DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) + +/* + * bytea_substr() + * Return a substring starting at the specified position. + * Cloned from text_substr and modified as required. + * + * Input: + * - string + * - starting position (is one-based) + * - string length (optional) + * + * If the starting position is zero or less, then return from the start of the string + * adjusting the length to be consistent with the "negative start" per SQL. + * If the length is less than zero, an ERROR is thrown. If no third argument + * (length) is provided, the length to the end of the string is assumed. + */ +Datum +bytea_substr(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +/* + * bytea_substr_no_len - + * Wrapper to avoid opr_sanity failure due to + * one function accepting a different number of args. + */ +Datum +bytea_substr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + -1, + true)); +} + +static bytea * +bytea_substring(Datum str, + int S, + int L, + bool length_not_specified) +{ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * The logic here should generally match text_substring(). + */ + S1 = Max(S, 1); + + if (length_not_specified) + { + /* + * Not passed a length - DatumGetByteaPSlice() grabs everything to the + * end of the string if we pass it a negative value for length. + */ + L1 = -1; + } + else if (L < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, L, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. + */ + L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return PG_STR_GET_BYTEA(""); + + L1 = E - S1; + } + + /* + * If the start position is past the end of the string, SQL99 says to + * return a zero-length string -- DatumGetByteaPSlice() will do that for + * us. We need only convert S1 to zero-based starting position. + */ + return DatumGetByteaPSlice(str, S1 - 1, L1); +} + +/* + * byteaoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +byteaoverlay(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +Datum +byteaoverlay_no_len(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +static bytea * +bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) +{ + bytea *result; + bytea *s1; + bytea *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); + s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); + result = bytea_catenate(s1, t2); + result = bytea_catenate(result, s2); + + return result; +} + +/* + * bit_count + */ +Datum +bytea_bit_count(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + + PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); +} + +/* + * byteapos - + * Return the position of the specified substring. + * Implements the SQL POSITION() function. + * Cloned from textpos and modified as required. + */ +Datum +byteapos(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int pos; + int px, + p; + int len1, + len2; + char *p1, + *p2; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + if (len2 <= 0) + PG_RETURN_INT32(1); /* result for empty pattern */ + + p1 = VARDATA_ANY(t1); + p2 = VARDATA_ANY(t2); + + pos = 0; + px = (len1 - len2); + for (p = 0; p <= px; p++) + { + if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) + { + pos = p + 1; + break; + }; + p1++; + }; + + PG_RETURN_INT32(pos); +} + +/*------------------------------------------------------------- + * byteaGetByte + * + * this routine treats "bytea" as an array of bytes. + * It returns the Nth byte (a number between 0 and 255). + *------------------------------------------------------------- + */ +Datum +byteaGetByte(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int32 n = PG_GETARG_INT32(1); + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + byte = ((unsigned char *) VARDATA_ANY(v))[n]; + + PG_RETURN_INT32(byte); +} + +/*------------------------------------------------------------- + * byteaGetBit + * + * This routine treats a "bytea" type like an array of bits. + * It returns the value of the Nth bit (0 or 1). + * + *------------------------------------------------------------- + */ +Datum +byteaGetBit(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int64 n = PG_GETARG_INT64(1); + int byteNo, + bitNo; + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %lld out of valid range, 0..%lld", + (long long) n, (long long) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; + + if (byte & (1 << bitNo)) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(0); +} + +/*------------------------------------------------------------- + * byteaSetByte + * + * Given an instance of type 'bytea' creates a new one with + * the Nth byte set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetByte(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int32 n = PG_GETARG_INT32(1); + int32 newByte = PG_GETARG_INT32(2); + int len; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + /* + * Now set the byte. + */ + ((unsigned char *) VARDATA(res))[n] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/*------------------------------------------------------------- + * byteaSetBit + * + * Given an instance of type 'bytea' creates a new one with + * the Nth bit set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetBit(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int64 n = PG_GETARG_INT64(1); + int32 newBit = PG_GETARG_INT32(2); + int len; + int oldByte, + newByte; + int byteNo, + bitNo; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %lld out of valid range, 0..%lld", + (long long) n, (long long) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + /* + * sanity check! + */ + if (newBit != 0 && newBit != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new bit must be 0 or 1"))); + + /* + * Update the byte. + */ + oldByte = ((unsigned char *) VARDATA(res))[byteNo]; + + if (newBit == 0) + newByte = oldByte & (~(1 << bitNo)); + else + newByte = oldByte | (1 << bitNo); + + ((unsigned char *) VARDATA(res))[byteNo] = newByte; + + PG_RETURN_BYTEA_P(res); +} + + +/* text_name() + * Converts a text type to a Name type. + */ +Datum +text_name(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_PP(0); + Name result; + int len; + + len = VARSIZE_ANY_EXHDR(s); + + /* Truncate oversize input */ + if (len >= NAMEDATALEN) + len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1); + + /* We use palloc0 here to ensure result is zero-padded */ + result = (Name) palloc0(NAMEDATALEN); + memcpy(NameStr(*result), VARDATA_ANY(s), len); + + PG_RETURN_NAME(result); +} + +/* name_text() + * Converts a Name type to a text type. + */ +Datum +name_text(PG_FUNCTION_ARGS) +{ + Name s = PG_GETARG_NAME(0); + + PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s))); +} + + +/* + * textToQualifiedNameList - convert a text object to list of names + * + * This implements the input parsing needed by nextval() and other + * functions that take a text parameter representing a qualified name. + * We split the name at dots, downcase if not double-quoted, and + * truncate names if they're too long. + */ +List * +textToQualifiedNameList(text *textval) +{ + char *rawname; + List *result = NIL; + List *namelist; + ListCell *l; + + /* Convert to C string (handles possible detoasting). */ + /* Note we rely on being able to modify rawname below. */ + rawname = text_to_cstring(textval); + + if (!SplitIdentifierString(rawname, '.', &namelist)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + if (namelist == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + foreach(l, namelist) + { + char *curname = (char *) lfirst(l); + + result = lappend(result, makeString(pstrdup(curname))); + } + + pfree(rawname); + list_free(namelist); + + return result; +} + +/* + * SplitIdentifierString --- parse a string containing identifiers + * + * This is the guts of textToQualifiedNameList, and is exported for use in + * other situations such as parsing GUC variables. In the GUC case, it's + * important to avoid memory leaks, so the API is designed to minimize the + * amount of stuff that needs to be allocated and freed. + * + * Inputs: + * rawstring: the input string; must be overwritable! On return, it's + * been modified to contain the separated identifiers. + * separator: the separator punctuation expected between identifiers + * (typically '.' or ','). Whitespace may also appear around + * identifiers. + * Outputs: + * namelist: filled with a palloc'd list of pointers to identifiers within + * rawstring. Caller should list_free() this even on error return. + * + * Returns true if okay, false if there is a syntax error in the string. + * + * Note that an empty string is considered okay here, though not in + * textToQualifiedNameList. + */ +bool +SplitIdentifierString(char *rawstring, char separator, + List **namelist) +{ + char *nextp = rawstring; + bool done = false; + + *namelist = NIL; + + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace */ + + if (*nextp == '\0') + return true; /* allow empty string */ + + /* At the top of the loop, we are at start of a new identifier. */ + do + { + char *curname; + char *endp; + + if (*nextp == '"') + { + /* Quoted name --- collapse quote-quote pairs, no downcasing */ + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '"'); + if (endp == NULL) + return false; /* mismatched quotes */ + if (endp[1] != '"') + break; /* found end of quoted name */ + /* Collapse adjacent quotes into one quote, and look again */ + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + /* endp now points at the terminating quote */ + nextp = endp + 1; + } + else + { + /* Unquoted name --- extends to separator or whitespace */ + char *downname; + int len; + + curname = nextp; + while (*nextp && *nextp != separator && + !scanner_isspace(*nextp)) + nextp++; + endp = nextp; + if (curname == nextp) + return false; /* empty unquoted name not allowed */ + + /* + * Downcase the identifier, using same code as main lexer does. + * + * XXX because we want to overwrite the input in-place, we cannot + * support a downcasing transformation that increases the string + * length. This is not a problem given the current implementation + * of downcase_truncate_identifier, but we'll probably have to do + * something about this someday. + */ + len = endp - curname; + downname = downcase_truncate_identifier(curname, len, false); + Assert(strlen(downname) <= len); + strncpy(curname, downname, len); /* strncpy is required here */ + pfree(downname); + } + + while (scanner_isspace(*nextp)) + nextp++; /* skip trailing whitespace */ + + if (*nextp == separator) + { + nextp++; + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace for next */ + /* we expect another name, so done remains false */ + } + else if (*nextp == '\0') + done = true; + else + return false; /* invalid syntax */ + + /* Now safe to overwrite separator with a null */ + *endp = '\0'; + + /* Truncate name if it's overlength */ + truncate_identifier(curname, strlen(curname), false); + + /* + * Finished isolating current name --- add it to list + */ + *namelist = lappend(*namelist, curname); + + /* Loop back if we didn't reach end of string */ + } while (!done); + + return true; +} + + +/* + * SplitDirectoriesString --- parse a string containing file/directory names + * + * This works fine on file names too; the function name is historical. + * + * This is similar to SplitIdentifierString, except that the parsing + * rules are meant to handle pathnames instead of identifiers: there is + * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1, + * and we apply canonicalize_path() to each extracted string. Because of the + * last, the returned strings are separately palloc'd rather than being + * pointers into rawstring --- but we still scribble on rawstring. + * + * Inputs: + * rawstring: the input string; must be modifiable! + * separator: the separator punctuation expected between directories + * (typically ',' or ';'). Whitespace may also appear around + * directories. + * Outputs: + * namelist: filled with a palloc'd list of directory names. + * Caller should list_free_deep() this even on error return. + * + * Returns true if okay, false if there is a syntax error in the string. + * + * Note that an empty string is considered okay here. + */ +bool +SplitDirectoriesString(char *rawstring, char separator, + List **namelist) +{ + char *nextp = rawstring; + bool done = false; + + *namelist = NIL; + + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace */ + + if (*nextp == '\0') + return true; /* allow empty string */ + + /* At the top of the loop, we are at start of a new directory. */ + do + { + char *curname; + char *endp; + + if (*nextp == '"') + { + /* Quoted name --- collapse quote-quote pairs */ + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '"'); + if (endp == NULL) + return false; /* mismatched quotes */ + if (endp[1] != '"') + break; /* found end of quoted name */ + /* Collapse adjacent quotes into one quote, and look again */ + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + /* endp now points at the terminating quote */ + nextp = endp + 1; + } + else + { + /* Unquoted name --- extends to separator or end of string */ + curname = endp = nextp; + while (*nextp && *nextp != separator) + { + /* trailing whitespace should not be included in name */ + if (!scanner_isspace(*nextp)) + endp = nextp + 1; + nextp++; + } + if (curname == endp) + return false; /* empty unquoted name not allowed */ + } + + while (scanner_isspace(*nextp)) + nextp++; /* skip trailing whitespace */ + + if (*nextp == separator) + { + nextp++; + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace for next */ + /* we expect another name, so done remains false */ + } + else if (*nextp == '\0') + done = true; + else + return false; /* invalid syntax */ + + /* Now safe to overwrite separator with a null */ + *endp = '\0'; + + /* Truncate path if it's overlength */ + if (strlen(curname) >= MAXPGPATH) + curname[MAXPGPATH - 1] = '\0'; + + /* + * Finished isolating current name --- add it to list + */ + curname = pstrdup(curname); + canonicalize_path(curname); + *namelist = lappend(*namelist, curname); + + /* Loop back if we didn't reach end of string */ + } while (!done); + + return true; +} + + +/* + * SplitGUCList --- parse a string containing identifiers or file names + * + * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without + * presuming whether the elements will be taken as identifiers or file names. + * We assume the input has already been through flatten_set_variable_args(), + * so that we need never downcase (if appropriate, that was done already). + * Nor do we ever truncate, since we don't know the correct max length. + * We disallow embedded whitespace for simplicity (it shouldn't matter, + * because any embedded whitespace should have led to double-quoting). + * Otherwise the API is identical to SplitIdentifierString. + * + * XXX it's annoying to have so many copies of this string-splitting logic. + * However, it's not clear that having one function with a bunch of option + * flags would be much better. + * + * XXX there is a version of this function in src/bin/pg_dump/dumputils.c. + * Be sure to update that if you have to change this. + * + * Inputs: + * rawstring: the input string; must be overwritable! On return, it's + * been modified to contain the separated identifiers. + * separator: the separator punctuation expected between identifiers + * (typically '.' or ','). Whitespace may also appear around + * identifiers. + * Outputs: + * namelist: filled with a palloc'd list of pointers to identifiers within + * rawstring. Caller should list_free() this even on error return. + * + * Returns true if okay, false if there is a syntax error in the string. + */ +bool +SplitGUCList(char *rawstring, char separator, + List **namelist) +{ + char *nextp = rawstring; + bool done = false; + + *namelist = NIL; + + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace */ + + if (*nextp == '\0') + return true; /* allow empty string */ + + /* At the top of the loop, we are at start of a new identifier. */ + do + { + char *curname; + char *endp; + + if (*nextp == '"') + { + /* Quoted name --- collapse quote-quote pairs */ + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '"'); + if (endp == NULL) + return false; /* mismatched quotes */ + if (endp[1] != '"') + break; /* found end of quoted name */ + /* Collapse adjacent quotes into one quote, and look again */ + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + /* endp now points at the terminating quote */ + nextp = endp + 1; + } + else + { + /* Unquoted name --- extends to separator or whitespace */ + curname = nextp; + while (*nextp && *nextp != separator && + !scanner_isspace(*nextp)) + nextp++; + endp = nextp; + if (curname == nextp) + return false; /* empty unquoted name not allowed */ + } + + while (scanner_isspace(*nextp)) + nextp++; /* skip trailing whitespace */ + + if (*nextp == separator) + { + nextp++; + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace for next */ + /* we expect another name, so done remains false */ + } + else if (*nextp == '\0') + done = true; + else + return false; /* invalid syntax */ + + /* Now safe to overwrite separator with a null */ + *endp = '\0'; + + /* + * Finished isolating current name --- add it to list + */ + *namelist = lappend(*namelist, curname); + + /* Loop back if we didn't reach end of string */ + } while (!done); + + return true; +} + + +/***************************************************************************** + * Comparison Functions used for bytea + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +Datum +byteaeq(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +byteane(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +bytealt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); +} + +Datum +byteale(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); +} + +Datum +byteagt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); +} + +Datum +byteage(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); +} + +Datum +byteacmp(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if ((cmp == 0) && (len1 != len2)) + cmp = (len1 < len2) ? -1 : 1; + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bytea_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +/* + * appendStringInfoText + * + * Append a text to str. + * Like appendStringInfoString(str, text_to_cstring(t)) but faster. + */ +static void +appendStringInfoText(StringInfo str, const text *t) +{ + appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); +} + +/* + * replace_text + * replace all occurrences of 'old_sub_str' in 'orig_str' + * with 'new_sub_str' to form 'new_str' + * + * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == '' + * otherwise returns 'new_str' + */ +Datum +replace_text(PG_FUNCTION_ARGS) +{ + text *src_text = PG_GETARG_TEXT_PP(0); + text *from_sub_text = PG_GETARG_TEXT_PP(1); + text *to_sub_text = PG_GETARG_TEXT_PP(2); + int src_text_len; + int from_sub_text_len; + TextPositionState state; + text *ret_text; + int chunk_len; + char *curr_ptr; + char *start_ptr; + StringInfoData str; + bool found; + + src_text_len = VARSIZE_ANY_EXHDR(src_text); + from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text); + + /* Return unmodified source string if empty source or pattern */ + if (src_text_len < 1 || from_sub_text_len < 1) + { + PG_RETURN_TEXT_P(src_text); + } + + text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state); + + found = text_position_next(&state); + + /* When the from_sub_text is not found, there is nothing to do. */ + if (!found) + { + text_position_cleanup(&state); + PG_RETURN_TEXT_P(src_text); + } + curr_ptr = text_position_get_match_ptr(&state); + start_ptr = VARDATA_ANY(src_text); + + initStringInfo(&str); + + do + { + CHECK_FOR_INTERRUPTS(); + + /* copy the data skipped over by last text_position_next() */ + chunk_len = curr_ptr - start_ptr; + appendBinaryStringInfo(&str, start_ptr, chunk_len); + + appendStringInfoText(&str, to_sub_text); + + start_ptr = curr_ptr + from_sub_text_len; + + found = text_position_next(&state); + if (found) + curr_ptr = text_position_get_match_ptr(&state); + } + while (found); + + /* copy trailing data */ + chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr; + appendBinaryStringInfo(&str, start_ptr, chunk_len); + + text_position_cleanup(&state); + + ret_text = cstring_to_text_with_len(str.data, str.len); + pfree(str.data); + + PG_RETURN_TEXT_P(ret_text); +} + +/* + * check_replace_text_has_escape + * + * Returns 0 if text contains no backslashes that need processing. + * Returns 1 if text contains backslashes, but not regexp submatch specifiers. + * Returns 2 if text contains regexp submatch specifiers (\1 .. \9). + */ +static int +check_replace_text_has_escape(const text *replace_text) +{ + int result = 0; + const char *p = VARDATA_ANY(replace_text); + const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text); + + while (p < p_end) + { + /* Find next escape char, if any. */ + p = memchr(p, '\\', p_end - p); + if (p == NULL) + break; + p++; + /* Note: a backslash at the end doesn't require extra processing. */ + if (p < p_end) + { + if (*p >= '1' && *p <= '9') + return 2; /* Found a submatch specifier, so done */ + result = 1; /* Found some other sequence, keep looking */ + p++; + } + } + return result; +} + +/* + * appendStringInfoRegexpSubstr + * + * Append replace_text to str, substituting regexp back references for + * \n escapes. start_ptr is the start of the match in the source string, + * at logical character position data_pos. + */ +static void +appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, + regmatch_t *pmatch, + char *start_ptr, int data_pos) +{ + const char *p = VARDATA_ANY(replace_text); + const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text); + + while (p < p_end) + { + const char *chunk_start = p; + int so; + int eo; + + /* Find next escape char, if any. */ + p = memchr(p, '\\', p_end - p); + if (p == NULL) + p = p_end; + + /* Copy the text we just scanned over, if any. */ + if (p > chunk_start) + appendBinaryStringInfo(str, chunk_start, p - chunk_start); + + /* Done if at end of string, else advance over escape char. */ + if (p >= p_end) + break; + p++; + + if (p >= p_end) + { + /* Escape at very end of input. Treat same as unexpected char */ + appendStringInfoChar(str, '\\'); + break; + } + + if (*p >= '1' && *p <= '9') + { + /* Use the back reference of regexp. */ + int idx = *p - '0'; + + so = pmatch[idx].rm_so; + eo = pmatch[idx].rm_eo; + p++; + } + else if (*p == '&') + { + /* Use the entire matched string. */ + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + p++; + } + else if (*p == '\\') + { + /* \\ means transfer one \ to output. */ + appendStringInfoChar(str, '\\'); + p++; + continue; + } + else + { + /* + * If escape char is not followed by any expected char, just treat + * it as ordinary data to copy. (XXX would it be better to throw + * an error?) + */ + appendStringInfoChar(str, '\\'); + continue; + } + + if (so >= 0 && eo >= 0) + { + /* + * Copy the text that is back reference of regexp. Note so and eo + * are counted in characters not bytes. + */ + char *chunk_start; + int chunk_len; + + Assert(so >= data_pos); + chunk_start = start_ptr; + chunk_start += charlen_to_bytelen(chunk_start, so - data_pos); + chunk_len = charlen_to_bytelen(chunk_start, eo - so); + appendBinaryStringInfo(str, chunk_start, chunk_len); + } + } +} + +/* + * replace_text_regexp + * + * replace substring(s) in src_text that match pattern with replace_text. + * The replace_text can contain backslash markers to substitute + * (parts of) the matched text. + * + * cflags: regexp compile flags. + * collation: collation to use. + * search_start: the character (not byte) offset in src_text at which to + * begin searching. + * n: if 0, replace all matches; if > 0, replace only the N'th match. + */ +text * +replace_text_regexp(text *src_text, text *pattern_text, + text *replace_text, + int cflags, Oid collation, + int search_start, int n) +{ + text *ret_text; + regex_t *re; + int src_text_len = VARSIZE_ANY_EXHDR(src_text); + int nmatches = 0; + StringInfoData buf; + regmatch_t pmatch[10]; /* main match, plus \1 to \9 */ + int nmatch = lengthof(pmatch); + pg_wchar *data; + size_t data_len; + int data_pos; + char *start_ptr; + int escape_status; + + initStringInfo(&buf); + + /* Convert data string to wide characters. */ + data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar)); + data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len); + + /* Check whether replace_text has escapes, especially regexp submatches. */ + escape_status = check_replace_text_has_escape(replace_text); + + /* If no regexp submatches, we can use REG_NOSUB. */ + if (escape_status < 2) + { + cflags |= REG_NOSUB; + /* Also tell pg_regexec we only want the whole-match location. */ + nmatch = 1; + } + + /* Prepare the regexp. */ + re = RE_compile_and_cache(pattern_text, cflags, collation); + + /* start_ptr points to the data_pos'th character of src_text */ + start_ptr = (char *) VARDATA_ANY(src_text); + data_pos = 0; + + while (search_start <= data_len) + { + int regexec_result; + + CHECK_FOR_INTERRUPTS(); + + regexec_result = pg_regexec(re, + data, + data_len, + search_start, + NULL, /* no details */ + nmatch, + pmatch, + 0); + + if (regexec_result == REG_NOMATCH) + break; + + if (regexec_result != REG_OKAY) + { + char errMsg[100]; + + pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("regular expression failed: %s", errMsg))); + } + + /* + * Count matches, and decide whether to replace this match. + */ + nmatches++; + if (n > 0 && nmatches != n) + { + /* + * No, so advance search_start, but not start_ptr/data_pos. (Thus, + * we treat the matched text as if it weren't matched, and copy it + * to the output later.) + */ + search_start = pmatch[0].rm_eo; + if (pmatch[0].rm_so == pmatch[0].rm_eo) + search_start++; + continue; + } + + /* + * Copy the text to the left of the match position. Note we are given + * character not byte indexes. + */ + if (pmatch[0].rm_so - data_pos > 0) + { + int chunk_len; + + chunk_len = charlen_to_bytelen(start_ptr, + pmatch[0].rm_so - data_pos); + appendBinaryStringInfo(&buf, start_ptr, chunk_len); + + /* + * Advance start_ptr over that text, to avoid multiple rescans of + * it if the replace_text contains multiple back-references. + */ + start_ptr += chunk_len; + data_pos = pmatch[0].rm_so; + } + + /* + * Copy the replace_text, processing escapes if any are present. + */ + if (escape_status > 0) + appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, + start_ptr, data_pos); + else + appendStringInfoText(&buf, replace_text); + + /* Advance start_ptr and data_pos over the matched text. */ + start_ptr += charlen_to_bytelen(start_ptr, + pmatch[0].rm_eo - data_pos); + data_pos = pmatch[0].rm_eo; + + /* + * If we only want to replace one occurrence, we're done. + */ + if (n > 0) + break; + + /* + * Advance search position. Normally we start the next search at the + * end of the previous match; but if the match was of zero length, we + * have to advance by one character, or we'd just find the same match + * again. + */ + search_start = data_pos; + if (pmatch[0].rm_so == pmatch[0].rm_eo) + search_start++; + } + + /* + * Copy the text to the right of the last match. + */ + if (data_pos < data_len) + { + int chunk_len; + + chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr; + appendBinaryStringInfo(&buf, start_ptr, chunk_len); + } + + ret_text = cstring_to_text_with_len(buf.data, buf.len); + pfree(buf.data); + pfree(data); + + return ret_text; +} + +/* + * split_part + * parse input string based on provided field separator + * return N'th item (1 based, negative counts from end) + */ +Datum +split_part(PG_FUNCTION_ARGS) +{ + text *inputstring = PG_GETARG_TEXT_PP(0); + text *fldsep = PG_GETARG_TEXT_PP(1); + int fldnum = PG_GETARG_INT32(2); + int inputstring_len; + int fldsep_len; + TextPositionState state; + char *start_ptr; + char *end_ptr; + text *result_text; + bool found; + + /* field number is 1 based */ + if (fldnum == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("field position must not be zero"))); + + inputstring_len = VARSIZE_ANY_EXHDR(inputstring); + fldsep_len = VARSIZE_ANY_EXHDR(fldsep); + + /* return empty string for empty input string */ + if (inputstring_len < 1) + PG_RETURN_TEXT_P(cstring_to_text("")); + + /* handle empty field separator */ + if (fldsep_len < 1) + { + /* if first or last field, return input string, else empty string */ + if (fldnum == 1 || fldnum == -1) + PG_RETURN_TEXT_P(inputstring); + else + PG_RETURN_TEXT_P(cstring_to_text("")); + } + + /* find the first field separator */ + text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state); + + found = text_position_next(&state); + + /* special case if fldsep not found at all */ + if (!found) + { + text_position_cleanup(&state); + /* if first or last field, return input string, else empty string */ + if (fldnum == 1 || fldnum == -1) + PG_RETURN_TEXT_P(inputstring); + else + PG_RETURN_TEXT_P(cstring_to_text("")); + } + + /* + * take care of a negative field number (i.e. count from the right) by + * converting to a positive field number; we need total number of fields + */ + if (fldnum < 0) + { + /* we found a fldsep, so there are at least two fields */ + int numfields = 2; + + while (text_position_next(&state)) + numfields++; + + /* special case of last field does not require an extra pass */ + if (fldnum == -1) + { + start_ptr = text_position_get_match_ptr(&state) + fldsep_len; + end_ptr = VARDATA_ANY(inputstring) + inputstring_len; + text_position_cleanup(&state); + PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr, + end_ptr - start_ptr)); + } + + /* else, convert fldnum to positive notation */ + fldnum += numfields + 1; + + /* if nonexistent field, return empty string */ + if (fldnum <= 0) + { + text_position_cleanup(&state); + PG_RETURN_TEXT_P(cstring_to_text("")); + } + + /* reset to pointing at first match, but now with positive fldnum */ + text_position_reset(&state); + found = text_position_next(&state); + Assert(found); + } + + /* identify bounds of first field */ + start_ptr = VARDATA_ANY(inputstring); + end_ptr = text_position_get_match_ptr(&state); + + while (found && --fldnum > 0) + { + /* identify bounds of next field */ + start_ptr = end_ptr + fldsep_len; + found = text_position_next(&state); + if (found) + end_ptr = text_position_get_match_ptr(&state); + } + + text_position_cleanup(&state); + + if (fldnum > 0) + { + /* N'th field separator not found */ + /* if last field requested, return it, else empty string */ + if (fldnum == 1) + { + int last_len = start_ptr - VARDATA_ANY(inputstring); + + result_text = cstring_to_text_with_len(start_ptr, + inputstring_len - last_len); + } + else + result_text = cstring_to_text(""); + } + else + { + /* non-last field requested */ + result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr); + } + + PG_RETURN_TEXT_P(result_text); +} + +/* + * Convenience function to return true when two text params are equal. + */ +static bool +text_isequal(text *txt1, text *txt2, Oid collid) +{ + return DatumGetBool(DirectFunctionCall2Coll(texteq, + collid, + PointerGetDatum(txt1), + PointerGetDatum(txt2))); +} + +/* + * text_to_array + * parse input string and return text array of elements, + * based on provided field separator + */ +Datum +text_to_array(PG_FUNCTION_ARGS) +{ + SplitTextOutputData tstate; + + /* For array output, tstate should start as all zeroes */ + memset(&tstate, 0, sizeof(tstate)); + + if (!split_text(fcinfo, &tstate)) + PG_RETURN_NULL(); + + if (tstate.astate == NULL) + PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID)); + + PG_RETURN_DATUM(makeArrayResult(tstate.astate, + CurrentMemoryContext)); +} + +/* + * text_to_array_null + * parse input string and return text array of elements, + * based on provided field separator and null string + * + * This is a separate entry point only to prevent the regression tests from + * complaining about different argument sets for the same internal function. + */ +Datum +text_to_array_null(PG_FUNCTION_ARGS) +{ + return text_to_array(fcinfo); +} + +/* + * text_to_table + * parse input string and return table of elements, + * based on provided field separator + */ +Datum +text_to_table(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo; + SplitTextOutputData tstate; + + tstate.astate = NULL; + InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC); + tstate.tupstore = rsi->setResult; + tstate.tupdesc = rsi->setDesc; + + (void) split_text(fcinfo, &tstate); + + return (Datum) 0; +} + +/* + * text_to_table_null + * parse input string and return table of elements, + * based on provided field separator and null string + * + * This is a separate entry point only to prevent the regression tests from + * complaining about different argument sets for the same internal function. + */ +Datum +text_to_table_null(PG_FUNCTION_ARGS) +{ + return text_to_table(fcinfo); +} + +/* + * Common code for text_to_array, text_to_array_null, text_to_table + * and text_to_table_null functions. + * + * These are not strict so we have to test for null inputs explicitly. + * Returns false if result is to be null, else returns true. + * + * Note that if the result is valid but empty (zero elements), we return + * without changing *tstate --- caller must handle that case, too. + */ +static bool +split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate) +{ + text *inputstring; + text *fldsep; + text *null_string; + Oid collation = PG_GET_COLLATION(); + int inputstring_len; + int fldsep_len; + char *start_ptr; + text *result_text; + + /* when input string is NULL, then result is NULL too */ + if (PG_ARGISNULL(0)) + return false; + + inputstring = PG_GETARG_TEXT_PP(0); + + /* fldsep can be NULL */ + if (!PG_ARGISNULL(1)) + fldsep = PG_GETARG_TEXT_PP(1); + else + fldsep = NULL; + + /* null_string can be NULL or omitted */ + if (PG_NARGS() > 2 && !PG_ARGISNULL(2)) + null_string = PG_GETARG_TEXT_PP(2); + else + null_string = NULL; + + if (fldsep != NULL) + { + /* + * Normal case with non-null fldsep. Use the text_position machinery + * to search for occurrences of fldsep. + */ + TextPositionState state; + + inputstring_len = VARSIZE_ANY_EXHDR(inputstring); + fldsep_len = VARSIZE_ANY_EXHDR(fldsep); + + /* return empty set for empty input string */ + if (inputstring_len < 1) + return true; + + /* empty field separator: return input string as a one-element set */ + if (fldsep_len < 1) + { + split_text_accum_result(tstate, inputstring, + null_string, collation); + return true; + } + + text_position_setup(inputstring, fldsep, collation, &state); + + start_ptr = VARDATA_ANY(inputstring); + + for (;;) + { + bool found; + char *end_ptr; + int chunk_len; + + CHECK_FOR_INTERRUPTS(); + + found = text_position_next(&state); + if (!found) + { + /* fetch last field */ + chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr; + end_ptr = NULL; /* not used, but some compilers complain */ + } + else + { + /* fetch non-last field */ + end_ptr = text_position_get_match_ptr(&state); + chunk_len = end_ptr - start_ptr; + } + + /* build a temp text datum to pass to split_text_accum_result */ + result_text = cstring_to_text_with_len(start_ptr, chunk_len); + + /* stash away this field */ + split_text_accum_result(tstate, result_text, + null_string, collation); + + pfree(result_text); + + if (!found) + break; + + start_ptr = end_ptr + fldsep_len; + } + + text_position_cleanup(&state); + } + else + { + /* + * When fldsep is NULL, each character in the input string becomes a + * separate element in the result set. The separator is effectively + * the space between characters. + */ + inputstring_len = VARSIZE_ANY_EXHDR(inputstring); + + start_ptr = VARDATA_ANY(inputstring); + + while (inputstring_len > 0) + { + int chunk_len = pg_mblen(start_ptr); + + CHECK_FOR_INTERRUPTS(); + + /* build a temp text datum to pass to split_text_accum_result */ + result_text = cstring_to_text_with_len(start_ptr, chunk_len); + + /* stash away this field */ + split_text_accum_result(tstate, result_text, + null_string, collation); + + pfree(result_text); + + start_ptr += chunk_len; + inputstring_len -= chunk_len; + } + } + + return true; +} + +/* + * Add text item to result set (table or array). + * + * This is also responsible for checking to see if the item matches + * the null_string, in which case we should emit NULL instead. + */ +static void +split_text_accum_result(SplitTextOutputData *tstate, + text *field_value, + text *null_string, + Oid collation) +{ + bool is_null = false; + + if (null_string && text_isequal(field_value, null_string, collation)) + is_null = true; + + if (tstate->tupstore) + { + Datum values[1]; + bool nulls[1]; + + values[0] = PointerGetDatum(field_value); + nulls[0] = is_null; + + tuplestore_putvalues(tstate->tupstore, + tstate->tupdesc, + values, + nulls); + } + else + { + tstate->astate = accumArrayResult(tstate->astate, + PointerGetDatum(field_value), + is_null, + TEXTOID, + CurrentMemoryContext); + } +} + +/* + * array_to_text + * concatenate Cstring representation of input array elements + * using provided field separator + */ +Datum +array_to_text(PG_FUNCTION_ARGS) +{ + ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1)); + + PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL)); +} + +/* + * array_to_text_null + * concatenate Cstring representation of input array elements + * using provided field separator and null string + * + * This version is not strict so we have to test for null inputs explicitly. + */ +Datum +array_to_text_null(PG_FUNCTION_ARGS) +{ + ArrayType *v; + char *fldsep; + char *null_string; + + /* returns NULL when first or second parameter is NULL */ + if (PG_ARGISNULL(0) || PG_ARGISNULL(1)) + PG_RETURN_NULL(); + + v = PG_GETARG_ARRAYTYPE_P(0); + fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1)); + + /* NULL null string is passed through as a null pointer */ + if (!PG_ARGISNULL(2)) + null_string = text_to_cstring(PG_GETARG_TEXT_PP(2)); + else + null_string = NULL; + + PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string)); +} + +/* + * common code for array_to_text and array_to_text_null functions + */ +static text * +array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, + const char *fldsep, const char *null_string) +{ + text *result; + int nitems, + *dims, + ndims; + Oid element_type; + int typlen; + bool typbyval; + char typalign; + StringInfoData buf; + bool printed = false; + char *p; + bits8 *bitmap; + int bitmask; + int i; + ArrayMetaState *my_extra; + + ndims = ARR_NDIM(v); + dims = ARR_DIMS(v); + nitems = ArrayGetNItems(ndims, dims); + + /* if there are no elements, return an empty string */ + if (nitems == 0) + return cstring_to_text_with_len("", 0); + + element_type = ARR_ELEMTYPE(v); + initStringInfo(&buf); + + /* + * We arrange to look up info about element type, including its output + * conversion proc, only once per series of calls, assuming the element + * type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + /* + * Get info about element type, including its output conversion proc + */ + get_type_io_data(element_type, IOFunc_output, + &my_extra->typlen, &my_extra->typbyval, + &my_extra->typalign, &my_extra->typdelim, + &my_extra->typioparam, &my_extra->typiofunc); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + my_extra->element_type = element_type; + } + typlen = my_extra->typlen; + typbyval = my_extra->typbyval; + typalign = my_extra->typalign; + + p = ARR_DATA_PTR(v); + bitmap = ARR_NULLBITMAP(v); + bitmask = 1; + + for (i = 0; i < nitems; i++) + { + Datum itemvalue; + char *value; + + /* Get source element, checking for NULL */ + if (bitmap && (*bitmap & bitmask) == 0) + { + /* if null_string is NULL, we just ignore null elements */ + if (null_string != NULL) + { + if (printed) + appendStringInfo(&buf, "%s%s", fldsep, null_string); + else + appendStringInfoString(&buf, null_string); + printed = true; + } + } + else + { + itemvalue = fetch_att(p, typbyval, typlen); + + value = OutputFunctionCall(&my_extra->proc, itemvalue); + + if (printed) + appendStringInfo(&buf, "%s%s", fldsep, value); + else + appendStringInfoString(&buf, value); + printed = true; + + p = att_addlength_pointer(p, typlen, p); + p = (char *) att_align_nominal(p, typalign); + } + + /* advance bitmap pointer if any */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + } + + result = cstring_to_text_with_len(buf.data, buf.len); + pfree(buf.data); + + return result; +} + +#define HEXBASE 16 +/* + * Convert an int32 to a string containing a base 16 (hex) representation of + * the number. + */ +Datum +to_hex32(PG_FUNCTION_ARGS) +{ + uint32 value = (uint32) PG_GETARG_INT32(0); + char *ptr; + const char *digits = "0123456789abcdef"; + char buf[32]; /* bigger than needed, but reasonable */ + + ptr = buf + sizeof(buf) - 1; + *ptr = '\0'; + + do + { + *--ptr = digits[value % HEXBASE]; + value /= HEXBASE; + } while (ptr > buf && value); + + PG_RETURN_TEXT_P(cstring_to_text(ptr)); +} + +/* + * Convert an int64 to a string containing a base 16 (hex) representation of + * the number. + */ +Datum +to_hex64(PG_FUNCTION_ARGS) +{ + uint64 value = (uint64) PG_GETARG_INT64(0); + char *ptr; + const char *digits = "0123456789abcdef"; + char buf[32]; /* bigger than needed, but reasonable */ + + ptr = buf + sizeof(buf) - 1; + *ptr = '\0'; + + do + { + *--ptr = digits[value % HEXBASE]; + value /= HEXBASE; + } while (ptr > buf && value); + + PG_RETURN_TEXT_P(cstring_to_text(ptr)); +} + +/* + * Return the size of a datum, possibly compressed + * + * Works on any data type + */ +Datum +pg_column_size(PG_FUNCTION_ARGS) +{ + Datum value = PG_GETARG_DATUM(0); + int32 result; + int typlen; + + /* On first call, get the input type's typlen, and save at *fn_extra */ + if (fcinfo->flinfo->fn_extra == NULL) + { + /* Lookup the datatype of the supplied argument */ + Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0); + + typlen = get_typlen(argtypeid); + if (typlen == 0) /* should not happen */ + elog(ERROR, "cache lookup failed for type %u", argtypeid); + + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(int)); + *((int *) fcinfo->flinfo->fn_extra) = typlen; + } + else + typlen = *((int *) fcinfo->flinfo->fn_extra); + + if (typlen == -1) + { + /* varlena type, possibly toasted */ + result = toast_datum_size(value); + } + else if (typlen == -2) + { + /* cstring */ + result = strlen(DatumGetCString(value)) + 1; + } + else + { + /* ordinary fixed-width type */ + result = typlen; + } + + PG_RETURN_INT32(result); +} + +/* + * Return the compression method stored in the compressed attribute. Return + * NULL for non varlena type or uncompressed data. + */ +Datum +pg_column_compression(PG_FUNCTION_ARGS) +{ + int typlen; + char *result; + ToastCompressionId cmid; + + /* On first call, get the input type's typlen, and save at *fn_extra */ + if (fcinfo->flinfo->fn_extra == NULL) + { + /* Lookup the datatype of the supplied argument */ + Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0); + + typlen = get_typlen(argtypeid); + if (typlen == 0) /* should not happen */ + elog(ERROR, "cache lookup failed for type %u", argtypeid); + + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(int)); + *((int *) fcinfo->flinfo->fn_extra) = typlen; + } + else + typlen = *((int *) fcinfo->flinfo->fn_extra); + + if (typlen != -1) + PG_RETURN_NULL(); + + /* get the compression method id stored in the compressed varlena */ + cmid = toast_get_compression_id((struct varlena *) + DatumGetPointer(PG_GETARG_DATUM(0))); + if (cmid == TOAST_INVALID_COMPRESSION_ID) + PG_RETURN_NULL(); + + /* convert compression method id to compression method name */ + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + result = "pglz"; + break; + case TOAST_LZ4_COMPRESSION_ID: + result = "lz4"; + break; + default: + elog(ERROR, "invalid compression method id %d", cmid); + } + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +/* + * string_agg - Concatenates values and returns string. + * + * Syntax: string_agg(value text, delimiter text) RETURNS text + * + * Note: Any NULL values are ignored. The first-call delimiter isn't + * actually used at all, and on subsequent calls the delimiter precedes + * the associated value. + */ + +/* subroutine to initialize state */ +static StringInfo +makeStringAggState(FunctionCallInfo fcinfo) +{ + StringInfo state; + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "string_agg_transfn called in non-aggregate context"); + } + + /* + * Create state in aggregate context. It'll stay there across subsequent + * calls. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Datum +string_agg_transfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + /* Append the value unless null, preceding it with the delimiter. */ + if (!PG_ARGISNULL(1)) + { + text *value = PG_GETARG_TEXT_PP(1); + bool isfirst = false; + + /* + * You might think we can just throw away the first delimiter, however + * we must keep it as we may be a parallel worker doing partial + * aggregation building a state to send to the main process. We need + * to keep the delimiter of every aggregation so that the combine + * function can properly join up the strings of two separately + * partially aggregated results. The first delimiter is only stripped + * off in the final function. To know how much to strip off the front + * of the string, we store the length of the first delimiter in the + * StringInfo's cursor field, which we don't otherwise need here. + */ + if (state == NULL) + { + state = makeStringAggState(fcinfo); + isfirst = true; + } + + if (!PG_ARGISNULL(2)) + { + text *delim = PG_GETARG_TEXT_PP(2); + + appendStringInfoText(state, delim); + if (isfirst) + state->cursor = VARSIZE_ANY_EXHDR(delim); + } + + appendStringInfoText(state, value); + } + + /* + * The transition type for string_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + if (state) + PG_RETURN_POINTER(state); + PG_RETURN_NULL(); +} + +/* + * string_agg_combine + * Aggregate combine function for string_agg(text) and string_agg(bytea) + */ +Datum +string_agg_combine(PG_FUNCTION_ARGS) +{ + StringInfo state1; + StringInfo state2; + MemoryContext agg_context; + + if (!AggCheckCallContext(fcinfo, &agg_context)) + elog(ERROR, "aggregate function called in non-aggregate context"); + + state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1); + + if (state2 == NULL) + { + /* + * NULL state2 is easy, just return state1, which we know is already + * in the agg_context + */ + if (state1 == NULL) + PG_RETURN_NULL(); + PG_RETURN_POINTER(state1); + } + + if (state1 == NULL) + { + /* We must copy state2's data into the agg_context */ + MemoryContext old_context; + + old_context = MemoryContextSwitchTo(agg_context); + state1 = makeStringAggState(fcinfo); + appendBinaryStringInfo(state1, state2->data, state2->len); + state1->cursor = state2->cursor; + MemoryContextSwitchTo(old_context); + } + else if (state2->len > 0) + { + /* Combine ... state1->cursor does not change in this case */ + appendBinaryStringInfo(state1, state2->data, state2->len); + } + + PG_RETURN_POINTER(state1); +} + +/* + * string_agg_serialize + * Aggregate serialize function for string_agg(text) and string_agg(bytea) + * + * This is strict, so we need not handle NULL input + */ +Datum +string_agg_serialize(PG_FUNCTION_ARGS) +{ + StringInfo state; + StringInfoData buf; + bytea *result; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = (StringInfo) PG_GETARG_POINTER(0); + + pq_begintypsend(&buf); + + /* cursor */ + pq_sendint(&buf, state->cursor, 4); + + /* data */ + pq_sendbytes(&buf, state->data, state->len); + + result = pq_endtypsend(&buf); + + PG_RETURN_BYTEA_P(result); +} + +/* + * string_agg_deserialize + * Aggregate deserial function for string_agg(text) and string_agg(bytea) + * + * This is strict, so we need not handle NULL input + */ +Datum +string_agg_deserialize(PG_FUNCTION_ARGS) +{ + bytea *sstate; + StringInfo result; + StringInfoData buf; + char *data; + int datalen; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + sstate = PG_GETARG_BYTEA_PP(0); + + /* + * Copy the bytea into a StringInfo so that we can "receive" it using the + * standard recv-function infrastructure. + */ + initStringInfo(&buf); + appendBinaryStringInfo(&buf, + VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); + + result = makeStringAggState(fcinfo); + + /* cursor */ + result->cursor = pq_getmsgint(&buf, 4); + + /* data */ + datalen = VARSIZE_ANY_EXHDR(sstate) - 4; + data = (char *) pq_getmsgbytes(&buf, datalen); + appendBinaryStringInfo(result, data, datalen); + + pq_getmsgend(&buf); + pfree(buf.data); + + PG_RETURN_POINTER(result); +} + +Datum +string_agg_finalfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + if (state != NULL) + { + /* As per comment in transfn, strip data before the cursor position */ + PG_RETURN_TEXT_P(cstring_to_text_with_len(&state->data[state->cursor], + state->len - state->cursor)); + } + else + PG_RETURN_NULL(); +} + +/* + * Prepare cache with fmgr info for the output functions of the datatypes of + * the arguments of a concat-like function, beginning with argument "argidx". + * (Arguments before that will have corresponding slots in the resulting + * FmgrInfo array, but we don't fill those slots.) + */ +static FmgrInfo * +build_concat_foutcache(FunctionCallInfo fcinfo, int argidx) +{ + FmgrInfo *foutcache; + int i; + + /* We keep the info in fn_mcxt so it survives across calls */ + foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + PG_NARGS() * sizeof(FmgrInfo)); + + for (i = argidx; i < PG_NARGS(); i++) + { + Oid valtype; + Oid typOutput; + bool typIsVarlena; + + valtype = get_fn_expr_argtype(fcinfo->flinfo, i); + if (!OidIsValid(valtype)) + elog(ERROR, "could not determine data type of concat() input"); + + getTypeOutputInfo(valtype, &typOutput, &typIsVarlena); + fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt); + } + + fcinfo->flinfo->fn_extra = foutcache; + + return foutcache; +} + +/* + * Implementation of both concat() and concat_ws(). + * + * sepstr is the separator string to place between values. + * argidx identifies the first argument to concatenate (counting from zero); + * note that this must be constant across any one series of calls. + * + * Returns NULL if result should be NULL, else text value. + */ +static text * +concat_internal(const char *sepstr, int argidx, + FunctionCallInfo fcinfo) +{ + text *result; + StringInfoData str; + FmgrInfo *foutcache; + bool first_arg = true; + int i; + + /* + * concat(VARIADIC some-array) is essentially equivalent to + * array_to_text(), ie concat the array elements with the given separator. + * So we just pass the case off to that code. + */ + if (get_fn_expr_variadic(fcinfo->flinfo)) + { + ArrayType *arr; + + /* Should have just the one argument */ + Assert(argidx == PG_NARGS() - 1); + + /* concat(VARIADIC NULL) is defined as NULL */ + if (PG_ARGISNULL(argidx)) + return NULL; + + /* + * Non-null argument had better be an array. We assume that any call + * context that could let get_fn_expr_variadic return true will have + * checked that a VARIADIC-labeled parameter actually is an array. So + * it should be okay to just Assert that it's an array rather than + * doing a full-fledged error check. + */ + Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx)))); + + /* OK, safe to fetch the array value */ + arr = PG_GETARG_ARRAYTYPE_P(argidx); + + /* + * And serialize the array. We tell array_to_text to ignore null + * elements, which matches the behavior of the loop below. + */ + return array_to_text_internal(fcinfo, arr, sepstr, NULL); + } + + /* Normal case without explicit VARIADIC marker */ + initStringInfo(&str); + + /* Get output function info, building it if first time through */ + foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra; + if (foutcache == NULL) + foutcache = build_concat_foutcache(fcinfo, argidx); + + for (i = argidx; i < PG_NARGS(); i++) + { + if (!PG_ARGISNULL(i)) + { + Datum value = PG_GETARG_DATUM(i); + + /* add separator if appropriate */ + if (first_arg) + first_arg = false; + else + appendStringInfoString(&str, sepstr); + + /* call the appropriate type output function, append the result */ + appendStringInfoString(&str, + OutputFunctionCall(&foutcache[i], value)); + } + } + + result = cstring_to_text_with_len(str.data, str.len); + pfree(str.data); + + return result; +} + +/* + * Concatenate all arguments. NULL arguments are ignored. + */ +Datum +text_concat(PG_FUNCTION_ARGS) +{ + text *result; + + result = concat_internal("", 0, fcinfo); + if (result == NULL) + PG_RETURN_NULL(); + PG_RETURN_TEXT_P(result); +} + +/* + * Concatenate all but first argument value with separators. The first + * parameter is used as the separator. NULL arguments are ignored. + */ +Datum +text_concat_ws(PG_FUNCTION_ARGS) +{ + char *sep; + text *result; + + /* return NULL when separator is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + sep = text_to_cstring(PG_GETARG_TEXT_PP(0)); + + result = concat_internal(sep, 1, fcinfo); + if (result == NULL) + PG_RETURN_NULL(); + PG_RETURN_TEXT_P(result); +} + +/* + * Return first n characters in the string. When n is negative, + * return all but last |n| characters. + */ +Datum +text_left(PG_FUNCTION_ARGS) +{ + int n = PG_GETARG_INT32(1); + + if (n < 0) + { + text *str = PG_GETARG_TEXT_PP(0); + const char *p = VARDATA_ANY(str); + int len = VARSIZE_ANY_EXHDR(str); + int rlen; + + n = pg_mbstrlen_with_len(p, len) + n; + rlen = pg_mbcharcliplen(p, len, n); + PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen)); + } + else + PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false)); +} + +/* + * Return last n characters in the string. When n is negative, + * return all but first |n| characters. + */ +Datum +text_right(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + const char *p = VARDATA_ANY(str); + int len = VARSIZE_ANY_EXHDR(str); + int n = PG_GETARG_INT32(1); + int off; + + if (n < 0) + n = -n; + else + n = pg_mbstrlen_with_len(p, len) - n; + off = pg_mbcharcliplen(p, len, n); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off)); +} + +/* + * Return reversed string + */ +Datum +text_reverse(PG_FUNCTION_ARGS) +{ + text *str = PG_GETARG_TEXT_PP(0); + const char *p = VARDATA_ANY(str); + int len = VARSIZE_ANY_EXHDR(str); + const char *endp = p + len; + text *result; + char *dst; + + result = palloc(len + VARHDRSZ); + dst = (char *) VARDATA(result) + len; + SET_VARSIZE(result, len + VARHDRSZ); + + if (pg_database_encoding_max_length() > 1) + { + /* multibyte version */ + while (p < endp) + { + int sz; + + sz = pg_mblen(p); + dst -= sz; + memcpy(dst, p, sz); + p += sz; + } + } + else + { + /* single byte version */ + while (p < endp) + *(--dst) = *p++; + } + + PG_RETURN_TEXT_P(result); +} + + +/* + * Support macros for text_format() + */ +#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */ + +#define ADVANCE_PARSE_POINTER(ptr,end_ptr) \ + do { \ + if (++(ptr) >= (end_ptr)) \ + ereport(ERROR, \ + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \ + errmsg("unterminated format() type specifier"), \ + errhint("For a single \"%%\" use \"%%%%\"."))); \ + } while (0) + +/* + * Returns a formatted string + */ +Datum +text_format(PG_FUNCTION_ARGS) +{ + text *fmt; + StringInfoData str; + const char *cp; + const char *start_ptr; + const char *end_ptr; + text *result; + int arg; + bool funcvariadic; + int nargs; + Datum *elements = NULL; + bool *nulls = NULL; + Oid element_type = InvalidOid; + Oid prev_type = InvalidOid; + Oid prev_width_type = InvalidOid; + FmgrInfo typoutputfinfo; + FmgrInfo typoutputinfo_width; + + /* When format string is null, immediately return null */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + /* If argument is marked VARIADIC, expand array into elements */ + if (get_fn_expr_variadic(fcinfo->flinfo)) + { + ArrayType *arr; + int16 elmlen; + bool elmbyval; + char elmalign; + int nitems; + + /* Should have just the one argument */ + Assert(PG_NARGS() == 2); + + /* If argument is NULL, we treat it as zero-length array */ + if (PG_ARGISNULL(1)) + nitems = 0; + else + { + /* + * Non-null argument had better be an array. We assume that any + * call context that could let get_fn_expr_variadic return true + * will have checked that a VARIADIC-labeled parameter actually is + * an array. So it should be okay to just Assert that it's an + * array rather than doing a full-fledged error check. + */ + Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1)))); + + /* OK, safe to fetch the array value */ + arr = PG_GETARG_ARRAYTYPE_P(1); + + /* Get info about array element type */ + element_type = ARR_ELEMTYPE(arr); + get_typlenbyvalalign(element_type, + &elmlen, &elmbyval, &elmalign); + + /* Extract all array elements */ + deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign, + &elements, &nulls, &nitems); + } + + nargs = nitems + 1; + funcvariadic = true; + } + else + { + /* Non-variadic case, we'll process the arguments individually */ + nargs = PG_NARGS(); + funcvariadic = false; + } + + /* Setup for main loop. */ + fmt = PG_GETARG_TEXT_PP(0); + start_ptr = VARDATA_ANY(fmt); + end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt); + initStringInfo(&str); + arg = 1; /* next argument position to print */ + + /* Scan format string, looking for conversion specifiers. */ + for (cp = start_ptr; cp < end_ptr; cp++) + { + int argpos; + int widthpos; + int flags; + int width; + Datum value; + bool isNull; + Oid typid; + + /* + * If it's not the start of a conversion specifier, just copy it to + * the output buffer. + */ + if (*cp != '%') + { + appendStringInfoCharMacro(&str, *cp); + continue; + } + + ADVANCE_PARSE_POINTER(cp, end_ptr); + + /* Easy case: %% outputs a single % */ + if (*cp == '%') + { + appendStringInfoCharMacro(&str, *cp); + continue; + } + + /* Parse the optional portions of the format specifier */ + cp = text_format_parse_format(cp, end_ptr, + &argpos, &widthpos, + &flags, &width); + + /* + * Next we should see the main conversion specifier. Whether or not + * an argument position was present, it's known that at least one + * character remains in the string at this point. Experience suggests + * that it's worth checking that that character is one of the expected + * ones before we try to fetch arguments, so as to produce the least + * confusing response to a mis-formatted specifier. + */ + if (strchr("sIL", *cp) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized format() type specifier \"%.*s\"", + pg_mblen(cp), cp), + errhint("For a single \"%%\" use \"%%%%\"."))); + + /* If indirect width was specified, get its value */ + if (widthpos >= 0) + { + /* Collect the specified or next argument position */ + if (widthpos > 0) + arg = widthpos; + if (arg >= nargs) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("too few arguments for format()"))); + + /* Get the value and type of the selected argument */ + if (!funcvariadic) + { + value = PG_GETARG_DATUM(arg); + isNull = PG_ARGISNULL(arg); + typid = get_fn_expr_argtype(fcinfo->flinfo, arg); + } + else + { + value = elements[arg - 1]; + isNull = nulls[arg - 1]; + typid = element_type; + } + if (!OidIsValid(typid)) + elog(ERROR, "could not determine data type of format() input"); + + arg++; + + /* We can treat NULL width the same as zero */ + if (isNull) + width = 0; + else if (typid == INT4OID) + width = DatumGetInt32(value); + else if (typid == INT2OID) + width = DatumGetInt16(value); + else + { + /* For less-usual datatypes, convert to text then to int */ + char *str; + + if (typid != prev_width_type) + { + Oid typoutputfunc; + bool typIsVarlena; + + getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena); + fmgr_info(typoutputfunc, &typoutputinfo_width); + prev_width_type = typid; + } + + str = OutputFunctionCall(&typoutputinfo_width, value); + + /* pg_strtoint32 will complain about bad data or overflow */ + width = pg_strtoint32(str); + + pfree(str); + } + } + + /* Collect the specified or next argument position */ + if (argpos > 0) + arg = argpos; + if (arg >= nargs) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("too few arguments for format()"))); + + /* Get the value and type of the selected argument */ + if (!funcvariadic) + { + value = PG_GETARG_DATUM(arg); + isNull = PG_ARGISNULL(arg); + typid = get_fn_expr_argtype(fcinfo->flinfo, arg); + } + else + { + value = elements[arg - 1]; + isNull = nulls[arg - 1]; + typid = element_type; + } + if (!OidIsValid(typid)) + elog(ERROR, "could not determine data type of format() input"); + + arg++; + + /* + * Get the appropriate typOutput function, reusing previous one if + * same type as previous argument. That's particularly useful in the + * variadic-array case, but often saves work even for ordinary calls. + */ + if (typid != prev_type) + { + Oid typoutputfunc; + bool typIsVarlena; + + getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena); + fmgr_info(typoutputfunc, &typoutputfinfo); + prev_type = typid; + } + + /* + * And now we can format the value. + */ + switch (*cp) + { + case 's': + case 'I': + case 'L': + text_format_string_conversion(&str, *cp, &typoutputfinfo, + value, isNull, + flags, width); + break; + default: + /* should not get here, because of previous check */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized format() type specifier \"%.*s\"", + pg_mblen(cp), cp), + errhint("For a single \"%%\" use \"%%%%\"."))); + break; + } + } + + /* Don't need deconstruct_array results anymore. */ + if (elements != NULL) + pfree(elements); + if (nulls != NULL) + pfree(nulls); + + /* Generate results. */ + result = cstring_to_text_with_len(str.data, str.len); + pfree(str.data); + + PG_RETURN_TEXT_P(result); +} + +/* + * Parse contiguous digits as a decimal number. + * + * Returns true if some digits could be parsed. + * The value is returned into *value, and *ptr is advanced to the next + * character to be parsed. + * + * Note parsing invariant: at least one character is known available before + * string end (end_ptr) at entry, and this is still true at exit. + */ +static bool +text_format_parse_digits(const char **ptr, const char *end_ptr, int *value) +{ + bool found = false; + const char *cp = *ptr; + int val = 0; + + while (*cp >= '0' && *cp <= '9') + { + int8 digit = (*cp - '0'); + + if (unlikely(pg_mul_s32_overflow(val, 10, &val)) || + unlikely(pg_add_s32_overflow(val, digit, &val))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("number is out of range"))); + ADVANCE_PARSE_POINTER(cp, end_ptr); + found = true; + } + + *ptr = cp; + *value = val; + + return found; +} + +/* + * Parse a format specifier (generally following the SUS printf spec). + * + * We have already advanced over the initial '%', and we are looking for + * [argpos][flags][width]type (but the type character is not consumed here). + * + * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1). + * Output parameters: + * argpos: argument position for value to be printed. -1 means unspecified. + * widthpos: argument position for width. Zero means the argument position + * was unspecified (ie, take the next arg) and -1 means no width + * argument (width was omitted or specified as a constant). + * flags: bitmask of flags. + * width: directly-specified width value. Zero means the width was omitted + * (note it's not necessary to distinguish this case from an explicit + * zero width value). + * + * The function result is the next character position to be parsed, ie, the + * location where the type character is/should be. + * + * Note parsing invariant: at least one character is known available before + * string end (end_ptr) at entry, and this is still true at exit. + */ +static const char * +text_format_parse_format(const char *start_ptr, const char *end_ptr, + int *argpos, int *widthpos, + int *flags, int *width) +{ + const char *cp = start_ptr; + int n; + + /* set defaults for output parameters */ + *argpos = -1; + *widthpos = -1; + *flags = 0; + *width = 0; + + /* try to identify first number */ + if (text_format_parse_digits(&cp, end_ptr, &n)) + { + if (*cp != '$') + { + /* Must be just a width and a type, so we're done */ + *width = n; + return cp; + } + /* The number was argument position */ + *argpos = n; + /* Explicit 0 for argument index is immediately refused */ + if (n == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("format specifies argument 0, but arguments are numbered from 1"))); + ADVANCE_PARSE_POINTER(cp, end_ptr); + } + + /* Handle flags (only minus is supported now) */ + while (*cp == '-') + { + *flags |= TEXT_FORMAT_FLAG_MINUS; + ADVANCE_PARSE_POINTER(cp, end_ptr); + } + + if (*cp == '*') + { + /* Handle indirect width */ + ADVANCE_PARSE_POINTER(cp, end_ptr); + if (text_format_parse_digits(&cp, end_ptr, &n)) + { + /* number in this position must be closed by $ */ + if (*cp != '$') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("width argument position must be ended by \"$\""))); + /* The number was width argument position */ + *widthpos = n; + /* Explicit 0 for argument index is immediately refused */ + if (n == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("format specifies argument 0, but arguments are numbered from 1"))); + ADVANCE_PARSE_POINTER(cp, end_ptr); + } + else + *widthpos = 0; /* width's argument position is unspecified */ + } + else + { + /* Check for direct width specification */ + if (text_format_parse_digits(&cp, end_ptr, &n)) + *width = n; + } + + /* cp should now be pointing at type character */ + return cp; +} + +/* + * Format a %s, %I, or %L conversion + */ +static void +text_format_string_conversion(StringInfo buf, char conversion, + FmgrInfo *typOutputInfo, + Datum value, bool isNull, + int flags, int width) +{ + char *str; + + /* Handle NULL arguments before trying to stringify the value. */ + if (isNull) + { + if (conversion == 's') + text_format_append_string(buf, "", flags, width); + else if (conversion == 'L') + text_format_append_string(buf, "NULL", flags, width); + else if (conversion == 'I') + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("null values cannot be formatted as an SQL identifier"))); + return; + } + + /* Stringify. */ + str = OutputFunctionCall(typOutputInfo, value); + + /* Escape. */ + if (conversion == 'I') + { + /* quote_identifier may or may not allocate a new string. */ + text_format_append_string(buf, quote_identifier(str), flags, width); + } + else if (conversion == 'L') + { + char *qstr = quote_literal_cstr(str); + + text_format_append_string(buf, qstr, flags, width); + /* quote_literal_cstr() always allocates a new string */ + pfree(qstr); + } + else + text_format_append_string(buf, str, flags, width); + + /* Cleanup. */ + pfree(str); +} + +/* + * Append str to buf, padding as directed by flags/width + */ +static void +text_format_append_string(StringInfo buf, const char *str, + int flags, int width) +{ + bool align_to_left = false; + int len; + + /* fast path for typical easy case */ + if (width == 0) + { + appendStringInfoString(buf, str); + return; + } + + if (width < 0) + { + /* Negative width: implicit '-' flag, then take absolute value */ + align_to_left = true; + /* -INT_MIN is undefined */ + if (width <= INT_MIN) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("number is out of range"))); + width = -width; + } + else if (flags & TEXT_FORMAT_FLAG_MINUS) + align_to_left = true; + + len = pg_mbstrlen(str); + if (align_to_left) + { + /* left justify */ + appendStringInfoString(buf, str); + if (len < width) + appendStringInfoSpaces(buf, width - len); + } + else + { + /* right justify */ + if (len < width) + appendStringInfoSpaces(buf, width - len); + appendStringInfoString(buf, str); + } +} + +/* + * text_format_nv - nonvariadic wrapper for text_format function. + * + * note: this wrapper is necessary to pass the sanity check in opr_sanity, + * which checks that all built-in functions that share the implementing C + * function take the same number of arguments. + */ +Datum +text_format_nv(PG_FUNCTION_ARGS) +{ + return text_format(fcinfo); +} + +/* + * Helper function for Levenshtein distance functions. Faster than memcmp(), + * for this use case. + */ +static inline bool +rest_of_char_same(const char *s1, const char *s2, int len) +{ + while (len > 0) + { + len--; + if (s1[len] != s2[len]) + return false; + } + return true; +} + +/* Expand each Levenshtein distance variant */ +#include "levenshtein.c" +#define LEVENSHTEIN_LESS_EQUAL +#include "levenshtein.c" + + +/* + * The following *ClosestMatch() functions can be used to determine whether a + * user-provided string resembles any known valid values, which is useful for + * providing hints in log messages, among other things. Use these functions + * like so: + * + * initClosestMatch(&state, source_string, max_distance); + * + * for (int i = 0; i < num_valid_strings; i++) + * updateClosestMatch(&state, valid_strings[i]); + * + * closestMatch = getClosestMatch(&state); + */ + +/* + * Initialize the given state with the source string and maximum Levenshtein + * distance to consider. + */ +void +initClosestMatch(ClosestMatchState *state, const char *source, int max_d) +{ + Assert(state); + Assert(max_d >= 0); + + state->source = source; + state->min_d = -1; + state->max_d = max_d; + state->match = NULL; +} + +/* + * If the candidate string is a closer match than the current one saved (or + * there is no match saved), save it as the closest match. + * + * If the source or candidate string is NULL, empty, or too long, this function + * takes no action. Likewise, if the Levenshtein distance exceeds the maximum + * allowed or more than half the characters are different, no action is taken. + */ +void +updateClosestMatch(ClosestMatchState *state, const char *candidate) +{ + int dist; + + Assert(state); + + if (state->source == NULL || state->source[0] == '\0' || + candidate == NULL || candidate[0] == '\0') + return; + + /* + * To avoid ERROR-ing, we check the lengths here instead of setting + * 'trusted' to false in the call to varstr_levenshtein_less_equal(). + */ + if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN || + strlen(candidate) > MAX_LEVENSHTEIN_STRLEN) + return; + + dist = varstr_levenshtein_less_equal(state->source, strlen(state->source), + candidate, strlen(candidate), 1, 1, 1, + state->max_d, true); + if (dist <= state->max_d && + dist <= strlen(state->source) / 2 && + (state->min_d == -1 || dist < state->min_d)) + { + state->min_d = dist; + state->match = candidate; + } +} + +/* + * Return the closest match. If no suitable candidates were provided via + * updateClosestMatch(), return NULL. + */ +const char * +getClosestMatch(ClosestMatchState *state) +{ + Assert(state); + + return state->match; +} + + +/* + * Unicode support + */ + +static UnicodeNormalizationForm +unicode_norm_form_from_string(const char *formstr) +{ + UnicodeNormalizationForm form = -1; + + /* + * Might as well check this while we're here. + */ + if (GetDatabaseEncoding() != PG_UTF8) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("Unicode normalization can only be performed if server encoding is UTF8"))); + + if (pg_strcasecmp(formstr, "NFC") == 0) + form = UNICODE_NFC; + else if (pg_strcasecmp(formstr, "NFD") == 0) + form = UNICODE_NFD; + else if (pg_strcasecmp(formstr, "NFKC") == 0) + form = UNICODE_NFKC; + else if (pg_strcasecmp(formstr, "NFKD") == 0) + form = UNICODE_NFKD; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid normalization form: %s", formstr))); + + return form; +} + +Datum +unicode_normalize_func(PG_FUNCTION_ARGS) +{ + text *input = PG_GETARG_TEXT_PP(0); + char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); + UnicodeNormalizationForm form; + int size; + pg_wchar *input_chars; + pg_wchar *output_chars; + unsigned char *p; + text *result; + int i; + + form = unicode_norm_form_from_string(formstr); + + /* convert to pg_wchar */ + size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); + input_chars = palloc((size + 1) * sizeof(pg_wchar)); + p = (unsigned char *) VARDATA_ANY(input); + for (i = 0; i < size; i++) + { + input_chars[i] = utf8_to_unicode(p); + p += pg_utf_mblen(p); + } + input_chars[i] = (pg_wchar) '\0'; + Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input)); + + /* action */ + output_chars = unicode_normalize(form, input_chars); + + /* convert back to UTF-8 string */ + size = 0; + for (pg_wchar *wp = output_chars; *wp; wp++) + { + unsigned char buf[4]; + + unicode_to_utf8(*wp, buf); + size += pg_utf_mblen(buf); + } + + result = palloc(size + VARHDRSZ); + SET_VARSIZE(result, size + VARHDRSZ); + + p = (unsigned char *) VARDATA_ANY(result); + for (pg_wchar *wp = output_chars; *wp; wp++) + { + unicode_to_utf8(*wp, p); + p += pg_utf_mblen(p); + } + Assert((char *) p == (char *) result + size + VARHDRSZ); + + PG_RETURN_TEXT_P(result); +} + +/* + * Check whether the string is in the specified Unicode normalization form. + * + * This is done by converting the string to the specified normal form and then + * comparing that to the original string. To speed that up, we also apply the + * "quick check" algorithm specified in UAX #15, which can give a yes or no + * answer for many strings by just scanning the string once. + * + * This function should generally be optimized for the case where the string + * is in fact normalized. In that case, we'll end up looking at the entire + * string, so it's probably not worth doing any incremental conversion etc. + */ +Datum +unicode_is_normalized(PG_FUNCTION_ARGS) +{ + text *input = PG_GETARG_TEXT_PP(0); + char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); + UnicodeNormalizationForm form; + int size; + pg_wchar *input_chars; + pg_wchar *output_chars; + unsigned char *p; + int i; + UnicodeNormalizationQC quickcheck; + int output_size; + bool result; + + form = unicode_norm_form_from_string(formstr); + + /* convert to pg_wchar */ + size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); + input_chars = palloc((size + 1) * sizeof(pg_wchar)); + p = (unsigned char *) VARDATA_ANY(input); + for (i = 0; i < size; i++) + { + input_chars[i] = utf8_to_unicode(p); + p += pg_utf_mblen(p); + } + input_chars[i] = (pg_wchar) '\0'; + Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input)); + + /* quick check (see UAX #15) */ + quickcheck = unicode_is_normalized_quickcheck(form, input_chars); + if (quickcheck == UNICODE_NORM_QC_YES) + PG_RETURN_BOOL(true); + else if (quickcheck == UNICODE_NORM_QC_NO) + PG_RETURN_BOOL(false); + + /* normalize and compare with original */ + output_chars = unicode_normalize(form, input_chars); + + output_size = 0; + for (pg_wchar *wp = output_chars; *wp; wp++) + output_size++; + + result = (size == output_size) && + (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0); + + PG_RETURN_BOOL(result); +} + +/* + * Check if first n chars are hexadecimal digits + */ +static bool +isxdigits_n(const char *instr, size_t n) +{ + for (size_t i = 0; i < n; i++) + if (!isxdigit((unsigned char) instr[i])) + return false; + + return true; +} + +static unsigned int +hexval(unsigned char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 0xA; + if (c >= 'A' && c <= 'F') + return c - 'A' + 0xA; + elog(ERROR, "invalid hexadecimal digit"); + return 0; /* not reached */ +} + +/* + * Translate string with hexadecimal digits to number + */ +static unsigned int +hexval_n(const char *instr, size_t n) +{ + unsigned int result = 0; + + for (size_t i = 0; i < n; i++) + result += hexval(instr[i]) << (4 * (n - i - 1)); + + return result; +} + +/* + * Replaces Unicode escape sequences by Unicode characters + */ +Datum +unistr(PG_FUNCTION_ARGS) +{ + text *input_text = PG_GETARG_TEXT_PP(0); + char *instr; + int len; + StringInfoData str; + text *result; + pg_wchar pair_first = 0; + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + + instr = VARDATA_ANY(input_text); + len = VARSIZE_ANY_EXHDR(input_text); + + initStringInfo(&str); + + while (len > 0) + { + if (instr[0] == '\\') + { + if (len >= 2 && + instr[1] == '\\') + { + if (pair_first) + goto invalid_pair; + appendStringInfoChar(&str, '\\'); + instr += 2; + len -= 2; + } + else if ((len >= 5 && isxdigits_n(instr + 1, 4)) || + (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4))) + { + pg_wchar unicode; + int offset = instr[1] == 'u' ? 2 : 1; + + unicode = hexval_n(instr + offset, 4); + + if (!is_valid_unicode_codepoint(unicode)) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid Unicode code point: %04X", unicode)); + + if (pair_first) + { + if (is_utf16_surrogate_second(unicode)) + { + unicode = surrogate_pair_to_codepoint(pair_first, unicode); + pair_first = 0; + } + else + goto invalid_pair; + } + else if (is_utf16_surrogate_second(unicode)) + goto invalid_pair; + + if (is_utf16_surrogate_first(unicode)) + pair_first = unicode; + else + { + pg_unicode_to_server(unicode, (unsigned char *) cbuf); + appendStringInfoString(&str, cbuf); + } + + instr += 4 + offset; + len -= 4 + offset; + } + else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6)) + { + pg_wchar unicode; + + unicode = hexval_n(instr + 2, 6); + + if (!is_valid_unicode_codepoint(unicode)) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid Unicode code point: %04X", unicode)); + + if (pair_first) + { + if (is_utf16_surrogate_second(unicode)) + { + unicode = surrogate_pair_to_codepoint(pair_first, unicode); + pair_first = 0; + } + else + goto invalid_pair; + } + else if (is_utf16_surrogate_second(unicode)) + goto invalid_pair; + + if (is_utf16_surrogate_first(unicode)) + pair_first = unicode; + else + { + pg_unicode_to_server(unicode, (unsigned char *) cbuf); + appendStringInfoString(&str, cbuf); + } + + instr += 8; + len -= 8; + } + else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8)) + { + pg_wchar unicode; + + unicode = hexval_n(instr + 2, 8); + + if (!is_valid_unicode_codepoint(unicode)) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid Unicode code point: %04X", unicode)); + + if (pair_first) + { + if (is_utf16_surrogate_second(unicode)) + { + unicode = surrogate_pair_to_codepoint(pair_first, unicode); + pair_first = 0; + } + else + goto invalid_pair; + } + else if (is_utf16_surrogate_second(unicode)) + goto invalid_pair; + + if (is_utf16_surrogate_first(unicode)) + pair_first = unicode; + else + { + pg_unicode_to_server(unicode, (unsigned char *) cbuf); + appendStringInfoString(&str, cbuf); + } + + instr += 10; + len -= 10; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid Unicode escape"), + errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX."))); + } + else + { + if (pair_first) + goto invalid_pair; + + appendStringInfoChar(&str, *instr++); + len--; + } + } + + /* unfinished surrogate pair? */ + if (pair_first) + goto invalid_pair; + + result = cstring_to_text_with_len(str.data, str.len); + pfree(str.data); + + PG_RETURN_TEXT_P(result); + +invalid_pair: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid Unicode surrogate pair"))); + PG_RETURN_NULL(); /* keep compiler quiet */ +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/version.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/version.c new file mode 100644 index 00000000000..30edac59302 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/version.c @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * version.c + * Returns the PostgreSQL version string + * + * Copyright (c) 1998-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * + * src/backend/utils/adt/version.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/builtins.h" + + +Datum +pgsql_version(PG_FUNCTION_ARGS) +{ + PG_RETURN_TEXT_P(cstring_to_text(PG_VERSION_STR)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/windowfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/windowfuncs.c new file mode 100644 index 00000000000..0c7cc55845a --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/windowfuncs.c @@ -0,0 +1,732 @@ +/*------------------------------------------------------------------------- + * + * windowfuncs.c + * Standard window functions defined in SQL spec. + * + * Portions Copyright (c) 2000-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/windowfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" +#include "utils/builtins.h" +#include "windowapi.h" + +/* + * ranking process information + */ +typedef struct rank_context +{ + int64 rank; /* current rank */ +} rank_context; + +/* + * ntile process information + */ +typedef struct +{ + int32 ntile; /* current result */ + int64 rows_per_bucket; /* row number of current bucket */ + int64 boundary; /* how many rows should be in the bucket */ + int64 remainder; /* (total rows) % (bucket num) */ +} ntile_context; + +static bool rank_up(WindowObject winobj); +static Datum leadlag_common(FunctionCallInfo fcinfo, + bool forward, bool withoffset, bool withdefault); + + +/* + * utility routine for *_rank functions. + */ +static bool +rank_up(WindowObject winobj) +{ + bool up = false; /* should rank increase? */ + int64 curpos = WinGetCurrentPosition(winobj); + rank_context *context; + + context = (rank_context *) + WinGetPartitionLocalMemory(winobj, sizeof(rank_context)); + + if (context->rank == 0) + { + /* first call: rank of first row is always 1 */ + Assert(curpos == 0); + context->rank = 1; + } + else + { + Assert(curpos > 0); + /* do current and prior tuples match by ORDER BY clause? */ + if (!WinRowsArePeers(winobj, curpos - 1, curpos)) + up = true; + } + + /* We can advance the mark, but only *after* access to prior row */ + WinSetMarkPosition(winobj, curpos); + + return up; +} + + +/* + * row_number + * just increment up from 1 until current partition finishes. + */ +Datum +window_row_number(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + int64 curpos = WinGetCurrentPosition(winobj); + + WinSetMarkPosition(winobj, curpos); + PG_RETURN_INT64(curpos + 1); +} + +/* + * window_row_number_support + * prosupport function for window_row_number() + */ +Datum +window_row_number_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* row_number() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + if (IsA(rawreq, SupportRequestOptimizeWindowClause)) + { + SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq; + + /* + * The frame options can always become "ROWS BETWEEN UNBOUNDED + * PRECEDING AND CURRENT ROW". row_number() always just increments by + * 1 with each row in the partition. Using ROWS instead of RANGE + * saves effort checking peer rows during execution. + */ + req->frameOptions = (FRAMEOPTION_NONDEFAULT | + FRAMEOPTION_ROWS | + FRAMEOPTION_START_UNBOUNDED_PRECEDING | + FRAMEOPTION_END_CURRENT_ROW); + + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + +/* + * rank + * Rank changes when key columns change. + * The new rank number is the current row number. + */ +Datum +window_rank(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + rank_context *context; + bool up; + + up = rank_up(winobj); + context = (rank_context *) + WinGetPartitionLocalMemory(winobj, sizeof(rank_context)); + if (up) + context->rank = WinGetCurrentPosition(winobj) + 1; + + PG_RETURN_INT64(context->rank); +} + +/* + * window_rank_support + * prosupport function for window_rank() + */ +Datum +window_rank_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* rank() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + if (IsA(rawreq, SupportRequestOptimizeWindowClause)) + { + SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq; + + /* + * rank() is coded in such a way that it returns "(COUNT (*) OVER + * (<opt> RANGE UNBOUNDED PRECEDING) - COUNT (*) OVER (<opt> RANGE + * CURRENT ROW) + 1)" regardless of the frame options. We'll set the + * frame options to "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" + * so they agree with what window_row_number_support() optimized the + * frame options to be. Using ROWS instead of RANGE saves from doing + * peer row checks during execution. + */ + req->frameOptions = (FRAMEOPTION_NONDEFAULT | + FRAMEOPTION_ROWS | + FRAMEOPTION_START_UNBOUNDED_PRECEDING | + FRAMEOPTION_END_CURRENT_ROW); + + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + +/* + * dense_rank + * Rank increases by 1 when key columns change. + */ +Datum +window_dense_rank(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + rank_context *context; + bool up; + + up = rank_up(winobj); + context = (rank_context *) + WinGetPartitionLocalMemory(winobj, sizeof(rank_context)); + if (up) + context->rank++; + + PG_RETURN_INT64(context->rank); +} + +/* + * window_dense_rank_support + * prosupport function for window_dense_rank() + */ +Datum +window_dense_rank_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* dense_rank() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + if (IsA(rawreq, SupportRequestOptimizeWindowClause)) + { + SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq; + + /* + * dense_rank() is unaffected by the frame options. Here we set the + * frame options to match what's done in row_number's support + * function. Using ROWS instead of RANGE (the default) saves the + * executor from having to check for peer rows. + */ + req->frameOptions = (FRAMEOPTION_NONDEFAULT | + FRAMEOPTION_ROWS | + FRAMEOPTION_START_UNBOUNDED_PRECEDING | + FRAMEOPTION_END_CURRENT_ROW); + + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + +/* + * percent_rank + * return fraction between 0 and 1 inclusive, + * which is described as (RK - 1) / (NR - 1), where RK is the current row's + * rank and NR is the total number of rows, per spec. + */ +Datum +window_percent_rank(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + rank_context *context; + bool up; + int64 totalrows = WinGetPartitionRowCount(winobj); + + Assert(totalrows > 0); + + up = rank_up(winobj); + context = (rank_context *) + WinGetPartitionLocalMemory(winobj, sizeof(rank_context)); + if (up) + context->rank = WinGetCurrentPosition(winobj) + 1; + + /* return zero if there's only one row, per spec */ + if (totalrows <= 1) + PG_RETURN_FLOAT8(0.0); + + PG_RETURN_FLOAT8((float8) (context->rank - 1) / (float8) (totalrows - 1)); +} + +/* + * window_percent_rank_support + * prosupport function for window_percent_rank() + */ +Datum +window_percent_rank_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* percent_rank() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + if (IsA(rawreq, SupportRequestOptimizeWindowClause)) + { + SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq; + + /* + * percent_rank() is unaffected by the frame options. Here we set the + * frame options to match what's done in row_number's support + * function. Using ROWS instead of RANGE (the default) saves the + * executor from having to check for peer rows. + */ + req->frameOptions = (FRAMEOPTION_NONDEFAULT | + FRAMEOPTION_ROWS | + FRAMEOPTION_START_UNBOUNDED_PRECEDING | + FRAMEOPTION_END_CURRENT_ROW); + + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + + +/* + * cume_dist + * return fraction between 0 and 1 inclusive, + * which is described as NP / NR, where NP is the number of rows preceding or + * peers to the current row, and NR is the total number of rows, per spec. + */ +Datum +window_cume_dist(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + rank_context *context; + bool up; + int64 totalrows = WinGetPartitionRowCount(winobj); + + Assert(totalrows > 0); + + up = rank_up(winobj); + context = (rank_context *) + WinGetPartitionLocalMemory(winobj, sizeof(rank_context)); + if (up || context->rank == 1) + { + /* + * The current row is not peer to prior row or is just the first, so + * count up the number of rows that are peer to the current. + */ + int64 row; + + context->rank = WinGetCurrentPosition(winobj) + 1; + + /* + * start from current + 1 + */ + for (row = context->rank; row < totalrows; row++) + { + if (!WinRowsArePeers(winobj, row - 1, row)) + break; + context->rank++; + } + } + + PG_RETURN_FLOAT8((float8) context->rank / (float8) totalrows); +} + +/* + * window_cume_dist_support + * prosupport function for window_cume_dist() + */ +Datum +window_cume_dist_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* cume_dist() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + if (IsA(rawreq, SupportRequestOptimizeWindowClause)) + { + SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq; + + /* + * cume_dist() is unaffected by the frame options. Here we set the + * frame options to match what's done in row_number's support + * function. Using ROWS instead of RANGE (the default) saves the + * executor from having to check for peer rows. + */ + req->frameOptions = (FRAMEOPTION_NONDEFAULT | + FRAMEOPTION_ROWS | + FRAMEOPTION_START_UNBOUNDED_PRECEDING | + FRAMEOPTION_END_CURRENT_ROW); + + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + +/* + * ntile + * compute an exact numeric value with scale 0 (zero), + * ranging from 1 (one) to n, per spec. + */ +Datum +window_ntile(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + ntile_context *context; + + context = (ntile_context *) + WinGetPartitionLocalMemory(winobj, sizeof(ntile_context)); + + if (context->ntile == 0) + { + /* first call */ + int64 total; + int32 nbuckets; + bool isnull; + + total = WinGetPartitionRowCount(winobj); + nbuckets = DatumGetInt32(WinGetFuncArgCurrent(winobj, 0, &isnull)); + + /* + * per spec: If NT is the null value, then the result is the null + * value. + */ + if (isnull) + PG_RETURN_NULL(); + + /* + * per spec: If NT is less than or equal to 0 (zero), then an + * exception condition is raised. + */ + if (nbuckets <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_NTILE), + errmsg("argument of ntile must be greater than zero"))); + + context->ntile = 1; + context->rows_per_bucket = 0; + context->boundary = total / nbuckets; + if (context->boundary <= 0) + context->boundary = 1; + else + { + /* + * If the total number is not divisible, add 1 row to leading + * buckets. + */ + context->remainder = total % nbuckets; + if (context->remainder != 0) + context->boundary++; + } + } + + context->rows_per_bucket++; + if (context->boundary < context->rows_per_bucket) + { + /* ntile up */ + if (context->remainder != 0 && context->ntile == context->remainder) + { + context->remainder = 0; + context->boundary -= 1; + } + context->ntile += 1; + context->rows_per_bucket = 1; + } + + PG_RETURN_INT32(context->ntile); +} + +/* + * window_ntile_support + * prosupport function for window_ntile() + */ +Datum +window_ntile_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + WindowFunc *wfunc = req->window_func; + + if (list_length(wfunc->args) == 1) + { + Node *expr = eval_const_expressions(NULL, linitial(wfunc->args)); + + /* + * Due to the Node representation of WindowClause runConditions in + * version prior to v17, we need to insist that ntile arg is Const + * to allow safe application of the runCondition optimization. + */ + if (IsA(expr, Const)) + { + /* + * ntile() is monotonically increasing as the number of + * buckets cannot change after the first call + */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + } + + PG_RETURN_POINTER(NULL); + } + + if (IsA(rawreq, SupportRequestOptimizeWindowClause)) + { + SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq; + + /* + * ntile() is unaffected by the frame options. Here we set the frame + * options to match what's done in row_number's support function. + * Using ROWS instead of RANGE (the default) saves the executor from + * having to check for peer rows. + */ + req->frameOptions = (FRAMEOPTION_NONDEFAULT | + FRAMEOPTION_ROWS | + FRAMEOPTION_START_UNBOUNDED_PRECEDING | + FRAMEOPTION_END_CURRENT_ROW); + + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + +/* + * leadlag_common + * common operation of lead() and lag() + * For lead() forward is true, whereas for lag() it is false. + * withoffset indicates we have an offset second argument. + * withdefault indicates we have a default third argument. + */ +static Datum +leadlag_common(FunctionCallInfo fcinfo, + bool forward, bool withoffset, bool withdefault) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + int32 offset; + bool const_offset; + Datum result; + bool isnull; + bool isout; + + if (withoffset) + { + offset = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull)); + if (isnull) + PG_RETURN_NULL(); + const_offset = get_fn_expr_arg_stable(fcinfo->flinfo, 1); + } + else + { + offset = 1; + const_offset = true; + } + + result = WinGetFuncArgInPartition(winobj, 0, + (forward ? offset : -offset), + WINDOW_SEEK_CURRENT, + const_offset, + &isnull, &isout); + + if (isout) + { + /* + * target row is out of the partition; supply default value if + * provided. otherwise it'll stay NULL + */ + if (withdefault) + result = WinGetFuncArgCurrent(winobj, 2, &isnull); + } + + if (isnull) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(result); +} + +/* + * lag + * returns the value of VE evaluated on a row that is 1 + * row before the current row within a partition, + * per spec. + */ +Datum +window_lag(PG_FUNCTION_ARGS) +{ + return leadlag_common(fcinfo, false, false, false); +} + +/* + * lag_with_offset + * returns the value of VE evaluated on a row that is OFFSET + * rows before the current row within a partition, + * per spec. + */ +Datum +window_lag_with_offset(PG_FUNCTION_ARGS) +{ + return leadlag_common(fcinfo, false, true, false); +} + +/* + * lag_with_offset_and_default + * same as lag_with_offset but accepts default value + * as its third argument. + */ +Datum +window_lag_with_offset_and_default(PG_FUNCTION_ARGS) +{ + return leadlag_common(fcinfo, false, true, true); +} + +/* + * lead + * returns the value of VE evaluated on a row that is 1 + * row after the current row within a partition, + * per spec. + */ +Datum +window_lead(PG_FUNCTION_ARGS) +{ + return leadlag_common(fcinfo, true, false, false); +} + +/* + * lead_with_offset + * returns the value of VE evaluated on a row that is OFFSET + * number of rows after the current row within a partition, + * per spec. + */ +Datum +window_lead_with_offset(PG_FUNCTION_ARGS) +{ + return leadlag_common(fcinfo, true, true, false); +} + +/* + * lead_with_offset_and_default + * same as lead_with_offset but accepts default value + * as its third argument. + */ +Datum +window_lead_with_offset_and_default(PG_FUNCTION_ARGS) +{ + return leadlag_common(fcinfo, true, true, true); +} + +/* + * first_value + * return the value of VE evaluated on the first row of the + * window frame, per spec. + */ +Datum +window_first_value(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + Datum result; + bool isnull; + + result = WinGetFuncArgInFrame(winobj, 0, + 0, WINDOW_SEEK_HEAD, true, + &isnull, NULL); + if (isnull) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(result); +} + +/* + * last_value + * return the value of VE evaluated on the last row of the + * window frame, per spec. + */ +Datum +window_last_value(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + Datum result; + bool isnull; + + result = WinGetFuncArgInFrame(winobj, 0, + 0, WINDOW_SEEK_TAIL, true, + &isnull, NULL); + if (isnull) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(result); +} + +/* + * nth_value + * return the value of VE evaluated on the n-th row from the first + * row of the window frame, per spec. + */ +Datum +window_nth_value(PG_FUNCTION_ARGS) +{ + WindowObject winobj = PG_WINDOW_OBJECT(); + bool const_offset; + Datum result; + bool isnull; + int32 nth; + + nth = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull)); + if (isnull) + PG_RETURN_NULL(); + const_offset = get_fn_expr_arg_stable(fcinfo->flinfo, 1); + + if (nth <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_NTH_VALUE), + errmsg("argument of nth_value must be greater than zero"))); + + result = WinGetFuncArgInFrame(winobj, 0, + nth - 1, WINDOW_SEEK_HEAD, const_offset, + &isnull, NULL); + if (isnull) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(result); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid.c new file mode 100644 index 00000000000..8ac1679c381 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid.c @@ -0,0 +1,379 @@ +/*------------------------------------------------------------------------- + * + * xid.c + * POSTGRES transaction identifier and command identifier datatypes. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/xid.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> + +#include "access/multixact.h" +#include "access/transam.h" +#include "access/xact.h" +#include "libpq/pqformat.h" +#include "utils/builtins.h" +#include "utils/xid8.h" + +#define PG_GETARG_COMMANDID(n) DatumGetCommandId(PG_GETARG_DATUM(n)) +#define PG_RETURN_COMMANDID(x) return CommandIdGetDatum(x) + + +Datum +xidin(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + TransactionId result; + + result = uint32in_subr(str, NULL, "xid", fcinfo->context); + PG_RETURN_TRANSACTIONID(result); +} + +Datum +xidout(PG_FUNCTION_ARGS) +{ + TransactionId transactionId = PG_GETARG_TRANSACTIONID(0); + char *result = (char *) palloc(16); + + snprintf(result, 16, "%lu", (unsigned long) transactionId); + PG_RETURN_CSTRING(result); +} + +/* + * xidrecv - converts external binary format to xid + */ +Datum +xidrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId))); +} + +/* + * xidsend - converts xid to binary format + */ +Datum +xidsend(PG_FUNCTION_ARGS) +{ + TransactionId arg1 = PG_GETARG_TRANSACTIONID(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * xideq - are two xids equal? + */ +Datum +xideq(PG_FUNCTION_ARGS) +{ + TransactionId xid1 = PG_GETARG_TRANSACTIONID(0); + TransactionId xid2 = PG_GETARG_TRANSACTIONID(1); + + PG_RETURN_BOOL(TransactionIdEquals(xid1, xid2)); +} + +/* + * xidneq - are two xids different? + */ +Datum +xidneq(PG_FUNCTION_ARGS) +{ + TransactionId xid1 = PG_GETARG_TRANSACTIONID(0); + TransactionId xid2 = PG_GETARG_TRANSACTIONID(1); + + PG_RETURN_BOOL(!TransactionIdEquals(xid1, xid2)); +} + +/* + * xid_age - compute age of an XID (relative to latest stable xid) + */ +Datum +xid_age(PG_FUNCTION_ARGS) +{ + TransactionId xid = PG_GETARG_TRANSACTIONID(0); + TransactionId now = GetStableLatestTransactionId(); + + /* Permanent XIDs are always infinitely old */ + if (!TransactionIdIsNormal(xid)) + PG_RETURN_INT32(INT_MAX); + + PG_RETURN_INT32((int32) (now - xid)); +} + +/* + * mxid_age - compute age of a multi XID (relative to latest stable mxid) + */ +Datum +mxid_age(PG_FUNCTION_ARGS) +{ + TransactionId xid = PG_GETARG_TRANSACTIONID(0); + MultiXactId now = ReadNextMultiXactId(); + + if (!MultiXactIdIsValid(xid)) + PG_RETURN_INT32(INT_MAX); + + PG_RETURN_INT32((int32) (now - xid)); +} + +/* + * xidComparator + * qsort comparison function for XIDs + * + * We can't use wraparound comparison for XIDs because that does not respect + * the triangle inequality! Any old sort order will do. + */ +int +xidComparator(const void *arg1, const void *arg2) +{ + TransactionId xid1 = *(const TransactionId *) arg1; + TransactionId xid2 = *(const TransactionId *) arg2; + + if (xid1 > xid2) + return 1; + if (xid1 < xid2) + return -1; + return 0; +} + +/* + * xidLogicalComparator + * qsort comparison function for XIDs + * + * This is used to compare only XIDs from the same epoch (e.g. for backends + * running at the same time). So there must be only normal XIDs, so there's + * no issue with triangle inequality. + */ +int +xidLogicalComparator(const void *arg1, const void *arg2) +{ + TransactionId xid1 = *(const TransactionId *) arg1; + TransactionId xid2 = *(const TransactionId *) arg2; + + Assert(TransactionIdIsNormal(xid1)); + Assert(TransactionIdIsNormal(xid2)); + + if (TransactionIdPrecedes(xid1, xid2)) + return -1; + + if (TransactionIdPrecedes(xid2, xid1)) + return 1; + + return 0; +} + +Datum +xid8toxid(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0); + + PG_RETURN_TRANSACTIONID(XidFromFullTransactionId(fxid)); +} + +Datum +xid8in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + uint64 result; + + result = uint64in_subr(str, NULL, "xid8", fcinfo->context); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(result)); +} + +Datum +xid8out(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0); + char *result = (char *) palloc(21); + + snprintf(result, 21, UINT64_FORMAT, U64FromFullTransactionId(fxid)); + PG_RETURN_CSTRING(result); +} + +Datum +xid8recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + uint64 value; + + value = (uint64) pq_getmsgint64(buf); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(value)); +} + +Datum +xid8send(PG_FUNCTION_ARGS) +{ + FullTransactionId arg1 = PG_GETARG_FULLTRANSACTIONID(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, (uint64) U64FromFullTransactionId(arg1)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +xid8eq(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + PG_RETURN_BOOL(FullTransactionIdEquals(fxid1, fxid2)); +} + +Datum +xid8ne(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + PG_RETURN_BOOL(!FullTransactionIdEquals(fxid1, fxid2)); +} + +Datum +xid8lt(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + PG_RETURN_BOOL(FullTransactionIdPrecedes(fxid1, fxid2)); +} + +Datum +xid8gt(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + PG_RETURN_BOOL(FullTransactionIdFollows(fxid1, fxid2)); +} + +Datum +xid8le(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + PG_RETURN_BOOL(FullTransactionIdPrecedesOrEquals(fxid1, fxid2)); +} + +Datum +xid8ge(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + PG_RETURN_BOOL(FullTransactionIdFollowsOrEquals(fxid1, fxid2)); +} + +Datum +xid8cmp(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + if (FullTransactionIdFollows(fxid1, fxid2)) + PG_RETURN_INT32(1); + else if (FullTransactionIdEquals(fxid1, fxid2)) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); +} + +Datum +xid8_larger(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + if (FullTransactionIdFollows(fxid1, fxid2)) + PG_RETURN_FULLTRANSACTIONID(fxid1); + else + PG_RETURN_FULLTRANSACTIONID(fxid2); +} + +Datum +xid8_smaller(PG_FUNCTION_ARGS) +{ + FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0); + FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1); + + if (FullTransactionIdPrecedes(fxid1, fxid2)) + PG_RETURN_FULLTRANSACTIONID(fxid1); + else + PG_RETURN_FULLTRANSACTIONID(fxid2); +} + +/***************************************************************************** + * COMMAND IDENTIFIER ROUTINES * + *****************************************************************************/ + +/* + * cidin - converts CommandId to internal representation. + */ +Datum +cidin(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + CommandId result; + + result = uint32in_subr(str, NULL, "cid", fcinfo->context); + PG_RETURN_COMMANDID(result); +} + +/* + * cidout - converts a cid to external representation. + */ +Datum +cidout(PG_FUNCTION_ARGS) +{ + CommandId c = PG_GETARG_COMMANDID(0); + char *result = (char *) palloc(16); + + snprintf(result, 16, "%lu", (unsigned long) c); + PG_RETURN_CSTRING(result); +} + +/* + * cidrecv - converts external binary format to cid + */ +Datum +cidrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + + PG_RETURN_COMMANDID((CommandId) pq_getmsgint(buf, sizeof(CommandId))); +} + +/* + * cidsend - converts cid to binary format + */ +Datum +cidsend(PG_FUNCTION_ARGS) +{ + CommandId arg1 = PG_GETARG_COMMANDID(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +cideq(PG_FUNCTION_ARGS) +{ + CommandId arg1 = PG_GETARG_COMMANDID(0); + CommandId arg2 = PG_GETARG_COMMANDID(1); + + PG_RETURN_BOOL(arg1 == arg2); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid8funcs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid8funcs.c new file mode 100644 index 00000000000..6fbfb3a1cc2 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid8funcs.c @@ -0,0 +1,716 @@ +/*------------------------------------------------------------------------- + * xid8funcs.c + * + * Export internal transaction IDs to user level. + * + * Note that only top-level transaction IDs are exposed to user sessions. + * This is important because xid8s frequently persist beyond the global + * xmin horizon, or may even be shipped to other machines, so we cannot + * rely on being able to correlate subtransaction IDs with their parents + * via functions such as SubTransGetTopmostTransaction(). + * + * These functions are used to support the txid_XXX functions and the newer + * pg_current_xact_id, pg_current_snapshot and related fmgr functions, since + * the only difference between them is whether they expose xid8 or int8 values + * to users. The txid_XXX variants should eventually be dropped. + * + * + * Copyright (c) 2003-2023, PostgreSQL Global Development Group + * Author: Jan Wieck, Afilias USA INC. + * 64-bit txids: Marko Kreen, Skype Technologies + * + * src/backend/utils/adt/xid8funcs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/clog.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "funcapi.h" +#include "lib/qunique.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "postmaster/postmaster.h" +#include "storage/lwlock.h" +#include "storage/procarray.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/snapmgr.h" +#include "utils/xid8.h" + + +/* + * If defined, use bsearch() function for searching for xid8s in snapshots + * that have more than the specified number of values. + */ +#define USE_BSEARCH_IF_NXIP_GREATER 30 + + +/* + * Snapshot containing FullTransactionIds. + */ +typedef struct +{ + /* + * 4-byte length hdr, should not be touched directly. + * + * Explicit embedding is ok as we want always correct alignment anyway. + */ + int32 __varsz; + + uint32 nxip; /* number of fxids in xip array */ + FullTransactionId xmin; + FullTransactionId xmax; + /* in-progress fxids, xmin <= xip[i] < xmax: */ + FullTransactionId xip[FLEXIBLE_ARRAY_MEMBER]; +} pg_snapshot; + +#define PG_SNAPSHOT_SIZE(nxip) \ + (offsetof(pg_snapshot, xip) + sizeof(FullTransactionId) * (nxip)) +#define PG_SNAPSHOT_MAX_NXIP \ + ((MaxAllocSize - offsetof(pg_snapshot, xip)) / sizeof(FullTransactionId)) + +/* + * Compile-time limits on the procarray (MAX_BACKENDS processes plus + * MAX_BACKENDS prepared transactions) guarantee nxip won't be too large. + */ +StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP, + "possible overflow in pg_current_snapshot()"); + + +/* + * Helper to get a TransactionId from a 64-bit xid with wraparound detection. + * + * It is an ERROR if the xid is in the future. Otherwise, returns true if + * the transaction is still new enough that we can determine whether it + * committed and false otherwise. If *extracted_xid is not NULL, it is set + * to the low 32 bits of the transaction ID (i.e. the actual XID, without the + * epoch). + * + * The caller must hold XactTruncationLock since it's dealing with arbitrary + * XIDs, and must continue to hold it until it's done with any clog lookups + * relating to those XIDs. + */ +static bool +TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) +{ + TransactionId xid = XidFromFullTransactionId(fxid); + uint32 now_epoch; + TransactionId now_epoch_next_xid; + FullTransactionId now_fullxid; + TransactionId oldest_xid; + FullTransactionId oldest_fxid; + + now_fullxid = ReadNextFullTransactionId(); + now_epoch_next_xid = XidFromFullTransactionId(now_fullxid); + now_epoch = EpochFromFullTransactionId(now_fullxid); + + if (extracted_xid != NULL) + *extracted_xid = xid; + + if (!TransactionIdIsValid(xid)) + return false; + + /* For non-normal transaction IDs, we can ignore the epoch. */ + if (!TransactionIdIsNormal(xid)) + return true; + + /* If the transaction ID is in the future, throw an error. */ + if (!FullTransactionIdPrecedes(fxid, now_fullxid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("transaction ID %llu is in the future", + (unsigned long long) U64FromFullTransactionId(fxid)))); + + /* + * ShmemVariableCache->oldestClogXid is protected by XactTruncationLock, + * but we don't acquire that lock here. Instead, we require the caller to + * acquire it, because the caller is presumably going to look up the + * returned XID. If we took and released the lock within this function, a + * CLOG truncation could occur before the caller finished with the XID. + */ + Assert(LWLockHeldByMe(XactTruncationLock)); + + /* + * If fxid is not older than ShmemVariableCache->oldestClogXid, the + * relevant CLOG entry is guaranteed to still exist. Convert + * ShmemVariableCache->oldestClogXid into a FullTransactionId to compare + * it with fxid. Determine the right epoch knowing that oldest_fxid + * shouldn't be more than 2^31 older than now_fullxid. + */ + oldest_xid = ShmemVariableCache->oldestClogXid; + Assert(TransactionIdPrecedesOrEquals(oldest_xid, now_epoch_next_xid)); + if (oldest_xid <= now_epoch_next_xid) + { + oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch, oldest_xid); + } + else + { + Assert(now_epoch > 0); + oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch - 1, oldest_xid); + } + return !FullTransactionIdPrecedes(fxid, oldest_fxid); +} + +/* + * Convert a TransactionId obtained from a snapshot held by the caller to a + * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that + * we can compute the high order bits. It must have been obtained by the + * caller with ReadNextFullTransactionId() after the snapshot was created. + */ +static FullTransactionId +widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid) +{ + TransactionId next_xid = XidFromFullTransactionId(next_fxid); + uint32 epoch = EpochFromFullTransactionId(next_fxid); + + /* Special transaction ID. */ + if (!TransactionIdIsNormal(xid)) + return FullTransactionIdFromEpochAndXid(0, xid); + + /* + * The 64 bit result must be <= next_fxid, since next_fxid hadn't been + * issued yet when the snapshot was created. Every TransactionId in the + * snapshot must therefore be from the same epoch as next_fxid, or the + * epoch before. We know this because next_fxid is never allow to get + * more than one epoch ahead of the TransactionIds in any snapshot. + */ + if (xid > next_xid) + epoch--; + + return FullTransactionIdFromEpochAndXid(epoch, xid); +} + +/* + * txid comparator for qsort/bsearch + */ +static int +cmp_fxid(const void *aa, const void *bb) +{ + FullTransactionId a = *(const FullTransactionId *) aa; + FullTransactionId b = *(const FullTransactionId *) bb; + + if (FullTransactionIdPrecedes(a, b)) + return -1; + if (FullTransactionIdPrecedes(b, a)) + return 1; + return 0; +} + +/* + * Sort a snapshot's txids, so we can use bsearch() later. Also remove + * any duplicates. + * + * For consistency of on-disk representation, we always sort even if bsearch + * will not be used. + */ +static void +sort_snapshot(pg_snapshot *snap) +{ + if (snap->nxip > 1) + { + qsort(snap->xip, snap->nxip, sizeof(FullTransactionId), cmp_fxid); + snap->nxip = qunique(snap->xip, snap->nxip, sizeof(FullTransactionId), + cmp_fxid); + } +} + +/* + * check fxid visibility. + */ +static bool +is_visible_fxid(FullTransactionId value, const pg_snapshot *snap) +{ + if (FullTransactionIdPrecedes(value, snap->xmin)) + return true; + else if (!FullTransactionIdPrecedes(value, snap->xmax)) + return false; +#ifdef USE_BSEARCH_IF_NXIP_GREATER + else if (snap->nxip > USE_BSEARCH_IF_NXIP_GREATER) + { + void *res; + + res = bsearch(&value, snap->xip, snap->nxip, sizeof(FullTransactionId), + cmp_fxid); + /* if found, transaction is still in progress */ + return (res) ? false : true; + } +#endif + else + { + uint32 i; + + for (i = 0; i < snap->nxip; i++) + { + if (FullTransactionIdEquals(value, snap->xip[i])) + return false; + } + return true; + } +} + +/* + * helper functions to use StringInfo for pg_snapshot creation. + */ + +static StringInfo +buf_init(FullTransactionId xmin, FullTransactionId xmax) +{ + pg_snapshot snap; + StringInfo buf; + + snap.xmin = xmin; + snap.xmax = xmax; + snap.nxip = 0; + + buf = makeStringInfo(); + appendBinaryStringInfo(buf, &snap, PG_SNAPSHOT_SIZE(0)); + return buf; +} + +static void +buf_add_txid(StringInfo buf, FullTransactionId fxid) +{ + pg_snapshot *snap = (pg_snapshot *) buf->data; + + /* do this before possible realloc */ + snap->nxip++; + + appendBinaryStringInfo(buf, &fxid, sizeof(fxid)); +} + +static pg_snapshot * +buf_finalize(StringInfo buf) +{ + pg_snapshot *snap = (pg_snapshot *) buf->data; + + SET_VARSIZE(snap, buf->len); + + /* buf is not needed anymore */ + buf->data = NULL; + pfree(buf); + + return snap; +} + +/* + * parse snapshot from cstring + */ +static pg_snapshot * +parse_snapshot(const char *str, Node *escontext) +{ + FullTransactionId xmin; + FullTransactionId xmax; + FullTransactionId last_val = InvalidFullTransactionId; + FullTransactionId val; + const char *str_start = str; + char *endp; + StringInfo buf; + + xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + if (*endp != ':') + goto bad_format; + str = endp + 1; + + xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + if (*endp != ':') + goto bad_format; + str = endp + 1; + + /* it should look sane */ + if (!FullTransactionIdIsValid(xmin) || + !FullTransactionIdIsValid(xmax) || + FullTransactionIdPrecedes(xmax, xmin)) + goto bad_format; + + /* allocate buffer */ + buf = buf_init(xmin, xmax); + + /* loop over values */ + while (*str != '\0') + { + /* read next value */ + val = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + str = endp; + + /* require the input to be in order */ + if (FullTransactionIdPrecedes(val, xmin) || + FullTransactionIdFollowsOrEquals(val, xmax) || + FullTransactionIdPrecedes(val, last_val)) + goto bad_format; + + /* skip duplicates */ + if (!FullTransactionIdEquals(val, last_val)) + buf_add_txid(buf, val); + last_val = val; + + if (*str == ',') + str++; + else if (*str != '\0') + goto bad_format; + } + + return buf_finalize(buf); + +bad_format: + ereturn(escontext, NULL, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "pg_snapshot", str_start))); +} + +/* + * pg_current_xact_id() returns xid8 + * + * Return the current toplevel full transaction ID. + * If the current transaction does not have one, one is assigned. + */ +Datum +pg_current_xact_id(PG_FUNCTION_ARGS) +{ + /* + * Must prevent during recovery because if an xid is not assigned we try + * to assign one, which would fail. Programs already rely on this function + * to always return a valid current xid, so we should not change this to + * return NULL or similar invalid xid. + */ + PreventCommandDuringRecovery("pg_current_xact_id()"); + + PG_RETURN_FULLTRANSACTIONID(GetTopFullTransactionId()); +} + +/* + * Same as pg_current_xact_id() but doesn't assign a new xid if there + * isn't one yet. + */ +Datum +pg_current_xact_id_if_assigned(PG_FUNCTION_ARGS) +{ + FullTransactionId topfxid = GetTopFullTransactionIdIfAny(); + + if (!FullTransactionIdIsValid(topfxid)) + PG_RETURN_NULL(); + + PG_RETURN_FULLTRANSACTIONID(topfxid); +} + +/* + * pg_current_snapshot() returns pg_snapshot + * + * Return current snapshot + * + * Note that only top-transaction XIDs are included in the snapshot. + */ +Datum +pg_current_snapshot(PG_FUNCTION_ARGS) +{ + pg_snapshot *snap; + uint32 nxip, + i; + Snapshot cur; + FullTransactionId next_fxid = ReadNextFullTransactionId(); + + cur = GetActiveSnapshot(); + if (cur == NULL) + elog(ERROR, "no active snapshot set"); + + /* allocate */ + nxip = cur->xcnt; + snap = palloc(PG_SNAPSHOT_SIZE(nxip)); + + /* fill */ + snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid); + snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid); + snap->nxip = nxip; + for (i = 0; i < nxip; i++) + snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid); + + /* + * We want them guaranteed to be in ascending order. This also removes + * any duplicate xids. Normally, an XID can only be assigned to one + * backend, but when preparing a transaction for two-phase commit, there + * is a transient state when both the original backend and the dummy + * PGPROC entry reserved for the prepared transaction hold the same XID. + */ + sort_snapshot(snap); + + /* set size after sorting, because it may have removed duplicate xips */ + SET_VARSIZE(snap, PG_SNAPSHOT_SIZE(snap->nxip)); + + PG_RETURN_POINTER(snap); +} + +/* + * pg_snapshot_in(cstring) returns pg_snapshot + * + * input function for type pg_snapshot + */ +Datum +pg_snapshot_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + pg_snapshot *snap; + + snap = parse_snapshot(str, fcinfo->context); + + PG_RETURN_POINTER(snap); +} + +/* + * pg_snapshot_out(pg_snapshot) returns cstring + * + * output function for type pg_snapshot + */ +Datum +pg_snapshot_out(PG_FUNCTION_ARGS) +{ + pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0); + StringInfoData str; + uint32 i; + + initStringInfo(&str); + + appendStringInfo(&str, UINT64_FORMAT ":", + U64FromFullTransactionId(snap->xmin)); + appendStringInfo(&str, UINT64_FORMAT ":", + U64FromFullTransactionId(snap->xmax)); + + for (i = 0; i < snap->nxip; i++) + { + if (i > 0) + appendStringInfoChar(&str, ','); + appendStringInfo(&str, UINT64_FORMAT, + U64FromFullTransactionId(snap->xip[i])); + } + + PG_RETURN_CSTRING(str.data); +} + +/* + * pg_snapshot_recv(internal) returns pg_snapshot + * + * binary input function for type pg_snapshot + * + * format: int4 nxip, int8 xmin, int8 xmax, int8 xip + */ +Datum +pg_snapshot_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + pg_snapshot *snap; + FullTransactionId last = InvalidFullTransactionId; + int nxip; + int i; + FullTransactionId xmin; + FullTransactionId xmax; + + /* load and validate nxip */ + nxip = pq_getmsgint(buf, 4); + if (nxip < 0 || nxip > PG_SNAPSHOT_MAX_NXIP) + goto bad_format; + + xmin = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + xmax = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + if (!FullTransactionIdIsValid(xmin) || + !FullTransactionIdIsValid(xmax) || + FullTransactionIdPrecedes(xmax, xmin)) + goto bad_format; + + snap = palloc(PG_SNAPSHOT_SIZE(nxip)); + snap->xmin = xmin; + snap->xmax = xmax; + + for (i = 0; i < nxip; i++) + { + FullTransactionId cur = + FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + + if (FullTransactionIdPrecedes(cur, last) || + FullTransactionIdPrecedes(cur, xmin) || + FullTransactionIdPrecedes(xmax, cur)) + goto bad_format; + + /* skip duplicate xips */ + if (FullTransactionIdEquals(cur, last)) + { + i--; + nxip--; + continue; + } + + snap->xip[i] = cur; + last = cur; + } + snap->nxip = nxip; + SET_VARSIZE(snap, PG_SNAPSHOT_SIZE(nxip)); + PG_RETURN_POINTER(snap); + +bad_format: + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("invalid external pg_snapshot data"))); + PG_RETURN_POINTER(NULL); /* keep compiler quiet */ +} + +/* + * pg_snapshot_send(pg_snapshot) returns bytea + * + * binary output function for type pg_snapshot + * + * format: int4 nxip, u64 xmin, u64 xmax, u64 xip... + */ +Datum +pg_snapshot_send(PG_FUNCTION_ARGS) +{ + pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0); + StringInfoData buf; + uint32 i; + + pq_begintypsend(&buf); + pq_sendint32(&buf, snap->nxip); + pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmin)); + pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmax)); + for (i = 0; i < snap->nxip; i++) + pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xip[i])); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * pg_visible_in_snapshot(xid8, pg_snapshot) returns bool + * + * is txid visible in snapshot ? + */ +Datum +pg_visible_in_snapshot(PG_FUNCTION_ARGS) +{ + FullTransactionId value = PG_GETARG_FULLTRANSACTIONID(0); + pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(1); + + PG_RETURN_BOOL(is_visible_fxid(value, snap)); +} + +/* + * pg_snapshot_xmin(pg_snapshot) returns xid8 + * + * return snapshot's xmin + */ +Datum +pg_snapshot_xmin(PG_FUNCTION_ARGS) +{ + pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0); + + PG_RETURN_FULLTRANSACTIONID(snap->xmin); +} + +/* + * pg_snapshot_xmax(pg_snapshot) returns xid8 + * + * return snapshot's xmax + */ +Datum +pg_snapshot_xmax(PG_FUNCTION_ARGS) +{ + pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0); + + PG_RETURN_FULLTRANSACTIONID(snap->xmax); +} + +/* + * pg_snapshot_xip(pg_snapshot) returns setof xid8 + * + * return in-progress xid8s in snapshot. + */ +Datum +pg_snapshot_xip(PG_FUNCTION_ARGS) +{ + FuncCallContext *fctx; + pg_snapshot *snap; + FullTransactionId value; + + /* on first call initialize fctx and get copy of snapshot */ + if (SRF_IS_FIRSTCALL()) + { + pg_snapshot *arg = (pg_snapshot *) PG_GETARG_VARLENA_P(0); + + fctx = SRF_FIRSTCALL_INIT(); + + /* make a copy of user snapshot */ + snap = MemoryContextAlloc(fctx->multi_call_memory_ctx, VARSIZE(arg)); + memcpy(snap, arg, VARSIZE(arg)); + + fctx->user_fctx = snap; + } + + /* return values one-by-one */ + fctx = SRF_PERCALL_SETUP(); + snap = fctx->user_fctx; + if (fctx->call_cntr < snap->nxip) + { + value = snap->xip[fctx->call_cntr]; + SRF_RETURN_NEXT(fctx, FullTransactionIdGetDatum(value)); + } + else + { + SRF_RETURN_DONE(fctx); + } +} + +/* + * Report the status of a recent transaction ID, or null for wrapped, + * truncated away or otherwise too old XIDs. + * + * The passed epoch-qualified xid is treated as a normal xid, not a + * multixact id. + * + * If it points to a committed subxact the result is the subxact status even + * though the parent xact may still be in progress or may have aborted. + */ +Datum +pg_xact_status(PG_FUNCTION_ARGS) +{ + const char *status; + FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0); + TransactionId xid; + + /* + * We must protect against concurrent truncation of clog entries to avoid + * an I/O error on SLRU lookup. + */ + LWLockAcquire(XactTruncationLock, LW_SHARED); + if (TransactionIdInRecentPast(fxid, &xid)) + { + Assert(TransactionIdIsValid(xid)); + + /* + * Like when doing visibility checks on a row, check whether the + * transaction is still in progress before looking into the CLOG. + * Otherwise we would incorrectly return "committed" for a transaction + * that is committing and has already updated the CLOG, but hasn't + * removed its XID from the proc array yet. (See comment on that race + * condition at the top of heapam_visibility.c) + */ + if (TransactionIdIsInProgress(xid)) + status = "in progress"; + else if (TransactionIdDidCommit(xid)) + status = "committed"; + else + { + /* it must have aborted or crashed */ + status = "aborted"; + } + } + else + { + status = NULL; + } + LWLockRelease(XactTruncationLock); + + if (status == NULL) + PG_RETURN_NULL(); + else + PG_RETURN_TEXT_P(cstring_to_text(status)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xml.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xml.c new file mode 100644 index 00000000000..64c632c07f6 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xml.c @@ -0,0 +1,5022 @@ +/*------------------------------------------------------------------------- + * + * xml.c + * XML data type support. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/utils/adt/xml.c + * + *------------------------------------------------------------------------- + */ + +/* + * Generally, XML type support is only available when libxml use was + * configured during the build. But even if that is not done, the + * type and all the functions are available, but most of them will + * fail. For one thing, this avoids having to manage variant catalog + * installations. But it also has nice effects such as that you can + * dump a database containing XML type data even if the server is not + * linked with libxml. Thus, make sure xml_out() works even if nothing + * else does. + */ + +/* + * Notes on memory management: + * + * Sometimes libxml allocates global structures in the hope that it can reuse + * them later on. This makes it impractical to change the xmlMemSetup + * functions on-the-fly; that is likely to lead to trying to pfree() chunks + * allocated with malloc() or vice versa. Since libxml might be used by + * loadable modules, eg libperl, our only safe choices are to change the + * functions at postmaster/backend launch or not at all. Since we'd rather + * not activate libxml in sessions that might never use it, the latter choice + * is the preferred one. However, for debugging purposes it can be awfully + * handy to constrain libxml's allocations to be done in a specific palloc + * context, where they're easy to track. Therefore there is code here that + * can be enabled in debug builds to redirect libxml's allocations into a + * special context LibxmlContext. It's not recommended to turn this on in + * a production build because of the possibility of bad interactions with + * external modules. + */ +/* #define USE_LIBXMLCONTEXT */ + +#include "postgres.h" + +#ifdef USE_LIBXML +#include <libxml/chvalid.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/tree.h> +#include <libxml/uri.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlsave.h> +#include <libxml/xmlversion.h> +#include <libxml/xmlwriter.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> + +/* + * We used to check for xmlStructuredErrorContext via a configure test; but + * that doesn't work on Windows, so instead use this grottier method of + * testing the library version number. + */ +#if LIBXML_VERSION >= 20704 +#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1 +#endif + +/* + * libxml2 2.12 decided to insert "const" into the error handler API. + */ +#if LIBXML_VERSION >= 21200 +#define PgXmlErrorPtr const xmlError * +#else +#define PgXmlErrorPtr xmlErrorPtr +#endif + +#endif /* USE_LIBXML */ + +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "catalog/pg_class.h" +#include "catalog/pg_type.h" +#include "commands/dbcommands.h" +#include "executor/spi.h" +#include "executor/tablefunc.h" +#include "fmgr.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "nodes/miscnodes.h" +#include "nodes/nodeFuncs.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/xml.h" + + +/* GUC variables */ +__thread int xmlbinary = XMLBINARY_BASE64; +__thread int xmloption = XMLOPTION_CONTENT; + +#ifdef USE_LIBXML + +/* random number to identify PgXmlErrorContext */ +#define ERRCXT_MAGIC 68275028 + +struct PgXmlErrorContext +{ + int magic; + /* strictness argument passed to pg_xml_init */ + PgXmlStrictness strictness; + /* current error status and accumulated message, if any */ + bool err_occurred; + StringInfoData err_buf; + /* previous libxml error handling state (saved by pg_xml_init) */ + xmlStructuredErrorFunc saved_errfunc; + void *saved_errcxt; + /* previous libxml entity handler (saved by pg_xml_init) */ + xmlExternalEntityLoader saved_entityfunc; +}; + +static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt); +static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt, + int sqlcode, const char *msg); +static void xml_errorHandler(void *data, PgXmlErrorPtr error); +static int errdetail_for_xml_code(int code); +static void chopStringInfoNewlines(StringInfo str); +static void appendStringInfoLineSeparator(StringInfo str); + +#ifdef USE_LIBXMLCONTEXT + +static MemoryContext LibxmlContext = NULL; + +static void xml_memory_init(void); +static void *xml_palloc(size_t size); +static void *xml_repalloc(void *ptr, size_t size); +static void xml_pfree(void *ptr); +static char *xml_pstrdup(const char *string); +#endif /* USE_LIBXMLCONTEXT */ + +static xmlChar *xml_text2xmlChar(text *in); +static int parse_xml_decl(const xmlChar *str, size_t *lenp, + xmlChar **version, xmlChar **encoding, int *standalone); +static bool print_xml_decl(StringInfo buf, const xmlChar *version, + pg_enc encoding, int standalone); +static bool xml_doctype_in_content(const xmlChar *str); +static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, + xmlNodePtr *parsed_nodes, + Node *escontext); +static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); +static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, + ArrayBuildState *astate, + PgXmlErrorContext *xmlerrcxt); +static xmlChar *pg_xmlCharStrndup(const char *str, size_t len); +#endif /* USE_LIBXML */ + +static void xmldata_root_element_start(StringInfo result, const char *eltname, + const char *xmlschema, const char *targetns, + bool top_level); +static void xmldata_root_element_end(StringInfo result, const char *eltname); +static StringInfo query_to_xml_internal(const char *query, char *tablename, + const char *xmlschema, bool nulls, bool tableforest, + const char *targetns, bool top_level); +static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, + bool nulls, bool tableforest, const char *targetns); +static const char *map_sql_schema_to_xmlschema_types(Oid nspid, + List *relid_list, bool nulls, + bool tableforest, const char *targetns); +static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list, + bool nulls, bool tableforest, + const char *targetns); +static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod); +static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list); +static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod); +static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, + char *tablename, bool nulls, bool tableforest, + const char *targetns, bool top_level); + +/* XMLTABLE support */ +#ifdef USE_LIBXML +/* random number to identify XmlTableContext */ +#define XMLTABLE_CONTEXT_MAGIC 46922182 +typedef struct XmlTableBuilderData +{ + int magic; + int natts; + long int row_count; + PgXmlErrorContext *xmlerrcxt; + xmlParserCtxtPtr ctxt; + xmlDocPtr doc; + xmlXPathContextPtr xpathcxt; + xmlXPathCompExprPtr xpathcomp; + xmlXPathObjectPtr xpathobj; + xmlXPathCompExprPtr *xpathscomp; +} XmlTableBuilderData; +#endif + +static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts); +static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value); +static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name, + const char *uri); +static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path); +static void XmlTableSetColumnFilter(struct TableFuncScanState *state, + const char *path, int colnum); +static bool XmlTableFetchRow(struct TableFuncScanState *state); +static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum, + Oid typid, int32 typmod, bool *isnull); +static void XmlTableDestroyOpaque(struct TableFuncScanState *state); + +const TableFuncRoutine XmlTableRoutine = +{ + XmlTableInitOpaque, + XmlTableSetDocument, + XmlTableSetNamespace, + XmlTableSetRowFilter, + XmlTableSetColumnFilter, + XmlTableFetchRow, + XmlTableGetValue, + XmlTableDestroyOpaque +}; + +#define NO_XML_SUPPORT() \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("unsupported XML feature"), \ + errdetail("This functionality requires the server to be built with libxml support."))) + + +/* from SQL/XML:2008 section 4.9 */ +#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema" +#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance" +#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml" + + +#ifdef USE_LIBXML + +static int +xmlChar_to_encoding(const xmlChar *encoding_name) +{ + int encoding = pg_char_to_encoding((const char *) encoding_name); + + if (encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid encoding name \"%s\"", + (const char *) encoding_name))); + return encoding; +} +#endif + + +/* + * xml_in uses a plain C string to VARDATA conversion, so for the time being + * we use the conversion function for the text datatype. + * + * This is only acceptable so long as xmltype and text use the same + * representation. + */ +Datum +xml_in(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + char *s = PG_GETARG_CSTRING(0); + xmltype *vardata; + xmlDocPtr doc; + + /* Build the result object. */ + vardata = (xmltype *) cstring_to_text(s); + + /* + * Parse the data to check if it is well-formed XML data. + * + * Note: we don't need to worry about whether a soft error is detected. + */ + doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(), + NULL, NULL, fcinfo->context); + if (doc != NULL) + xmlFreeDoc(doc); + + PG_RETURN_XML_P(vardata); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +#define PG_XML_DEFAULT_VERSION "1.0" + + +/* + * xml_out_internal uses a plain VARDATA to C string conversion, so for the + * time being we use the conversion function for the text datatype. + * + * This is only acceptable so long as xmltype and text use the same + * representation. + */ +static char * +xml_out_internal(xmltype *x, pg_enc target_encoding) +{ + char *str = text_to_cstring((text *) x); + +#ifdef USE_LIBXML + size_t len = strlen(str); + xmlChar *version; + int standalone; + int res_code; + + if ((res_code = parse_xml_decl((xmlChar *) str, + &len, &version, NULL, &standalone)) == 0) + { + StringInfoData buf; + + initStringInfo(&buf); + + if (!print_xml_decl(&buf, version, target_encoding, standalone)) + { + /* + * If we are not going to produce an XML declaration, eat a single + * newline in the original string to prevent empty first lines in + * the output. + */ + if (*(str + len) == '\n') + len += 1; + } + appendStringInfoString(&buf, str + len); + + pfree(str); + + return buf.data; + } + + ereport(WARNING, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("could not parse XML declaration in stored value"), + errdetail_for_xml_code(res_code)); +#endif + return str; +} + + +Datum +xml_out(PG_FUNCTION_ARGS) +{ + xmltype *x = PG_GETARG_XML_P(0); + + /* + * xml_out removes the encoding property in all cases. This is because we + * cannot control from here whether the datum will be converted to a + * different client encoding, so we'd do more harm than good by including + * it. + */ + PG_RETURN_CSTRING(xml_out_internal(x, 0)); +} + + +Datum +xml_recv(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + xmltype *result; + char *str; + char *newstr; + int nbytes; + xmlDocPtr doc; + xmlChar *encodingStr = NULL; + int encoding; + + /* + * Read the data in raw format. We don't know yet what the encoding is, as + * that information is embedded in the xml declaration; so we have to + * parse that before converting to server encoding. + */ + nbytes = buf->len - buf->cursor; + str = (char *) pq_getmsgbytes(buf, nbytes); + + /* + * We need a null-terminated string to pass to parse_xml_decl(). Rather + * than make a separate copy, make the temporary result one byte bigger + * than it needs to be. + */ + result = palloc(nbytes + 1 + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + memcpy(VARDATA(result), str, nbytes); + str = VARDATA(result); + str[nbytes] = '\0'; + + parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL); + + /* + * If encoding wasn't explicitly specified in the XML header, treat it as + * UTF-8, as that's the default in XML. This is different from xml_in(), + * where the input has to go through the normal client to server encoding + * conversion. + */ + encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8; + + /* + * Parse the data to check if it is well-formed XML data. Assume that + * xml_parse will throw ERROR if not. + */ + doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL); + xmlFreeDoc(doc); + + /* Now that we know what we're dealing with, convert to server encoding */ + newstr = pg_any_to_server(str, nbytes, encoding); + + if (newstr != str) + { + pfree(result); + result = (xmltype *) cstring_to_text(newstr); + pfree(newstr); + } + + PG_RETURN_XML_P(result); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xml_send(PG_FUNCTION_ARGS) +{ + xmltype *x = PG_GETARG_XML_P(0); + char *outval; + StringInfoData buf; + + /* + * xml_out_internal doesn't convert the encoding, it just prints the right + * declaration. pq_sendtext will do the conversion. + */ + outval = xml_out_internal(x, pg_get_client_encoding()); + + pq_begintypsend(&buf); + pq_sendtext(&buf, outval, strlen(outval)); + pfree(outval); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +#ifdef USE_LIBXML +static void +appendStringInfoText(StringInfo str, const text *t) +{ + appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); +} +#endif + + +static xmltype * +stringinfo_to_xmltype(StringInfo buf) +{ + return (xmltype *) cstring_to_text_with_len(buf->data, buf->len); +} + + +static xmltype * +cstring_to_xmltype(const char *string) +{ + return (xmltype *) cstring_to_text(string); +} + + +#ifdef USE_LIBXML +static xmltype * +xmlBuffer_to_xmltype(xmlBufferPtr buf) +{ + return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf), + xmlBufferLength(buf)); +} +#endif + + +Datum +xmlcomment(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *arg = PG_GETARG_TEXT_PP(0); + char *argdata = VARDATA_ANY(arg); + int len = VARSIZE_ANY_EXHDR(arg); + StringInfoData buf; + int i; + + /* check for "--" in string or "-" at the end */ + for (i = 1; i < len; i++) + { + if (argdata[i] == '-' && argdata[i - 1] == '-') + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_COMMENT), + errmsg("invalid XML comment"))); + } + if (len > 0 && argdata[len - 1] == '-') + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_COMMENT), + errmsg("invalid XML comment"))); + + initStringInfo(&buf); + appendStringInfoString(&buf, "<!--"); + appendStringInfoText(&buf, arg); + appendStringInfoString(&buf, "-->"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + + +/* + * TODO: xmlconcat needs to merge the notations and unparsed entities + * of the argument values. Not very important in practice, though. + */ +xmltype * +xmlconcat(List *args) +{ +#ifdef USE_LIBXML + int global_standalone = 1; + xmlChar *global_version = NULL; + bool global_version_no_value = false; + StringInfoData buf; + ListCell *v; + + initStringInfo(&buf); + foreach(v, args) + { + xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v))); + size_t len; + xmlChar *version; + int standalone; + char *str; + + len = VARSIZE(x) - VARHDRSZ; + str = text_to_cstring((text *) x); + + parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone); + + if (standalone == 0 && global_standalone == 1) + global_standalone = 0; + if (standalone < 0) + global_standalone = -1; + + if (!version) + global_version_no_value = true; + else if (!global_version) + global_version = version; + else if (xmlStrcmp(version, global_version) != 0) + global_version_no_value = true; + + appendStringInfoString(&buf, str + len); + pfree(str); + } + + if (!global_version_no_value || global_standalone >= 0) + { + StringInfoData buf2; + + initStringInfo(&buf2); + + print_xml_decl(&buf2, + (!global_version_no_value) ? global_version : NULL, + 0, + global_standalone); + + appendBinaryStringInfo(&buf2, buf.data, buf.len); + buf = buf2; + } + + return stringinfo_to_xmltype(&buf); +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +/* + * XMLAGG support + */ +Datum +xmlconcat2(PG_FUNCTION_ARGS) +{ + if (PG_ARGISNULL(0)) + { + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + else + PG_RETURN_XML_P(PG_GETARG_XML_P(1)); + } + else if (PG_ARGISNULL(1)) + PG_RETURN_XML_P(PG_GETARG_XML_P(0)); + else + PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0), + PG_GETARG_XML_P(1)))); +} + + +Datum +texttoxml(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_XML_P(xmlparse(data, xmloption, true)); +} + + +Datum +xmltotext(PG_FUNCTION_ARGS) +{ + xmltype *data = PG_GETARG_XML_P(0); + + /* It's actually binary compatible. */ + PG_RETURN_TEXT_P((text *) data); +} + + +text * +xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) +{ +#ifdef USE_LIBXML + text *volatile result; + xmlDocPtr doc; + XmlOptionType parsed_xmloptiontype; + xmlNodePtr content_nodes; + volatile xmlBufferPtr buf = NULL; + volatile xmlSaveCtxtPtr ctxt = NULL; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + PgXmlErrorContext *xmlerrcxt; +#endif + + if (xmloption_arg != XMLOPTION_DOCUMENT && !indent) + { + /* + * We don't actually need to do anything, so just return the + * binary-compatible input. For backwards-compatibility reasons, + * allow such cases to succeed even without USE_LIBXML. + */ + return (text *) data; + } + +#ifdef USE_LIBXML + /* Parse the input according to the xmloption */ + doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(), + &parsed_xmloptiontype, &content_nodes, + (Node *) &escontext); + if (doc == NULL || escontext.error_occurred) + { + if (doc) + xmlFreeDoc(doc); + /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */ + ereport(ERROR, + (errcode(ERRCODE_NOT_AN_XML_DOCUMENT), + errmsg("not an XML document"))); + } + + /* If we weren't asked to indent, we're done. */ + if (!indent) + { + xmlFreeDoc(doc); + return (text *) data; + } + + /* Otherwise, we gotta spin up some error handling. */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + size_t decl_len = 0; + + /* The serialized data will go into this buffer. */ + buf = xmlBufferCreate(); + + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + + /* Detect whether there's an XML declaration */ + parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL); + + /* + * Emit declaration only if the input had one. Note: some versions of + * xmlSaveToBuffer leak memory if a non-null encoding argument is + * passed, so don't do that. We don't want any encoding conversion + * anyway. + */ + if (decl_len == 0) + ctxt = xmlSaveToBuffer(buf, NULL, + XML_SAVE_NO_DECL | XML_SAVE_FORMAT); + else + ctxt = xmlSaveToBuffer(buf, NULL, + XML_SAVE_FORMAT); + + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlSaveCtxt"); + + if (parsed_xmloptiontype == XMLOPTION_DOCUMENT) + { + /* If it's a document, saving is easy. */ + if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not save document to xmlBuffer"); + } + else if (content_nodes != NULL) + { + /* + * Deal with the case where we have non-singly-rooted XML. + * libxml's dump functions don't work well for that without help. + * We build a fake root node that serves as a container for the + * content nodes, and then iterate over the nodes. + */ + xmlNodePtr root; + xmlNodePtr newline; + + root = xmlNewNode(NULL, (const xmlChar *) "content-root"); + if (root == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xml node"); + + /* This attaches root to doc, so we need not free it separately. */ + xmlDocSetRootElement(doc, root); + xmlAddChild(root, content_nodes); + + /* + * We use this node to insert newlines in the dump. Note: in at + * least some libxml versions, xmlNewDocText would not attach the + * node to the document even if we passed it. Therefore, manage + * freeing of this node manually, and pass NULL here to make sure + * there's not a dangling link. + */ + newline = xmlNewDocText(NULL, (const xmlChar *) "\n"); + if (newline == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xml node"); + + for (xmlNodePtr node = root->children; node; node = node->next) + { + /* insert newlines between nodes */ + if (node->type != XML_TEXT_NODE && node->prev != NULL) + { + if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred) + { + xmlFreeNode(newline); + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not save newline to xmlBuffer"); + } + } + + if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred) + { + xmlFreeNode(newline); + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not save content to xmlBuffer"); + } + } + + xmlFreeNode(newline); + } + + if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred) + { + ctxt = NULL; /* don't try to close it again */ + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not close xmlSaveCtxtPtr"); + } + + result = (text *) xmlBuffer_to_xmltype(buf); + } + PG_CATCH(); + { + if (ctxt) + xmlSaveClose(ctxt); + if (buf) + xmlBufferFree(buf); + if (doc) + xmlFreeDoc(doc); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlBufferFree(buf); + xmlFreeDoc(doc); + + pg_xml_done(xmlerrcxt, false); + + return result; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +xmltype * +xmlelement(XmlExpr *xexpr, + Datum *named_argvalue, bool *named_argnull, + Datum *argvalue, bool *argnull) +{ +#ifdef USE_LIBXML + xmltype *result; + List *named_arg_strings; + List *arg_strings; + int i; + ListCell *arg; + ListCell *narg; + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; + + /* + * All arguments are already evaluated, and their values are passed in the + * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids + * issues if one of the arguments involves a call to some other function + * or subsystem that wants to use libxml on its own terms. We examine the + * original XmlExpr to identify the numbers and types of the arguments. + */ + named_arg_strings = NIL; + i = 0; + foreach(arg, xexpr->named_args) + { + Expr *e = (Expr *) lfirst(arg); + char *str; + + if (named_argnull[i]) + str = NULL; + else + str = map_sql_value_to_xml_value(named_argvalue[i], + exprType((Node *) e), + false); + named_arg_strings = lappend(named_arg_strings, str); + i++; + } + + arg_strings = NIL; + i = 0; + foreach(arg, xexpr->args) + { + Expr *e = (Expr *) lfirst(arg); + char *str; + + /* here we can just forget NULL elements immediately */ + if (!argnull[i]) + { + str = map_sql_value_to_xml_value(argvalue[i], + exprType((Node *) e), + true); + arg_strings = lappend(arg_strings, str); + } + i++; + } + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + buf = xmlBufferCreate(); + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + writer = xmlNewTextWriterMemory(buf, 0); + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlTextWriter"); + + xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); + + forboth(arg, named_arg_strings, narg, xexpr->arg_names) + { + char *str = (char *) lfirst(arg); + char *argname = strVal(lfirst(narg)); + + if (str) + xmlTextWriterWriteAttribute(writer, + (xmlChar *) argname, + (xmlChar *) str); + } + + foreach(arg, arg_strings) + { + char *str = (char *) lfirst(arg); + + xmlTextWriterWriteRaw(writer, (xmlChar *) str); + } + + xmlTextWriterEndElement(writer); + + /* we MUST do this now to flush data out to the buffer ... */ + xmlFreeTextWriter(writer); + writer = NULL; + + result = xmlBuffer_to_xmltype(buf); + } + PG_CATCH(); + { + if (writer) + xmlFreeTextWriter(writer); + if (buf) + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, false); + + return result; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +xmltype * +xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace) +{ +#ifdef USE_LIBXML + xmlDocPtr doc; + + doc = xml_parse(data, xmloption_arg, preserve_whitespace, + GetDatabaseEncoding(), NULL, NULL, NULL); + xmlFreeDoc(doc); + + return (xmltype *) data; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +xmltype * +xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null) +{ +#ifdef USE_LIBXML + xmltype *result; + StringInfoData buf; + + if (pg_strcasecmp(target, "xml") == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), /* really */ + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction target name cannot be \"%s\".", target))); + + /* + * Following the SQL standard, the null check comes after the syntax check + * above. + */ + *result_is_null = arg_is_null; + if (*result_is_null) + return NULL; + + initStringInfo(&buf); + + appendStringInfo(&buf, "<?%s", target); + + if (arg != NULL) + { + char *string; + + string = text_to_cstring(arg); + if (strstr(string, "?>") != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION), + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction cannot contain \"?>\"."))); + + appendStringInfoChar(&buf, ' '); + appendStringInfoString(&buf, string + strspn(string, " ")); + pfree(string); + } + appendStringInfoString(&buf, "?>"); + + result = stringinfo_to_xmltype(&buf); + pfree(buf.data); + return result; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +xmltype * +xmlroot(xmltype *data, text *version, int standalone) +{ +#ifdef USE_LIBXML + char *str; + size_t len; + xmlChar *orig_version; + int orig_standalone; + StringInfoData buf; + + len = VARSIZE(data) - VARHDRSZ; + str = text_to_cstring((text *) data); + + parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone); + + if (version) + orig_version = xml_text2xmlChar(version); + else + orig_version = NULL; + + switch (standalone) + { + case XML_STANDALONE_YES: + orig_standalone = 1; + break; + case XML_STANDALONE_NO: + orig_standalone = 0; + break; + case XML_STANDALONE_NO_VALUE: + orig_standalone = -1; + break; + case XML_STANDALONE_OMITTED: + /* leave original value */ + break; + } + + initStringInfo(&buf); + print_xml_decl(&buf, orig_version, 0, orig_standalone); + appendStringInfoString(&buf, str + len); + + return stringinfo_to_xmltype(&buf); +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} + + +/* + * Validate document (given as string) against DTD (given as external link) + * + * This has been removed because it is a security hole: unprivileged users + * should not be able to use Postgres to fetch arbitrary external files, + * which unfortunately is exactly what libxml is willing to do with the DTD + * parameter. + */ +Datum +xmlvalidate(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xmlvalidate is not implemented"))); + return 0; +} + + +bool +xml_is_document(xmltype *arg) +{ +#ifdef USE_LIBXML + xmlDocPtr doc; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + /* + * We'll report "true" if no soft error is reported by xml_parse(). + */ + doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext); + if (doc) + xmlFreeDoc(doc); + + return !escontext.error_occurred; +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return false; +#endif /* not USE_LIBXML */ +} + + +#ifdef USE_LIBXML + +/* + * pg_xml_init_library --- set up for use of libxml + * + * This should be called by each function that is about to use libxml + * facilities but doesn't require error handling. It initializes libxml + * and verifies compatibility with the loaded libxml version. These are + * once-per-session activities. + * + * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and + * check) + */ +void +pg_xml_init_library(void) +{ + static __thread bool first_time = true; + + if (first_time) + { + /* Stuff we need do only once per session */ + + /* + * Currently, we have no pure UTF-8 support for internals -- check if + * we can work. + */ + if (sizeof(char) != sizeof(xmlChar)) + ereport(ERROR, + (errmsg("could not initialize XML library"), + errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.", + sizeof(char), sizeof(xmlChar)))); + +#ifdef USE_LIBXMLCONTEXT + /* Set up libxml's memory allocation our way */ + xml_memory_init(); +#endif + + /* Check library compatibility */ + LIBXML_TEST_VERSION; + + first_time = false; + } +} + +/* + * pg_xml_init --- set up for use of libxml and register an error handler + * + * This should be called by each function that is about to use libxml + * facilities and requires error handling. It initializes libxml with + * pg_xml_init_library() and establishes our libxml error handler. + * + * strictness determines which errors are reported and which are ignored. + * + * Calls to this function MUST be followed by a PG_TRY block that guarantees + * that pg_xml_done() is called during either normal or error exit. + * + * This is exported for use by contrib/xml2, as well as other code that might + * wish to share use of this module's libxml error handler. + */ +PgXmlErrorContext * +pg_xml_init(PgXmlStrictness strictness) +{ + PgXmlErrorContext *errcxt; + void *new_errcxt; + + /* Do one-time setup if needed */ + pg_xml_init_library(); + + /* Create error handling context structure */ + errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext)); + errcxt->magic = ERRCXT_MAGIC; + errcxt->strictness = strictness; + errcxt->err_occurred = false; + initStringInfo(&errcxt->err_buf); + + /* + * Save original error handler and install ours. libxml originally didn't + * distinguish between the contexts for generic and for structured error + * handlers. If we're using an old libxml version, we must thus save the + * generic error context, even though we're using a structured error + * handler. + */ + errcxt->saved_errfunc = xmlStructuredError; + +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + errcxt->saved_errcxt = xmlStructuredErrorContext; +#else + errcxt->saved_errcxt = xmlGenericErrorContext; +#endif + + xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler); + + /* + * Verify that xmlSetStructuredErrorFunc set the context variable we + * expected it to. If not, the error context pointer we just saved is not + * the correct thing to restore, and since that leaves us without a way to + * restore the context in pg_xml_done, we must fail. + * + * The only known situation in which this test fails is if we compile with + * headers from a libxml2 that doesn't track the structured error context + * separately (< 2.7.4), but at runtime use a version that does, or vice + * versa. The libxml2 authors did not treat that change as constituting + * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library + * fails to protect us from this. + */ + +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + new_errcxt = xmlStructuredErrorContext; +#else + new_errcxt = xmlGenericErrorContext; +#endif + + if (new_errcxt != (void *) errcxt) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not set up XML error handler"), + errhint("This probably indicates that the version of libxml2" + " being used is not compatible with the libxml2" + " header files that PostgreSQL was built with."))); + + /* + * Also, install an entity loader to prevent unwanted fetches of external + * files and URLs. + */ + errcxt->saved_entityfunc = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(xmlPgEntityLoader); + + return errcxt; +} + + +/* + * pg_xml_done --- restore previous libxml error handling + * + * Resets libxml's global error-handling state to what it was before + * pg_xml_init() was called. + * + * This routine verifies that all pending errors have been dealt with + * (in assert-enabled builds, anyway). + */ +void +pg_xml_done(PgXmlErrorContext *errcxt, bool isError) +{ + void *cur_errcxt; + + /* An assert seems like enough protection here */ + Assert(errcxt->magic == ERRCXT_MAGIC); + + /* + * In a normal exit, there should be no un-handled libxml errors. But we + * shouldn't try to enforce this during error recovery, since the longjmp + * could have been thrown before xml_ereport had a chance to run. + */ + Assert(!errcxt->err_occurred || isError); + + /* + * Check that libxml's global state is correct, warn if not. This is a + * real test and not an Assert because it has a higher probability of + * happening. + */ +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + cur_errcxt = xmlStructuredErrorContext; +#else + cur_errcxt = xmlGenericErrorContext; +#endif + + if (cur_errcxt != (void *) errcxt) + elog(WARNING, "libxml error handling state is out of sync with xml.c"); + + /* Restore the saved handlers */ + xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc); + xmlSetExternalEntityLoader(errcxt->saved_entityfunc); + + /* + * Mark the struct as invalid, just in case somebody somehow manages to + * call xml_errorHandler or xml_ereport with it. + */ + errcxt->magic = 0; + + /* Release memory */ + pfree(errcxt->err_buf.data); + pfree(errcxt); +} + + +/* + * pg_xml_error_occurred() --- test the error flag + */ +bool +pg_xml_error_occurred(PgXmlErrorContext *errcxt) +{ + return errcxt->err_occurred; +} + + +/* + * SQL/XML allows storing "XML documents" or "XML content". "XML + * documents" are specified by the XML specification and are parsed + * easily by libxml. "XML content" is specified by SQL/XML as the + * production "XMLDecl? content". But libxml can only parse the + * "content" part, so we have to parse the XML declaration ourselves + * to complete this. + */ + +#define CHECK_XML_SPACE(p) \ + do { \ + if (!xmlIsBlank_ch(*(p))) \ + return XML_ERR_SPACE_REQUIRED; \ + } while (0) + +#define SKIP_XML_SPACE(p) \ + while (xmlIsBlank_ch(*(p))) (p)++ + +/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ +/* Beware of multiple evaluations of argument! */ +#define PG_XMLISNAMECHAR(c) \ + (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \ + || xmlIsDigit_ch(c) \ + || c == '.' || c == '-' || c == '_' || c == ':' \ + || xmlIsCombiningQ(c) \ + || xmlIsExtender_ch(c)) + +/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */ +static xmlChar * +xml_pnstrdup(const xmlChar *str, size_t len) +{ + xmlChar *result; + + result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); + memcpy(result, str, len * sizeof(xmlChar)); + result[len] = 0; + return result; +} + +/* Ditto, except input is char* */ +static xmlChar * +pg_xmlCharStrndup(const char *str, size_t len) +{ + xmlChar *result; + + result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); + memcpy(result, str, len); + result[len] = '\0'; + + return result; +} + +/* + * Copy xmlChar string to PostgreSQL-owned memory, freeing the input. + * + * The input xmlChar is freed regardless of success of the copy. + */ +static char * +xml_pstrdup_and_free(xmlChar *str) +{ + char *result; + + if (str) + { + PG_TRY(); + { + result = pstrdup((char *) str); + } + PG_FINALLY(); + { + xmlFree(str); + } + PG_END_TRY(); + } + else + result = NULL; + + return result; +} + +/* + * str is the null-terminated input string. Remaining arguments are + * output arguments; each can be NULL if value is not wanted. + * version and encoding are returned as locally-palloc'd strings. + * Result is 0 if OK, an error code if not. + */ +static int +parse_xml_decl(const xmlChar *str, size_t *lenp, + xmlChar **version, xmlChar **encoding, int *standalone) +{ + const xmlChar *p; + const xmlChar *save_p; + size_t len; + int utf8char; + int utf8len; + + /* + * Only initialize libxml. We don't need error handling here, but we do + * need to make sure libxml is initialized before calling any of its + * functions. Note that this is safe (and a no-op) if caller has already + * done pg_xml_init(). + */ + pg_xml_init_library(); + + /* Initialize output arguments to "not present" */ + if (version) + *version = NULL; + if (encoding) + *encoding = NULL; + if (standalone) + *standalone = -1; + + p = str; + + if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0) + goto finished; + + /* + * If next char is a name char, it's a PI like <?xml-stylesheet ...?> + * rather than an XMLDecl, so we have done what we came to do and found no + * XMLDecl. + * + * We need an input length value for xmlGetUTF8Char, but there's no need + * to count the whole document size, so use strnlen not strlen. + */ + utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN); + utf8char = xmlGetUTF8Char(p + 5, &utf8len); + if (PG_XMLISNAMECHAR(utf8char)) + goto finished; + + p += 5; + + /* version */ + CHECK_XML_SPACE(p); + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0) + return XML_ERR_VERSION_MISSING; + p += 7; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_VERSION_MISSING; + p += 1; + SKIP_XML_SPACE(p); + + if (*p == '\'' || *p == '"') + { + const xmlChar *q; + + q = xmlStrchr(p + 1, *p); + if (!q) + return XML_ERR_VERSION_MISSING; + + if (version) + *version = xml_pnstrdup(p + 1, q - p - 1); + p = q + 1; + } + else + return XML_ERR_VERSION_MISSING; + + /* encoding */ + save_p = p; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0) + { + CHECK_XML_SPACE(save_p); + p += 8; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_MISSING_ENCODING; + p += 1; + SKIP_XML_SPACE(p); + + if (*p == '\'' || *p == '"') + { + const xmlChar *q; + + q = xmlStrchr(p + 1, *p); + if (!q) + return XML_ERR_MISSING_ENCODING; + + if (encoding) + *encoding = xml_pnstrdup(p + 1, q - p - 1); + p = q + 1; + } + else + return XML_ERR_MISSING_ENCODING; + } + else + { + p = save_p; + } + + /* standalone */ + save_p = p; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0) + { + CHECK_XML_SPACE(save_p); + p += 10; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_STANDALONE_VALUE; + p += 1; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 || + xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0) + { + if (standalone) + *standalone = 1; + p += 5; + } + else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 || + xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0) + { + if (standalone) + *standalone = 0; + p += 4; + } + else + return XML_ERR_STANDALONE_VALUE; + } + else + { + p = save_p; + } + + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0) + return XML_ERR_XMLDECL_NOT_FINISHED; + p += 2; + +finished: + len = p - str; + + for (p = str; p < str + len; p++) + if (*p > 127) + return XML_ERR_INVALID_CHAR; + + if (lenp) + *lenp = len; + + return XML_ERR_OK; +} + + +/* + * Write an XML declaration. On output, we adjust the XML declaration + * as follows. (These rules are the moral equivalent of the clause + * "Serialization of an XML value" in the SQL standard.) + * + * We try to avoid generating an XML declaration if possible. This is + * so that you don't get trivial things like xml '<foo/>' resulting in + * '<?xml version="1.0"?><foo/>', which would surely be annoying. We + * must provide a declaration if the standalone property is specified + * or if we include an encoding declaration. If we have a + * declaration, we must specify a version (XML requires this). + * Otherwise we only make a declaration if the version is not "1.0", + * which is the default version specified in SQL:2003. + */ +static bool +print_xml_decl(StringInfo buf, const xmlChar *version, + pg_enc encoding, int standalone) +{ + if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0) + || (encoding && encoding != PG_UTF8) + || standalone != -1) + { + appendStringInfoString(buf, "<?xml"); + + if (version) + appendStringInfo(buf, " version=\"%s\"", version); + else + appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION); + + if (encoding && encoding != PG_UTF8) + { + /* + * XXX might be useful to convert this to IANA names (ISO-8859-1 + * instead of LATIN1 etc.); needs field experience + */ + appendStringInfo(buf, " encoding=\"%s\"", + pg_encoding_to_char(encoding)); + } + + if (standalone == 1) + appendStringInfoString(buf, " standalone=\"yes\""); + else if (standalone == 0) + appendStringInfoString(buf, " standalone=\"no\""); + appendStringInfoString(buf, "?>"); + + return true; + } + else + return false; +} + +/* + * Test whether an input that is to be parsed as CONTENT contains a DTD. + * + * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not + * satisfied by a document with a DTD, which is a bit of a wart, as it means + * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and + * later fix that, by redefining content with reference to the "more + * permissive" Document Node of the XQuery/XPath Data Model, such that any + * DOCUMENT value is indeed also a CONTENT value. That definition is more + * useful, as CONTENT becomes usable for parsing input of unknown form (think + * pg_restore). + * + * As used below in parse_xml when parsing for CONTENT, libxml does not give + * us the 2006+ behavior, but only the 2003; it will choke if the input has + * a DTD. But we can provide the 2006+ definition of CONTENT easily enough, + * by detecting this case first and simply doing the parse as DOCUMENT. + * + * A DTD can be found arbitrarily far in, but that would be a contrived case; + * it will ordinarily start within a few dozen characters. The only things + * that can precede it are an XMLDecl (here, the caller will have called + * parse_xml_decl already), whitespace, comments, and processing instructions. + * This function need only return true if it sees a valid sequence of such + * things leading to <!DOCTYPE. It can simply return false in any other + * cases, including malformed input; that will mean the input gets parsed as + * CONTENT as originally planned, with libxml reporting any errors. + * + * This is only to be called from xml_parse, when pg_xml_init has already + * been called. The input is already in UTF8 encoding. + */ +static bool +xml_doctype_in_content(const xmlChar *str) +{ + const xmlChar *p = str; + + for (;;) + { + const xmlChar *e; + + SKIP_XML_SPACE(p); + if (*p != '<') + return false; + p++; + + if (*p == '!') + { + p++; + + /* if we see <!DOCTYPE, we can return true */ + if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0) + return true; + + /* otherwise, if it's not a comment, fail */ + if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0) + return false; + /* find end of comment: find -- and a > must follow */ + p = xmlStrstr(p + 2, (xmlChar *) "--"); + if (!p || p[2] != '>') + return false; + /* advance over comment, and keep scanning */ + p += 3; + continue; + } + + /* otherwise, if it's not a PI <?target something?>, fail */ + if (*p != '?') + return false; + p++; + + /* find end of PI (the string ?> is forbidden within a PI) */ + e = xmlStrstr(p, (xmlChar *) "?>"); + if (!e) + return false; + + /* advance over PI, keep scanning */ + p = e + 2; + } +} + + +/* + * Convert a text object to XML internal representation + * + * data is the source data (must not be toasted!), encoding is its encoding, + * and xmloption_arg and preserve_whitespace are options for the + * transformation. + * + * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the + * XmlOptionType actually used to parse the input (typically the same as + * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode). + * + * If parsed_nodes isn't NULL and the input is not an XML document, the list + * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned + * to *parsed_nodes. + * + * Errors normally result in ereport(ERROR), but if escontext is an + * ErrorSaveContext, then "safe" errors are reported there instead, and the + * caller must check SOFT_ERROR_OCCURRED() to see whether that happened. + * + * Note: it is caller's responsibility to xmlFreeDoc() the result, + * else a permanent memory leak will ensue! But note the result could + * be NULL after a soft error. + * + * TODO maybe libxml2's xmlreader is better? (do not construct DOM, + * yet do not use SAX - see xmlreader.c) + */ +static xmlDocPtr +xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes, + Node *escontext) +{ + int32 len; + xmlChar *string; + xmlChar *utf8string; + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; + + /* + * This step looks annoyingly redundant, but we must do it to have a + * null-terminated string in case encoding conversion isn't required. + */ + len = VARSIZE_ANY_EXHDR(data); /* will be useful later */ + string = xml_text2xmlChar(data); + + /* + * If the data isn't UTF8, we must translate before giving it to libxml. + * + * XXX ideally, we'd catch any encoding conversion failure and return a + * soft error. However, failure to convert to UTF8 should be pretty darn + * rare, so for now this is left undone. + */ + utf8string = pg_do_encoding_conversion(string, + len, + encoding, + PG_UTF8); + + /* Start up libxml and its parser */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED); + + /* Use a TRY block to ensure we clean up correctly */ + PG_TRY(); + { + bool parse_as_document = false; + int res_code; + size_t count = 0; + xmlChar *version = NULL; + int standalone = 0; + + /* Any errors here are reported as hard ereport's */ + xmlInitParser(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + + /* Decide whether to parse as document or content */ + if (xmloption_arg == XMLOPTION_DOCUMENT) + parse_as_document = true; + else + { + /* Parse and skip over the XML declaration, if any */ + res_code = parse_xml_decl(utf8string, + &count, &version, NULL, &standalone); + if (res_code != 0) + { + errsave(escontext, + errcode(ERRCODE_INVALID_XML_CONTENT), + errmsg_internal("invalid XML content: invalid XML declaration"), + errdetail_for_xml_code(res_code)); + goto fail; + } + + /* Is there a DOCTYPE element? */ + if (xml_doctype_in_content(utf8string + count)) + parse_as_document = true; + } + + /* initialize output parameters */ + if (parsed_xmloptiontype != NULL) + *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT : + XMLOPTION_CONTENT; + if (parsed_nodes != NULL) + *parsed_nodes = NULL; + + if (parse_as_document) + { + /* + * Note, that here we try to apply DTD defaults + * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d: + * 'Default values defined by internal DTD are applied'. As for + * external DTDs, we try to support them too, (see SQL/XML:2008 GR + * 10.16.7.e) + */ + doc = xmlCtxtReadDoc(ctxt, utf8string, + NULL, + "UTF-8", + XML_PARSE_DTDATTR // XML_PARSE_NOENT removed to make coverity happy + | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); + if (doc == NULL || xmlerrcxt->err_occurred) + { + /* Use original option to decide which error code to report */ + if (xmloption_arg == XMLOPTION_DOCUMENT) + xml_errsave(escontext, xmlerrcxt, + ERRCODE_INVALID_XML_DOCUMENT, + "invalid XML document"); + else + xml_errsave(escontext, xmlerrcxt, + ERRCODE_INVALID_XML_CONTENT, + "invalid XML content"); + goto fail; + } + } + else + { + doc = xmlNewDoc(version); + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XML document"); + + Assert(doc->encoding == NULL); + doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); + if (doc->encoding == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XML document"); + doc->standalone = standalone; + + /* allow empty content */ + if (*(utf8string + count)) + { + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, + utf8string + count, + parsed_nodes); + if (res_code != 0 || xmlerrcxt->err_occurred) + { + xml_errsave(escontext, xmlerrcxt, + ERRCODE_INVALID_XML_CONTENT, + "invalid XML content"); + goto fail; + } + } + } + +fail: + ; + } + PG_CATCH(); + { + if (doc != NULL) + xmlFreeDoc(doc); + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, false); + + return doc; +} + + +/* + * xmlChar<->text conversions + */ +static xmlChar * +xml_text2xmlChar(text *in) +{ + return (xmlChar *) text_to_cstring(in); +} + + +#ifdef USE_LIBXMLCONTEXT + +/* + * Manage the special context used for all libxml allocations (but only + * in special debug builds; see notes at top of file) + */ +static void +xml_memory_init(void) +{ + /* Create memory context if not there already */ + if (LibxmlContext == NULL) + LibxmlContext = AllocSetContextCreate(TopMemoryContext, + "Libxml context", + ALLOCSET_DEFAULT_SIZES); + + /* Re-establish the callbacks even if already set */ + xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup); +} + +/* + * Wrappers for memory management functions + */ +static void * +xml_palloc(size_t size) +{ + return MemoryContextAlloc(LibxmlContext, size); +} + + +static void * +xml_repalloc(void *ptr, size_t size) +{ + return repalloc(ptr, size); +} + + +static void +xml_pfree(void *ptr) +{ + /* At least some parts of libxml assume xmlFree(NULL) is allowed */ + if (ptr) + pfree(ptr); +} + + +static char * +xml_pstrdup(const char *string) +{ + return MemoryContextStrdup(LibxmlContext, string); +} +#endif /* USE_LIBXMLCONTEXT */ + + +/* + * xmlPgEntityLoader --- entity loader callback function + * + * Silently prevent any external entity URL from being loaded. We don't want + * to throw an error, so instead make the entity appear to expand to an empty + * string. + * + * We would prefer to allow loading entities that exist in the system's + * global XML catalog; but the available libxml2 APIs make that a complex + * and fragile task. For now, just shut down all external access. + */ +static xmlParserInputPtr +xmlPgEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) +{ + return xmlNewStringInputStream(ctxt, (const xmlChar *) ""); +} + + +/* + * xml_ereport --- report an XML-related error + * + * The "msg" is the SQL-level message; some can be adopted from the SQL/XML + * standard. This function adds libxml's native error message, if any, as + * detail. + * + * This is exported for modules that want to share the core libxml error + * handler. Note that pg_xml_init() *must* have been called previously. + */ +void +xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg) +{ + char *detail; + + /* Defend against someone passing us a bogus context struct */ + if (errcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext"); + + /* Flag that the current libxml error has been reported */ + errcxt->err_occurred = false; + + /* Include detail only if we have some text from libxml */ + if (errcxt->err_buf.len > 0) + detail = errcxt->err_buf.data; + else + detail = NULL; + + ereport(level, + (errcode(sqlcode), + errmsg_internal("%s", msg), + detail ? errdetail_internal("%s", detail) : 0)); +} + + +/* + * xml_errsave --- save an XML-related error + * + * If escontext is an ErrorSaveContext, error details are saved into it, + * and control returns normally. + * + * Otherwise, the error is thrown, so that this is equivalent to + * xml_ereport() with level == ERROR. + * + * This should be used only for errors that we're sure we do not need + * a transaction abort to clean up after. + */ +static void +xml_errsave(Node *escontext, PgXmlErrorContext *errcxt, + int sqlcode, const char *msg) +{ + char *detail; + + /* Defend against someone passing us a bogus context struct */ + if (errcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext"); + + /* Flag that the current libxml error has been reported */ + errcxt->err_occurred = false; + + /* Include detail only if we have some text from libxml */ + if (errcxt->err_buf.len > 0) + detail = errcxt->err_buf.data; + else + detail = NULL; + + errsave(escontext, + (errcode(sqlcode), + errmsg_internal("%s", msg), + detail ? errdetail_internal("%s", detail) : 0)); +} + + +/* + * Error handler for libxml errors and warnings + */ +static void +xml_errorHandler(void *data, PgXmlErrorPtr error) +{ + PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt; + xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL; + xmlNodePtr node = error->node; + const xmlChar *name = (node != NULL && + node->type == XML_ELEMENT_NODE) ? node->name : NULL; + int domain = error->domain; + int level = error->level; + StringInfo errorBuf; + + /* + * Defend against someone passing us a bogus context struct. + * + * We force a backend exit if this check fails because longjmp'ing out of + * libxml would likely render it unsafe to use further. + */ + if (xmlerrcxt->magic != ERRCXT_MAGIC) + elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext"); + + /*---------- + * Older libxml versions report some errors differently. + * First, some errors were previously reported as coming from the parser + * domain but are now reported as coming from the namespace domain. + * Second, some warnings were upgraded to errors. + * We attempt to compensate for that here. + *---------- + */ + switch (error->code) + { + case XML_WAR_NS_URI: + level = XML_ERR_ERROR; + domain = XML_FROM_NAMESPACE; + break; + + case XML_ERR_NS_DECL_ERROR: + case XML_WAR_NS_URI_RELATIVE: + case XML_WAR_NS_COLUMN: + case XML_NS_ERR_XML_NAMESPACE: + case XML_NS_ERR_UNDEFINED_NAMESPACE: + case XML_NS_ERR_QNAME: + case XML_NS_ERR_ATTRIBUTE_REDEFINED: + case XML_NS_ERR_EMPTY: + domain = XML_FROM_NAMESPACE; + break; + } + + /* Decide whether to act on the error or not */ + switch (domain) + { + case XML_FROM_PARSER: + case XML_FROM_NONE: + case XML_FROM_MEMORY: + case XML_FROM_IO: + + /* + * Suppress warnings about undeclared entities. We need to do + * this to avoid problems due to not loading DTD definitions. + */ + if (error->code == XML_WAR_UNDECLARED_ENTITY) + return; + + /* Otherwise, accept error regardless of the parsing purpose */ + break; + + default: + /* Ignore error if only doing well-formedness check */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED) + return; + break; + } + + /* Prepare error message in errorBuf */ + errorBuf = makeStringInfo(); + + if (error->line > 0) + appendStringInfo(errorBuf, "line %d: ", error->line); + if (name != NULL) + appendStringInfo(errorBuf, "element %s: ", name); + if (error->message != NULL) + appendStringInfoString(errorBuf, error->message); + else + appendStringInfoString(errorBuf, "(no message provided)"); + + /* + * Append context information to errorBuf. + * + * xmlParserPrintFileContext() uses libxml's "generic" error handler to + * write the context. Since we don't want to duplicate libxml + * functionality here, we set up a generic error handler temporarily. + * + * We use appendStringInfo() directly as libxml's generic error handler. + * This should work because it has essentially the same signature as + * libxml expects, namely (void *ptr, const char *msg, ...). + */ + if (input != NULL) + { + xmlGenericErrorFunc errFuncSaved = xmlGenericError; + void *errCtxSaved = xmlGenericErrorContext; + + xmlSetGenericErrorFunc((void *) errorBuf, + (xmlGenericErrorFunc) appendStringInfo); + + /* Add context information to errorBuf */ + appendStringInfoLineSeparator(errorBuf); + + xmlParserPrintFileContext(input); + + /* Restore generic error func */ + xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved); + } + + /* Get rid of any trailing newlines in errorBuf */ + chopStringInfoNewlines(errorBuf); + + /* + * Legacy error handling mode. err_occurred is never set, we just add the + * message to err_buf. This mode exists because the xml2 contrib module + * uses our error-handling infrastructure, but we don't want to change its + * behaviour since it's deprecated anyway. This is also why we don't + * distinguish between notices, warnings and errors here --- the old-style + * generic error handler wouldn't have done that either. + */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY) + { + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data, + errorBuf->len); + + pfree(errorBuf->data); + pfree(errorBuf); + return; + } + + /* + * We don't want to ereport() here because that'd probably leave libxml in + * an inconsistent state. Instead, we remember the error and ereport() + * from xml_ereport(). + * + * Warnings and notices can be reported immediately since they won't cause + * a longjmp() out of libxml. + */ + if (level >= XML_ERR_ERROR) + { + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data, + errorBuf->len); + + xmlerrcxt->err_occurred = true; + } + else if (level >= XML_ERR_WARNING) + { + ereport(WARNING, + (errmsg_internal("%s", errorBuf->data))); + } + else + { + ereport(NOTICE, + (errmsg_internal("%s", errorBuf->data))); + } + + pfree(errorBuf->data); + pfree(errorBuf); +} + + +/* + * Convert libxml error codes into textual errdetail messages. + * + * This should be called within an ereport or errsave invocation, + * just as errdetail would be. + * + * At the moment, we only need to cover those codes that we + * may raise in this file. + */ +static int +errdetail_for_xml_code(int code) +{ + const char *det; + + switch (code) + { + case XML_ERR_INVALID_CHAR: + det = gettext_noop("Invalid character value."); + break; + case XML_ERR_SPACE_REQUIRED: + det = gettext_noop("Space required."); + break; + case XML_ERR_STANDALONE_VALUE: + det = gettext_noop("standalone accepts only 'yes' or 'no'."); + break; + case XML_ERR_VERSION_MISSING: + det = gettext_noop("Malformed declaration: missing version."); + break; + case XML_ERR_MISSING_ENCODING: + det = gettext_noop("Missing encoding in text declaration."); + break; + case XML_ERR_XMLDECL_NOT_FINISHED: + det = gettext_noop("Parsing XML declaration: '?>' expected."); + break; + default: + det = gettext_noop("Unrecognized libxml error code: %d."); + break; + } + + return errdetail(det, code); +} + + +/* + * Remove all trailing newlines from a StringInfo string + */ +static void +chopStringInfoNewlines(StringInfo str) +{ + while (str->len > 0 && str->data[str->len - 1] == '\n') + str->data[--str->len] = '\0'; +} + + +/* + * Append a newline after removing any existing trailing newlines + */ +static void +appendStringInfoLineSeparator(StringInfo str) +{ + chopStringInfoNewlines(str); + if (str->len > 0) + appendStringInfoChar(str, '\n'); +} + + +/* + * Convert one char in the current server encoding to a Unicode codepoint. + */ +static pg_wchar +sqlchar_to_unicode(const char *s) +{ + char *utf8string; + pg_wchar ret[2]; /* need space for trailing zero */ + + /* note we're not assuming s is null-terminated */ + utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8); + + pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, + pg_encoding_mblen(PG_UTF8, utf8string)); + + if (utf8string != s) + pfree(utf8string); + + return ret[0]; +} + + +static bool +is_valid_xml_namefirst(pg_wchar c) +{ + /* (Letter | '_' | ':') */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || c == '_' || c == ':'); +} + + +static bool +is_valid_xml_namechar(pg_wchar c) +{ + /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || xmlIsDigitQ(c) + || c == '.' || c == '-' || c == '_' || c == ':' + || xmlIsCombiningQ(c) + || xmlIsExtenderQ(c)); +} +#endif /* USE_LIBXML */ + + +/* + * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1. + */ +char * +map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, + bool escape_period) +{ +#ifdef USE_LIBXML + StringInfoData buf; + const char *p; + + /* + * SQL/XML doesn't make use of this case anywhere, so it's probably a + * mistake. + */ + Assert(fully_escaped || !escape_period); + + initStringInfo(&buf); + + for (p = ident; *p; p += pg_mblen(p)) + { + if (*p == ':' && (p == ident || fully_escaped)) + appendStringInfoString(&buf, "_x003A_"); + else if (*p == '_' && *(p + 1) == 'x') + appendStringInfoString(&buf, "_x005F_"); + else if (fully_escaped && p == ident && + pg_strncasecmp(p, "xml", 3) == 0) + { + if (*p == 'x') + appendStringInfoString(&buf, "_x0078_"); + else + appendStringInfoString(&buf, "_x0058_"); + } + else if (escape_period && *p == '.') + appendStringInfoString(&buf, "_x002E_"); + else + { + pg_wchar u = sqlchar_to_unicode(p); + + if ((p == ident) + ? !is_valid_xml_namefirst(u) + : !is_valid_xml_namechar(u)) + appendStringInfo(&buf, "_x%04X_", (unsigned int) u); + else + appendBinaryStringInfo(&buf, p, pg_mblen(p)); + } + } + + return buf.data; +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return NULL; +#endif /* not USE_LIBXML */ +} + + +/* + * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3. + */ +char * +map_xml_name_to_sql_identifier(const char *name) +{ + StringInfoData buf; + const char *p; + + initStringInfo(&buf); + + for (p = name; *p; p += pg_mblen(p)) + { + if (*p == '_' && *(p + 1) == 'x' + && isxdigit((unsigned char) *(p + 2)) + && isxdigit((unsigned char) *(p + 3)) + && isxdigit((unsigned char) *(p + 4)) + && isxdigit((unsigned char) *(p + 5)) + && *(p + 6) == '_') + { + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + unsigned int u; + + sscanf(p + 2, "%X", &u); + pg_unicode_to_server(u, (unsigned char *) cbuf); + appendStringInfoString(&buf, cbuf); + p += 6; + } + else + appendBinaryStringInfo(&buf, p, pg_mblen(p)); + } + + return buf.data; +} + +/* + * Map SQL value to XML value; see SQL/XML:2008 section 9.8. + * + * When xml_escape_strings is true, then certain characters in string + * values are replaced by entity references (< etc.), as specified + * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is + * wanted. The false case is mainly useful when the resulting value + * is used with xmlTextWriterWriteAttribute() to write out an + * attribute, because that function does the escaping itself. + */ +char * +map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) +{ + if (type_is_array_domain(type)) + { + ArrayType *array; + Oid elmtype; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + StringInfoData buf; + int i; + + array = DatumGetArrayTypeP(value); + elmtype = ARR_ELEMTYPE(array); + get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign); + + deconstruct_array(array, elmtype, + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, + &num_elems); + + initStringInfo(&buf); + + for (i = 0; i < num_elems; i++) + { + if (elem_nulls[i]) + continue; + appendStringInfoString(&buf, "<element>"); + appendStringInfoString(&buf, + map_sql_value_to_xml_value(elem_values[i], + elmtype, true)); + appendStringInfoString(&buf, "</element>"); + } + + pfree(elem_values); + pfree(elem_nulls); + + return buf.data; + } + else + { + Oid typeOut; + bool isvarlena; + char *str; + + /* + * Flatten domains; the special-case treatments below should apply to, + * eg, domains over boolean not just boolean. + */ + type = getBaseType(type); + + /* + * Special XSD formatting for some data types + */ + switch (type) + { + case BOOLOID: + if (DatumGetBool(value)) + return "true"; + else + return "false"; + + case DATEOID: + { + DateADT date; + struct pg_tm tm; + char buf[MAXDATELEN + 1]; + + date = DatumGetDateADT(value); + /* XSD doesn't support infinite values */ + if (DATE_NOT_FINITE(date)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range"), + errdetail("XML does not support infinite date values."))); + j2date(date + POSTGRES_EPOCH_JDATE, + &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday)); + EncodeDateOnly(&tm, USE_XSD_DATES, buf); + + return pstrdup(buf); + } + + case TIMESTAMPOID: + { + Timestamp timestamp; + struct pg_tm tm; + fsec_t fsec; + char buf[MAXDATELEN + 1]; + + timestamp = DatumGetTimestamp(value); + + /* XSD doesn't support infinite values */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"), + errdetail("XML does not support infinite timestamp values."))); + else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0) + EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + return pstrdup(buf); + } + + case TIMESTAMPTZOID: + { + TimestampTz timestamp; + struct pg_tm tm; + int tz; + fsec_t fsec; + const char *tzn = NULL; + char buf[MAXDATELEN + 1]; + + timestamp = DatumGetTimestamp(value); + + /* XSD doesn't support infinite values */ + if (TIMESTAMP_NOT_FINITE(timestamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"), + errdetail("XML does not support infinite timestamp values."))); + else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0) + EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + return pstrdup(buf); + } + +#ifdef USE_LIBXML + case BYTEAOID: + { + bytea *bstr = DatumGetByteaPP(value); + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; + char *result; + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + buf = xmlBufferCreate(); + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + writer = xmlNewTextWriterMemory(buf, 0); + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlTextWriter"); + + if (xmlbinary == XMLBINARY_BASE64) + xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr), + 0, VARSIZE_ANY_EXHDR(bstr)); + else + xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr), + 0, VARSIZE_ANY_EXHDR(bstr)); + + /* we MUST do this now to flush data out to the buffer */ + xmlFreeTextWriter(writer); + writer = NULL; + + result = pstrdup((const char *) xmlBufferContent(buf)); + } + PG_CATCH(); + { + if (writer) + xmlFreeTextWriter(writer); + if (buf) + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, false); + + return result; + } +#endif /* USE_LIBXML */ + + } + + /* + * otherwise, just use the type's native text representation + */ + getTypeOutputInfo(type, &typeOut, &isvarlena); + str = OidOutputFunctionCall(typeOut, value); + + /* ... exactly as-is for XML, and when escaping is not wanted */ + if (type == XMLOID || !xml_escape_strings) + return str; + + /* otherwise, translate special characters as needed */ + return escape_xml(str); + } +} + + +/* + * Escape characters in text that have special meanings in XML. + * + * Returns a palloc'd string. + * + * NB: this is intentionally not dependent on libxml. + */ +char * +escape_xml(const char *str) +{ + StringInfoData buf; + const char *p; + + initStringInfo(&buf); + for (p = str; *p; p++) + { + switch (*p) + { + case '&': + appendStringInfoString(&buf, "&"); + break; + case '<': + appendStringInfoString(&buf, "<"); + break; + case '>': + appendStringInfoString(&buf, ">"); + break; + case '\r': + appendStringInfoString(&buf, "
"); + break; + default: + appendStringInfoCharMacro(&buf, *p); + break; + } + } + return buf.data; +} + + +static char * +_SPI_strdup(const char *s) +{ + size_t len = strlen(s) + 1; + char *ret = SPI_palloc(len); + + memcpy(ret, s, len); + return ret; +} + + +/* + * SQL to XML mapping functions + * + * What follows below was at one point intentionally organized so that + * you can read along in the SQL/XML standard. The functions are + * mostly split up the way the clauses lay out in the standards + * document, and the identifiers are also aligned with the standard + * text. Unfortunately, SQL/XML:2006 reordered the clauses + * differently than SQL/XML:2003, so the order below doesn't make much + * sense anymore. + * + * There are many things going on there: + * + * There are two kinds of mappings: Mapping SQL data (table contents) + * to XML documents, and mapping SQL structure (the "schema") to XML + * Schema. And there are functions that do both at the same time. + * + * Then you can map a database, a schema, or a table, each in both + * ways. This breaks down recursively: Mapping a database invokes + * mapping schemas, which invokes mapping tables, which invokes + * mapping rows, which invokes mapping columns, although you can't + * call the last two from the outside. Because of this, there are a + * number of xyz_internal() functions which are to be called both from + * the function manager wrapper and from some upper layer in a + * recursive call. + * + * See the documentation about what the common function arguments + * nulls, tableforest, and targetns mean. + * + * Some style guidelines for XML output: Use double quotes for quoting + * XML attributes. Indent XML elements by two spaces, but remember + * that a lot of code is called recursively at different levels, so + * it's better not to indent rather than create output that indents + * and outdents weirdly. Add newlines to make the output look nice. + */ + + +/* + * Visibility of objects for XML mappings; see SQL/XML:2008 section + * 4.10.8. + */ + +/* + * Given a query, which must return type oid as first column, produce + * a list of Oids with the query results. + */ +static List * +query_to_oid_list(const char *query) +{ + uint64 i; + List *list = NIL; + int spi_result; + + spi_result = SPI_execute(query, true, 0); + if (spi_result != SPI_OK_SELECT) + elog(ERROR, "SPI_execute returned %s for %s", + SPI_result_code_string(spi_result), query); + + for (i = 0; i < SPI_processed; i++) + { + Datum oid; + bool isnull; + + oid = SPI_getbinval(SPI_tuptable->vals[i], + SPI_tuptable->tupdesc, + 1, + &isnull); + if (!isnull) + list = lappend_oid(list, DatumGetObjectId(oid)); + } + + return list; +} + + +static List * +schema_get_xml_visible_tables(Oid nspid) +{ + StringInfoData query; + + initStringInfo(&query); + appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class" + " WHERE relnamespace = %u AND relkind IN (" + CppAsString2(RELKIND_RELATION) "," + CppAsString2(RELKIND_MATVIEW) "," + CppAsString2(RELKIND_VIEW) ")" + " AND pg_catalog.has_table_privilege (oid, 'SELECT')" + " ORDER BY relname;", nspid); + + return query_to_oid_list(query.data); +} + + +/* + * Including the system schemas is probably not useful for a database + * mapping. + */ +#define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')" + +#define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE + + +static List * +database_get_xml_visible_schemas(void) +{ + return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;"); +} + + +static List * +database_get_xml_visible_tables(void) +{ + /* At the moment there is no order required here. */ + return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class" + " WHERE relkind IN (" + CppAsString2(RELKIND_RELATION) "," + CppAsString2(RELKIND_MATVIEW) "," + CppAsString2(RELKIND_VIEW) ")" + " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')" + " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");"); +} + + +/* + * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008 + * section 9.11. + */ + +static StringInfo +table_to_xml_internal(Oid relid, + const char *xmlschema, bool nulls, bool tableforest, + const char *targetns, bool top_level) +{ + StringInfoData query; + + initStringInfo(&query); + appendStringInfo(&query, "SELECT * FROM %s", + DatumGetCString(DirectFunctionCall1(regclassout, + ObjectIdGetDatum(relid)))); + return query_to_xml_internal(query.data, get_rel_name(relid), + xmlschema, nulls, tableforest, + targetns, top_level); +} + + +Datum +table_to_xml(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL, + nulls, tableforest, + targetns, true))); +} + + +Datum +query_to_xml(PG_FUNCTION_ARGS) +{ + char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL, + NULL, nulls, tableforest, + targetns, true))); +} + + +Datum +cursor_to_xml(PG_FUNCTION_ARGS) +{ + char *name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + int32 count = PG_GETARG_INT32(1); + bool nulls = PG_GETARG_BOOL(2); + bool tableforest = PG_GETARG_BOOL(3); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4)); + + StringInfoData result; + Portal portal; + uint64 i; + + initStringInfo(&result); + + if (!tableforest) + { + xmldata_root_element_start(&result, "table", NULL, targetns, true); + appendStringInfoChar(&result, '\n'); + } + + SPI_connect(); + portal = SPI_cursor_find(name); + if (portal == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_CURSOR), + errmsg("cursor \"%s\" does not exist", name))); + + SPI_cursor_fetch(portal, true, count); + for (i = 0; i < SPI_processed; i++) + SPI_sql_row_to_xmlelement(i, &result, NULL, nulls, + tableforest, targetns, true); + + SPI_finish(); + + if (!tableforest) + xmldata_root_element_end(&result, "table"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&result)); +} + + +/* + * Write the start tag of the root element of a data mapping. + * + * top_level means that this is the very top level of the eventual + * output. For example, when the user calls table_to_xml, then a call + * with a table name to this function is the top level. When the user + * calls database_to_xml, then a call with a schema name to this + * function is not the top level. If top_level is false, then the XML + * namespace declarations are omitted, because they supposedly already + * appeared earlier in the output. Repeating them is not wrong, but + * it looks ugly. + */ +static void +xmldata_root_element_start(StringInfo result, const char *eltname, + const char *xmlschema, const char *targetns, + bool top_level) +{ + /* This isn't really wrong but currently makes no sense. */ + Assert(top_level || !xmlschema); + + appendStringInfo(result, "<%s", eltname); + if (top_level) + { + appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\""); + if (strlen(targetns) > 0) + appendStringInfo(result, " xmlns=\"%s\"", targetns); + } + if (xmlschema) + { + /* FIXME: better targets */ + if (strlen(targetns) > 0) + appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns); + else + appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\""); + } + appendStringInfoString(result, ">\n"); +} + + +static void +xmldata_root_element_end(StringInfo result, const char *eltname) +{ + appendStringInfo(result, "</%s>\n", eltname); +} + + +static StringInfo +query_to_xml_internal(const char *query, char *tablename, + const char *xmlschema, bool nulls, bool tableforest, + const char *targetns, bool top_level) +{ + StringInfo result; + char *xmltn; + uint64 i; + + if (tablename) + xmltn = map_sql_identifier_to_xml_name(tablename, true, false); + else + xmltn = "table"; + + result = makeStringInfo(); + + SPI_connect(); + if (SPI_execute(query, true, 0) != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid query"))); + + if (!tableforest) + { + xmldata_root_element_start(result, xmltn, xmlschema, + targetns, top_level); + appendStringInfoChar(result, '\n'); + } + + if (xmlschema) + appendStringInfo(result, "%s\n\n", xmlschema); + + for (i = 0; i < SPI_processed; i++) + SPI_sql_row_to_xmlelement(i, result, tablename, nulls, + tableforest, targetns, top_level); + + if (!tableforest) + xmldata_root_element_end(result, xmltn); + + SPI_finish(); + + return result; +} + + +Datum +table_to_xmlschema(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + const char *result; + Relation rel; + + rel = table_open(relid, AccessShareLock); + result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls, + tableforest, targetns); + table_close(rel, NoLock); + + PG_RETURN_XML_P(cstring_to_xmltype(result)); +} + + +Datum +query_to_xmlschema(PG_FUNCTION_ARGS) +{ + char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + const char *result; + SPIPlanPtr plan; + Portal portal; + + SPI_connect(); + + if ((plan = SPI_prepare(query, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare(\"%s\") failed", query); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) + elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); + + result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, + InvalidOid, nulls, + tableforest, targetns)); + SPI_cursor_close(portal); + SPI_finish(); + + PG_RETURN_XML_P(cstring_to_xmltype(result)); +} + + +Datum +cursor_to_xmlschema(PG_FUNCTION_ARGS) +{ + char *name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + const char *xmlschema; + Portal portal; + + SPI_connect(); + portal = SPI_cursor_find(name); + if (portal == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_CURSOR), + errmsg("cursor \"%s\" does not exist", name))); + if (portal->tupDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_CURSOR_STATE), + errmsg("portal \"%s\" does not return tuples", name))); + + xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, + InvalidOid, nulls, + tableforest, targetns)); + SPI_finish(); + + PG_RETURN_XML_P(cstring_to_xmltype(xmlschema)); +} + + +Datum +table_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + Relation rel; + const char *xmlschema; + + rel = table_open(relid, AccessShareLock); + xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls, + tableforest, targetns); + table_close(rel, NoLock); + + PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, + xmlschema, nulls, tableforest, + targetns, true))); +} + + +Datum +query_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + const char *xmlschema; + SPIPlanPtr plan; + Portal portal; + + SPI_connect(); + + if ((plan = SPI_prepare(query, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare(\"%s\") failed", query); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) + elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); + + xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, + InvalidOid, nulls, tableforest, targetns)); + SPI_cursor_close(portal); + SPI_finish(); + + PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL, + xmlschema, nulls, tableforest, + targetns, true))); +} + + +/* + * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008 + * sections 9.13, 9.14. + */ + +static StringInfo +schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls, + bool tableforest, const char *targetns, bool top_level) +{ + StringInfo result; + char *xmlsn; + List *relid_list; + ListCell *cell; + + xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid), + true, false); + result = makeStringInfo(); + + xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level); + appendStringInfoChar(result, '\n'); + + if (xmlschema) + appendStringInfo(result, "%s\n\n", xmlschema); + + SPI_connect(); + + relid_list = schema_get_xml_visible_tables(nspid); + + foreach(cell, relid_list) + { + Oid relid = lfirst_oid(cell); + StringInfo subres; + + subres = table_to_xml_internal(relid, NULL, nulls, tableforest, + targetns, false); + + appendBinaryStringInfo(result, subres->data, subres->len); + appendStringInfoChar(result, '\n'); + } + + SPI_finish(); + + xmldata_root_element_end(result, xmlsn); + + return result; +} + + +Datum +schema_to_xml(PG_FUNCTION_ARGS) +{ + Name name = PG_GETARG_NAME(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + char *schemaname; + Oid nspid; + + schemaname = NameStr(*name); + nspid = LookupExplicitNamespace(schemaname, false); + + PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL, + nulls, tableforest, targetns, true))); +} + + +/* + * Write the start element of the root element of an XML Schema mapping. + */ +static void +xsd_schema_element_start(StringInfo result, const char *targetns) +{ + appendStringInfoString(result, + "<xsd:schema\n" + " xmlns:xsd=\"" NAMESPACE_XSD "\""); + if (strlen(targetns) > 0) + appendStringInfo(result, + "\n" + " targetNamespace=\"%s\"\n" + " elementFormDefault=\"qualified\"", + targetns); + appendStringInfoString(result, + ">\n\n"); +} + + +static void +xsd_schema_element_end(StringInfo result) +{ + appendStringInfoString(result, "</xsd:schema>"); +} + + +static StringInfo +schema_to_xmlschema_internal(const char *schemaname, bool nulls, + bool tableforest, const char *targetns) +{ + Oid nspid; + List *relid_list; + List *tupdesc_list; + ListCell *cell; + StringInfo result; + + result = makeStringInfo(); + + nspid = LookupExplicitNamespace(schemaname, false); + + xsd_schema_element_start(result, targetns); + + SPI_connect(); + + relid_list = schema_get_xml_visible_tables(nspid); + + tupdesc_list = NIL; + foreach(cell, relid_list) + { + Relation rel; + + rel = table_open(lfirst_oid(cell), AccessShareLock); + tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att)); + table_close(rel, NoLock); + } + + appendStringInfoString(result, + map_sql_typecoll_to_xmlschema_types(tupdesc_list)); + + appendStringInfoString(result, + map_sql_schema_to_xmlschema_types(nspid, relid_list, + nulls, tableforest, targetns)); + + xsd_schema_element_end(result); + + SPI_finish(); + + return result; +} + + +Datum +schema_to_xmlschema(PG_FUNCTION_ARGS) +{ + Name name = PG_GETARG_NAME(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name), + nulls, tableforest, targetns))); +} + + +Datum +schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + Name name = PG_GETARG_NAME(0); + bool nulls = PG_GETARG_BOOL(1); + bool tableforest = PG_GETARG_BOOL(2); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); + char *schemaname; + Oid nspid; + StringInfo xmlschema; + + schemaname = NameStr(*name); + nspid = LookupExplicitNamespace(schemaname, false); + + xmlschema = schema_to_xmlschema_internal(schemaname, nulls, + tableforest, targetns); + + PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, + xmlschema->data, nulls, + tableforest, targetns, true))); +} + + +/* + * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008 + * sections 9.16, 9.17. + */ + +static StringInfo +database_to_xml_internal(const char *xmlschema, bool nulls, + bool tableforest, const char *targetns) +{ + StringInfo result; + List *nspid_list; + ListCell *cell; + char *xmlcn; + + xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId), + true, false); + result = makeStringInfo(); + + xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true); + appendStringInfoChar(result, '\n'); + + if (xmlschema) + appendStringInfo(result, "%s\n\n", xmlschema); + + SPI_connect(); + + nspid_list = database_get_xml_visible_schemas(); + + foreach(cell, nspid_list) + { + Oid nspid = lfirst_oid(cell); + StringInfo subres; + + subres = schema_to_xml_internal(nspid, NULL, nulls, + tableforest, targetns, false); + + appendBinaryStringInfo(result, subres->data, subres->len); + appendStringInfoChar(result, '\n'); + } + + SPI_finish(); + + xmldata_root_element_end(result, xmlcn); + + return result; +} + + +Datum +database_to_xml(PG_FUNCTION_ARGS) +{ + bool nulls = PG_GETARG_BOOL(0); + bool tableforest = PG_GETARG_BOOL(1); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls, + tableforest, targetns))); +} + + +static StringInfo +database_to_xmlschema_internal(bool nulls, bool tableforest, + const char *targetns) +{ + List *relid_list; + List *nspid_list; + List *tupdesc_list; + ListCell *cell; + StringInfo result; + + result = makeStringInfo(); + + xsd_schema_element_start(result, targetns); + + SPI_connect(); + + relid_list = database_get_xml_visible_tables(); + nspid_list = database_get_xml_visible_schemas(); + + tupdesc_list = NIL; + foreach(cell, relid_list) + { + Relation rel; + + rel = table_open(lfirst_oid(cell), AccessShareLock); + tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att)); + table_close(rel, NoLock); + } + + appendStringInfoString(result, + map_sql_typecoll_to_xmlschema_types(tupdesc_list)); + + appendStringInfoString(result, + map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns)); + + xsd_schema_element_end(result); + + SPI_finish(); + + return result; +} + + +Datum +database_to_xmlschema(PG_FUNCTION_ARGS) +{ + bool nulls = PG_GETARG_BOOL(0); + bool tableforest = PG_GETARG_BOOL(1); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); + + PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls, + tableforest, targetns))); +} + + +Datum +database_to_xml_and_xmlschema(PG_FUNCTION_ARGS) +{ + bool nulls = PG_GETARG_BOOL(0); + bool tableforest = PG_GETARG_BOOL(1); + const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); + StringInfo xmlschema; + + xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns); + + PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data, + nulls, tableforest, targetns))); +} + + +/* + * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section + * 9.2. + */ +static char * +map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d) +{ + StringInfoData result; + + initStringInfo(&result); + + if (a) + appendStringInfoString(&result, + map_sql_identifier_to_xml_name(a, true, true)); + if (b) + appendStringInfo(&result, ".%s", + map_sql_identifier_to_xml_name(b, true, true)); + if (c) + appendStringInfo(&result, ".%s", + map_sql_identifier_to_xml_name(c, true, true)); + if (d) + appendStringInfo(&result, ".%s", + map_sql_identifier_to_xml_name(d, true, true)); + + return result.data; +} + + +/* + * Map an SQL table to an XML Schema document; see SQL/XML:2008 + * section 9.11. + * + * Map an SQL table to XML Schema data types; see SQL/XML:2008 section + * 9.9. + */ +static const char * +map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls, + bool tableforest, const char *targetns) +{ + int i; + char *xmltn; + char *tabletypename; + char *rowtypename; + StringInfoData result; + + initStringInfo(&result); + + if (OidIsValid(relid)) + { + HeapTuple tuple; + Form_pg_class reltuple; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + reltuple = (Form_pg_class) GETSTRUCT(tuple); + + xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname), + true, false); + + tabletypename = map_multipart_sql_identifier_to_xml_name("TableType", + get_database_name(MyDatabaseId), + get_namespace_name(reltuple->relnamespace), + NameStr(reltuple->relname)); + + rowtypename = map_multipart_sql_identifier_to_xml_name("RowType", + get_database_name(MyDatabaseId), + get_namespace_name(reltuple->relnamespace), + NameStr(reltuple->relname)); + + ReleaseSysCache(tuple); + } + else + { + if (tableforest) + xmltn = "row"; + else + xmltn = "table"; + + tabletypename = "TableType"; + rowtypename = "RowType"; + } + + xsd_schema_element_start(&result, targetns); + + appendStringInfoString(&result, + map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc))); + + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n" + " <xsd:sequence>\n", + rowtypename); + + for (i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n", + map_sql_identifier_to_xml_name(NameStr(att->attname), + true, false), + map_sql_type_to_xml_name(att->atttypid, -1), + nulls ? " nillable=\"true\"" : " minOccurs=\"0\""); + } + + appendStringInfoString(&result, + " </xsd:sequence>\n" + "</xsd:complexType>\n\n"); + + if (!tableforest) + { + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n" + " <xsd:sequence>\n" + " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n" + " </xsd:sequence>\n" + "</xsd:complexType>\n\n", + tabletypename, rowtypename); + + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmltn, tabletypename); + } + else + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmltn, rowtypename); + + xsd_schema_element_end(&result); + + return result.data; +} + + +/* + * Map an SQL schema to XML Schema data types; see SQL/XML:2008 + * section 9.12. + */ +static const char * +map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls, + bool tableforest, const char *targetns) +{ + char *dbname; + char *nspname; + char *xmlsn; + char *schematypename; + StringInfoData result; + ListCell *cell; + + dbname = get_database_name(MyDatabaseId); + nspname = get_namespace_name(nspid); + + initStringInfo(&result); + + xmlsn = map_sql_identifier_to_xml_name(nspname, true, false); + + schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType", + dbname, + nspname, + NULL); + + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n", schematypename); + if (!tableforest) + appendStringInfoString(&result, + " <xsd:all>\n"); + else + appendStringInfoString(&result, + " <xsd:sequence>\n"); + + foreach(cell, relid_list) + { + Oid relid = lfirst_oid(cell); + char *relname = get_rel_name(relid); + char *xmltn = map_sql_identifier_to_xml_name(relname, true, false); + char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType", + dbname, + nspname, + relname); + + if (!tableforest) + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\"/>\n", + xmltn, tabletypename); + else + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n", + xmltn, tabletypename); + } + + if (!tableforest) + appendStringInfoString(&result, + " </xsd:all>\n"); + else + appendStringInfoString(&result, + " </xsd:sequence>\n"); + appendStringInfoString(&result, + "</xsd:complexType>\n\n"); + + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmlsn, schematypename); + + return result.data; +} + + +/* + * Map an SQL catalog to XML Schema data types; see SQL/XML:2008 + * section 9.15. + */ +static const char * +map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls, + bool tableforest, const char *targetns) +{ + char *dbname; + char *xmlcn; + char *catalogtypename; + StringInfoData result; + ListCell *cell; + + dbname = get_database_name(MyDatabaseId); + + initStringInfo(&result); + + xmlcn = map_sql_identifier_to_xml_name(dbname, true, false); + + catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType", + dbname, + NULL, + NULL); + + appendStringInfo(&result, + "<xsd:complexType name=\"%s\">\n", catalogtypename); + appendStringInfoString(&result, + " <xsd:all>\n"); + + foreach(cell, nspid_list) + { + Oid nspid = lfirst_oid(cell); + char *nspname = get_namespace_name(nspid); + char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false); + char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType", + dbname, + nspname, + NULL); + + appendStringInfo(&result, + " <xsd:element name=\"%s\" type=\"%s\"/>\n", + xmlsn, schematypename); + } + + appendStringInfoString(&result, + " </xsd:all>\n"); + appendStringInfoString(&result, + "</xsd:complexType>\n\n"); + + appendStringInfo(&result, + "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", + xmlcn, catalogtypename); + + return result.data; +} + + +/* + * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4. + */ +static const char * +map_sql_type_to_xml_name(Oid typeoid, int typmod) +{ + StringInfoData result; + + initStringInfo(&result); + + switch (typeoid) + { + case BPCHAROID: + if (typmod == -1) + appendStringInfoString(&result, "CHAR"); + else + appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ); + break; + case VARCHAROID: + if (typmod == -1) + appendStringInfoString(&result, "VARCHAR"); + else + appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ); + break; + case NUMERICOID: + if (typmod == -1) + appendStringInfoString(&result, "NUMERIC"); + else + appendStringInfo(&result, "NUMERIC_%d_%d", + ((typmod - VARHDRSZ) >> 16) & 0xffff, + (typmod - VARHDRSZ) & 0xffff); + break; + case INT4OID: + appendStringInfoString(&result, "INTEGER"); + break; + case INT2OID: + appendStringInfoString(&result, "SMALLINT"); + break; + case INT8OID: + appendStringInfoString(&result, "BIGINT"); + break; + case FLOAT4OID: + appendStringInfoString(&result, "REAL"); + break; + case FLOAT8OID: + appendStringInfoString(&result, "DOUBLE"); + break; + case BOOLOID: + appendStringInfoString(&result, "BOOLEAN"); + break; + case TIMEOID: + if (typmod == -1) + appendStringInfoString(&result, "TIME"); + else + appendStringInfo(&result, "TIME_%d", typmod); + break; + case TIMETZOID: + if (typmod == -1) + appendStringInfoString(&result, "TIME_WTZ"); + else + appendStringInfo(&result, "TIME_WTZ_%d", typmod); + break; + case TIMESTAMPOID: + if (typmod == -1) + appendStringInfoString(&result, "TIMESTAMP"); + else + appendStringInfo(&result, "TIMESTAMP_%d", typmod); + break; + case TIMESTAMPTZOID: + if (typmod == -1) + appendStringInfoString(&result, "TIMESTAMP_WTZ"); + else + appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod); + break; + case DATEOID: + appendStringInfoString(&result, "DATE"); + break; + case XMLOID: + appendStringInfoString(&result, "XML"); + break; + default: + { + HeapTuple tuple; + Form_pg_type typtuple; + + tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for type %u", typeoid); + typtuple = (Form_pg_type) GETSTRUCT(tuple); + + appendStringInfoString(&result, + map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT", + get_database_name(MyDatabaseId), + get_namespace_name(typtuple->typnamespace), + NameStr(typtuple->typname))); + + ReleaseSysCache(tuple); + } + } + + return result.data; +} + + +/* + * Map a collection of SQL data types to XML Schema data types; see + * SQL/XML:2008 section 9.7. + */ +static const char * +map_sql_typecoll_to_xmlschema_types(List *tupdesc_list) +{ + List *uniquetypes = NIL; + int i; + StringInfoData result; + ListCell *cell0; + + /* extract all column types used in the set of TupleDescs */ + foreach(cell0, tupdesc_list) + { + TupleDesc tupdesc = (TupleDesc) lfirst(cell0); + + for (i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + + if (att->attisdropped) + continue; + uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid); + } + } + + /* add base types of domains */ + foreach(cell0, uniquetypes) + { + Oid typid = lfirst_oid(cell0); + Oid basetypid = getBaseType(typid); + + if (basetypid != typid) + uniquetypes = list_append_unique_oid(uniquetypes, basetypid); + } + + /* Convert to textual form */ + initStringInfo(&result); + + foreach(cell0, uniquetypes) + { + appendStringInfo(&result, "%s\n", + map_sql_type_to_xmlschema_type(lfirst_oid(cell0), + -1)); + } + + return result.data; +} + + +/* + * Map an SQL data type to a named XML Schema data type; see + * SQL/XML:2008 sections 9.5 and 9.6. + * + * (The distinction between 9.5 and 9.6 is basically that 9.6 adds + * a name attribute, which this function does. The name-less version + * 9.5 doesn't appear to be required anywhere.) + */ +static const char * +map_sql_type_to_xmlschema_type(Oid typeoid, int typmod) +{ + StringInfoData result; + const char *typename = map_sql_type_to_xml_name(typeoid, typmod); + + initStringInfo(&result); + + if (typeoid == XMLOID) + { + appendStringInfoString(&result, + "<xsd:complexType mixed=\"true\">\n" + " <xsd:sequence>\n" + " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n" + " </xsd:sequence>\n" + "</xsd:complexType>\n"); + } + else + { + appendStringInfo(&result, + "<xsd:simpleType name=\"%s\">\n", typename); + + switch (typeoid) + { + case BPCHAROID: + case VARCHAROID: + case TEXTOID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:string\">\n"); + if (typmod != -1) + appendStringInfo(&result, + " <xsd:maxLength value=\"%d\"/>\n", + typmod - VARHDRSZ); + appendStringInfoString(&result, " </xsd:restriction>\n"); + break; + + case BYTEAOID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:%s\">\n" + " </xsd:restriction>\n", + xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary"); + break; + + case NUMERICOID: + if (typmod != -1) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:decimal\">\n" + " <xsd:totalDigits value=\"%d\"/>\n" + " <xsd:fractionDigits value=\"%d\"/>\n" + " </xsd:restriction>\n", + ((typmod - VARHDRSZ) >> 16) & 0xffff, + (typmod - VARHDRSZ) & 0xffff); + break; + + case INT2OID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:short\">\n" + " <xsd:maxInclusive value=\"%d\"/>\n" + " <xsd:minInclusive value=\"%d\"/>\n" + " </xsd:restriction>\n", + SHRT_MAX, SHRT_MIN); + break; + + case INT4OID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:int\">\n" + " <xsd:maxInclusive value=\"%d\"/>\n" + " <xsd:minInclusive value=\"%d\"/>\n" + " </xsd:restriction>\n", + INT_MAX, INT_MIN); + break; + + case INT8OID: + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:long\">\n" + " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n" + " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n" + " </xsd:restriction>\n", + PG_INT64_MAX, + PG_INT64_MIN); + break; + + case FLOAT4OID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n"); + break; + + case FLOAT8OID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n"); + break; + + case BOOLOID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n"); + break; + + case TIMEOID: + case TIMETZOID: + { + const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : ""); + + if (typmod == -1) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:time\">\n" + " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n" + " </xsd:restriction>\n", tz); + else if (typmod == 0) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:time\">\n" + " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n" + " </xsd:restriction>\n", tz); + else + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:time\">\n" + " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n" + " </xsd:restriction>\n", typmod - VARHDRSZ, tz); + break; + } + + case TIMESTAMPOID: + case TIMESTAMPTZOID: + { + const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : ""); + + if (typmod == -1) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:dateTime\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n" + " </xsd:restriction>\n", tz); + else if (typmod == 0) + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:dateTime\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n" + " </xsd:restriction>\n", tz); + else + appendStringInfo(&result, + " <xsd:restriction base=\"xsd:dateTime\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n" + " </xsd:restriction>\n", typmod - VARHDRSZ, tz); + break; + } + + case DATEOID: + appendStringInfoString(&result, + " <xsd:restriction base=\"xsd:date\">\n" + " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n" + " </xsd:restriction>\n"); + break; + + default: + if (get_typtype(typeoid) == TYPTYPE_DOMAIN) + { + Oid base_typeoid; + int32 base_typmod = -1; + + base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod); + + appendStringInfo(&result, + " <xsd:restriction base=\"%s\"/>\n", + map_sql_type_to_xml_name(base_typeoid, base_typmod)); + } + break; + } + appendStringInfoString(&result, "</xsd:simpleType>\n"); + } + + return result.data; +} + + +/* + * Map an SQL row to an XML element, taking the row from the active + * SPI cursor. See also SQL/XML:2008 section 9.10. + */ +static void +SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename, + bool nulls, bool tableforest, + const char *targetns, bool top_level) +{ + int i; + char *xmltn; + + if (tablename) + xmltn = map_sql_identifier_to_xml_name(tablename, true, false); + else + { + if (tableforest) + xmltn = "row"; + else + xmltn = "table"; + } + + if (tableforest) + xmldata_root_element_start(result, xmltn, NULL, targetns, top_level); + else + appendStringInfoString(result, "<row>\n"); + + for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++) + { + char *colname; + Datum colval; + bool isnull; + + colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i), + true, false); + colval = SPI_getbinval(SPI_tuptable->vals[rownum], + SPI_tuptable->tupdesc, + i, + &isnull); + if (isnull) + { + if (nulls) + appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname); + } + else + appendStringInfo(result, " <%s>%s</%s>\n", + colname, + map_sql_value_to_xml_value(colval, + SPI_gettypeid(SPI_tuptable->tupdesc, i), true), + colname); + } + + if (tableforest) + { + xmldata_root_element_end(result, xmltn); + appendStringInfoChar(result, '\n'); + } + else + appendStringInfoString(result, "</row>\n\n"); +} + + +/* + * XPath related functions + */ + +#ifdef USE_LIBXML + +/* + * Convert XML node to text. + * + * For attribute and text nodes, return the escaped text. For anything else, + * dump the whole subtree. + */ +static text * +xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) +{ + xmltype *result = NULL; + + if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE) + { + void (*volatile nodefree) (xmlNodePtr) = NULL; + volatile xmlBufferPtr buf = NULL; + volatile xmlNodePtr cur_copy = NULL; + + PG_TRY(); + { + int bytes; + + buf = xmlBufferCreate(); + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + + /* + * Produce a dump of the node that we can serialize. xmlNodeDump + * does that, but the result of that function won't contain + * namespace definitions from ancestor nodes, so we first do a + * xmlCopyNode() which duplicates the node along with its required + * namespace definitions. + * + * Some old libxml2 versions such as 2.7.6 produce partially + * broken XML_DOCUMENT_NODE nodes (unset content field) when + * copying them. xmlNodeDump of such a node works fine, but + * xmlFreeNode crashes; set us up to call xmlFreeDoc instead. + */ + cur_copy = xmlCopyNode(cur, 1); + if (cur_copy == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not copy node"); + nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ? + (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode; + + bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0); + if (bytes == -1 || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not dump node"); + + result = xmlBuffer_to_xmltype(buf); + } + PG_FINALLY(); + { + if (nodefree) + nodefree(cur_copy); + if (buf) + xmlBufferFree(buf); + } + PG_END_TRY(); + } + else + { + xmlChar *str; + + str = xmlXPathCastNodeToString(cur); + PG_TRY(); + { + /* Here we rely on XML having the same representation as TEXT */ + char *escaped = escape_xml((char *) str); + + result = (xmltype *) cstring_to_text(escaped); + pfree(escaped); + } + PG_FINALLY(); + { + xmlFree(str); + } + PG_END_TRY(); + } + + return result; +} + +/* + * Convert an XML XPath object (the result of evaluating an XPath expression) + * to an array of xml values, which are appended to astate. The function + * result value is the number of elements in the array. + * + * If "astate" is NULL then we don't generate the array value, but we still + * return the number of elements it would have had. + * + * Nodesets are converted to an array containing the nodes' textual + * representations. Primitive values (float, double, string) are converted + * to a single-element array containing the value's string representation. + */ +static int +xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, + ArrayBuildState *astate, + PgXmlErrorContext *xmlerrcxt) +{ + int result = 0; + Datum datum; + Oid datumtype; + char *result_str; + + switch (xpathobj->type) + { + case XPATH_NODESET: + if (xpathobj->nodesetval != NULL) + { + result = xpathobj->nodesetval->nodeNr; + if (astate != NULL) + { + int i; + + for (i = 0; i < result; i++) + { + datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], + xmlerrcxt)); + (void) accumArrayResult(astate, datum, false, + XMLOID, CurrentMemoryContext); + } + } + } + return result; + + case XPATH_BOOLEAN: + if (astate == NULL) + return 1; + datum = BoolGetDatum(xpathobj->boolval); + datumtype = BOOLOID; + break; + + case XPATH_NUMBER: + if (astate == NULL) + return 1; + datum = Float8GetDatum(xpathobj->floatval); + datumtype = FLOAT8OID; + break; + + case XPATH_STRING: + if (astate == NULL) + return 1; + datum = CStringGetDatum((char *) xpathobj->stringval); + datumtype = CSTRINGOID; + break; + + default: + elog(ERROR, "xpath expression result type %d is unsupported", + xpathobj->type); + return 0; /* keep compiler quiet */ + } + + /* Common code for scalar-value cases */ + result_str = map_sql_value_to_xml_value(datum, datumtype, true); + datum = PointerGetDatum(cstring_to_xmltype(result_str)); + (void) accumArrayResult(astate, datum, false, + XMLOID, CurrentMemoryContext); + return 1; +} + + +/* + * Common code for xpath() and xmlexists() + * + * Evaluate XPath expression and return number of nodes in res_nitems + * and array of XML values in astate. Either of those pointers can be + * NULL if the corresponding result isn't wanted. + * + * It is up to the user to ensure that the XML passed is in fact + * an XML document - XPath doesn't work easily on fragments without + * a context node being known. + */ +static void +xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, + int *res_nitems, ArrayBuildState *astate) +{ + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; + volatile xmlXPathContextPtr xpathctx = NULL; + volatile xmlXPathCompExprPtr xpathcomp = NULL; + volatile xmlXPathObjectPtr xpathobj = NULL; + char *datastr; + int32 len; + int32 xpath_len; + xmlChar *string; + xmlChar *xpath_expr; + size_t xmldecl_len = 0; + int i; + int ndim; + Datum *ns_names_uris; + bool *ns_names_uris_nulls; + int ns_count; + + /* + * Namespace mappings are passed as text[]. If an empty array is passed + * (ndim = 0, "0-dimensional"), then there are no namespace mappings. + * Else, a 2-dimensional array with length of the second axis being equal + * to 2 should be passed, i.e., every subarray contains 2 elements, the + * first element defining the name, the second one the URI. Example: + * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2', + * 'http://example2.com']]. + */ + ndim = namespaces ? ARR_NDIM(namespaces) : 0; + if (ndim != 0) + { + int *dims; + + dims = ARR_DIMS(namespaces); + + if (ndim != 2 || dims[1] != 2) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid array for XML namespace mapping"), + errdetail("The array must be two-dimensional with length of the second axis equal to 2."))); + + Assert(ARR_ELEMTYPE(namespaces) == TEXTOID); + + deconstruct_array_builtin(namespaces, TEXTOID, + &ns_names_uris, &ns_names_uris_nulls, + &ns_count); + + Assert((ns_count % 2) == 0); /* checked above */ + ns_count /= 2; /* count pairs only */ + } + else + { + ns_names_uris = NULL; + ns_names_uris_nulls = NULL; + ns_count = 0; + } + + datastr = VARDATA(data); + len = VARSIZE(data) - VARHDRSZ; + xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text); + if (xpath_len == 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("empty XPath expression"))); + + string = pg_xmlCharStrndup(datastr, len); + xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len); + + /* + * In a UTF8 database, skip any xml declaration, which might assert + * another encoding. Ignore parse_xml_decl() failure, letting + * xmlCtxtReadMemory() report parse errors. Documentation disclaims + * xpath() support for non-ASCII data in non-UTF8 databases, so leave + * those scenarios bug-compatible with historical behavior. + */ + if (GetDatabaseEncoding() == PG_UTF8) + parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL); + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + xmlInitParser(); + + /* + * redundant XML parsing (two parsings for the same value during one + * command execution are possible) + */ + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len, + len - xmldecl_len, NULL, NULL, 0); + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "could not parse XML document"); + xpathctx = xmlXPathNewContext(doc); + if (xpathctx == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + xpathctx->node = (xmlNodePtr) doc; + + /* register namespaces, if any */ + if (ns_count > 0) + { + for (i = 0; i < ns_count; i++) + { + char *ns_name; + char *ns_uri; + + if (ns_names_uris_nulls[i * 2] || + ns_names_uris_nulls[i * 2 + 1]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("neither namespace name nor URI may be null"))); + ns_name = TextDatumGetCString(ns_names_uris[i * 2]); + ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]); + if (xmlXPathRegisterNs(xpathctx, + (xmlChar *) ns_name, + (xmlChar *) ns_uri) != 0) + ereport(ERROR, /* is this an internal error??? */ + (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"", + ns_name, ns_uri))); + } + } + + xpathcomp = xmlXPathCompile(xpath_expr); + if (xpathcomp == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "invalid XPath expression"); + + /* + * Version 2.6.27 introduces a function named + * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists, + * but we can derive the existence by whether any nodes are returned, + * thereby preventing a library version upgrade and keeping the code + * the same. + */ + xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx); + if (xpathobj == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + /* + * Extract the results as requested. + */ + if (res_nitems != NULL) + *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); + else + (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); + } + PG_CATCH(); + { + if (xpathobj) + xmlXPathFreeObject(xpathobj); + if (xpathcomp) + xmlXPathFreeCompExpr(xpathcomp); + if (xpathctx) + xmlXPathFreeContext(xpathctx); + if (doc) + xmlFreeDoc(doc); + if (ctxt) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlXPathFreeObject(xpathobj); + xmlXPathFreeCompExpr(xpathcomp); + xmlXPathFreeContext(xpathctx); + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, false); +} +#endif /* USE_LIBXML */ + +/* + * Evaluate XPath expression and return array of XML values. + * + * As we have no support of XQuery sequences yet, this function seems + * to be the most useful one (array of XML functions plays a role of + * some kind of substitution for XQuery sequences). + */ +Datum +xpath(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_PP(0); + xmltype *data = PG_GETARG_XML_P(1); + ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); + ArrayBuildState *astate; + + astate = initArrayResult(XMLOID, CurrentMemoryContext, true); + xpath_internal(xpath_expr_text, data, namespaces, + NULL, astate); + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + +/* + * Determines if the node specified by the supplied XPath exists + * in a given XML document, returning a boolean. + */ +Datum +xmlexists(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_PP(0); + xmltype *data = PG_GETARG_XML_P(1); + int res_nitems; + + xpath_internal(xpath_expr_text, data, NULL, + &res_nitems, NULL); + + PG_RETURN_BOOL(res_nitems > 0); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + +/* + * Determines if the node specified by the supplied XPath exists + * in a given XML document, returning a boolean. Differs from + * xmlexists as it supports namespaces and is not defined in SQL/XML. + */ +Datum +xpath_exists(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_PP(0); + xmltype *data = PG_GETARG_XML_P(1); + ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); + int res_nitems; + + xpath_internal(xpath_expr_text, data, namespaces, + &res_nitems, NULL); + + PG_RETURN_BOOL(res_nitems > 0); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + +/* + * Functions for checking well-formed-ness + */ + +#ifdef USE_LIBXML +static bool +wellformed_xml(text *data, XmlOptionType xmloption_arg) +{ + xmlDocPtr doc; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + /* + * We'll report "true" if no soft error is reported by xml_parse(). + */ + doc = xml_parse(data, xmloption_arg, true, + GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext); + if (doc) + xmlFreeDoc(doc); + + return !escontext.error_occurred; +} +#endif + +Datum +xml_is_well_formed(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_BOOL(wellformed_xml(data, xmloption)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_document(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_content(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_PP(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +/* + * support functions for XMLTABLE + * + */ +#ifdef USE_LIBXML + +/* + * Returns private data from executor state. Ensure validity by check with + * MAGIC number. + */ +static inline XmlTableBuilderData * +GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname) +{ + XmlTableBuilderData *result; + + if (!IsA(state, TableFuncScanState)) + elog(ERROR, "%s called with invalid TableFuncScanState", fname); + result = (XmlTableBuilderData *) state->opaque; + if (result->magic != XMLTABLE_CONTEXT_MAGIC) + elog(ERROR, "%s called with invalid TableFuncScanState", fname); + + return result; +} +#endif + +/* + * XmlTableInitOpaque + * Fill in TableFuncScanState->opaque for XmlTable processor; initialize + * the XML parser. + * + * Note: Because we call pg_xml_init() here and pg_xml_done() in + * XmlTableDestroyOpaque, it is critical for robustness that no other + * executor nodes run until this node is processed to completion. Caller + * must execute this to completion (probably filling a tuplestore to exhaust + * this node in a single pass) instead of using row-per-call mode. + */ +static void +XmlTableInitOpaque(TableFuncScanState *state, int natts) +{ +#ifdef USE_LIBXML + volatile xmlParserCtxtPtr ctxt = NULL; + XmlTableBuilderData *xtCxt; + PgXmlErrorContext *xmlerrcxt; + + xtCxt = palloc0(sizeof(XmlTableBuilderData)); + xtCxt->magic = XMLTABLE_CONTEXT_MAGIC; + xtCxt->natts = natts; + xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts); + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + xmlInitParser(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + } + PG_CATCH(); + { + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xtCxt->xmlerrcxt = xmlerrcxt; + xtCxt->ctxt = ctxt; + + state->opaque = xtCxt; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetDocument + * Install the input document + */ +static void +XmlTableSetDocument(TableFuncScanState *state, Datum value) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmltype *xmlval = DatumGetXmlP(value); + char *str; + xmlChar *xstr; + int length; + volatile xmlDocPtr doc = NULL; + volatile xmlXPathContextPtr xpathcxt = NULL; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument"); + + /* + * Use out function for casting to string (remove encoding property). See + * comment in xml_out. + */ + str = xml_out_internal(xmlval, 0); + + length = strlen(str); + xstr = pg_xmlCharStrndup(str, length); + + PG_TRY(); + { + doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0); + if (doc == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "could not parse XML document"); + xpathcxt = xmlXPathNewContext(doc); + if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + xpathcxt->node = (xmlNodePtr) doc; + } + PG_CATCH(); + { + if (xpathcxt != NULL) + xmlXPathFreeContext(xpathcxt); + if (doc != NULL) + xmlFreeDoc(doc); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xtCxt->doc = doc; + xtCxt->xpathcxt = xpathcxt; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetNamespace + * Add a namespace declaration + */ +static void +XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + if (name == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DEFAULT namespace is not supported"))); + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace"); + + if (xmlXPathRegisterNs(xtCxt->xpathcxt, + pg_xmlCharStrndup(name, strlen(name)), + pg_xmlCharStrndup(uri, strlen(uri)))) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, + "could not set XML namespace"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetRowFilter + * Install the row-filter Xpath expression. + */ +static void +XmlTableSetRowFilter(TableFuncScanState *state, const char *path) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmlChar *xstr; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter"); + + if (*path == '\0') + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("row path filter must not be empty string"))); + + xstr = pg_xmlCharStrndup(path, strlen(path)); + + xtCxt->xpathcomp = xmlXPathCompile(xstr); + if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR, + "invalid XPath expression"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetColumnFilter + * Install the column-filter Xpath expression, for the given column. + */ +static void +XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmlChar *xstr; + + Assert(PointerIsValid(path)); + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter"); + + if (*path == '\0') + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("column path filter must not be empty string"))); + + xstr = pg_xmlCharStrndup(path, strlen(path)); + + xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr); + if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, + "invalid XPath expression"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableFetchRow + * Prepare the next "current" tuple for upcoming GetValue calls. + * Returns false if the row-filter expression returned no more rows. + */ +static bool +XmlTableFetchRow(TableFuncScanState *state) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow"); + + /* Propagate our own error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + if (xtCxt->xpathobj == NULL) + { + xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt); + if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + xtCxt->row_count = 0; + } + + if (xtCxt->xpathobj->type == XPATH_NODESET) + { + if (xtCxt->xpathobj->nodesetval != NULL) + { + if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr) + return true; + } + } + + return false; +#else + NO_XML_SUPPORT(); + return false; +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableGetValue + * Return the value for column number 'colnum' for the current row. If + * column -1 is requested, return representation of the whole row. + * + * This leaks memory, so be sure to reset often the context in which it's + * called. + */ +static Datum +XmlTableGetValue(TableFuncScanState *state, int colnum, + Oid typid, int32 typmod, bool *isnull) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + Datum result = (Datum) 0; + xmlNodePtr cur; + char *cstr = NULL; + volatile xmlXPathObjectPtr xpathobj = NULL; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue"); + + Assert(xtCxt->xpathobj && + xtCxt->xpathobj->type == XPATH_NODESET && + xtCxt->xpathobj->nodesetval != NULL); + + /* Propagate our own error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + *isnull = false; + + cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1]; + + Assert(xtCxt->xpathscomp[colnum] != NULL); + + PG_TRY(); + { + /* Set current node as entry point for XPath evaluation */ + xtCxt->xpathcxt->node = cur; + + /* Evaluate column path */ + xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt); + if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + /* + * There are four possible cases, depending on the number of nodes + * returned by the XPath expression and the type of the target column: + * a) XPath returns no nodes. b) The target type is XML (return all + * as XML). For non-XML return types: c) One node (return content). + * d) Multiple nodes (error). + */ + if (xpathobj->type == XPATH_NODESET) + { + int count = 0; + + if (xpathobj->nodesetval != NULL) + count = xpathobj->nodesetval->nodeNr; + + if (xpathobj->nodesetval == NULL || count == 0) + { + *isnull = true; + } + else + { + if (typid == XMLOID) + { + text *textstr; + StringInfoData str; + + /* Concatenate serialized values */ + initStringInfo(&str); + for (int i = 0; i < count; i++) + { + textstr = + xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], + xtCxt->xmlerrcxt); + + appendStringInfoText(&str, textstr); + } + cstr = str.data; + } + else + { + xmlChar *str; + + if (count > 1) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("more than one value returned by column XPath expression"))); + + str = xmlXPathCastNodeSetToString(xpathobj->nodesetval); + cstr = str ? xml_pstrdup_and_free(str) : ""; + } + } + } + else if (xpathobj->type == XPATH_STRING) + { + /* Content should be escaped when target will be XML */ + if (typid == XMLOID) + cstr = escape_xml((char *) xpathobj->stringval); + else + cstr = (char *) xpathobj->stringval; + } + else if (xpathobj->type == XPATH_BOOLEAN) + { + char typcategory; + bool typispreferred; + xmlChar *str; + + /* Allow implicit casting from boolean to numbers */ + get_type_category_preferred(typid, &typcategory, &typispreferred); + + if (typcategory != TYPCATEGORY_NUMERIC) + str = xmlXPathCastBooleanToString(xpathobj->boolval); + else + str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval)); + + cstr = xml_pstrdup_and_free(str); + } + else if (xpathobj->type == XPATH_NUMBER) + { + xmlChar *str; + + str = xmlXPathCastNumberToString(xpathobj->floatval); + cstr = xml_pstrdup_and_free(str); + } + else + elog(ERROR, "unexpected XPath object type %u", xpathobj->type); + + /* + * By here, either cstr contains the result value, or the isnull flag + * has been set. + */ + Assert(cstr || *isnull); + + if (!*isnull) + result = InputFunctionCall(&state->in_functions[colnum], + cstr, + state->typioparams[colnum], + typmod); + } + PG_FINALLY(); + { + if (xpathobj != NULL) + xmlXPathFreeObject(xpathobj); + } + PG_END_TRY(); + + return result; +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableDestroyOpaque + * Release all libxml2 resources + */ +static void +XmlTableDestroyOpaque(TableFuncScanState *state) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque"); + + /* Propagate our own error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + if (xtCxt->xpathscomp != NULL) + { + int i; + + for (i = 0; i < xtCxt->natts; i++) + if (xtCxt->xpathscomp[i] != NULL) + xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]); + } + + if (xtCxt->xpathobj != NULL) + xmlXPathFreeObject(xtCxt->xpathobj); + if (xtCxt->xpathcomp != NULL) + xmlXPathFreeCompExpr(xtCxt->xpathcomp); + if (xtCxt->xpathcxt != NULL) + xmlXPathFreeContext(xtCxt->xpathcxt); + if (xtCxt->doc != NULL) + xmlFreeDoc(xtCxt->doc); + if (xtCxt->ctxt != NULL) + xmlFreeParserCtxt(xtCxt->ctxt); + + pg_xml_done(xtCxt->xmlerrcxt, true); + + /* not valid anymore */ + xtCxt->magic = 0; + state->opaque = NULL; + +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} |