diff options
author | vvvv <vvvv@yandex-team.com> | 2024-11-07 12:29:36 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.com> | 2024-11-07 13:49:47 +0300 |
commit | d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch) | |
tree | b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c | |
parent | 13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff) | |
download | ydb-d4c258e9431675bab6745c8638df6e3dfd4dca6b.tar.gz |
Moved other yql/essentials libs YQL-19206
init
commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c')
-rw-r--r-- | yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c new file mode 100644 index 00000000000..be4cb66066b --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c @@ -0,0 +1,397 @@ +/*------------------------------------------------------------------------- + * + * queryjumblefuncs.c + * Query normalization and fingerprinting. + * + * Normalization is a process whereby similar queries, typically differing only + * in their constants (though the exact rules are somewhat more subtle than + * that) are recognized as equivalent, and are tracked as a single entry. This + * is particularly useful for non-prepared queries. + * + * Normalization is implemented by fingerprinting queries, selectively + * serializing those fields of each query tree's nodes that are judged to be + * essential to the query. This is referred to as a query jumble. This is + * distinct from a regular serialization in that various extraneous + * information is ignored as irrelevant or not essential to the query, such + * as the collations of Vars and, most notably, the values of constants. + * + * This jumble is acquired at the end of parse analysis of each query, and + * a 64-bit hash of it is stored into the query's Query.queryId field. + * The server then copies this value around, making it available in plan + * tree(s) generated from the query. The executor can then use this value + * to blame query costs on the proper queryId. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/nodes/queryjumblefuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/hashfn.h" +#include "miscadmin.h" +#include "nodes/queryjumble.h" +#include "parser/scansup.h" + +#define JUMBLE_SIZE 1024 /* query serialization buffer size */ + +/* GUC parameters */ +__thread int compute_query_id = COMPUTE_QUERY_ID_AUTO; + +/* True when compute_query_id is ON, or AUTO and a module requests them */ +__thread bool query_id_enabled = false; + +static void AppendJumble(JumbleState *jstate, + const unsigned char *item, Size size); +static void RecordConstLocation(JumbleState *jstate, int location); +static void _jumbleNode(JumbleState *jstate, Node *node); +static void _jumbleA_Const(JumbleState *jstate, Node *node); +static void _jumbleList(JumbleState *jstate, Node *node); +static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node); + +/* + * Given a possibly multi-statement source string, confine our attention to the + * relevant part of the string. + */ +const char * +CleanQuerytext(const char *query, int *location, int *len) +{ + int query_location = *location; + int query_len = *len; + + /* First apply starting offset, unless it's -1 (unknown). */ + if (query_location >= 0) + { + Assert(query_location <= strlen(query)); + query += query_location; + /* Length of 0 (or -1) means "rest of string" */ + if (query_len <= 0) + query_len = strlen(query); + else + Assert(query_len <= strlen(query)); + } + else + { + /* If query location is unknown, distrust query_len as well */ + query_location = 0; + query_len = strlen(query); + } + + /* + * Discard leading and trailing whitespace, too. Use scanner_isspace() + * not libc's isspace(), because we want to match the lexer's behavior. + */ + while (query_len > 0 && scanner_isspace(query[0])) + query++, query_location++, query_len--; + while (query_len > 0 && scanner_isspace(query[query_len - 1])) + query_len--; + + *location = query_location; + *len = query_len; + + return query; +} + +JumbleState * +JumbleQuery(Query *query) +{ + JumbleState *jstate = NULL; + + Assert(IsQueryIdEnabled()); + + jstate = (JumbleState *) palloc(sizeof(JumbleState)); + + /* Set up workspace for query jumbling */ + jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); + jstate->jumble_len = 0; + jstate->clocations_buf_size = 32; + jstate->clocations = (LocationLen *) + palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + jstate->clocations_count = 0; + jstate->highest_extern_param_id = 0; + + /* Compute query ID and mark the Query node with it */ + _jumbleNode(jstate, (Node *) query); + query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, + jstate->jumble_len, + 0)); + + /* + * If we are unlucky enough to get a hash of zero, use 1 instead for + * normal statements and 2 for utility queries. + */ + if (query->queryId == UINT64CONST(0)) + { + if (query->utilityStmt) + query->queryId = UINT64CONST(2); + else + query->queryId = UINT64CONST(1); + } + + return jstate; +} + +/* + * Enables query identifier computation. + * + * Third-party plugins can use this function to inform core that they require + * a query identifier to be computed. + */ +void +EnableQueryId(void) +{ + if (compute_query_id != COMPUTE_QUERY_ID_OFF) + query_id_enabled = true; +} + +/* + * AppendJumble: Append a value that is substantive in a given query to + * the current jumble. + */ +static void +AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) +{ + unsigned char *jumble = jstate->jumble; + Size jumble_len = jstate->jumble_len; + + /* + * Whenever the jumble buffer is full, we hash the current contents and + * reset the buffer to contain just that hash value, thus relying on the + * hash to summarize everything so far. + */ + while (size > 0) + { + Size part_size; + + if (jumble_len >= JUMBLE_SIZE) + { + uint64 start_hash; + + start_hash = DatumGetUInt64(hash_any_extended(jumble, + JUMBLE_SIZE, 0)); + memcpy(jumble, &start_hash, sizeof(start_hash)); + jumble_len = sizeof(start_hash); + } + part_size = Min(size, JUMBLE_SIZE - jumble_len); + memcpy(jumble + jumble_len, item, part_size); + jumble_len += part_size; + item += part_size; + size -= part_size; + } + jstate->jumble_len = jumble_len; +} + +/* + * Record location of constant within query string of query tree + * that is currently being walked. + */ +static void +RecordConstLocation(JumbleState *jstate, int location) +{ + /* -1 indicates unknown or undefined location */ + if (location >= 0) + { + /* enlarge array if needed */ + if (jstate->clocations_count >= jstate->clocations_buf_size) + { + jstate->clocations_buf_size *= 2; + jstate->clocations = (LocationLen *) + repalloc(jstate->clocations, + jstate->clocations_buf_size * + sizeof(LocationLen)); + } + jstate->clocations[jstate->clocations_count].location = location; + /* initialize lengths to -1 to simplify third-party module usage */ + jstate->clocations[jstate->clocations_count].length = -1; + jstate->clocations_count++; + } +} + +#define JUMBLE_NODE(item) \ + _jumbleNode(jstate, (Node *) expr->item) +#define JUMBLE_LOCATION(location) \ + RecordConstLocation(jstate, expr->location) +#define JUMBLE_FIELD(item) \ + AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) +#define JUMBLE_FIELD_SINGLE(item) \ + AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) +#define JUMBLE_STRING(str) \ +do { \ + if (expr->str) \ + AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \ +} while(0) + +#include "queryjumblefuncs.funcs.c" + +static void +_jumbleNode(JumbleState *jstate, Node *node) +{ + Node *expr = node; + + if (expr == NULL) + return; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + JUMBLE_FIELD(type); + + switch (nodeTag(expr)) + { +#include "queryjumblefuncs.switch.c" + + case T_List: + case T_IntList: + case T_OidList: + case T_XidList: + _jumbleList(jstate, expr); + break; + + default: + /* Only a warning, since we can stumble along anyway */ + elog(WARNING, "unrecognized node type: %d", + (int) nodeTag(expr)); + break; + } + + /* Special cases to handle outside the automated code */ + switch (nodeTag(expr)) + { + case T_Param: + { + Param *p = (Param *) node; + + /* + * Update the highest Param id seen, in order to start + * normalization correctly. + */ + if (p->paramkind == PARAM_EXTERN && + p->paramid > jstate->highest_extern_param_id) + jstate->highest_extern_param_id = p->paramid; + } + break; + default: + break; + } +} + +static void +_jumbleList(JumbleState *jstate, Node *node) +{ + List *expr = (List *) node; + ListCell *l; + + switch (expr->type) + { + case T_List: + foreach(l, expr) + _jumbleNode(jstate, lfirst(l)); + break; + case T_IntList: + foreach(l, expr) + JUMBLE_FIELD_SINGLE(lfirst_int(l)); + break; + case T_OidList: + foreach(l, expr) + JUMBLE_FIELD_SINGLE(lfirst_oid(l)); + break; + case T_XidList: + foreach(l, expr) + JUMBLE_FIELD_SINGLE(lfirst_xid(l)); + break; + default: + elog(ERROR, "unrecognized list node type: %d", + (int) expr->type); + return; + } +} + +static void +_jumbleA_Const(JumbleState *jstate, Node *node) +{ + A_Const *expr = (A_Const *) node; + + JUMBLE_FIELD(isnull); + if (!expr->isnull) + { + JUMBLE_FIELD(val.node.type); + switch (nodeTag(&expr->val)) + { + case T_Integer: + JUMBLE_FIELD(val.ival.ival); + break; + case T_Float: + JUMBLE_STRING(val.fval.fval); + break; + case T_Boolean: + JUMBLE_FIELD(val.boolval.boolval); + break; + case T_String: + JUMBLE_STRING(val.sval.sval); + break; + case T_BitString: + JUMBLE_STRING(val.bsval.bsval); + break; + default: + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(&expr->val)); + break; + } + } +} + +static void +_jumbleRangeTblEntry(JumbleState *jstate, Node *node) +{ + RangeTblEntry *expr = (RangeTblEntry *) node; + + JUMBLE_FIELD(rtekind); + switch (expr->rtekind) + { + case RTE_RELATION: + JUMBLE_FIELD(relid); + JUMBLE_NODE(tablesample); + JUMBLE_FIELD(inh); + break; + case RTE_SUBQUERY: + JUMBLE_NODE(subquery); + break; + case RTE_JOIN: + JUMBLE_FIELD(jointype); + break; + case RTE_FUNCTION: + JUMBLE_NODE(functions); + break; + case RTE_TABLEFUNC: + JUMBLE_NODE(tablefunc); + break; + case RTE_VALUES: + JUMBLE_NODE(values_lists); + break; + case RTE_CTE: + + /* + * Depending on the CTE name here isn't ideal, but it's the only + * info we have to identify the referenced WITH item. + */ + JUMBLE_STRING(ctename); + JUMBLE_FIELD(ctelevelsup); + break; + case RTE_NAMEDTUPLESTORE: + JUMBLE_STRING(enrname); + break; + case RTE_RESULT: + break; + default: + elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind); + break; + } +} |