aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c
diff options
context:
space:
mode:
authorvvvv <vvvv@yandex-team.com>2024-11-07 12:29:36 +0300
committervvvv <vvvv@yandex-team.com>2024-11-07 13:49:47 +0300
commitd4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
treeb5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c
parent13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)
downloadydb-d4c258e9431675bab6745c8638df6e3dfd4dca6b.tar.gz
Moved other yql/essentials libs YQL-19206
init commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c')
-rw-r--r--yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c397
1 files changed, 397 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c
new file mode 100644
index 00000000000..be4cb66066b
--- /dev/null
+++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/nodes/queryjumblefuncs.c
@@ -0,0 +1,397 @@
+/*-------------------------------------------------------------------------
+ *
+ * queryjumblefuncs.c
+ * Query normalization and fingerprinting.
+ *
+ * Normalization is a process whereby similar queries, typically differing only
+ * in their constants (though the exact rules are somewhat more subtle than
+ * that) are recognized as equivalent, and are tracked as a single entry. This
+ * is particularly useful for non-prepared queries.
+ *
+ * Normalization is implemented by fingerprinting queries, selectively
+ * serializing those fields of each query tree's nodes that are judged to be
+ * essential to the query. This is referred to as a query jumble. This is
+ * distinct from a regular serialization in that various extraneous
+ * information is ignored as irrelevant or not essential to the query, such
+ * as the collations of Vars and, most notably, the values of constants.
+ *
+ * This jumble is acquired at the end of parse analysis of each query, and
+ * a 64-bit hash of it is stored into the query's Query.queryId field.
+ * The server then copies this value around, making it available in plan
+ * tree(s) generated from the query. The executor can then use this value
+ * to blame query costs on the proper queryId.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/nodes/queryjumblefuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/hashfn.h"
+#include "miscadmin.h"
+#include "nodes/queryjumble.h"
+#include "parser/scansup.h"
+
+#define JUMBLE_SIZE 1024 /* query serialization buffer size */
+
+/* GUC parameters */
+__thread int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+
+/* True when compute_query_id is ON, or AUTO and a module requests them */
+__thread bool query_id_enabled = false;
+
+static void AppendJumble(JumbleState *jstate,
+ const unsigned char *item, Size size);
+static void RecordConstLocation(JumbleState *jstate, int location);
+static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleA_Const(JumbleState *jstate, Node *node);
+static void _jumbleList(JumbleState *jstate, Node *node);
+static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
+
+/*
+ * Given a possibly multi-statement source string, confine our attention to the
+ * relevant part of the string.
+ */
+const char *
+CleanQuerytext(const char *query, int *location, int *len)
+{
+ int query_location = *location;
+ int query_len = *len;
+
+ /* First apply starting offset, unless it's -1 (unknown). */
+ if (query_location >= 0)
+ {
+ Assert(query_location <= strlen(query));
+ query += query_location;
+ /* Length of 0 (or -1) means "rest of string" */
+ if (query_len <= 0)
+ query_len = strlen(query);
+ else
+ Assert(query_len <= strlen(query));
+ }
+ else
+ {
+ /* If query location is unknown, distrust query_len as well */
+ query_location = 0;
+ query_len = strlen(query);
+ }
+
+ /*
+ * Discard leading and trailing whitespace, too. Use scanner_isspace()
+ * not libc's isspace(), because we want to match the lexer's behavior.
+ */
+ while (query_len > 0 && scanner_isspace(query[0]))
+ query++, query_location++, query_len--;
+ while (query_len > 0 && scanner_isspace(query[query_len - 1]))
+ query_len--;
+
+ *location = query_location;
+ *len = query_len;
+
+ return query;
+}
+
+JumbleState *
+JumbleQuery(Query *query)
+{
+ JumbleState *jstate = NULL;
+
+ Assert(IsQueryIdEnabled());
+
+ jstate = (JumbleState *) palloc(sizeof(JumbleState));
+
+ /* Set up workspace for query jumbling */
+ jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
+ jstate->jumble_len = 0;
+ jstate->clocations_buf_size = 32;
+ jstate->clocations = (LocationLen *)
+ palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ jstate->clocations_count = 0;
+ jstate->highest_extern_param_id = 0;
+
+ /* Compute query ID and mark the Query node with it */
+ _jumbleNode(jstate, (Node *) query);
+ query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
+ jstate->jumble_len,
+ 0));
+
+ /*
+ * If we are unlucky enough to get a hash of zero, use 1 instead for
+ * normal statements and 2 for utility queries.
+ */
+ if (query->queryId == UINT64CONST(0))
+ {
+ if (query->utilityStmt)
+ query->queryId = UINT64CONST(2);
+ else
+ query->queryId = UINT64CONST(1);
+ }
+
+ return jstate;
+}
+
+/*
+ * Enables query identifier computation.
+ *
+ * Third-party plugins can use this function to inform core that they require
+ * a query identifier to be computed.
+ */
+void
+EnableQueryId(void)
+{
+ if (compute_query_id != COMPUTE_QUERY_ID_OFF)
+ query_id_enabled = true;
+}
+
+/*
+ * AppendJumble: Append a value that is substantive in a given query to
+ * the current jumble.
+ */
+static void
+AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
+{
+ unsigned char *jumble = jstate->jumble;
+ Size jumble_len = jstate->jumble_len;
+
+ /*
+ * Whenever the jumble buffer is full, we hash the current contents and
+ * reset the buffer to contain just that hash value, thus relying on the
+ * hash to summarize everything so far.
+ */
+ while (size > 0)
+ {
+ Size part_size;
+
+ if (jumble_len >= JUMBLE_SIZE)
+ {
+ uint64 start_hash;
+
+ start_hash = DatumGetUInt64(hash_any_extended(jumble,
+ JUMBLE_SIZE, 0));
+ memcpy(jumble, &start_hash, sizeof(start_hash));
+ jumble_len = sizeof(start_hash);
+ }
+ part_size = Min(size, JUMBLE_SIZE - jumble_len);
+ memcpy(jumble + jumble_len, item, part_size);
+ jumble_len += part_size;
+ item += part_size;
+ size -= part_size;
+ }
+ jstate->jumble_len = jumble_len;
+}
+
+/*
+ * Record location of constant within query string of query tree
+ * that is currently being walked.
+ */
+static void
+RecordConstLocation(JumbleState *jstate, int location)
+{
+ /* -1 indicates unknown or undefined location */
+ if (location >= 0)
+ {
+ /* enlarge array if needed */
+ if (jstate->clocations_count >= jstate->clocations_buf_size)
+ {
+ jstate->clocations_buf_size *= 2;
+ jstate->clocations = (LocationLen *)
+ repalloc(jstate->clocations,
+ jstate->clocations_buf_size *
+ sizeof(LocationLen));
+ }
+ jstate->clocations[jstate->clocations_count].location = location;
+ /* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].length = -1;
+ jstate->clocations_count++;
+ }
+}
+
+#define JUMBLE_NODE(item) \
+ _jumbleNode(jstate, (Node *) expr->item)
+#define JUMBLE_LOCATION(location) \
+ RecordConstLocation(jstate, expr->location)
+#define JUMBLE_FIELD(item) \
+ AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
+#define JUMBLE_FIELD_SINGLE(item) \
+ AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
+#define JUMBLE_STRING(str) \
+do { \
+ if (expr->str) \
+ AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
+} while(0)
+
+#include "queryjumblefuncs.funcs.c"
+
+static void
+_jumbleNode(JumbleState *jstate, Node *node)
+{
+ Node *expr = node;
+
+ if (expr == NULL)
+ return;
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ JUMBLE_FIELD(type);
+
+ switch (nodeTag(expr))
+ {
+#include "queryjumblefuncs.switch.c"
+
+ case T_List:
+ case T_IntList:
+ case T_OidList:
+ case T_XidList:
+ _jumbleList(jstate, expr);
+ break;
+
+ default:
+ /* Only a warning, since we can stumble along anyway */
+ elog(WARNING, "unrecognized node type: %d",
+ (int) nodeTag(expr));
+ break;
+ }
+
+ /* Special cases to handle outside the automated code */
+ switch (nodeTag(expr))
+ {
+ case T_Param:
+ {
+ Param *p = (Param *) node;
+
+ /*
+ * Update the highest Param id seen, in order to start
+ * normalization correctly.
+ */
+ if (p->paramkind == PARAM_EXTERN &&
+ p->paramid > jstate->highest_extern_param_id)
+ jstate->highest_extern_param_id = p->paramid;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+_jumbleList(JumbleState *jstate, Node *node)
+{
+ List *expr = (List *) node;
+ ListCell *l;
+
+ switch (expr->type)
+ {
+ case T_List:
+ foreach(l, expr)
+ _jumbleNode(jstate, lfirst(l));
+ break;
+ case T_IntList:
+ foreach(l, expr)
+ JUMBLE_FIELD_SINGLE(lfirst_int(l));
+ break;
+ case T_OidList:
+ foreach(l, expr)
+ JUMBLE_FIELD_SINGLE(lfirst_oid(l));
+ break;
+ case T_XidList:
+ foreach(l, expr)
+ JUMBLE_FIELD_SINGLE(lfirst_xid(l));
+ break;
+ default:
+ elog(ERROR, "unrecognized list node type: %d",
+ (int) expr->type);
+ return;
+ }
+}
+
+static void
+_jumbleA_Const(JumbleState *jstate, Node *node)
+{
+ A_Const *expr = (A_Const *) node;
+
+ JUMBLE_FIELD(isnull);
+ if (!expr->isnull)
+ {
+ JUMBLE_FIELD(val.node.type);
+ switch (nodeTag(&expr->val))
+ {
+ case T_Integer:
+ JUMBLE_FIELD(val.ival.ival);
+ break;
+ case T_Float:
+ JUMBLE_STRING(val.fval.fval);
+ break;
+ case T_Boolean:
+ JUMBLE_FIELD(val.boolval.boolval);
+ break;
+ case T_String:
+ JUMBLE_STRING(val.sval.sval);
+ break;
+ case T_BitString:
+ JUMBLE_STRING(val.bsval.bsval);
+ break;
+ default:
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(&expr->val));
+ break;
+ }
+ }
+}
+
+static void
+_jumbleRangeTblEntry(JumbleState *jstate, Node *node)
+{
+ RangeTblEntry *expr = (RangeTblEntry *) node;
+
+ JUMBLE_FIELD(rtekind);
+ switch (expr->rtekind)
+ {
+ case RTE_RELATION:
+ JUMBLE_FIELD(relid);
+ JUMBLE_NODE(tablesample);
+ JUMBLE_FIELD(inh);
+ break;
+ case RTE_SUBQUERY:
+ JUMBLE_NODE(subquery);
+ break;
+ case RTE_JOIN:
+ JUMBLE_FIELD(jointype);
+ break;
+ case RTE_FUNCTION:
+ JUMBLE_NODE(functions);
+ break;
+ case RTE_TABLEFUNC:
+ JUMBLE_NODE(tablefunc);
+ break;
+ case RTE_VALUES:
+ JUMBLE_NODE(values_lists);
+ break;
+ case RTE_CTE:
+
+ /*
+ * Depending on the CTE name here isn't ideal, but it's the only
+ * info we have to identify the referenced WITH item.
+ */
+ JUMBLE_STRING(ctename);
+ JUMBLE_FIELD(ctelevelsup);
+ break;
+ case RTE_NAMEDTUPLESTORE:
+ JUMBLE_STRING(enrname);
+ break;
+ case RTE_RESULT:
+ break;
+ default:
+ elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind);
+ break;
+ }
+}