aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/tests/postgresql/common
diff options
context:
space:
mode:
authorudovichenko-r <udovichenko-r@yandex-team.com>2024-11-19 14:11:52 +0300
committerudovichenko-r <udovichenko-r@yandex-team.com>2024-11-19 14:22:01 +0300
commit72b3cd51dc3fb9d16975d353ea82fd85701393cc (patch)
tree318141940b8bf6bdb37ad6154e745e2ebfe3613f /yql/essentials/tests/postgresql/common
parent223625eed56ec3e2808c010eac46dba1c9a64d13 (diff)
downloadydb-72b3cd51dc3fb9d16975d353ea82fd85701393cc.tar.gz
YQL-19206 Move contrib/ydb/library/yql/tests/postgresql -> yql/essentials/tests/postgresql
commit_hash:46fdf59714b20cf2b61233a06e58365227d3c8b2
Diffstat (limited to 'yql/essentials/tests/postgresql/common')
-rw-r--r--yql/essentials/tests/postgresql/common/__init__.py129
-rw-r--r--yql/essentials/tests/postgresql/common/differ.py91
-rw-r--r--yql/essentials/tests/postgresql/common/ya.make8
3 files changed, 228 insertions, 0 deletions
diff --git a/yql/essentials/tests/postgresql/common/__init__.py b/yql/essentials/tests/postgresql/common/__init__.py
new file mode 100644
index 0000000000..e4238ef56a
--- /dev/null
+++ b/yql/essentials/tests/postgresql/common/__init__.py
@@ -0,0 +1,129 @@
+import sys
+import logging
+from pathlib import Path
+import subprocess
+from .differ import Differ
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+def setup_logger():
+ options = dict(
+ level=logging.DEBUG,
+ format='%(levelname)s: %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+ stream=sys.stderr
+ )
+
+ logging.basicConfig(**options)
+
+
+setup_logger()
+
+
+def find_sql_tests(path):
+ tests = []
+
+ for sql_file in Path(path).glob('*.sql'):
+ if not sql_file.is_file():
+ LOGGER.warning("'%s' is not a file", sql_file.absolute())
+ continue
+
+ out_files = list(get_out_files(sql_file))
+ if not out_files:
+ LOGGER.warning("No .out files found for '%s'", sql_file.absolute())
+ continue
+
+ tests.append((sql_file.stem, (sql_file, out_files)))
+
+ return tests
+
+
+def load_init_scripts_for_testcase(testcase_name, init_scripts_cfg, init_scripts_dir):
+ with open(init_scripts_cfg, 'r') as cfg:
+ for lineno, line in enumerate(cfg, 1):
+ cfgline = line.strip().split(':')
+ if len(cfgline) != 2:
+ LOGGER.info("Bad line %d in init scripts configuration '%s'", lineno, init_scripts_cfg)
+ continue
+
+ if cfgline[0].strip() == testcase_name:
+ break
+ else:
+ return []
+
+ avail_scripts = frozenset(s.stem for s in init_scripts_dir.glob("*.sql"))
+
+ scripts = [(init_scripts_dir / s).with_suffix(".sql") for s in cfgline[1].split() if s in avail_scripts]
+
+ if scripts:
+ LOGGER.debug("Init scripts: %s", ", ".join(s.stem for s in scripts))
+
+ return scripts
+
+
+def run_sql_test(sql, out, tmp_path, runner, udfs, init_scripts_cfg, init_scripts_dir):
+ args = [runner, "--datadir", tmp_path]
+ for udf in udfs:
+ args.append("--udf")
+ args.append(udf)
+
+ LOGGER.debug("Loading init scripts for '%s' from '%s'", sql.stem, init_scripts_cfg)
+ init_scripts = load_init_scripts_for_testcase(sql.stem, init_scripts_cfg, Path(init_scripts_dir))
+
+ if init_scripts:
+ LOGGER.debug("Executing init scripts for '%s'", sql.stem)
+ for script in init_scripts:
+ LOGGER.debug("Executing init script '%s'", script.name)
+ with open(script, 'rb') as f:
+ pi = subprocess.run(args,
+ stdin=f, stdout=subprocess.PIPE, stderr=sys.stderr, check=True)
+
+ LOGGER.debug("Running %s '%s' -> [%s]", runner, sql, ', '.join("'{}'".format(a) for a in out))
+ with open(sql, 'rb') as f:
+ pi = subprocess.run(args,
+ stdin=f, stdout=subprocess.PIPE, stderr=sys.stderr, check=True)
+
+ min_diff = sys.maxsize
+ best_match = out[0]
+ best_diff = ''
+
+ for out_file in out:
+ with open(out_file, 'rb') as f:
+ out_data = f.read()
+
+ last_diff = Differ.diff(pi.stdout, out_data)
+ diff_len = len(last_diff)
+
+ if diff_len == 0:
+ return
+
+ if diff_len < min_diff:
+ min_diff = diff_len
+ best_match = out_file
+ best_diff = last_diff
+
+ LOGGER.info("No exact match for '%s'. Best match is '%s'", sql, best_match)
+ for line in best_diff:
+ LOGGER.debug(line)
+
+ # We need assert to fail the test properly
+ assert min_diff == 0, \
+ f"pgrun output does not match out-file for {sql}. Diff:\n" + ''.join(d.decode('utf8') for d in best_diff)[:1024]
+
+
+def get_out_files(sql_file):
+ base_name = sql_file.stem
+ out_file = sql_file.with_suffix('.out')
+
+ if out_file.is_file():
+ yield out_file
+
+ for i in range(1, 10):
+ nth_out_file = out_file.with_stem('{}_{}'.format(base_name, i))
+
+ if not nth_out_file.is_file():
+ break
+
+ yield nth_out_file
diff --git a/yql/essentials/tests/postgresql/common/differ.py b/yql/essentials/tests/postgresql/common/differ.py
new file mode 100644
index 0000000000..3b29d2cdd5
--- /dev/null
+++ b/yql/essentials/tests/postgresql/common/differ.py
@@ -0,0 +1,91 @@
+import difflib
+import re
+
+
+class Differ:
+ @classmethod
+ def diff(cls, left, right):
+ left = cls.__remove_pg_error_msgs(left).splitlines(keepends=True)
+ right = cls.__remove_pg_error_msgs(right).splitlines(keepends=True)
+
+ cls.__unify_tables(left, right)
+
+ return list(difflib.diff_bytes(difflib.unified_diff, left, right, n=0, fromfile=b'sql', tofile=b'out'))
+
+ __reErr = re.compile(b'(^ERROR: [^\n]+)(?:\nLINE \\d+: [^\n]+(?:\n\\s*\\^\\s*)?)?(?:\n(?:HINT|DETAIL|CONTEXT): [^\n]+)*(?:\n|$)',
+ re.MULTILINE)
+
+ @classmethod
+ def __remove_pg_error_msgs(cls, s):
+ return cls.__reErr.sub(rb"\1", s)
+
+ __reUniversalTableMarker = re.compile(rb'^-{3,100}(?:\+-{3,100})*$')
+ __reTableEndMarker = re.compile(rb'^\(\d+ rows?\)$')
+
+ @classmethod
+ def __is_table_start(cls, pgrun_output: str, row_idx):
+ is_0_col_tbl_start = pgrun_output[row_idx] == b'--\n' and row_idx + 1 < len(pgrun_output) \
+ and cls.__reTableEndMarker.match(pgrun_output[row_idx + 1])
+ return is_0_col_tbl_start or cls.__reUniversalTableMarker.match(pgrun_output[row_idx])
+
+ @classmethod
+ def __reformat_table_row(cls, row, col_widths):
+ cells = [c.strip() for c in row[:-1].split(b'|')]
+ return b'|'.join(c.ljust(w) for (c, w) in zip(cells, col_widths))
+
+ @classmethod
+ def __remove_table_headers(cls, lines, header_line_numbers):
+ for i in reversed(header_line_numbers):
+ del lines[i]
+ del lines[i-1]
+
+ @classmethod
+ def __unify_tables(cls, left, right):
+ left_headers = []
+ right_headers = []
+ ucols = []
+
+ in_table = False
+ R = enumerate(right)
+ for i, l in enumerate(left):
+ if in_table:
+ if cls.__reTableEndMarker.match(l):
+ in_table = False
+
+ for (j, r) in R:
+ if cls.__reTableEndMarker.match(r):
+ break
+ right[j] = cls.__reformat_table_row(r, ucols)
+ else:
+ break
+
+ continue
+
+ left[i] = cls.__reformat_table_row(l, ucols)
+
+ continue
+
+ if cls.__is_table_start(left, i):
+ for (j, r) in R:
+ if cls.__is_table_start(right, j):
+ break
+ else:
+ continue
+ lcols = [len(c) for c in l[:-1].split(b'+')]
+ rcols = [len(c) for c in r[:-1].split(b'+')]
+
+ if left[i-1] == right[j-1]:
+ continue
+
+ if len(lcols) != len(rcols):
+ continue
+
+ ucols = [max(lw, rw) for lw, rw in zip(lcols, rcols)]
+
+ left_headers.append(i)
+ right_headers.append(j)
+
+ in_table = True
+
+ cls.__remove_table_headers(left, left_headers)
+ cls.__remove_table_headers(right, right_headers)
diff --git a/yql/essentials/tests/postgresql/common/ya.make b/yql/essentials/tests/postgresql/common/ya.make
new file mode 100644
index 0000000000..c354d69518
--- /dev/null
+++ b/yql/essentials/tests/postgresql/common/ya.make
@@ -0,0 +1,8 @@
+PY3_LIBRARY()
+
+PY_SRCS(
+ __init__.py
+ differ.py
+)
+
+END()