diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 09:58:56 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 10:20:20 +0300 |
commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/clickhouse-connect/clickhouse_connect/driver/parser.py | |
parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
download | ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/python/clickhouse-connect/clickhouse_connect/driver/parser.py')
-rw-r--r-- | contrib/python/clickhouse-connect/clickhouse_connect/driver/parser.py | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/parser.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/parser.py new file mode 100644 index 0000000000..a158e7f999 --- /dev/null +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/parser.py @@ -0,0 +1,166 @@ +from typing import Union, Tuple + +from clickhouse_connect.driver.common import unescape_identifier + + +# pylint: disable=too-many-branches +def parse_callable(expr) -> Tuple[str, Tuple[Union[str, int], ...], str]: + """ + Parses a single level ClickHouse optionally 'callable' function/identifier. The identifier is returned as the + first value in the response tuple. If the expression is callable -- i.e. an identifier followed by 0 or more + arguments in parentheses, the second returned value is a tuple of the comma separated arguments. The third and + final tuple value is any text remaining after the initial expression for further parsing/processing. + + Examples: + "Tuple(String, Enum('one' = 1, 'two' = 2))" will return "Tuple", ("String", "Enum('one' = 1,'two' = 2)"), "" + "MergeTree() PARTITION BY key" will return "MergeTree", (), "PARTITION BY key" + + :param expr: ClickHouse DDL or Column Name expression + :return: Tuple of the identifier, a tuple of arguments, and remaining text + """ + expr = expr.strip() + pos = expr.find('(') + space = expr.find(' ') + if pos == -1 and space == -1: + return expr, (), '' + if space != -1 and (pos == -1 or space < pos): + return expr[:space], (), expr[space:].strip() + name = expr[:pos] + pos += 1 # Skip first paren + values = [] + value = '' + in_str = False + level = 0 + + def add_value(): + try: + values.append(int(value)) + except ValueError: + values.append(value) + + while True: + char = expr[pos] + pos += 1 + if in_str: + value += char + if char == "'": + in_str = False + elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:pos + 2] != "')": + value += expr[pos] + pos += 1 + else: + if level == 0: + if char == ' ': + space = pos + temp_char = expr[space] + while temp_char == ' ': + space += 1 + temp_char = expr[space] + if not value or temp_char in "()',=><0": + char = temp_char + pos = space + 1 + if char == ',': + add_value() + value = '' + continue + if char == ')': + break + if char == "'" and (not value or 'Enum' in value): + in_str = True + elif char == '(': + level += 1 + elif char == ')' and level: + level -= 1 + value += char + if value != '': + add_value() + return name, tuple(values), expr[pos:].strip() + + +def parse_enum(expr) -> Tuple[Tuple[str], Tuple[int]]: + """ + Parse a ClickHouse enum definition expression of the form ('key1' = 1, 'key2' = 2) + :param expr: ClickHouse enum expression/arguments + :return: Parallel tuples of string enum keys and integer enum values + """ + keys = [] + values = [] + pos = expr.find('(') + 1 + in_key = False + key = [] + value = [] + while True: + char = expr[pos] + pos += 1 + if in_key: + if char == "'": + keys.append(''.join(key)) + key = [] + in_key = False + elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:] != "')": + key.append(expr[pos]) + pos += 1 + else: + key.append(char) + elif char not in (' ', '='): + if char == ',': + values.append(int(''.join(value))) + value = [] + elif char == ')': + values.append(int(''.join(value))) + break + elif char == "'" and not value: + in_key = True + else: + value.append(char) + values, keys = zip(*sorted(zip(values, keys))) + return tuple(keys), tuple(values) + + +def parse_columns(expr: str): + """ + Parse a ClickHouse column list of the form (col1 String, col2 Array(Tuple(String, Int32))). This also handles + unnamed columns (such as Tuple definitions). Mixed named and unnamed columns are not currently supported. + :param expr: ClickHouse enum expression/arguments + :return: Parallel tuples of column types and column types (strings) + """ + names = [] + columns = [] + pos = 1 + named = False + level = 0 + label = '' + in_str = False + while True: + char = expr[pos] + pos += 1 + if in_str: + if "'" == char: + in_str = False + elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:pos + 2] != "')": + label += expr[pos] + pos += 1 + else: + if level == 0: + if char == ' ': + if label and not named: + names.append(unescape_identifier(label)) + label = '' + named = True + char = '' + elif char == ',': + columns.append(label) + named = False + label = '' + continue + elif char == ')': + columns.append(label) + break + if char == "'" and (not label or 'Enum' in label): + in_str = True + elif char == '(': + level += 1 + elif char == ')': + level -= 1 + label += char + return tuple(names), tuple(columns) |