diff options
| author | robot-piglet <[email protected]> | 2025-11-29 15:12:53 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2025-11-29 15:25:57 +0300 |
| commit | f01b7b700742556dc65aa8fbacef3c62ffddbdd7 (patch) | |
| tree | 4dafb30e37d5bbe6a4bd6006bf2d114eaca541c6 | |
| parent | 1895c63513ed8b03774d8ae04c4092eccaf128e6 (diff) | |
Intermediate changes
commit_hash:3a22794f13d3a75e0e82d9187e263dfa8342c8a5
16 files changed, 278 insertions, 38 deletions
diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA index c2d9217601d..af5f3829da5 100644 --- a/contrib/python/clickhouse-connect/.dist-info/METADATA +++ b/contrib/python/clickhouse-connect/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: clickhouse-connect -Version: 0.9.2 +Version: 0.10.0 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset Home-page: https://github.com/ClickHouse/clickhouse-connect Author: ClickHouse Inc. @@ -15,14 +15,17 @@ Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 -Requires-Python: >=3.9,<3.14 +Classifier: Programming Language :: Python :: 3.14 +Requires-Python: >=3.9,<3.15 Description-Content-Type: text/markdown License-File: LICENSE Requires-Dist: certifi Requires-Dist: urllib3>=1.26 Requires-Dist: pytz -Requires-Dist: zstandard -Requires-Dist: lz4 +Requires-Dist: zstandard; python_version < "3.14" +Requires-Dist: zstandard>=0.25.0; python_version >= "3.14" +Requires-Dist: lz4; python_version < "3.14" +Requires-Dist: lz4>=4.4.5; python_version >= "3.14" Provides-Extra: sqlalchemy Requires-Dist: sqlalchemy<3.0,>=1.4.40; extra == "sqlalchemy" Provides-Extra: numpy @@ -32,7 +35,8 @@ Requires-Dist: pandas; extra == "pandas" Provides-Extra: polars Requires-Dist: polars>=1.0; extra == "polars" Provides-Extra: arrow -Requires-Dist: pyarrow; extra == "arrow" +Requires-Dist: pyarrow>=22.0; python_version >= "3.14" and extra == "arrow" +Requires-Dist: pyarrow; python_version < "3.14" and extra == "arrow" Provides-Extra: orjson Requires-Dist: orjson; extra == "orjson" Provides-Extra: tzlocal @@ -89,7 +93,7 @@ and **SQLAlchemy Core**. Supported features include: - Basic query execution via SQLAlchemy Core -- `SELECT` queries with `JOIN`s +- `SELECT` queries with `JOIN`s, `ARRAY JOIN`, and `FINAL` modifier - Lightweight `DELETE` statements The implementation does not include ORM support and is not intended as a full SQLAlchemy dialect. While it can support diff --git a/contrib/python/clickhouse-connect/README.md b/contrib/python/clickhouse-connect/README.md index 8d9e4b8b845..8b7288b9e64 100644 --- a/contrib/python/clickhouse-connect/README.md +++ b/contrib/python/clickhouse-connect/README.md @@ -36,7 +36,7 @@ and **SQLAlchemy Core**. Supported features include: - Basic query execution via SQLAlchemy Core -- `SELECT` queries with `JOIN`s +- `SELECT` queries with `JOIN`s, `ARRAY JOIN`, and `FINAL` modifier - Lightweight `DELETE` statements The implementation does not include ORM support and is not intended as a full SQLAlchemy dialect. While it can support diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py index b2b90876495..f4a8f1729b5 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py @@ -1 +1 @@ -version = "0.9.2" +version = "0.10.0" diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py index ec80555281c..1e6441197a6 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py @@ -1,6 +1,10 @@ from clickhouse_connect import driver_name from clickhouse_connect.cc_sqlalchemy.datatypes.base import schema_types +from clickhouse_connect.cc_sqlalchemy.sql import final +from clickhouse_connect.cc_sqlalchemy.sql.clauses import array_join, ArrayJoin # pylint: disable=invalid-name dialect_name = driver_name ischema_names = schema_types + +__all__ = ['dialect_name', 'ischema_names', 'array_join', 'ArrayJoin', 'final'] diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py index 00b9bc8c134..e7051839c32 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py @@ -1,6 +1,7 @@ from typing import Optional from sqlalchemy import Table +from sqlalchemy.sql.selectable import FromClause, Select from clickhouse_connect.driver.binding import quote_identifier @@ -13,3 +14,46 @@ def full_table(table_name: str, schema: Optional[str] = None) -> str: def format_table(table: Table): return full_table(table.name, table.schema) + + +def final(select_stmt: Select, table: Optional[FromClause] = None) -> Select: + """ + Apply the ClickHouse FINAL modifier to a select statement. + + Args: + select_stmt: The SQLAlchemy Select statement to modify. + table: Optional explicit table/alias to apply FINAL to. When omitted the + method will use the single FROM element present on the select. A + ValueError is raised if the statement has no FROMs or more than one + FROM element and table is not provided. + + Returns: + A new Select that renders the FINAL modifier for the target table. + """ + if not isinstance(select_stmt, Select): + raise TypeError("final() expects a SQLAlchemy Select instance") + + target = table + if target is None: + froms = select_stmt.get_final_froms() + if not froms: + raise ValueError("final() requires a table to apply the FINAL modifier.") + if len(froms) > 1: + raise ValueError("final() is ambiguous for statements with multiple FROM clauses. Specify the table explicitly.") + target = froms[0] + + if not isinstance(target, FromClause): + raise TypeError("table must be a SQLAlchemy FromClause when provided") + + return select_stmt.with_hint(target, "FINAL") + + +def _select_final(self: Select, table: Optional[FromClause] = None) -> Select: + """ + Select.final() convenience wrapper around the module-level final() helper. + """ + return final(self, table=table) + + +# Monkey-patch the Select class to add the .final() convenience method +Select.final = _select_final diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py new file mode 100644 index 00000000000..dd5e7a44140 --- /dev/null +++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py @@ -0,0 +1,93 @@ +from sqlalchemy.sql.base import Immutable +from sqlalchemy.sql.selectable import FromClause + + +# pylint: disable=protected-access,too-many-ancestors,abstract-method,unused-argument +class ArrayJoin(Immutable, FromClause): + """Represents ClickHouse ARRAY JOIN clause""" + + __visit_name__ = "array_join" + _is_from_container = True + named_with_column = False + _is_join = True + + def __init__(self, left, array_column, alias=None, is_left=False): + """Initialize ARRAY JOIN clause + + Args: + left: The left side (table or subquery) + array_column: The array column to join + alias: Optional alias for the joined array elements + is_left: If True, use LEFT ARRAY JOIN instead of ARRAY JOIN + """ + super().__init__() + self.left = left + self.array_column = array_column + self.alias = alias + self.is_left = is_left + self._is_clone_of = None + + @property + def selectable(self): + """Return the selectable for this clause""" + return self.left + + @property + def _hide_froms(self): + """Hide the left table from the FROM clause since it's part of the ARRAY JOIN""" + return [self.left] + + @property + def _from_objects(self): + """Return all FROM objects referenced by this construct""" + return self.left._from_objects + + def _clone(self, **kw): + """Return a copy of this ArrayJoin""" + c = self.__class__.__new__(self.__class__) + c.__dict__ = self.__dict__.copy() + c._is_clone_of = self + return c + + def _copy_internals(self, clone=None, **kw): + """Copy internal state for cloning + + This ensures that when queries are cloned (e.g., for subqueries, unions, or CTEs), + the left FromClause and array_column references are properly deep-cloned. + """ + def _default_clone(elem, **kwargs): + return elem + + if clone is None: + clone = _default_clone + + # Clone the left FromClause and array column to ensure proper + # reference handling in complex query scenarios + self.left = clone(self.left, **kw) + self.array_column = clone(self.array_column, **kw) + + +def array_join(left, array_column, alias=None, is_left=False): + """Create an ARRAY JOIN clause + + Args: + left: The left side (table or subquery) + array_column: The array column to join + alias: Optional alias for the joined array elements + is_left: If True, use LEFT ARRAY JOIN instead of ARRAY JOIN + + Returns: + ArrayJoin: An ArrayJoin clause element + + Example: + from clickhouse_connect.cc_sqlalchemy.sql.clauses import array_join + + # Basic ARRAY JOIN + query = select(table).select_from(array_join(table, table.c.tags)) + + # LEFT ARRAY JOIN with alias + query = select(table).select_from( + array_join(table, table.c.tags, alias='tag', is_left=True) + ) + """ + return ArrayJoin(left, array_column, alias, is_left) diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py index dab2099ef24..a3daa425a17 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py @@ -1,13 +1,14 @@ from sqlalchemy.exc import CompileError from sqlalchemy.sql.compiler import SQLCompiler +from clickhouse_connect.cc_sqlalchemy import ArrayJoin from clickhouse_connect.cc_sqlalchemy.sql import format_table # pylint: disable=arguments-differ class ChStatementCompiler(SQLCompiler): - # pylint: disable=attribute-defined-outside-init + # pylint: disable=attribute-defined-outside-init,unused-argument def visit_delete(self, delete_stmt, visiting_cte=None, **kw): table = delete_stmt.table text = f"DELETE FROM {format_table(table)}" @@ -23,10 +24,20 @@ class ChStatementCompiler(SQLCompiler): return text - def visit_select(self, select_stmt, **kw): - return super().visit_select(select_stmt, **kw) + def visit_array_join(self, array_join_clause, asfrom=False, from_linter=None, **kw): + left = self.process(array_join_clause.left, asfrom=True, from_linter=from_linter, **kw) + array_col = self.process(array_join_clause.array_column, **kw) + join_type = "LEFT ARRAY JOIN" if array_join_clause.is_left else "ARRAY JOIN" + text = f"{left} {join_type} {array_col}" + if array_join_clause.alias: + text += f" AS {self.preparer.quote(array_join_clause.alias)}" + + return text def visit_join(self, join, **kw): + if isinstance(join, ArrayJoin): + return self.visit_array_join(join, **kw) + left = self.process(join.left, **kw) right = self.process(join.right, **kw) onclause = join.onclause @@ -72,3 +83,8 @@ class ChStatementCompiler(SQLCompiler): def visit_sequence(self, sequence, **kw): raise NotImplementedError("ClickHouse doesn't support sequences") + + def get_from_hint_text(self, table, text): + if text == "FINAL": + return "FINAL" + return super().get_from_hint_text(table, text) diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/common.py b/contrib/python/clickhouse-connect/clickhouse_connect/common.py index fd56a26616b..dc59930286b 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/common.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/common.py @@ -2,13 +2,12 @@ import getpass import sys from dataclasses import dataclass from typing import Any, Sequence, Optional, Dict -from clickhouse_connect import __version__ - +from clickhouse_connect import __version__ from clickhouse_connect.driver.exceptions import ProgrammingError -def version(): +def version() -> str: return __version__.version @@ -30,7 +29,7 @@ class CommonSetting: _common_settings: Dict[str, CommonSetting] = {} -def build_client_name(client_name: str): +def build_client_name(client_name: str) -> str: product_name = get_setting('product_name') product_name = product_name.strip() + ' ' if product_name else '' client_name = client_name.strip() + ' ' if client_name else '' @@ -46,14 +45,14 @@ def build_client_name(client_name: str): return full_name.encode('ascii', 'ignore').decode() -def get_setting(name: str): +def get_setting(name: str) -> Any: setting = _common_settings.get(name) if setting is None: raise ProgrammingError(f'Unrecognized common setting {name}') return setting.value if setting.value is not None else setting.default -def set_setting(name: str, value: Any): +def set_setting(name: str, value: Any) -> None: setting = _common_settings.get(name) if setting is None: raise ProgrammingError(f'Unrecognized common setting {name}') @@ -65,7 +64,7 @@ def set_setting(name: str, value: Any): setting.value = value -def _init_common(name: str, options: Sequence[Any], default: Any): +def _init_common(name: str, options: Sequence[Any], default: Any) -> None: _common_settings[name] = CommonSetting(name, options, default) diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py index d7a9b21a540..654bf99e5b5 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py @@ -1,5 +1,6 @@ from collections import namedtuple from typing import List, Sequence, Collection, Any +from urllib.parse import unquote from clickhouse_connect.datatypes.base import ClickHouseType, TypeDef from clickhouse_connect.datatypes.registry import get_from_name @@ -22,6 +23,13 @@ json_serialization_format = 0x1 VariantState = namedtuple('VariantState', 'discriminator_node element_states') +def _json_path_segments(path: str) -> List[str]: + segments = path.split('.') + if '%' in path: + return [unquote(segment) for segment in segments] + return segments + + class Variant(ClickHouseType): _slots = 'element_types' python_type = object @@ -255,7 +263,7 @@ class JSON(ClickHouseType): for ix, field in enumerate(self.typed_paths): value = typed_columns[ix][row_num] item = top - chain = field.split('.') + chain = _json_path_segments(field) for key in chain[:-1]: child = item.get(key) if child is None: @@ -268,7 +276,7 @@ class JSON(ClickHouseType): if value is None: continue item = top - chain = field.split('.') + chain = _json_path_segments(field) for key in chain[:-1]: child = item.get(key) if child is None: diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py index a51b6584f8f..1808d8571d9 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py @@ -89,8 +89,8 @@ class Date(ClickHouseType): if column.tz is None: return column.astype(self.pandas_dtype) - naive = column.tz_localize(None).astype(self.pandas_dtype) - return pd.DatetimeIndex(naive, tz=column.tz) + naive = column.tz_convert("UTC").tz_localize(None).astype(self.pandas_dtype) + return naive.tz_localize("UTC").tz_convert(column.tz) if self.nullable and isinstance(column, list): return np.array([None if pd.isna(s) else s for s in column]).astype( @@ -159,8 +159,8 @@ class DateTimeBase(ClickHouseType, registered=False): result = column.astype(self.pandas_dtype) return pd.array(result) if self.nullable else result - naive_ns = column.tz_localize(None).astype(self.pandas_dtype) - tz_aware_result = pd.DatetimeIndex(naive_ns, tz=column.tz) + naive_ns = column.tz_convert("UTC").tz_localize(None).astype(self.pandas_dtype) + tz_aware_result = naive_ns.tz_localize("UTC").tz_convert(column.tz) return ( pd.array(tz_aware_result) if self.nullable else tz_aware_result ) @@ -249,11 +249,11 @@ class DateTime64(DateTimeBase): active_tz = ctx.active_tz(self.tzinfo) if ctx.use_numpy: np_array = numpy_conv.read_numpy_array(source, self.np_type, num_rows) - if ctx.as_pandas and active_tz and active_tz != pytz.UTC: + if ctx.as_pandas and active_tz: return pd.DatetimeIndex(np_array, tz='UTC').tz_convert(active_tz) return np_array column = source.read_array('q', num_rows) - if active_tz and active_tz != pytz.UTC: + if active_tz: return self._read_binary_tz(column, active_tz) return self._read_binary_naive(column) diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py index be72481ee56..e946492e854 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py @@ -1,4 +1,5 @@ import asyncio +from concurrent.futures import ThreadPoolExecutor from inspect import signature from typing import Optional, Union, Dict, Any from urllib.parse import urlparse, parse_qs @@ -8,7 +9,7 @@ from clickhouse_connect.driver.client import Client from clickhouse_connect.driver.common import dict_copy from clickhouse_connect.driver.exceptions import ProgrammingError from clickhouse_connect.driver.httpclient import HttpClient -from clickhouse_connect.driver.asyncclient import AsyncClient +from clickhouse_connect.driver.asyncclient import AsyncClient, DefaultThreadPoolExecutor, NEW_THREAD_POOL_EXECUTOR # pylint: disable=too-many-arguments,too-many-locals,too-many-branches @@ -75,6 +76,8 @@ def create_client(*, :param server_host_name This is the server host name that will be checked against a TLS certificate for validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server + :param utc_tz_aware When True, ClickHouse Connect will return timezone-aware UTC datetimes instead of the + legacy naive UTC datetimes. :param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting. :param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length @@ -144,7 +147,8 @@ async def create_async_client(*, dsn: Optional[str] = None, settings: Optional[Dict[str, Any]] = None, generic_args: Optional[Dict[str, Any]] = None, - executor_threads: Optional[int] = None, + executor_threads: int = 0, + executor: Union[ThreadPoolExecutor, None, DefaultThreadPoolExecutor] = NEW_THREAD_POOL_EXECUTOR, **kwargs) -> AsyncClient: """ The preferred method to get an async ClickHouse Connect Client instance. @@ -166,8 +170,11 @@ async def create_async_client(*, :param settings: ClickHouse server settings to be used with the session/every request :param generic_args: Used internally to parse DBAPI connection strings into keyword arguments and ClickHouse settings. It is not recommended to use this parameter externally - :param: executor_threads 'max_worker' threads used by the client ThreadPoolExecutor. If not set, the default + :param executor_threads: 'max_worker' threads used by the client ThreadPoolExecutor. If not set, the default of 4 + detected CPU cores will be used + :param executor: Optional `ThreadPoolExecutor` to use for async operations. If not set, a new `ThreadPoolExecutor` + will be created with the number of threads specified by `executor_threads`. If set to `None` it will use the + default executor of the event loop. :param kwargs -- Recognized keyword arguments (used by the HTTP client), see below :param compress: Enable compression for ClickHouse HTTP inserts and query results. True will select the preferred @@ -196,6 +203,8 @@ async def create_async_client(*, :param server_host_name This is the server host name that will be checked against a TLS certificate for validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server + :param utc_tz_aware When True, ClickHouse Connect will return timezone-aware UTC datetimes instead of the + legacy naive UTC datetimes. :param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting. :param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length @@ -211,4 +220,4 @@ async def create_async_client(*, loop = asyncio.get_running_loop() _client = await loop.run_in_executor(None, _create_client) - return AsyncClient(client=_client, executor_threads=executor_threads) + return AsyncClient(client=_client, executor_threads=executor_threads, executor=executor) diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py index feaa4e22903..41244bbbb51 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py @@ -1,5 +1,6 @@ import asyncio import io +import logging import os from concurrent.futures.thread import ThreadPoolExecutor from datetime import tzinfo @@ -14,6 +15,16 @@ from clickhouse_connect.driver.summary import QuerySummary from clickhouse_connect.datatypes.base import ClickHouseType from clickhouse_connect.driver.insert import InsertContext +logger = logging.getLogger(__name__) + + +class DefaultThreadPoolExecutor: + pass + + +# Sentinel value to preserve default behavior and also allow passing `None` +NEW_THREAD_POOL_EXECUTOR = DefaultThreadPoolExecutor() + # pylint: disable=too-many-public-methods,too-many-instance-attributes,too-many-arguments,too-many-positional-arguments,too-many-locals class AsyncClient: @@ -22,13 +33,25 @@ class AsyncClient: Internally, each of the methods that uses IO is wrapped in a call to EventLoop.run_in_executor. """ - def __init__(self, *, client: Client, executor_threads: int = 0): + def __init__(self, + *, + client: Client, + executor_threads: int = 0, + executor: Union[ThreadPoolExecutor, None, DefaultThreadPoolExecutor] = NEW_THREAD_POOL_EXECUTOR): if isinstance(client, HttpClient): client.headers['User-Agent'] = client.headers['User-Agent'].replace('mode:sync;', 'mode:async;') self.client = client if executor_threads == 0: executor_threads = min(32, (os.cpu_count() or 1) + 4) # Mimic the default behavior - self.executor = ThreadPoolExecutor(max_workers=executor_threads) + if executor is NEW_THREAD_POOL_EXECUTOR: + self.new_executor = True + self.executor = ThreadPoolExecutor(max_workers=executor_threads) + else: + if executor_threads != 0: + logger.warning('executor_threads parameter is ignored when passing an executor object') + + self.new_executor = False + self.executor = executor def set_client_setting(self, key, value): """ @@ -69,7 +92,9 @@ class AsyncClient: Subclass implementation to close the connection to the server/deallocate the client """ self.client.close() - await asyncio.to_thread(self.executor.shutdown, True) + + if self.new_executor: + await asyncio.to_thread(self.executor.shutdown, True) async def query(self, query: Optional[str] = None, @@ -85,6 +110,7 @@ class AsyncClient: context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None) -> QueryResult: """ @@ -98,6 +124,7 @@ class AsyncClient: column_formats=column_formats, encoding=encoding, use_none=use_none, column_oriented=column_oriented, use_numpy=use_numpy, max_str_len=max_str_len, context=context, query_tz=query_tz, column_tzs=column_tzs, + utc_tz_aware=utc_tz_aware, external_data=external_data, transport_settings=transport_settings) loop = asyncio.get_running_loop() @@ -115,6 +142,7 @@ class AsyncClient: context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None, ) -> StreamContext: @@ -129,6 +157,7 @@ class AsyncClient: query_formats=query_formats, column_formats=column_formats, encoding=encoding, use_none=use_none, context=context, query_tz=query_tz, column_tzs=column_tzs, + utc_tz_aware=utc_tz_aware, external_data=external_data, transport_settings=transport_settings) loop = asyncio.get_running_loop() @@ -146,6 +175,7 @@ class AsyncClient: context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None) -> StreamContext: """ @@ -159,6 +189,7 @@ class AsyncClient: query_formats=query_formats, column_formats=column_formats, encoding=encoding, use_none=use_none, context=context, query_tz=query_tz, column_tzs=column_tzs, + utc_tz_aware=utc_tz_aware, external_data=external_data, transport_settings=transport_settings) loop = asyncio.get_running_loop() @@ -176,6 +207,7 @@ class AsyncClient: context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None) -> StreamContext: """ @@ -189,6 +221,7 @@ class AsyncClient: query_formats=query_formats, column_formats=column_formats, encoding=encoding, use_none=use_none, context=context, query_tz=query_tz, column_tzs=column_tzs, + utc_tz_aware=utc_tz_aware, external_data=external_data, transport_settings=transport_settings) loop = asyncio.get_running_loop() @@ -321,6 +354,7 @@ class AsyncClient: use_na_values: Optional[bool] = None, query_tz: Optional[str] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, context: QueryContext = None, external_data: Optional[ExternalData] = None, use_extended_dtypes: Optional[bool] = None, @@ -335,7 +369,8 @@ class AsyncClient: return self.client.query_df(query=query, parameters=parameters, settings=settings, query_formats=query_formats, column_formats=column_formats, encoding=encoding, use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values, - query_tz=query_tz, column_tzs=column_tzs, context=context, + query_tz=query_tz, column_tzs=column_tzs, utc_tz_aware=utc_tz_aware, + context=context, external_data=external_data, use_extended_dtypes=use_extended_dtypes, transport_settings=transport_settings) @@ -395,6 +430,7 @@ class AsyncClient: use_na_values: Optional[bool] = None, query_tz: Optional[str] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, context: QueryContext = None, external_data: Optional[ExternalData] = None, use_extended_dtypes: Optional[bool] = None, @@ -410,7 +446,8 @@ class AsyncClient: query_formats=query_formats, column_formats=column_formats, encoding=encoding, use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values, - query_tz=query_tz, column_tzs=column_tzs, context=context, + query_tz=query_tz, column_tzs=column_tzs, + utc_tz_aware=utc_tz_aware, context=context, external_data=external_data, use_extended_dtypes=use_extended_dtypes, transport_settings=transport_settings) diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py index 77dfcb9a9d0..1a7a53a6141 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py @@ -44,6 +44,7 @@ class Client(ABC): database = None max_error_message = 0 apply_server_timezone = False + utc_tz_aware = False show_clickhouse_errors = True def __init__(self, @@ -53,12 +54,15 @@ class Client(ABC): query_retries: int, server_host_name: Optional[str], apply_server_timezone: Optional[Union[str, bool]], + utc_tz_aware: Optional[bool], show_clickhouse_errors: Optional[bool]): """ Shared initialization of ClickHouse Connect client :param database: database name :param query_limit: default LIMIT for queries :param uri: uri for error messages + :param utc_tz_aware: Default timezone behavior when the active timezone resolves to UTC. If True, + timezone-aware UTC datetimes are returned; otherwise legacy naive datetimes are used. """ self.query_limit = coerce_int(query_limit) self.query_retries = coerce_int(query_retries) @@ -68,6 +72,7 @@ class Client(ABC): self.show_clickhouse_errors = coerce_bool(show_clickhouse_errors) self.server_host_name = server_host_name self.uri = uri + self.utc_tz_aware = bool(utc_tz_aware) self._init_common_settings(apply_server_timezone) def _init_common_settings(self, apply_server_timezone: Optional[Union[str, bool]]): @@ -213,6 +218,7 @@ class Client(ABC): context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None) -> QueryResult: """ @@ -248,6 +254,7 @@ class Client(ABC): context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None) -> StreamContext: """ @@ -268,6 +275,7 @@ class Client(ABC): context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None) -> StreamContext: """ @@ -288,6 +296,7 @@ class Client(ABC): context: QueryContext = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, external_data: Optional[ExternalData] = None, transport_settings: Optional[Dict[str, str]] = None) -> StreamContext: """ @@ -396,6 +405,7 @@ class Client(ABC): use_na_values: Optional[bool] = None, query_tz: Optional[str] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, context: QueryContext = None, external_data: Optional[ExternalData] = None, use_extended_dtypes: Optional[bool] = None, @@ -422,6 +432,7 @@ class Client(ABC): use_na_values: Optional[bool] = None, query_tz: Optional[str] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, context: QueryContext = None, external_data: Optional[ExternalData] = None, use_extended_dtypes: Optional[bool] = None, @@ -451,6 +462,7 @@ class Client(ABC): context: Optional[QueryContext] = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, use_na_values: Optional[bool] = None, streaming: bool = False, as_pandas: bool = False, @@ -479,6 +491,8 @@ class Client(ABC): objects with the selected timezone. :param column_tzs: A dictionary of column names to tzinfo objects (or strings that will be converted to tzinfo objects). The timezone will be applied to datetime objects returned in the query + :param utc_tz_aware: Override the client default for handling UTC results. True forces timezone-aware + UTC datetimes while False returns naive UTC datetimes. :param use_na_values: Deprecated alias for use_advanced_dtypes :param as_pandas Return the result columns as pandas.Series objects :param streaming Marker used to correctly configure streaming queries @@ -489,6 +503,7 @@ class Client(ABC): :param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.) :return: Reusable QueryContext """ + resolved_utc_tz_aware = self.utc_tz_aware if utc_tz_aware is None else utc_tz_aware if context: return context.updated_copy(query=query, parameters=parameters, @@ -503,6 +518,7 @@ class Client(ABC): max_str_len=max_str_len, query_tz=query_tz, column_tzs=column_tzs, + utc_tz_aware=resolved_utc_tz_aware, as_pandas=as_pandas, use_extended_dtypes=use_extended_dtypes, streaming=streaming, @@ -527,6 +543,7 @@ class Client(ABC): max_str_len=max_str_len, query_tz=query_tz, column_tzs=column_tzs, + utc_tz_aware=resolved_utc_tz_aware, use_extended_dtypes=use_extended_dtypes, as_pandas=as_pandas, streaming=streaming, @@ -944,7 +961,7 @@ class Client(ABC): full_table = quote_identifier(table) column_defs = [] if column_types is None and column_type_names is None: - describe_result = self.query(f'DESCRIBE TABLE {full_table}') + describe_result = self.query(f'DESCRIBE TABLE {full_table}', settings=settings) column_defs = [ColumnDef(**row) for row in describe_result.named_results() if row['default_type'] not in ('ALIAS', 'MATERIALIZED')] if column_names is None or isinstance(column_names, str) and column_names == '*': diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py index 0f36b87c531..0f0ff45ea52 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py @@ -76,6 +76,7 @@ class HttpClient(Client): https_proxy: Optional[str] = None, server_host_name: Optional[str] = None, apply_server_timezone: Optional[Union[str, bool]] = None, + utc_tz_aware: Optional[bool] = None, show_clickhouse_errors: Optional[bool] = None, autogenerate_session_id: Optional[bool] = None, tls_mode: Optional[str] = None, @@ -177,6 +178,7 @@ class HttpClient(Client): query_retries=query_retries, server_host_name=server_host_name, apply_server_timezone=apply_server_timezone, + utc_tz_aware=utc_tz_aware, show_clickhouse_errors=show_clickhouse_errors) self.params = dict_copy(self.params, self._validate_settings(ch_settings)) comp_setting = self._setting_status('enable_http_compression') diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py index de490a5744a..8001bd2288f 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py @@ -48,6 +48,7 @@ class QueryContext(BaseQueryContext): max_str_len: Optional[int] = 0, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: bool = False, use_extended_dtypes: Optional[bool] = None, as_pandas: bool = False, streaming: bool = False, @@ -81,6 +82,8 @@ class QueryContext(BaseQueryContext): objects with the selected timezone :param column_tzs A dictionary of column names to tzinfo objects (or strings that will be converted to tzinfo objects). The timezone will be applied to datetime objects returned in the query + :param utc_tz_aware Force timezone-aware Python datetime objects even when the active timezone is UTC. + Defaults to False to preserve the legacy behavior of returning naive UTC timestamps. """ super().__init__(settings, query_formats, @@ -98,6 +101,7 @@ class QueryContext(BaseQueryContext): self.server_tz = server_tz self.apply_server_tz = apply_server_tz self.external_data = external_data + self.utc_tz_aware = utc_tz_aware if isinstance(query_tz, str): try: query_tz = pytz.timezone(query_tz) @@ -181,7 +185,7 @@ class QueryContext(BaseQueryContext): active_tz = self.server_tz else: active_tz = tzutil.local_tz - if active_tz == pytz.UTC: + if active_tz == pytz.UTC and not self.utc_tz_aware: return None return active_tz @@ -200,6 +204,7 @@ class QueryContext(BaseQueryContext): max_str_len: Optional[int] = None, query_tz: Optional[Union[str, tzinfo]] = None, column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None, + utc_tz_aware: Optional[bool] = None, use_extended_dtypes: Optional[bool] = None, as_pandas: bool = False, streaming: bool = False, @@ -222,6 +227,7 @@ class QueryContext(BaseQueryContext): self.max_str_len if max_str_len is None else max_str_len, self.query_tz if query_tz is None else query_tz, self.column_tzs if column_tzs is None else column_tzs, + self.utc_tz_aware if utc_tz_aware is None else utc_tz_aware, self.use_extended_dtypes if use_extended_dtypes is None else use_extended_dtypes, as_pandas, streaming, diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make index 7cc8b67f8a5..bc5b9d89f9c 100644 --- a/contrib/python/clickhouse-connect/ya.make +++ b/contrib/python/clickhouse-connect/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(0.9.2) +VERSION(0.10.0) LICENSE(Apache-2.0) @@ -40,6 +40,7 @@ PY_SRCS( clickhouse_connect/cc_sqlalchemy/dialect.py clickhouse_connect/cc_sqlalchemy/inspector.py clickhouse_connect/cc_sqlalchemy/sql/__init__.py + clickhouse_connect/cc_sqlalchemy/sql/clauses.py clickhouse_connect/cc_sqlalchemy/sql/compiler.py clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py clickhouse_connect/cc_sqlalchemy/sql/preparer.py |
