summaryrefslogtreecommitdiffstats
path: root/contrib/python/clickhouse-connect
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2025-11-29 15:12:53 +0300
committerrobot-piglet <[email protected]>2025-11-29 15:25:57 +0300
commitf01b7b700742556dc65aa8fbacef3c62ffddbdd7 (patch)
tree4dafb30e37d5bbe6a4bd6006bf2d114eaca541c6 /contrib/python/clickhouse-connect
parent1895c63513ed8b03774d8ae04c4092eccaf128e6 (diff)
Intermediate changes
commit_hash:3a22794f13d3a75e0e82d9187e263dfa8342c8a5
Diffstat (limited to 'contrib/python/clickhouse-connect')
-rw-r--r--contrib/python/clickhouse-connect/.dist-info/METADATA16
-rw-r--r--contrib/python/clickhouse-connect/README.md2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/__version__.py2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py4
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py44
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py93
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py22
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/common.py13
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py12
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py12
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py17
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py47
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py19
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py8
-rw-r--r--contrib/python/clickhouse-connect/ya.make3
16 files changed, 278 insertions, 38 deletions
diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA
index c2d9217601d..af5f3829da5 100644
--- a/contrib/python/clickhouse-connect/.dist-info/METADATA
+++ b/contrib/python/clickhouse-connect/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: clickhouse-connect
-Version: 0.9.2
+Version: 0.10.0
Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
Home-page: https://github.com/ClickHouse/clickhouse-connect
Author: ClickHouse Inc.
@@ -15,14 +15,17 @@ Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
-Requires-Python: >=3.9,<3.14
+Classifier: Programming Language :: Python :: 3.14
+Requires-Python: >=3.9,<3.15
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: certifi
Requires-Dist: urllib3>=1.26
Requires-Dist: pytz
-Requires-Dist: zstandard
-Requires-Dist: lz4
+Requires-Dist: zstandard; python_version < "3.14"
+Requires-Dist: zstandard>=0.25.0; python_version >= "3.14"
+Requires-Dist: lz4; python_version < "3.14"
+Requires-Dist: lz4>=4.4.5; python_version >= "3.14"
Provides-Extra: sqlalchemy
Requires-Dist: sqlalchemy<3.0,>=1.4.40; extra == "sqlalchemy"
Provides-Extra: numpy
@@ -32,7 +35,8 @@ Requires-Dist: pandas; extra == "pandas"
Provides-Extra: polars
Requires-Dist: polars>=1.0; extra == "polars"
Provides-Extra: arrow
-Requires-Dist: pyarrow; extra == "arrow"
+Requires-Dist: pyarrow>=22.0; python_version >= "3.14" and extra == "arrow"
+Requires-Dist: pyarrow; python_version < "3.14" and extra == "arrow"
Provides-Extra: orjson
Requires-Dist: orjson; extra == "orjson"
Provides-Extra: tzlocal
@@ -89,7 +93,7 @@ and **SQLAlchemy Core**.
Supported features include:
- Basic query execution via SQLAlchemy Core
-- `SELECT` queries with `JOIN`s
+- `SELECT` queries with `JOIN`s, `ARRAY JOIN`, and `FINAL` modifier
- Lightweight `DELETE` statements
The implementation does not include ORM support and is not intended as a full SQLAlchemy dialect. While it can support
diff --git a/contrib/python/clickhouse-connect/README.md b/contrib/python/clickhouse-connect/README.md
index 8d9e4b8b845..8b7288b9e64 100644
--- a/contrib/python/clickhouse-connect/README.md
+++ b/contrib/python/clickhouse-connect/README.md
@@ -36,7 +36,7 @@ and **SQLAlchemy Core**.
Supported features include:
- Basic query execution via SQLAlchemy Core
-- `SELECT` queries with `JOIN`s
+- `SELECT` queries with `JOIN`s, `ARRAY JOIN`, and `FINAL` modifier
- Lightweight `DELETE` statements
The implementation does not include ORM support and is not intended as a full SQLAlchemy dialect. While it can support
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
index b2b90876495..f4a8f1729b5 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
@@ -1 +1 @@
-version = "0.9.2"
+version = "0.10.0"
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py
index ec80555281c..1e6441197a6 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py
@@ -1,6 +1,10 @@
from clickhouse_connect import driver_name
from clickhouse_connect.cc_sqlalchemy.datatypes.base import schema_types
+from clickhouse_connect.cc_sqlalchemy.sql import final
+from clickhouse_connect.cc_sqlalchemy.sql.clauses import array_join, ArrayJoin
# pylint: disable=invalid-name
dialect_name = driver_name
ischema_names = schema_types
+
+__all__ = ['dialect_name', 'ischema_names', 'array_join', 'ArrayJoin', 'final']
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
index 00b9bc8c134..e7051839c32 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
@@ -1,6 +1,7 @@
from typing import Optional
from sqlalchemy import Table
+from sqlalchemy.sql.selectable import FromClause, Select
from clickhouse_connect.driver.binding import quote_identifier
@@ -13,3 +14,46 @@ def full_table(table_name: str, schema: Optional[str] = None) -> str:
def format_table(table: Table):
return full_table(table.name, table.schema)
+
+
+def final(select_stmt: Select, table: Optional[FromClause] = None) -> Select:
+ """
+ Apply the ClickHouse FINAL modifier to a select statement.
+
+ Args:
+ select_stmt: The SQLAlchemy Select statement to modify.
+ table: Optional explicit table/alias to apply FINAL to. When omitted the
+ method will use the single FROM element present on the select. A
+ ValueError is raised if the statement has no FROMs or more than one
+ FROM element and table is not provided.
+
+ Returns:
+ A new Select that renders the FINAL modifier for the target table.
+ """
+ if not isinstance(select_stmt, Select):
+ raise TypeError("final() expects a SQLAlchemy Select instance")
+
+ target = table
+ if target is None:
+ froms = select_stmt.get_final_froms()
+ if not froms:
+ raise ValueError("final() requires a table to apply the FINAL modifier.")
+ if len(froms) > 1:
+ raise ValueError("final() is ambiguous for statements with multiple FROM clauses. Specify the table explicitly.")
+ target = froms[0]
+
+ if not isinstance(target, FromClause):
+ raise TypeError("table must be a SQLAlchemy FromClause when provided")
+
+ return select_stmt.with_hint(target, "FINAL")
+
+
+def _select_final(self: Select, table: Optional[FromClause] = None) -> Select:
+ """
+ Select.final() convenience wrapper around the module-level final() helper.
+ """
+ return final(self, table=table)
+
+
+# Monkey-patch the Select class to add the .final() convenience method
+Select.final = _select_final
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py
new file mode 100644
index 00000000000..dd5e7a44140
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py
@@ -0,0 +1,93 @@
+from sqlalchemy.sql.base import Immutable
+from sqlalchemy.sql.selectable import FromClause
+
+
+# pylint: disable=protected-access,too-many-ancestors,abstract-method,unused-argument
+class ArrayJoin(Immutable, FromClause):
+ """Represents ClickHouse ARRAY JOIN clause"""
+
+ __visit_name__ = "array_join"
+ _is_from_container = True
+ named_with_column = False
+ _is_join = True
+
+ def __init__(self, left, array_column, alias=None, is_left=False):
+ """Initialize ARRAY JOIN clause
+
+ Args:
+ left: The left side (table or subquery)
+ array_column: The array column to join
+ alias: Optional alias for the joined array elements
+ is_left: If True, use LEFT ARRAY JOIN instead of ARRAY JOIN
+ """
+ super().__init__()
+ self.left = left
+ self.array_column = array_column
+ self.alias = alias
+ self.is_left = is_left
+ self._is_clone_of = None
+
+ @property
+ def selectable(self):
+ """Return the selectable for this clause"""
+ return self.left
+
+ @property
+ def _hide_froms(self):
+ """Hide the left table from the FROM clause since it's part of the ARRAY JOIN"""
+ return [self.left]
+
+ @property
+ def _from_objects(self):
+ """Return all FROM objects referenced by this construct"""
+ return self.left._from_objects
+
+ def _clone(self, **kw):
+ """Return a copy of this ArrayJoin"""
+ c = self.__class__.__new__(self.__class__)
+ c.__dict__ = self.__dict__.copy()
+ c._is_clone_of = self
+ return c
+
+ def _copy_internals(self, clone=None, **kw):
+ """Copy internal state for cloning
+
+ This ensures that when queries are cloned (e.g., for subqueries, unions, or CTEs),
+ the left FromClause and array_column references are properly deep-cloned.
+ """
+ def _default_clone(elem, **kwargs):
+ return elem
+
+ if clone is None:
+ clone = _default_clone
+
+ # Clone the left FromClause and array column to ensure proper
+ # reference handling in complex query scenarios
+ self.left = clone(self.left, **kw)
+ self.array_column = clone(self.array_column, **kw)
+
+
+def array_join(left, array_column, alias=None, is_left=False):
+ """Create an ARRAY JOIN clause
+
+ Args:
+ left: The left side (table or subquery)
+ array_column: The array column to join
+ alias: Optional alias for the joined array elements
+ is_left: If True, use LEFT ARRAY JOIN instead of ARRAY JOIN
+
+ Returns:
+ ArrayJoin: An ArrayJoin clause element
+
+ Example:
+ from clickhouse_connect.cc_sqlalchemy.sql.clauses import array_join
+
+ # Basic ARRAY JOIN
+ query = select(table).select_from(array_join(table, table.c.tags))
+
+ # LEFT ARRAY JOIN with alias
+ query = select(table).select_from(
+ array_join(table, table.c.tags, alias='tag', is_left=True)
+ )
+ """
+ return ArrayJoin(left, array_column, alias, is_left)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
index dab2099ef24..a3daa425a17 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
@@ -1,13 +1,14 @@
from sqlalchemy.exc import CompileError
from sqlalchemy.sql.compiler import SQLCompiler
+from clickhouse_connect.cc_sqlalchemy import ArrayJoin
from clickhouse_connect.cc_sqlalchemy.sql import format_table
# pylint: disable=arguments-differ
class ChStatementCompiler(SQLCompiler):
- # pylint: disable=attribute-defined-outside-init
+ # pylint: disable=attribute-defined-outside-init,unused-argument
def visit_delete(self, delete_stmt, visiting_cte=None, **kw):
table = delete_stmt.table
text = f"DELETE FROM {format_table(table)}"
@@ -23,10 +24,20 @@ class ChStatementCompiler(SQLCompiler):
return text
- def visit_select(self, select_stmt, **kw):
- return super().visit_select(select_stmt, **kw)
+ def visit_array_join(self, array_join_clause, asfrom=False, from_linter=None, **kw):
+ left = self.process(array_join_clause.left, asfrom=True, from_linter=from_linter, **kw)
+ array_col = self.process(array_join_clause.array_column, **kw)
+ join_type = "LEFT ARRAY JOIN" if array_join_clause.is_left else "ARRAY JOIN"
+ text = f"{left} {join_type} {array_col}"
+ if array_join_clause.alias:
+ text += f" AS {self.preparer.quote(array_join_clause.alias)}"
+
+ return text
def visit_join(self, join, **kw):
+ if isinstance(join, ArrayJoin):
+ return self.visit_array_join(join, **kw)
+
left = self.process(join.left, **kw)
right = self.process(join.right, **kw)
onclause = join.onclause
@@ -72,3 +83,8 @@ class ChStatementCompiler(SQLCompiler):
def visit_sequence(self, sequence, **kw):
raise NotImplementedError("ClickHouse doesn't support sequences")
+
+ def get_from_hint_text(self, table, text):
+ if text == "FINAL":
+ return "FINAL"
+ return super().get_from_hint_text(table, text)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/common.py b/contrib/python/clickhouse-connect/clickhouse_connect/common.py
index fd56a26616b..dc59930286b 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/common.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/common.py
@@ -2,13 +2,12 @@ import getpass
import sys
from dataclasses import dataclass
from typing import Any, Sequence, Optional, Dict
-from clickhouse_connect import __version__
-
+from clickhouse_connect import __version__
from clickhouse_connect.driver.exceptions import ProgrammingError
-def version():
+def version() -> str:
return __version__.version
@@ -30,7 +29,7 @@ class CommonSetting:
_common_settings: Dict[str, CommonSetting] = {}
-def build_client_name(client_name: str):
+def build_client_name(client_name: str) -> str:
product_name = get_setting('product_name')
product_name = product_name.strip() + ' ' if product_name else ''
client_name = client_name.strip() + ' ' if client_name else ''
@@ -46,14 +45,14 @@ def build_client_name(client_name: str):
return full_name.encode('ascii', 'ignore').decode()
-def get_setting(name: str):
+def get_setting(name: str) -> Any:
setting = _common_settings.get(name)
if setting is None:
raise ProgrammingError(f'Unrecognized common setting {name}')
return setting.value if setting.value is not None else setting.default
-def set_setting(name: str, value: Any):
+def set_setting(name: str, value: Any) -> None:
setting = _common_settings.get(name)
if setting is None:
raise ProgrammingError(f'Unrecognized common setting {name}')
@@ -65,7 +64,7 @@ def set_setting(name: str, value: Any):
setting.value = value
-def _init_common(name: str, options: Sequence[Any], default: Any):
+def _init_common(name: str, options: Sequence[Any], default: Any) -> None:
_common_settings[name] = CommonSetting(name, options, default)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py
index d7a9b21a540..654bf99e5b5 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py
@@ -1,5 +1,6 @@
from collections import namedtuple
from typing import List, Sequence, Collection, Any
+from urllib.parse import unquote
from clickhouse_connect.datatypes.base import ClickHouseType, TypeDef
from clickhouse_connect.datatypes.registry import get_from_name
@@ -22,6 +23,13 @@ json_serialization_format = 0x1
VariantState = namedtuple('VariantState', 'discriminator_node element_states')
+def _json_path_segments(path: str) -> List[str]:
+ segments = path.split('.')
+ if '%' in path:
+ return [unquote(segment) for segment in segments]
+ return segments
+
+
class Variant(ClickHouseType):
_slots = 'element_types'
python_type = object
@@ -255,7 +263,7 @@ class JSON(ClickHouseType):
for ix, field in enumerate(self.typed_paths):
value = typed_columns[ix][row_num]
item = top
- chain = field.split('.')
+ chain = _json_path_segments(field)
for key in chain[:-1]:
child = item.get(key)
if child is None:
@@ -268,7 +276,7 @@ class JSON(ClickHouseType):
if value is None:
continue
item = top
- chain = field.split('.')
+ chain = _json_path_segments(field)
for key in chain[:-1]:
child = item.get(key)
if child is None:
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py
index a51b6584f8f..1808d8571d9 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py
@@ -89,8 +89,8 @@ class Date(ClickHouseType):
if column.tz is None:
return column.astype(self.pandas_dtype)
- naive = column.tz_localize(None).astype(self.pandas_dtype)
- return pd.DatetimeIndex(naive, tz=column.tz)
+ naive = column.tz_convert("UTC").tz_localize(None).astype(self.pandas_dtype)
+ return naive.tz_localize("UTC").tz_convert(column.tz)
if self.nullable and isinstance(column, list):
return np.array([None if pd.isna(s) else s for s in column]).astype(
@@ -159,8 +159,8 @@ class DateTimeBase(ClickHouseType, registered=False):
result = column.astype(self.pandas_dtype)
return pd.array(result) if self.nullable else result
- naive_ns = column.tz_localize(None).astype(self.pandas_dtype)
- tz_aware_result = pd.DatetimeIndex(naive_ns, tz=column.tz)
+ naive_ns = column.tz_convert("UTC").tz_localize(None).astype(self.pandas_dtype)
+ tz_aware_result = naive_ns.tz_localize("UTC").tz_convert(column.tz)
return (
pd.array(tz_aware_result) if self.nullable else tz_aware_result
)
@@ -249,11 +249,11 @@ class DateTime64(DateTimeBase):
active_tz = ctx.active_tz(self.tzinfo)
if ctx.use_numpy:
np_array = numpy_conv.read_numpy_array(source, self.np_type, num_rows)
- if ctx.as_pandas and active_tz and active_tz != pytz.UTC:
+ if ctx.as_pandas and active_tz:
return pd.DatetimeIndex(np_array, tz='UTC').tz_convert(active_tz)
return np_array
column = source.read_array('q', num_rows)
- if active_tz and active_tz != pytz.UTC:
+ if active_tz:
return self._read_binary_tz(column, active_tz)
return self._read_binary_naive(column)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py
index be72481ee56..e946492e854 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py
@@ -1,4 +1,5 @@
import asyncio
+from concurrent.futures import ThreadPoolExecutor
from inspect import signature
from typing import Optional, Union, Dict, Any
from urllib.parse import urlparse, parse_qs
@@ -8,7 +9,7 @@ from clickhouse_connect.driver.client import Client
from clickhouse_connect.driver.common import dict_copy
from clickhouse_connect.driver.exceptions import ProgrammingError
from clickhouse_connect.driver.httpclient import HttpClient
-from clickhouse_connect.driver.asyncclient import AsyncClient
+from clickhouse_connect.driver.asyncclient import AsyncClient, DefaultThreadPoolExecutor, NEW_THREAD_POOL_EXECUTOR
# pylint: disable=too-many-arguments,too-many-locals,too-many-branches
@@ -75,6 +76,8 @@ def create_client(*,
:param server_host_name This is the server host name that will be checked against a TLS certificate for
validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server
where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server
+ :param utc_tz_aware When True, ClickHouse Connect will return timezone-aware UTC datetimes instead of the
+ legacy naive UTC datetimes.
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
:param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
@@ -144,7 +147,8 @@ async def create_async_client(*,
dsn: Optional[str] = None,
settings: Optional[Dict[str, Any]] = None,
generic_args: Optional[Dict[str, Any]] = None,
- executor_threads: Optional[int] = None,
+ executor_threads: int = 0,
+ executor: Union[ThreadPoolExecutor, None, DefaultThreadPoolExecutor] = NEW_THREAD_POOL_EXECUTOR,
**kwargs) -> AsyncClient:
"""
The preferred method to get an async ClickHouse Connect Client instance.
@@ -166,8 +170,11 @@ async def create_async_client(*,
:param settings: ClickHouse server settings to be used with the session/every request
:param generic_args: Used internally to parse DBAPI connection strings into keyword arguments and ClickHouse settings.
It is not recommended to use this parameter externally
- :param: executor_threads 'max_worker' threads used by the client ThreadPoolExecutor. If not set, the default
+ :param executor_threads: 'max_worker' threads used by the client ThreadPoolExecutor. If not set, the default
of 4 + detected CPU cores will be used
+ :param executor: Optional `ThreadPoolExecutor` to use for async operations. If not set, a new `ThreadPoolExecutor`
+ will be created with the number of threads specified by `executor_threads`. If set to `None` it will use the
+ default executor of the event loop.
:param kwargs -- Recognized keyword arguments (used by the HTTP client), see below
:param compress: Enable compression for ClickHouse HTTP inserts and query results. True will select the preferred
@@ -196,6 +203,8 @@ async def create_async_client(*,
:param server_host_name This is the server host name that will be checked against a TLS certificate for
validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server
where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server
+ :param utc_tz_aware When True, ClickHouse Connect will return timezone-aware UTC datetimes instead of the
+ legacy naive UTC datetimes.
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
:param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
@@ -211,4 +220,4 @@ async def create_async_client(*,
loop = asyncio.get_running_loop()
_client = await loop.run_in_executor(None, _create_client)
- return AsyncClient(client=_client, executor_threads=executor_threads)
+ return AsyncClient(client=_client, executor_threads=executor_threads, executor=executor)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
index feaa4e22903..41244bbbb51 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
@@ -1,5 +1,6 @@
import asyncio
import io
+import logging
import os
from concurrent.futures.thread import ThreadPoolExecutor
from datetime import tzinfo
@@ -14,6 +15,16 @@ from clickhouse_connect.driver.summary import QuerySummary
from clickhouse_connect.datatypes.base import ClickHouseType
from clickhouse_connect.driver.insert import InsertContext
+logger = logging.getLogger(__name__)
+
+
+class DefaultThreadPoolExecutor:
+ pass
+
+
+# Sentinel value to preserve default behavior and also allow passing `None`
+NEW_THREAD_POOL_EXECUTOR = DefaultThreadPoolExecutor()
+
# pylint: disable=too-many-public-methods,too-many-instance-attributes,too-many-arguments,too-many-positional-arguments,too-many-locals
class AsyncClient:
@@ -22,13 +33,25 @@ class AsyncClient:
Internally, each of the methods that uses IO is wrapped in a call to EventLoop.run_in_executor.
"""
- def __init__(self, *, client: Client, executor_threads: int = 0):
+ def __init__(self,
+ *,
+ client: Client,
+ executor_threads: int = 0,
+ executor: Union[ThreadPoolExecutor, None, DefaultThreadPoolExecutor] = NEW_THREAD_POOL_EXECUTOR):
if isinstance(client, HttpClient):
client.headers['User-Agent'] = client.headers['User-Agent'].replace('mode:sync;', 'mode:async;')
self.client = client
if executor_threads == 0:
executor_threads = min(32, (os.cpu_count() or 1) + 4) # Mimic the default behavior
- self.executor = ThreadPoolExecutor(max_workers=executor_threads)
+ if executor is NEW_THREAD_POOL_EXECUTOR:
+ self.new_executor = True
+ self.executor = ThreadPoolExecutor(max_workers=executor_threads)
+ else:
+ if executor_threads != 0:
+ logger.warning('executor_threads parameter is ignored when passing an executor object')
+
+ self.new_executor = False
+ self.executor = executor
def set_client_setting(self, key, value):
"""
@@ -69,7 +92,9 @@ class AsyncClient:
Subclass implementation to close the connection to the server/deallocate the client
"""
self.client.close()
- await asyncio.to_thread(self.executor.shutdown, True)
+
+ if self.new_executor:
+ await asyncio.to_thread(self.executor.shutdown, True)
async def query(self,
query: Optional[str] = None,
@@ -85,6 +110,7 @@ class AsyncClient:
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> QueryResult:
"""
@@ -98,6 +124,7 @@ class AsyncClient:
column_formats=column_formats, encoding=encoding, use_none=use_none,
column_oriented=column_oriented, use_numpy=use_numpy, max_str_len=max_str_len,
context=context, query_tz=query_tz, column_tzs=column_tzs,
+ utc_tz_aware=utc_tz_aware,
external_data=external_data, transport_settings=transport_settings)
loop = asyncio.get_running_loop()
@@ -115,6 +142,7 @@ class AsyncClient:
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None,
) -> StreamContext:
@@ -129,6 +157,7 @@ class AsyncClient:
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none, context=context,
query_tz=query_tz, column_tzs=column_tzs,
+ utc_tz_aware=utc_tz_aware,
external_data=external_data, transport_settings=transport_settings)
loop = asyncio.get_running_loop()
@@ -146,6 +175,7 @@ class AsyncClient:
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
@@ -159,6 +189,7 @@ class AsyncClient:
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none, context=context,
query_tz=query_tz, column_tzs=column_tzs,
+ utc_tz_aware=utc_tz_aware,
external_data=external_data, transport_settings=transport_settings)
loop = asyncio.get_running_loop()
@@ -176,6 +207,7 @@ class AsyncClient:
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
@@ -189,6 +221,7 @@ class AsyncClient:
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none, context=context,
query_tz=query_tz, column_tzs=column_tzs,
+ utc_tz_aware=utc_tz_aware,
external_data=external_data, transport_settings=transport_settings)
loop = asyncio.get_running_loop()
@@ -321,6 +354,7 @@ class AsyncClient:
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
@@ -335,7 +369,8 @@ class AsyncClient:
return self.client.query_df(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats, encoding=encoding,
use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values,
- query_tz=query_tz, column_tzs=column_tzs, context=context,
+ query_tz=query_tz, column_tzs=column_tzs, utc_tz_aware=utc_tz_aware,
+ context=context,
external_data=external_data, use_extended_dtypes=use_extended_dtypes,
transport_settings=transport_settings)
@@ -395,6 +430,7 @@ class AsyncClient:
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
@@ -410,7 +446,8 @@ class AsyncClient:
query_formats=query_formats, column_formats=column_formats,
encoding=encoding,
use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values,
- query_tz=query_tz, column_tzs=column_tzs, context=context,
+ query_tz=query_tz, column_tzs=column_tzs,
+ utc_tz_aware=utc_tz_aware, context=context,
external_data=external_data, use_extended_dtypes=use_extended_dtypes,
transport_settings=transport_settings)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
index 77dfcb9a9d0..1a7a53a6141 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
@@ -44,6 +44,7 @@ class Client(ABC):
database = None
max_error_message = 0
apply_server_timezone = False
+ utc_tz_aware = False
show_clickhouse_errors = True
def __init__(self,
@@ -53,12 +54,15 @@ class Client(ABC):
query_retries: int,
server_host_name: Optional[str],
apply_server_timezone: Optional[Union[str, bool]],
+ utc_tz_aware: Optional[bool],
show_clickhouse_errors: Optional[bool]):
"""
Shared initialization of ClickHouse Connect client
:param database: database name
:param query_limit: default LIMIT for queries
:param uri: uri for error messages
+ :param utc_tz_aware: Default timezone behavior when the active timezone resolves to UTC. If True,
+ timezone-aware UTC datetimes are returned; otherwise legacy naive datetimes are used.
"""
self.query_limit = coerce_int(query_limit)
self.query_retries = coerce_int(query_retries)
@@ -68,6 +72,7 @@ class Client(ABC):
self.show_clickhouse_errors = coerce_bool(show_clickhouse_errors)
self.server_host_name = server_host_name
self.uri = uri
+ self.utc_tz_aware = bool(utc_tz_aware)
self._init_common_settings(apply_server_timezone)
def _init_common_settings(self, apply_server_timezone: Optional[Union[str, bool]]):
@@ -213,6 +218,7 @@ class Client(ABC):
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> QueryResult:
"""
@@ -248,6 +254,7 @@ class Client(ABC):
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
@@ -268,6 +275,7 @@ class Client(ABC):
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
@@ -288,6 +296,7 @@ class Client(ABC):
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
@@ -396,6 +405,7 @@ class Client(ABC):
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
@@ -422,6 +432,7 @@ class Client(ABC):
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
@@ -451,6 +462,7 @@ class Client(ABC):
context: Optional[QueryContext] = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
use_na_values: Optional[bool] = None,
streaming: bool = False,
as_pandas: bool = False,
@@ -479,6 +491,8 @@ class Client(ABC):
objects with the selected timezone.
:param column_tzs: A dictionary of column names to tzinfo objects (or strings that will be converted to
tzinfo objects). The timezone will be applied to datetime objects returned in the query
+ :param utc_tz_aware: Override the client default for handling UTC results. True forces timezone-aware
+ UTC datetimes while False returns naive UTC datetimes.
:param use_na_values: Deprecated alias for use_advanced_dtypes
:param as_pandas Return the result columns as pandas.Series objects
:param streaming Marker used to correctly configure streaming queries
@@ -489,6 +503,7 @@ class Client(ABC):
:param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
:return: Reusable QueryContext
"""
+ resolved_utc_tz_aware = self.utc_tz_aware if utc_tz_aware is None else utc_tz_aware
if context:
return context.updated_copy(query=query,
parameters=parameters,
@@ -503,6 +518,7 @@ class Client(ABC):
max_str_len=max_str_len,
query_tz=query_tz,
column_tzs=column_tzs,
+ utc_tz_aware=resolved_utc_tz_aware,
as_pandas=as_pandas,
use_extended_dtypes=use_extended_dtypes,
streaming=streaming,
@@ -527,6 +543,7 @@ class Client(ABC):
max_str_len=max_str_len,
query_tz=query_tz,
column_tzs=column_tzs,
+ utc_tz_aware=resolved_utc_tz_aware,
use_extended_dtypes=use_extended_dtypes,
as_pandas=as_pandas,
streaming=streaming,
@@ -944,7 +961,7 @@ class Client(ABC):
full_table = quote_identifier(table)
column_defs = []
if column_types is None and column_type_names is None:
- describe_result = self.query(f'DESCRIBE TABLE {full_table}')
+ describe_result = self.query(f'DESCRIBE TABLE {full_table}', settings=settings)
column_defs = [ColumnDef(**row) for row in describe_result.named_results()
if row['default_type'] not in ('ALIAS', 'MATERIALIZED')]
if column_names is None or isinstance(column_names, str) and column_names == '*':
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
index 0f36b87c531..0f0ff45ea52 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
@@ -76,6 +76,7 @@ class HttpClient(Client):
https_proxy: Optional[str] = None,
server_host_name: Optional[str] = None,
apply_server_timezone: Optional[Union[str, bool]] = None,
+ utc_tz_aware: Optional[bool] = None,
show_clickhouse_errors: Optional[bool] = None,
autogenerate_session_id: Optional[bool] = None,
tls_mode: Optional[str] = None,
@@ -177,6 +178,7 @@ class HttpClient(Client):
query_retries=query_retries,
server_host_name=server_host_name,
apply_server_timezone=apply_server_timezone,
+ utc_tz_aware=utc_tz_aware,
show_clickhouse_errors=show_clickhouse_errors)
self.params = dict_copy(self.params, self._validate_settings(ch_settings))
comp_setting = self._setting_status('enable_http_compression')
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
index de490a5744a..8001bd2288f 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
@@ -48,6 +48,7 @@ class QueryContext(BaseQueryContext):
max_str_len: Optional[int] = 0,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: bool = False,
use_extended_dtypes: Optional[bool] = None,
as_pandas: bool = False,
streaming: bool = False,
@@ -81,6 +82,8 @@ class QueryContext(BaseQueryContext):
objects with the selected timezone
:param column_tzs A dictionary of column names to tzinfo objects (or strings that will be converted to
tzinfo objects). The timezone will be applied to datetime objects returned in the query
+ :param utc_tz_aware Force timezone-aware Python datetime objects even when the active timezone is UTC.
+ Defaults to False to preserve the legacy behavior of returning naive UTC timestamps.
"""
super().__init__(settings,
query_formats,
@@ -98,6 +101,7 @@ class QueryContext(BaseQueryContext):
self.server_tz = server_tz
self.apply_server_tz = apply_server_tz
self.external_data = external_data
+ self.utc_tz_aware = utc_tz_aware
if isinstance(query_tz, str):
try:
query_tz = pytz.timezone(query_tz)
@@ -181,7 +185,7 @@ class QueryContext(BaseQueryContext):
active_tz = self.server_tz
else:
active_tz = tzutil.local_tz
- if active_tz == pytz.UTC:
+ if active_tz == pytz.UTC and not self.utc_tz_aware:
return None
return active_tz
@@ -200,6 +204,7 @@ class QueryContext(BaseQueryContext):
max_str_len: Optional[int] = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
+ utc_tz_aware: Optional[bool] = None,
use_extended_dtypes: Optional[bool] = None,
as_pandas: bool = False,
streaming: bool = False,
@@ -222,6 +227,7 @@ class QueryContext(BaseQueryContext):
self.max_str_len if max_str_len is None else max_str_len,
self.query_tz if query_tz is None else query_tz,
self.column_tzs if column_tzs is None else column_tzs,
+ self.utc_tz_aware if utc_tz_aware is None else utc_tz_aware,
self.use_extended_dtypes if use_extended_dtypes is None else use_extended_dtypes,
as_pandas,
streaming,
diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make
index 7cc8b67f8a5..bc5b9d89f9c 100644
--- a/contrib/python/clickhouse-connect/ya.make
+++ b/contrib/python/clickhouse-connect/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(0.9.2)
+VERSION(0.10.0)
LICENSE(Apache-2.0)
@@ -40,6 +40,7 @@ PY_SRCS(
clickhouse_connect/cc_sqlalchemy/dialect.py
clickhouse_connect/cc_sqlalchemy/inspector.py
clickhouse_connect/cc_sqlalchemy/sql/__init__.py
+ clickhouse_connect/cc_sqlalchemy/sql/clauses.py
clickhouse_connect/cc_sqlalchemy/sql/compiler.py
clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py
clickhouse_connect/cc_sqlalchemy/sql/preparer.py