summaryrefslogtreecommitdiffstats
path: root/contrib/python
diff options
context:
space:
mode:
authorYDBot <[email protected]>2025-10-02 00:52:12 +0000
committerYDBot <[email protected]>2025-10-02 00:52:12 +0000
commit197d32948a067cc65bac500c343f548bb2d5855d (patch)
treea144cab09544e9191308fed88135c189392b2da5 /contrib/python
parentc2fb1c1f0da6856bd3569a4828abdaf44d60aabf (diff)
parentdf4ccba082c7c4afe8a610a94c168c533d11f80a (diff)
Sync branches 251002-0050
Diffstat (limited to 'contrib/python')
-rw-r--r--contrib/python/clickhouse-connect/.dist-info/METADATA32
-rw-r--r--contrib/python/clickhouse-connect/README.md23
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/__version__.py2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/base.py11
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py68
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/custom.py8
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py14
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py10
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py74
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/common.py10
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/datatypes/base.py1
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py13
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/datatypes/network.py78
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/datatypes/numeric.py120
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py556
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/dbapi/connection.py2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py10
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py118
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py183
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py74
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/context.py3
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py4
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py176
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/options.py14
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py22
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/transform.py8
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/tzutil.py3
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driverc/dataconv.pyx3
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/tools/datagen.py7
-rw-r--r--contrib/python/clickhouse-connect/ya.make3
-rw-r--r--contrib/python/pytest-lazy-fixtures/.dist-info/METADATA72
-rw-r--r--contrib/python/pytest-lazy-fixtures/README.md70
-rw-r--r--contrib/python/pytest-lazy-fixtures/pytest_lazy_fixtures/lazy_fixture_callable.py39
-rw-r--r--contrib/python/pytest-lazy-fixtures/ya.make2
-rw-r--r--contrib/python/pytest-mock/py3/.dist-info/METADATA2
-rw-r--r--contrib/python/pytest-mock/py3/pytest_mock/_util.py2
-rw-r--r--contrib/python/pytest-mock/py3/pytest_mock/_version.py4
-rw-r--r--contrib/python/pytest-mock/py3/pytest_mock/plugin.py7
-rw-r--r--contrib/python/pytest-mock/py3/tests/test_pytest_mock.py33
-rw-r--r--contrib/python/pytest-mock/py3/ya.make2
41 files changed, 1736 insertions, 149 deletions
diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA
index 39a36b2075e..c2d9217601d 100644
--- a/contrib/python/clickhouse-connect/.dist-info/METADATA
+++ b/contrib/python/clickhouse-connect/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: clickhouse-connect
-Version: 0.8.18
+Version: 0.9.2
Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
Home-page: https://github.com/ClickHouse/clickhouse-connect
Author: ClickHouse Inc.
@@ -10,13 +10,12 @@ Keywords: clickhouse,superset,sqlalchemy,http,driver
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
-Requires-Python: ~=3.8
+Requires-Python: >=3.9,<3.14
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: certifi
@@ -25,11 +24,13 @@ Requires-Dist: pytz
Requires-Dist: zstandard
Requires-Dist: lz4
Provides-Extra: sqlalchemy
-Requires-Dist: sqlalchemy<2.0,>1.3.21; extra == "sqlalchemy"
+Requires-Dist: sqlalchemy<3.0,>=1.4.40; extra == "sqlalchemy"
Provides-Extra: numpy
Requires-Dist: numpy; extra == "numpy"
Provides-Extra: pandas
Requires-Dist: pandas; extra == "pandas"
+Provides-Extra: polars
+Requires-Dist: polars>=1.0; extra == "polars"
Provides-Extra: arrow
Requires-Dist: pyarrow; extra == "arrow"
Provides-Extra: orjson
@@ -54,11 +55,12 @@ Dynamic: summary
A high performance core database driver for connecting ClickHouse to Python, Pandas, and Superset
-* Pandas DataFrames
+* Pandas DataFrames (numpy and arrow-backed)
* Numpy Arrays
* PyArrow Tables
+* Polars DataFrames
* Superset Connector
-* SQLAlchemy 1.3 and 1.4 (limited feature set)
+* SQLAlchemy Core (select, joins, lightweight deletes; limited feature set)
ClickHouse Connect currently uses the ClickHouse HTTP interface for maximum compatibility.
@@ -68,7 +70,7 @@ ClickHouse Connect currently uses the ClickHouse HTTP interface for maximum comp
pip install clickhouse-connect
```
-ClickHouse Connect requires Python 3.8 or higher.
+ClickHouse Connect requires Python 3.9 or higher. We officially test against Python 3.9 through 3.13.
### Superset Connectivity
@@ -82,9 +84,17 @@ When creating a Superset Data Source, either use the provided connection dialog,
### SQLAlchemy Implementation
-ClickHouse Connect incorporates a minimal SQLAlchemy implementation (without any ORM features) for compatibility with
-Superset. It has only been tested against SQLAlchemy versions 1.3.x and 1.4.x, and is unlikely to work with more
-complex SQLAlchemy applications.
+ClickHouse Connect includes a lightweight SQLAlchemy dialect implementation focused on compatibility with **Superset**
+and **SQLAlchemy Core**.
+
+Supported features include:
+- Basic query execution via SQLAlchemy Core
+- `SELECT` queries with `JOIN`s
+- Lightweight `DELETE` statements
+
+The implementation does not include ORM support and is not intended as a full SQLAlchemy dialect. While it can support
+a range of Core-based applications beyond Superset, it may not be suitable for more complex SQLAlchemy applications
+that rely on full ORM or advanced dialect functionality.
### Asyncio Support
@@ -94,4 +104,4 @@ See the [run_async example](./examples/run_async.py) for more details.
### Complete Documentation
The documentation for ClickHouse Connect has moved to
-[ClickHouse Docs](https://clickhouse.com/docs/integrations/python)
+[ClickHouse Docs](https://clickhouse.com/docs/integrations/python)
diff --git a/contrib/python/clickhouse-connect/README.md b/contrib/python/clickhouse-connect/README.md
index 83fbbf583ac..8d9e4b8b845 100644
--- a/contrib/python/clickhouse-connect/README.md
+++ b/contrib/python/clickhouse-connect/README.md
@@ -2,11 +2,12 @@
A high performance core database driver for connecting ClickHouse to Python, Pandas, and Superset
-* Pandas DataFrames
+* Pandas DataFrames (numpy and arrow-backed)
* Numpy Arrays
* PyArrow Tables
+* Polars DataFrames
* Superset Connector
-* SQLAlchemy 1.3 and 1.4 (limited feature set)
+* SQLAlchemy Core (select, joins, lightweight deletes; limited feature set)
ClickHouse Connect currently uses the ClickHouse HTTP interface for maximum compatibility.
@@ -16,7 +17,7 @@ ClickHouse Connect currently uses the ClickHouse HTTP interface for maximum comp
pip install clickhouse-connect
```
-ClickHouse Connect requires Python 3.8 or higher.
+ClickHouse Connect requires Python 3.9 or higher. We officially test against Python 3.9 through 3.13.
### Superset Connectivity
@@ -30,9 +31,17 @@ When creating a Superset Data Source, either use the provided connection dialog,
### SQLAlchemy Implementation
-ClickHouse Connect incorporates a minimal SQLAlchemy implementation (without any ORM features) for compatibility with
-Superset. It has only been tested against SQLAlchemy versions 1.3.x and 1.4.x, and is unlikely to work with more
-complex SQLAlchemy applications.
+ClickHouse Connect includes a lightweight SQLAlchemy dialect implementation focused on compatibility with **Superset**
+and **SQLAlchemy Core**.
+
+Supported features include:
+- Basic query execution via SQLAlchemy Core
+- `SELECT` queries with `JOIN`s
+- Lightweight `DELETE` statements
+
+The implementation does not include ORM support and is not intended as a full SQLAlchemy dialect. While it can support
+a range of Core-based applications beyond Superset, it may not be suitable for more complex SQLAlchemy applications
+that rely on full ORM or advanced dialect functionality.
### Asyncio Support
@@ -42,4 +51,4 @@ See the [run_async example](./examples/run_async.py) for more details.
### Complete Documentation
The documentation for ClickHouse Connect has moved to
-[ClickHouse Docs](https://clickhouse.com/docs/integrations/python)
+[ClickHouse Docs](https://clickhouse.com/docs/integrations/python)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
index 07f89fe72be..b2b90876495 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
@@ -1 +1 @@
-version = '0.8.18'
+version = "0.9.2"
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/base.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/base.py
index ce73da14815..d79b4e52a3c 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/base.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/base.py
@@ -1,5 +1,5 @@
import logging
-from typing import Dict, Type
+from typing import Dict, Type, Optional
from sqlalchemy.exc import CompileError
@@ -106,6 +106,15 @@ class ChSqlaType:
"""
return self.name
+ # pylint: disable=unused-argument
+ def _with_collation(self, collation: Optional[str]) -> "ChSqlaType":
+ """
+ SQLAlchemy 2.x compatibility: TypeEngine declares this abstract to support
+ text types that can carry a collation. ClickHouse types in this dialect
+ do not vary by collation, so this is a no-op that returns self.
+ """
+ return self
+
class CaseInsensitiveDict(dict):
def __setitem__(self, key, value):
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py
index cc5e8300f69..e42a6755afc 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py
@@ -3,7 +3,7 @@ from enum import Enum as PyEnum
from typing import Type, Union, Sequence
from sqlalchemy.types import Integer, Float, Numeric, Boolean as SqlaBoolean, \
- UserDefinedType, String as SqlaString, DateTime as SqlaDateTime, Date as SqlaDate
+ UserDefinedType, String as SqlaString, DateTime as SqlaDateTime, Date as SqlaDate, Interval
from sqlalchemy.exc import ArgumentError
from clickhouse_connect.cc_sqlalchemy.datatypes.base import ChSqlaType
@@ -78,6 +78,7 @@ class Bool(ChSqlaType, SqlaBoolean):
SqlaBoolean.__init__(self)
+# pylint: disable=too-many-ancestors
class Boolean(Bool):
pass
@@ -272,6 +273,71 @@ class DateTime64(ChSqlaType, SqlaDateTime):
SqlaDateTime.__init__(self)
+# pylint: disable=too-many-ancestors
+class Time(ChSqlaType, Interval):
+ """
+ Represents the ClickHouse Time type, which corresponds to a timedelta.
+
+ Represents time durations in the range -999:59:59 to 999:59:59 with
+ second precision. Maps to Python timedelta objects.
+ """
+
+ def __init__(self, type_def: TypeDef = EMPTY_TYPE_DEF):
+ ChSqlaType.__init__(self, type_def)
+ Interval.__init__(self)
+
+ def process_bind_param(self, value, dialect):
+ return value
+
+ def process_result_value(self, value, dialect):
+ return value
+
+ def process_literal_param(self, value, dialect):
+ return None
+
+
+# pylint: disable=too-many-ancestors
+class Time64(ChSqlaType, Interval):
+ """
+ Represents the ClickHouse Time64 type with configurable precision.
+
+ Represents time durations in the range -999:59:59.999999999 to
+ 999:59:59.999999999 configurable precision. Maps to Python timedelta objects.
+ If no precision is defined it default to 3.
+ """
+
+ def __init__(self, precision: int = None, type_def: TypeDef = None):
+ """
+ Time64 constructor with precision if not constructed with TypeDef.
+ :param precision: 3 (ms), 6 (us), or 9 (ns) for sub-second precision.
+ :param type_def: TypeDef from parse_name function.
+ """
+ if not type_def:
+ if precision is None:
+ precision = 3
+
+ if precision not in (3, 6, 9):
+ raise ArgumentError(
+ f"Invalid precision value {precision} for ClickHouse Time64. Must be 3, 6, or 9."
+ )
+ type_def = TypeDef(values=(precision,))
+ else:
+ precision = type_def.values[0] if len(type_def.values) > 0 else 3
+
+ ChSqlaType.__init__(self, type_def)
+
+ Interval.__init__(self, second_precision=precision)
+
+ def process_bind_param(self, value, dialect):
+ return value
+
+ def process_result_value(self, value, dialect):
+ return value
+
+ def process_literal_param(self, value, dialect):
+ return None
+
+
class Nullable:
"""
Class "wrapper" to use in DDL construction. It is never actually initialized but instead creates the "wrapped"
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/custom.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/custom.py
index 9f7c1e6401a..3b45b85641c 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/custom.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/custom.py
@@ -11,7 +11,7 @@ class CreateDatabase(DDL):
"""
# pylint: disable-msg=too-many-arguments
def __init__(self, name: str, engine: str = None, zoo_path: str = None, shard_name: str = '{shard}',
- replica_name: str = '{replica}'):
+ replica_name: str = '{replica}', exists_ok: bool = False):
"""
:param name: Database name
:param engine: Database ClickHouse engine type
@@ -21,7 +21,7 @@ class CreateDatabase(DDL):
"""
if engine and engine not in ('Ordinary', 'Atomic', 'Lazy', 'Replicated'):
raise ArgumentError(f'Unrecognized engine type {engine}')
- stmt = f'CREATE DATABASE {quote_identifier(name)}'
+ stmt = f"CREATE DATABASE {'IF NOT EXISTS ' if exists_ok else ''}{quote_identifier(name)}"
if engine:
stmt += f' Engine {engine}'
if engine == 'Replicated':
@@ -36,5 +36,5 @@ class DropDatabase(DDL):
"""
Alternative DDL statement for built in SqlAlchemy DropSchema DDL class
"""
- def __init__(self, name: str):
- super().__init__(f'DROP DATABASE {quote_identifier(name)}')
+ def __init__(self, name: str, missing_ok: bool = False):
+ super().__init__(f"DROP DATABASE {'IF EXISTS ' if missing_ok else ''}{quote_identifier(name)}")
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py
index 0c1d7d79fe2..4a415914714 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py
@@ -1,4 +1,3 @@
-
from sqlalchemy import text
from sqlalchemy.engine.default import DefaultDialect
@@ -7,6 +6,7 @@ from clickhouse_connect import dbapi
from clickhouse_connect.cc_sqlalchemy.inspector import ChInspector
from clickhouse_connect.cc_sqlalchemy.sql import full_table
from clickhouse_connect.cc_sqlalchemy.sql.ddlcompiler import ChDDLCompiler
+from clickhouse_connect.cc_sqlalchemy.sql.compiler import ChStatementCompiler
from clickhouse_connect.cc_sqlalchemy import ischema_names, dialect_name
from clickhouse_connect.cc_sqlalchemy.sql.preparer import ChIdentifierPreparer
from clickhouse_connect.driver.binding import quote_identifier, format_str
@@ -27,22 +27,29 @@ class ClickHouseDialect(DefaultDialect):
returns_unicode_strings = True
postfetch_lastrowid = False
ddl_compiler = ChDDLCompiler
+ statement_compiler = ChStatementCompiler
preparer = ChIdentifierPreparer
description_encoding = None
max_identifier_length = 127
ischema_names = ischema_names
inspector = ChInspector
+ # SQA 1 compatibility
# pylint: disable=method-hidden
@classmethod
def dbapi(cls):
return dbapi
+ # SQA 2 compatibility
+ # pylint: disable=method-hidden
+ @classmethod
+ def import_dbapi(cls):
+ return dbapi
+
def initialize(self, connection):
pass
- @staticmethod
- def get_schema_names(connection, **_):
+ def get_schema_names(self, connection, **_):
return [row.name for row in connection.execute('SHOW DATABASES')]
@staticmethod
@@ -95,6 +102,7 @@ class ClickHouseDialect(DefaultDialect):
def has_sequence(self, connection, sequence_name, schema=None, **_kw):
return False
+ # pylint: disable=duplicate-code
def do_begin_twophase(self, connection, xid):
raise NotImplementedError
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py
index d8936e67c23..53616d00f2a 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py
@@ -2,6 +2,7 @@ import sqlalchemy.schema as sa_schema
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.orm.exc import NoResultFound
+from sqlalchemy import text
from clickhouse_connect.cc_sqlalchemy.datatypes.base import sqla_type_from_name
from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import build_engine
@@ -12,8 +13,8 @@ ch_col_args = ('default_type', 'codec_expression', 'ttl_expression')
def get_engine(connection, table_name, schema=None):
- result_set = connection.execute(
- f"SELECT engine_full FROM system.tables WHERE database = '{schema}' and name = '{table_name}'")
+ result_set = connection.execute(text(
+ f"SELECT engine_full FROM system.tables WHERE database = '{schema}' and name = '{table_name}'"))
row = next(result_set, None)
if not row:
raise NoResultFound(f'Table {schema}.{table_name} does not exist')
@@ -35,7 +36,7 @@ class ChInspector(Inspector):
def get_columns(self, table_name, schema=None, **_kwargs):
table_id = full_table(table_name, schema)
- result_set = self.bind.execute(f'DESCRIBE TABLE {table_id}')
+ result_set = self.bind.execute(text(f'DESCRIBE TABLE {table_id}'))
if not result_set:
raise NoResultFound(f'Table {full_table} does not exist')
columns = []
@@ -52,6 +53,3 @@ class ChInspector(Inspector):
'ttl_expression': row.ttl_expression}
columns.append(col)
return columns
-
-
-ChInspector.reflecttable = ChInspector.reflect_table # Hack to provide backward compatibility for SQLAlchemy 1.3
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
new file mode 100644
index 00000000000..dab2099ef24
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
@@ -0,0 +1,74 @@
+from sqlalchemy.exc import CompileError
+from sqlalchemy.sql.compiler import SQLCompiler
+
+from clickhouse_connect.cc_sqlalchemy.sql import format_table
+
+
+# pylint: disable=arguments-differ
+class ChStatementCompiler(SQLCompiler):
+
+ # pylint: disable=attribute-defined-outside-init
+ def visit_delete(self, delete_stmt, visiting_cte=None, **kw):
+ table = delete_stmt.table
+ text = f"DELETE FROM {format_table(table)}"
+
+ if delete_stmt.whereclause is not None:
+ self._in_delete_where = True
+ try:
+ text += " WHERE " + self.process(delete_stmt.whereclause, **kw)
+ finally:
+ self._in_delete_where = False
+ else:
+ raise CompileError("ClickHouse DELETE statements require a WHERE clause. To delete all rows, use 'TRUNCATE TABLE' instead.")
+
+ return text
+
+ def visit_select(self, select_stmt, **kw):
+ return super().visit_select(select_stmt, **kw)
+
+ def visit_join(self, join, **kw):
+ left = self.process(join.left, **kw)
+ right = self.process(join.right, **kw)
+ onclause = join.onclause
+
+ if getattr(join, "full", False):
+ join_kw = " FULL OUTER JOIN "
+ elif onclause is None:
+ join_kw = " CROSS JOIN "
+ elif join.isouter:
+ join_kw = " LEFT OUTER JOIN "
+ else:
+ join_kw = " INNER JOIN "
+
+ text = left + join_kw + right
+
+ if onclause is not None:
+ text += " ON " + self.process(onclause, **kw)
+
+ return text
+
+ def visit_column(self, column, add_to_result_map=None, include_table=True, result_map_targets=(), ambiguous_table_name_map=None, **kw):
+ if getattr(self, "_in_delete_where", False):
+ return self.preparer.quote(column.name)
+
+ return super().visit_column(
+ column,
+ add_to_result_map=add_to_result_map,
+ include_table=include_table,
+ result_map_targets=result_map_targets,
+ **kw,
+ )
+
+ # Abstract methods required by SQLCompiler
+ def delete_extra_from_clause(self, delete_stmt, from_table, extra_froms, from_hints, **kw):
+ raise NotImplementedError("ClickHouse doesn't support DELETE with extra FROM clause")
+
+ def update_from_clause(self, update_stmt, from_table, extra_froms, from_hints, **kw):
+ raise NotImplementedError("ClickHouse doesn't support UPDATE with FROM clause")
+
+ # pylint: disable=unused-argument
+ def visit_empty_set_expr(self, element_types, **kw):
+ return "SELECT 1 WHERE 1=0"
+
+ def visit_sequence(self, sequence, **kw):
+ raise NotImplementedError("ClickHouse doesn't support sequences")
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/common.py b/contrib/python/clickhouse-connect/clickhouse_connect/common.py
index dd0f319dce9..fd56a26616b 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/common.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/common.py
@@ -77,6 +77,11 @@ _init_common('product_name', (), '') # Product name used as part of client iden
_init_common('readonly', (0, 1), 0) # Implied "read_only" ClickHouse settings for versions prior to 19.17
_init_common('send_os_user', (True, False), True)
+# Include integration tags (library name/version) in the User-Agent, e.g.:
+# pandas/2.2.5; polars/0.20.x; sqlalchemy/2.0.x. These tags are only included
+# when using relevant API methods.
+_init_common('send_integration_tags', (True, False), True)
+
# Use the client protocol version This is needed for DateTime timezone columns but breaks with current version of
# chproxy
_init_common('use_protocol_version', (True, False), True)
@@ -85,3 +90,8 @@ _init_common('max_error_size', (), 1024)
# HTTP raw data buffer for streaming queries. This should not be reduced below 64KB to ensure compatibility with LZ4 compression
_init_common('http_buffer_size', (), 10 * 1024 * 1024)
+
+# If True and using pandas 2.x, preserves the datetime64/timedelta64
+# dtype resolution (e.g., 's', 'ms', 'us', 'ns'). If False (or on
+# pandas <2.x), coerces to nanosecond ('ns') resolution for compatibility.
+_init_common('preserve_pandas_datetime_resolution', (True, False), False)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/base.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/base.py
index c41694b71a7..d40eefe5a55 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/base.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/base.py
@@ -41,6 +41,7 @@ class ClickHouseType(ABC):
encoding = 'utf8'
np_type = 'O' # Default to Numpy Object type
nano_divisor = 0 # Only relevant for date like objects
+ pd_datetime_res = "ns" # Default date-like resolution for pd
byte_size = 0
valid_formats = 'native'
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py
index c5057b2b1b1..d7a9b21a540 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/dynamic.py
@@ -14,6 +14,8 @@ from clickhouse_connect.json_impl import any_to_json
SHARED_DATA_TYPE: ClickHouseType
STRING_DATA_TYPE: ClickHouseType
+_JSON_NULL = b'null'
+_JSON_NULL_STR = 'null'
json_serialization_format = 0x1
@@ -129,13 +131,22 @@ def json_sample_size(_, sample: Collection) -> int:
def write_json(ch_type: ClickHouseType, column: Sequence, dest: bytearray, ctx: InsertContext):
+ if ch_type.nullable:
+ dest += bytearray(1 if v is None else 0 for v in column)
+
first = first_value(column, ch_type.nullable)
write_col = column
encoding = ctx.encoding or ch_type.encoding
if not isinstance(first, str) and ch_type.write_format(ctx) != 'string':
to_json = any_to_json
- write_col = [to_json(v) for v in column]
+ if ch_type.nullable:
+ write_col = [_JSON_NULL if v is None else to_json(v) for v in column]
+ else:
+ write_col = [to_json(v) for v in column]
encoding = None
+ else:
+ write_col = [_JSON_NULL_STR if v is None else v for v in column]
+
handle_error(data_conv.write_str_col(write_col, ch_type.nullable, encoding, dest), ctx)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/network.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/network.py
index 064b612feb2..7f13820f5b2 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/network.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/network.py
@@ -1,5 +1,5 @@
import socket
-from ipaddress import IPv4Address, IPv6Address
+from ipaddress import ip_address, IPv4Address, IPv6Address
from typing import Union, MutableSequence, Sequence, Any
from clickhouse_connect.datatypes.base import ClickHouseType
@@ -64,64 +64,60 @@ class IPv6(ClickHouseType):
return self._read_binary_ip(source, num_rows)
@staticmethod
- def _read_binary_ip(source: ByteSource, num_rows: int):
+ def _read_binary_ip(source: ByteSource, num_rows: int) -> list[IPv6Address]:
+ """Read IPv6 addresses in native format, always returning IPv6Address objects."""
fast_ip_v6 = IPv6Address.__new__
- fast_ip_v4 = IPv4Address.__new__
with_scope_id = '_scope_id' in IPv6Address.__slots__
new_col = []
app = new_col.append
ifb = int.from_bytes
for _ in range(num_rows):
int_value = ifb(source.read_bytes(16), 'big')
- if int_value >> 32 == 0xFFFF:
- ipv4 = fast_ip_v4(IPv4Address)
- ipv4._ip = int_value & 0xFFFFFFFF
- app(ipv4)
- else:
- ipv6 = fast_ip_v6(IPv6Address)
- ipv6._ip = int_value
- if with_scope_id:
- ipv6._scope_id = None
- app(ipv6)
+ ipv6 = fast_ip_v6(IPv6Address)
+ ipv6._ip = int_value
+ if with_scope_id:
+ ipv6._scope_id = None
+ app(ipv6)
return new_col
@staticmethod
- def _read_binary_str(source: ByteSource, num_rows: int):
+ def _read_binary_str(source: ByteSource, num_rows: int) -> list[str]:
+ """Read IPv6 addresses in string format, always returning IPv6Address strings."""
new_col = []
app = new_col.append
- v4mask = IPV4_V6_MASK
- tov4 = socket.inet_ntoa
tov6 = socket.inet_ntop
af6 = socket.AF_INET6
for _ in range(num_rows):
x = source.read_bytes(16)
- if x[:12] == v4mask:
- app(tov4(x[12:]))
- else:
- app(tov6(af6, x))
+ # Always use IPv6 string representation, even for IPv4-mapped addresses
+ app(tov6(af6, x))
return new_col
- def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
- v = V6_NULL
- first = first_value(column, self.nullable)
- v4mask = IPV4_V6_MASK
- af6 = socket.AF_INET6
- tov6 = socket.inet_pton
- if isinstance(first, str):
- for x in column:
- if x is None:
- dest += v
- elif '.' in x:
- dest += v4mask + bytes(int(b) for b in x.split('.'))
- else:
- dest += tov6(af6, x)
- else:
- for x in column:
- if x is None:
- dest += v
- else:
- b = x.packed
- dest += b if len(b) == 16 else (v4mask + b)
+ def _write_column_binary(
+ self,
+ column: Union[Sequence, MutableSequence],
+ dest: bytearray,
+ ctx: InsertContext,
+ ):
+ """Write IPv6 addresses, promoting IPv4 addresses to IPv4-mapped IPv6 addresses."""
+ for value in column:
+ if value is None:
+ dest += V6_NULL
+ continue
+
+ try:
+ addr = ip_address(value)
+ except ValueError as e:
+ raise ValueError(
+ f"Failed to parse '{value}' as a valid IP address for column '{ctx.column_name}'"
+ ) from e
+
+ # Now handle parsed object
+ if isinstance(addr, IPv6Address):
+ dest += addr.packed
+ elif isinstance(addr, IPv4Address):
+ # We have an IPv4, but the column is IPv6 so convert to IPv4-mapped.
+ dest += IPV4_V6_MASK + addr.packed
def _active_null(self, ctx):
if ctx.use_none:
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/numeric.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/numeric.py
index 7e86b623e3f..e074c96f6fc 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/numeric.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/numeric.py
@@ -1,5 +1,7 @@
import decimal
from typing import Union, Type, Sequence, MutableSequence, Any
+import struct
+import array
from math import nan, isnan, isinf
@@ -208,6 +210,80 @@ class Float64(Float):
np_type = '<f8'
+class BFloat16(ArrayType):
+ _array_type = "H"
+ python_type = float
+ np_type = "<f4"
+
+ def _write_column_binary(
+ self,
+ column: Sequence[Any],
+ dest: bytearray,
+ ctx: InsertContext,
+ ):
+ if not column:
+ return
+
+ if self.nullable:
+ first = next((x for x in column if x is not None), None)
+ if isinstance(first, float):
+ column = [0 if (x is None or isnan(x) or isinf(x)) else x for x in column]
+ else:
+ column = [0 if x is None else float(x) for x in column]
+ elif not isinstance(column[0], float):
+ column = [float(x) for x in column]
+
+ vals = array.array("H")
+ extend = vals.extend
+ for x in column:
+ bits32 = struct.unpack("<I", struct.pack("<f", x))[0]
+ extend([bits32 >> 16])
+
+ write_array(self._array_type, vals, dest, ctx.column_name)
+
+ def _read_column_binary(
+ self, source: ByteSource, num_rows: int, ctx: QueryContext, _read_state: Any
+ ):
+ if ctx.use_numpy:
+ arr16 = numpy_conv.read_numpy_array(source, "<u2", num_rows)
+ return (arr16.astype(np.uint32) << np.uint32(16)).view(np.float32)
+
+ raw = source.read_array(self._array_type, num_rows)
+ return [struct.unpack("<f", struct.pack("<I", v << 16))[0] for v in raw]
+
+ def _read_nullable_column(
+ self, source: ByteSource, num_rows: int, ctx: QueryContext, _read_state: Any
+ ):
+ null_map = source.read_bytes(num_rows)
+
+ if ctx.use_numpy:
+ arr16 = numpy_conv.read_numpy_array(source, "<u2", num_rows)
+ floats = (arr16.astype(np.uint32) << np.uint32(16)).view(np.float32)
+ return data_conv.build_nullable_column(
+ floats, null_map, self._active_null(ctx)
+ )
+
+ raw = source.read_array(self._array_type, num_rows)
+ floats = [struct.unpack("<f", struct.pack("<I", v << 16))[0] for v in raw]
+ return data_conv.build_nullable_column(floats, null_map, self._active_null(ctx))
+
+ def _finalize_column(self, column, ctx: QueryContext):
+ if ctx.use_extended_dtypes and self.nullable:
+ return pd.array(column, dtype="Float32")
+ if ctx.use_numpy and not isinstance(column, np.ndarray):
+ return np.array(column, dtype=self.np_type)
+ return column
+
+ def _active_null(self, ctx: QueryContext):
+ if ctx.use_extended_dtypes:
+ return nan
+ if ctx.use_none:
+ return None
+ if ctx.use_numpy:
+ return nan
+ return 0.0
+
+
class Bool(ClickHouseType):
np_type = '?'
python_type = bool
@@ -388,3 +464,47 @@ class Decimal128(BigDecimal):
class Decimal256(BigDecimal):
dec_size = 256
+
+
+class IntervalNanosecond(Int32):
+ pass
+
+
+class IntervalMicrosecond(Int32):
+ pass
+
+
+class IntervalMillisecond(Int32):
+ pass
+
+
+class IntervalSecond(Int32):
+ pass
+
+
+class IntervalMinute(Int32):
+ pass
+
+
+class IntervalHour(Int32):
+ pass
+
+
+class IntervalDay(Int32):
+ pass
+
+
+class IntervalWeek(Int32):
+ pass
+
+
+class IntervalMonth(Int32):
+ pass
+
+
+class IntervalQuarter(Int32):
+ pass
+
+
+class IntervalYear(Int32):
+ pass
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py
index 4b416a9159f..a51b6584f8f 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/datatypes/temporal.py
@@ -1,16 +1,22 @@
import pytz
-from datetime import date, datetime, tzinfo
-from typing import Union, Sequence, MutableSequence, Any
+import array
+from datetime import date, datetime, tzinfo, timedelta, time
+
+from typing import Union, Sequence, MutableSequence, Any, NamedTuple, Optional
+from abc import abstractmethod
+import re
from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType
+from clickhouse_connect.common import get_setting
+from clickhouse_connect.driver import tzutil
from clickhouse_connect.driver.common import write_array, np_date_types, int_size, first_value
from clickhouse_connect.driver.exceptions import ProgrammingError
from clickhouse_connect.driver.ctypes import data_conv, numpy_conv
from clickhouse_connect.driver.insert import InsertContext
from clickhouse_connect.driver.query import QueryContext
from clickhouse_connect.driver.types import ByteSource
-from clickhouse_connect.driver.options import np, pd
+from clickhouse_connect.driver.options import np, pd, IS_PANDAS_2
epoch_start_date = date(1970, 1, 1)
epoch_start_datetime = datetime(1970, 1, 1)
@@ -24,6 +30,12 @@ class Date(ClickHouseType):
python_type = date
byte_size = 2
+ @property
+ def pandas_dtype(self):
+ if IS_PANDAS_2 and get_setting("preserve_pandas_datetime_resolution"):
+ return "datetime64[s]"
+ return f"datetime64[{self.pd_datetime_res}]"
+
def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryContext, _read_state:Any):
if self.read_format(ctx) == 'int':
return source.read_array(self._array_type, num_rows)
@@ -56,14 +68,39 @@ class Date(ClickHouseType):
if fmt == 'int':
return 0
if ctx.use_numpy:
- return np.datetime64(0)
+ return np.datetime64(0, self.pd_datetime_res)
return epoch_start_date
+ # pylint: disable=too-many-return-statements
def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
if self.read_format(ctx) == 'int':
return column
+
if ctx.use_numpy and self.nullable and not ctx.use_none:
return np.array(column, dtype=self.np_type)
+
+ if ctx.use_extended_dtypes:
+ if isinstance(column, np.ndarray) and np.issubdtype(
+ column.dtype, np.datetime64
+ ):
+ return column.astype(self.pandas_dtype)
+
+ if isinstance(column, pd.DatetimeIndex):
+ if column.tz is None:
+ return column.astype(self.pandas_dtype)
+
+ naive = column.tz_localize(None).astype(self.pandas_dtype)
+ return pd.DatetimeIndex(naive, tz=column.tz)
+
+ if self.nullable and isinstance(column, list):
+ return np.array([None if pd.isna(s) else s for s in column]).astype(
+ self.pandas_dtype
+ )
+
+ return pd.to_datetime(column, errors="coerce").to_numpy(
+ dtype=self.pandas_dtype, copy=False
+ )
+
return column
@@ -84,6 +121,13 @@ class DateTimeBase(ClickHouseType, registered=False):
valid_formats = 'native', 'int'
python_type = datetime
+ @property
+ def pandas_dtype(self):
+ """Sets dtype for pandas datetime objects"""
+ if IS_PANDAS_2 and get_setting("preserve_pandas_datetime_resolution"):
+ return "datetime64[s]"
+ return f"datetime64[{self.pd_datetime_res}]"
+
def _active_null(self, ctx: QueryContext):
fmt = self.read_format(ctx)
if ctx.use_extended_dtypes:
@@ -93,9 +137,40 @@ class DateTimeBase(ClickHouseType, registered=False):
if self.read_format(ctx) == 'int':
return 0
if ctx.use_numpy:
- return np.datetime64(0)
+ return np.datetime64(0, self.pd_datetime_res)
return epoch_start_datetime
+ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
+ """Ensure every datetime-like column is at nanosecond resolution, preserving any tz."""
+ if ctx.use_extended_dtypes:
+ if isinstance(column, np.ndarray) and np.issubdtype(
+ column.dtype, np.datetime64
+ ):
+ return column.astype(self.pandas_dtype)
+
+ if isinstance(column, pd.DatetimeIndex) or (
+ isinstance(column, list)
+ and hasattr(next((s for s in column if not pd.isna(s)), None), "tz")
+ ):
+ if isinstance(column, list):
+ column = pd.DatetimeIndex(column)
+
+ if column.tz is None:
+ result = column.astype(self.pandas_dtype)
+ return pd.array(result) if self.nullable else result
+
+ naive_ns = column.tz_localize(None).astype(self.pandas_dtype)
+ tz_aware_result = pd.DatetimeIndex(naive_ns, tz=column.tz)
+ return (
+ pd.array(tz_aware_result) if self.nullable else tz_aware_result
+ )
+
+ if self.nullable:
+ return pd.array(
+ [None if pd.isna(s) else s for s in column], dtype=self.pandas_dtype
+ )
+ return column
+
class DateTime(DateTimeBase):
_array_type = 'L' if int_size == 2 else 'I'
@@ -151,6 +226,13 @@ class DateTime64(DateTimeBase):
self.tzinfo = None
@property
+ def pandas_dtype(self):
+ """Sets dtype for pandas datetime objects"""
+ if IS_PANDAS_2 and get_setting("preserve_pandas_datetime_resolution"):
+ return f"datetime64{self.unit}"
+ return f"datetime64[{self.pd_datetime_res}]"
+
+ @property
def np_type(self):
if self.unit:
return f'datetime64{self.unit}'
@@ -189,7 +271,7 @@ class DateTime64(DateTimeBase):
def _read_binary_naive(self, column: Sequence):
new_col = []
app = new_col.append
- dt_from = datetime.utcfromtimestamp
+ dt_from = tzutil.utcfromtimestamp
prec = self.prec
for ticks in column:
seconds = ticks // prec
@@ -222,3 +304,465 @@ class DateTime64(DateTimeBase):
else:
column = [((int(x.timestamp()) * 1000000 + x.microsecond) * prec) // 1000000 for x in column]
write_array('q', column, dest, ctx.column_name)
+
+
+class _HMSParts(NamedTuple):
+ """Internal structure for parsed HMS time components."""
+
+ hours: int
+ minutes: int
+ seconds: int
+ frac: Optional[str]
+ is_negative: bool
+
+
+class TimeBase(ClickHouseType, registered=False):
+ """
+ Abstract base for ClickHouse Time and Time64 types.
+
+ Subclasses must define:
+ - _array_type: Array type specifier (e.g. 'i' or 'q')
+ - byte_size: Size in bytes for binary representation
+ - np_type: NumPy array type (e.g. 'timedelta64[s]' or 'timedelta64[ns]')
+
+ And implement these abstract methods:
+ - _string_to_ticks(self, str) -> int
+ - _timedelta_to_ticks(self, timedelta) -> int
+ - _ticks_to_timedelta(self, int) -> timedelta
+ - _ticks_to_string(self, int) -> str
+ - max_ticks and min_ticks properties
+ """
+
+ _HMS_RE = re.compile(
+ r"""^\s*
+ (?P<sign>-?)
+ (?P<hours>\d+):
+ (?P<minutes>\d+):
+ (?P<seconds>\d+)
+ (?:\.(?P<frac>\d+))?
+ \s*$""",
+ re.VERBOSE,
+ )
+
+ MAX_TIME_SECONDS = 999 * 3600 + 59 * 60 + 59 # 999:59:59
+ MIN_TIME_SECONDS = -MAX_TIME_SECONDS # -999:59:59
+ _MICROS_PER_SECOND = 1_000_000
+ _NANOS_PER_SECOND = 1_000_000_000
+ _SECONDS_PER_DAY = 86_400
+
+ _array_type: str
+ byte_size: int
+ np_type: str
+ valid_formats = ("native", "string", "int", "time")
+ python_type = timedelta
+
+ def _read_column_binary(
+ self,
+ source: ByteSource,
+ num_rows: int,
+ ctx: QueryContext,
+ _read_state: Any,
+ ) -> Sequence:
+ """Read binary column data and convert to requested format."""
+ ticks = source.read_array(self._array_type, num_rows)
+ fmt = self.read_format(ctx)
+
+ if ctx.use_numpy:
+ return np.array(
+ [self._ticks_to_np_timedelta(t) for t in ticks], dtype=self.np_type
+ )
+
+ if fmt == "int":
+ return ticks
+
+ if fmt == "string":
+ return [self._ticks_to_string(t) for t in ticks]
+
+ if fmt == "time":
+ return [self._ticks_to_time(t) for t in ticks]
+
+ return [self._ticks_to_timedelta(t) for t in ticks]
+
+ def _write_column_binary(
+ self,
+ column: Sequence,
+ dest: bytearray,
+ ctx: InsertContext,
+ ):
+ """Write column data in binary format."""
+ ticks = self._to_ticks_array(column)
+ write_array(self._array_type, ticks, dest, ctx.column_name)
+
+ def _parse_core(self, time_str: str) -> _HMSParts:
+ """Parse an hhh:mm:ss[.fff] time literal."""
+ match = self._HMS_RE.match(time_str)
+ if not match:
+ raise ValueError(f"Invalid time literal {time_str}")
+
+ hours = int(match["hours"])
+ minutes = int(match["minutes"])
+ seconds = int(match["seconds"])
+
+ if hours > 999:
+ raise ValueError(
+ f"Hours out of range; cannot exceed 999: got {hours} in '{time_str}'"
+ )
+ if not 0 <= minutes < 60:
+ raise ValueError(
+ f"Minutes out of range; must be 0-59: got {minutes} in '{time_str}'"
+ )
+ if not 0 <= seconds < 60:
+ raise ValueError(
+ f"Seconds out of range; must be 0-59: got {seconds} in '{time_str}'"
+ )
+
+ return _HMSParts(
+ hours=hours,
+ minutes=minutes,
+ seconds=seconds,
+ frac=match["frac"],
+ is_negative=bool(match["sign"]),
+ )
+
+ def _to_ticks_array(self, column: Sequence) -> Sequence[int]:
+ """Convert column data to internal tick representation."""
+ first = first_value(column, self.nullable)
+ expected_type = type(first) if first is not None else None
+
+ if expected_type is None:
+ if self.nullable:
+ return [0] * len(column)
+ return []
+
+ converter_map = {
+ timedelta: self._timedelta_to_ticks,
+ time: self._time_to_ticks,
+ float: self._numerical_to_ticks,
+ int: self._numerical_to_ticks,
+ str: self._string_to_ticks,
+ }
+ if np is not None:
+ converter_map[np.timedelta64] = self._timedelta_to_ticks
+ converter_map[np.int64] = self._numerical_to_ticks
+ converter = converter_map.get(expected_type, None)
+
+ if converter is None:
+ raise TypeError(
+ f"Unsupported column type '{expected_type.__name__}' for {self.__class__.__name__}. "
+ "Expected 'int', 'str', 'time', or 'timedelta'."
+ )
+
+ if self.nullable:
+ return [converter(x) if x is not None else 0 for x in column]
+
+ return [converter(x) for x in column]
+
+ def _validate_standard_range(self, ticks: int, original: Any) -> None:
+ """Validate that ticks is within valid ClickHouse range."""
+ if not self.min_ticks <= ticks <= self.max_ticks:
+ raise ValueError(f"{original} out of range for {self.__class__.__name__}")
+
+ def _validate_time_obj_range(self, ticks: int) -> None:
+ """Ensure ticks can form a valid datetime.time object."""
+ if not self.min_time_ticks <= ticks <= self.max_time_ticks:
+ raise ValueError(
+ f"Ticks value {ticks} is outside valid range for datetime.time object."
+ )
+
+ def _numerical_to_ticks(self, value: Union[int, float, "np.int64"]) -> int:
+ """Convert numerical value to ticks, with range validation."""
+ value = int(value)
+ self._validate_standard_range(value, value)
+ return value
+
+ def _active_null(self, ctx: QueryContext):
+ """Return appropriate null value based on context."""
+ fmt = self.read_format(ctx)
+ if ctx.use_extended_dtypes:
+ return pd.NA if fmt == "int" else pd.NaT
+ if ctx.use_none:
+ return None
+ if fmt == "int":
+ return 0
+ if fmt == "string":
+ return "00:00:00"
+ if ctx.use_numpy:
+ return np.timedelta64("NaT")
+
+ return timedelta(0)
+
+ @property
+ def pandas_dtype(self):
+ """Sets dtype for pandas datetime objects"""
+ if IS_PANDAS_2 and get_setting("preserve_pandas_datetime_resolution"):
+ return "timedelta64[s]"
+ return f"timedelta64[{self.pd_datetime_res}]"
+
+ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
+ """Finalize column data based on context requirements."""
+ if ctx.use_extended_dtypes:
+ if isinstance(column, np.ndarray) and np.issubdtype(
+ column.dtype, np.timedelta64
+ ):
+ return column.astype(self.pandas_dtype)
+
+ if isinstance(column, pd.TimedeltaIndex):
+ return column.astype(self.pandas_dtype)
+
+ if self.nullable:
+ return np.array([None if pd.isna(s) else s for s in column]).astype(
+ self.pandas_dtype
+ )
+ return column
+
+ def _build_lc_column(self, index: Sequence, keys: array.array, ctx: QueryContext):
+ """Build low-cardinality column from index and keys."""
+ if ctx.use_numpy:
+ return np.array([index[k] for k in keys], dtype=self.np_type)
+
+ return super()._build_lc_column(index, keys, ctx)
+
+ @abstractmethod
+ def _string_to_ticks(self, time_str: str) -> int:
+ """Parse a string into integer ticks."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def _timedelta_to_ticks(self, td: Union[timedelta, "np.timedelta64"]) -> int:
+ """Convert a timedelta into integer ticks."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def _ticks_to_time(self, ticks: int) -> time:
+ """Convert integer ticks into a time."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def _time_to_ticks(self, t: time) -> int:
+ """Convert a time into integer ticks."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def _ticks_to_timedelta(self, ticks: int) -> timedelta:
+ """Convert integer ticks into a timedelta."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def _ticks_to_np_timedelta(self, ticks: int) -> timedelta:
+ """Convert integer ticks into an np.timedelta."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def _ticks_to_string(self, ticks: int) -> str:
+ """Format integer ticks as a string."""
+ raise NotImplementedError
+
+ @property
+ def min_time_ticks(self) -> int:
+ """Minimum tick value representable by datetime.time type."""
+ return 0
+
+ @property
+ @abstractmethod
+ def max_time_ticks(self) -> int:
+ """Maximum tick value representable by datetime.time type."""
+ raise NotImplementedError
+
+ @property
+ @abstractmethod
+ def max_ticks(self) -> int:
+ """Maximum tick value representable by this type."""
+ raise NotImplementedError
+
+ @property
+ @abstractmethod
+ def min_ticks(self) -> int:
+ """Minimum tick value representable by this type."""
+ raise NotImplementedError
+
+
+class Time(TimeBase):
+ """ClickHouse Time type with second precision."""
+
+ _array_type = "i"
+ byte_size = 4
+ np_type = "timedelta64[s]"
+
+ @property
+ def max_ticks(self) -> int:
+ return self.MAX_TIME_SECONDS
+
+ @property
+ def min_ticks(self) -> int:
+ return self.MIN_TIME_SECONDS
+
+ @property
+ def max_time_ticks(self) -> int:
+ return self._SECONDS_PER_DAY - 1
+
+ def _string_to_ticks(self, time_str: str) -> int:
+ """Parse string format 'HHH:MM:SS[.fff]' to ticks (seconds), flooring fractional seconds."""
+ parts = self._parse_core(time_str)
+ ticks = parts.hours * 3600 + parts.minutes * 60 + parts.seconds
+
+ if parts.is_negative:
+ ticks = -ticks
+ self._validate_standard_range(ticks, time_str)
+
+ return ticks
+
+ def _ticks_to_string(self, ticks: int) -> str:
+ """Format ticks (seconds) as 'HHH:MM:SS' string."""
+ sign = "-" if ticks < 0 else ""
+ t = abs(ticks)
+ h, rem = divmod(t, 3600)
+ m, s = divmod(rem, 60)
+
+ return f"{sign}{h:03d}:{m:02d}:{s:02d}"
+
+ def _timedelta_to_ticks(self, td: Union[timedelta, "np.timedelta64"]) -> int:
+ """Convert timedelta to ticks (seconds), flooring fractional seconds."""
+ if isinstance(td, timedelta):
+ total = int(td.total_seconds())
+ else:
+ total = td.astype("timedelta64[s]").astype(int)
+ self._validate_standard_range(total, td)
+
+ return total
+
+ def _ticks_to_timedelta(self, ticks: int) -> timedelta:
+ """Convert ticks (seconds) to timedelta."""
+ return timedelta(seconds=ticks)
+
+ def _ticks_to_np_timedelta(self, ticks: int) -> timedelta:
+ """Convert ticks (seconds) to np.timedelta."""
+ return np.timedelta64(ticks, "s")
+
+ def _time_to_ticks(self, t: time) -> int:
+ """Converts time to ticks (seconds), flooring fraction seconds."""
+ return t.hour * 3600 + t.minute * 60 + t.second
+
+ def _ticks_to_time(self, ticks: int) -> time:
+ """Converts ticks (seconds) to time."""
+ self._validate_time_obj_range(ticks)
+ h, rem = divmod(ticks, 3600)
+ m, s = divmod(rem, 60)
+
+ return time(hour=h, minute=m, second=s)
+
+
+class Time64(TimeBase):
+ """ClickHouse Time64 type with configurable sub-second precision."""
+
+ __slots__ = ("scale", "precision", "unit")
+ _array_type = "q"
+ byte_size = 8
+
+ def __init__(self, type_def):
+ super().__init__(type_def)
+ self._name_suffix = type_def.arg_str
+ self.scale = type_def.values[0]
+ if self.scale not in (3, 6, 9):
+ raise ProgrammingError(
+ f"Unsupported Time64 scale {self.scale}; "
+ "only 3, 6, or 9 are allowed for NumPy."
+ )
+ self.precision = 10**self.scale
+ self.unit = np_date_types.get(self.scale)
+
+ @property
+ def pandas_dtype(self):
+ """Sets dtype for pandas datetime objects"""
+ if IS_PANDAS_2 and get_setting("preserve_pandas_datetime_resolution"):
+ return f"timedelta64{self.unit}"
+ return f"timedelta64[{self.pd_datetime_res}]"
+
+ @property
+ def max_time_ticks(self) -> int:
+ return self._SECONDS_PER_DAY * self.precision - 1
+
+ @property
+ def np_type(self) -> str:
+ return f"timedelta64{self.unit}"
+
+ @property
+ def max_ticks(self) -> int:
+ return self.MAX_TIME_SECONDS * self.precision + (self.precision - 1)
+
+ @property
+ def min_ticks(self) -> int:
+ return -self.max_ticks
+
+ def _string_to_ticks(self, time_str: str) -> int:
+ """Parse string format 'HHH:MM:SS[.fff]' to ticks with sub-second precision."""
+ parts = self._parse_core(time_str)
+ frac_ticks = int((parts.frac or "").ljust(self.scale, "0")[: self.scale])
+ ticks = (
+ parts.hours * 3600 + parts.minutes * 60 + parts.seconds
+ ) * self.precision + frac_ticks
+ if parts.is_negative:
+ ticks = -ticks
+ self._validate_standard_range(ticks, time_str)
+
+ return ticks
+
+ def _ticks_to_string(self, ticks: int) -> str:
+ """Format ticks as 'HHH:MM:SS[.fff]' string with sub-second precision."""
+ sign = "-" if ticks < 0 else ""
+ t = abs(ticks)
+ sec_part, frac_part = divmod(t, self.precision)
+ h, rem = divmod(sec_part, 3600)
+ m, s = divmod(rem, 60)
+ frac_str = f".{frac_part:0{self.scale}d}" if self.scale else ""
+
+ return f"{sign}{h:03d}:{m:02d}:{s:02d}{frac_str}"
+
+ def _timedelta_to_ticks(self, td: Union[timedelta, "np.timedelta64"]) -> int:
+ """Convert timedelta to ticks with sub-second precision."""
+ if isinstance(td, timedelta):
+ total_us = (
+ int(td.total_seconds()) * self._MICROS_PER_SECOND + td.microseconds
+ )
+ ticks = (total_us * self.precision) // self._MICROS_PER_SECOND
+ else:
+ ticks = td.astype("timedelta64[s]").astype(int)
+ self._validate_standard_range(ticks, td)
+
+ return ticks
+
+ def _ticks_to_timedelta(self, ticks: int) -> timedelta:
+ """Convert ticks to timedelta with microsecond precision."""
+ neg = ticks < 0
+ t = abs(ticks)
+ sec_part = t // self.precision
+ frac_part = t - sec_part * self.precision
+ micros = (frac_part * self._MICROS_PER_SECOND) // self.precision
+ td = timedelta(seconds=sec_part, microseconds=micros)
+
+ return -td if neg else td
+
+ def _ticks_to_np_timedelta(self, ticks: int) -> "np.timedelta64":
+ """Convert ticks to numpy timedelta64 with nanosecond precision."""
+ res_map = {3: "ms", 6: "us", 9: "ns"}
+
+ return np.timedelta64(ticks, res_map.get(self.scale))
+
+ def _time_to_ticks(self, t: time) -> int:
+ """Convert time to ticks with sub-second precision."""
+ total_us = (
+ t.hour * 3600 + t.minute * 60 + t.second
+ ) * self._MICROS_PER_SECOND + t.microsecond
+ ticks = (total_us * self.precision) // self._MICROS_PER_SECOND
+ self._validate_time_obj_range(ticks)
+
+ return ticks
+
+ def _ticks_to_time(self, ticks: int) -> time:
+ """Convert ticks to time with microsecond precision."""
+ self._validate_time_obj_range(ticks)
+ sec_part, frac_part = divmod(ticks, self.precision)
+ h, rem = divmod(sec_part, 3600)
+ m, s = divmod(rem, 60)
+ micros = (frac_part * self._MICROS_PER_SECOND) // self.precision
+
+ return time(hour=h, minute=m, second=s, microsecond=micros)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/dbapi/connection.py b/contrib/python/clickhouse-connect/clickhouse_connect/dbapi/connection.py
index d1b3cb7f3cd..2c5bff6741e 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/dbapi/connection.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/dbapi/connection.py
@@ -29,6 +29,8 @@ class Connection:
secure=secure,
dsn=dsn,
generic_args=kwargs)
+
+ self.client._add_integration_tag("sqlalchemy")
self.timezone = self.client.server_tz
def close(self):
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py
index 644bac7b96a..be72481ee56 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/__init__.py
@@ -76,6 +76,9 @@ def create_client(*,
validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server
where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
+ :param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
+ instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
+ limits. Only available for query operations (not inserts). Default: False
:return: ClickHouse Connect Client instance
"""
if dsn:
@@ -131,8 +134,8 @@ def default_port(interface: str, secure: bool):
async def create_async_client(*,
- host: str = None,
- username: str = None,
+ host: Optional[str] = None,
+ username: Optional[str] = None,
password: str = '',
database: str = '__default__',
interface: Optional[str] = None,
@@ -194,6 +197,9 @@ async def create_async_client(*,
validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server
where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
+ :param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
+ instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
+ limits. Only available for query operations (not inserts). Default: False
:return: ClickHouse Connect Client instance
"""
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
index 4bb9f080948..feaa4e22903 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
@@ -343,6 +343,46 @@ class AsyncClient:
result = await loop.run_in_executor(self.executor, _query_df)
return result
+ async def query_df_arrow(
+ self,
+ query: str,
+ parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
+ settings: Optional[Dict[str, Any]] = None,
+ use_strings: Optional[bool] = None,
+ external_data: Optional[ExternalData] = None,
+ transport_settings: Optional[Dict[str, str]] = None,
+ dataframe_library: str = "pandas",
+ ) -> Union["pd.DataFrame", "pl.DataFrame"]:
+ """
+ Query method using the ClickHouse Arrow format to return a DataFrame
+ with PyArrow dtype backend. This provides better performance and memory efficiency
+ compared to the standard query_df method, though fewer output formatting options.
+
+ :param query: Query statement/format string
+ :param parameters: Optional dictionary used to format the query
+ :param settings: Optional dictionary of ClickHouse settings (key/string values)
+ :param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
+ :param external_data: ClickHouse "external data" to send with query
+ :param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
+ :param dataframe_library: Library to use for DataFrame creation ("pandas" or "polars")
+ :return: DataFrame (pandas or polars based on dataframe_library parameter)
+ """
+
+ def _query_df_arrow():
+ return self.client.query_df_arrow(
+ query=query,
+ parameters=parameters,
+ settings=settings,
+ use_strings=use_strings,
+ external_data=external_data,
+ transport_settings=transport_settings,
+ dataframe_library=dataframe_library
+ )
+
+ loop = asyncio.get_running_loop()
+ result = await loop.run_in_executor(self.executor, _query_df_arrow)
+ return result
+
async def query_df_stream(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
@@ -378,6 +418,45 @@ class AsyncClient:
result = await loop.run_in_executor(self.executor, _query_df_stream)
return result
+ async def query_df_arrow_stream(
+ self,
+ query: str,
+ parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
+ settings: Optional[Dict[str, Any]] = None,
+ use_strings: Optional[bool] = None,
+ external_data: Optional[ExternalData] = None,
+ transport_settings: Optional[Dict[str, str]] = None,
+ dataframe_library: str = "pandas"
+ ) -> StreamContext:
+ """
+ Query method that returns the results as a stream of DataFrames with PyArrow dtype backend.
+ Each DataFrame represents a block from the ClickHouse response.
+
+ :param query: Query statement/format string
+ :param parameters: Optional dictionary used to format the query
+ :param settings: Optional dictionary of ClickHouse settings (key/string values)
+ :param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
+ :param external_data: ClickHouse "external data" to send with query
+ :param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
+ :param dataframe_library: Library to use for DataFrame creation ("pandas" or "polars")
+ :return: StreamContext that yields DataFrames (pandas or polars based on dataframe_library parameter)
+ """
+
+ def _query_df_arrow_stream():
+ return self.client.query_df_arrow_stream(
+ query=query,
+ parameters=parameters,
+ settings=settings,
+ use_strings=use_strings,
+ external_data=external_data,
+ transport_settings=transport_settings,
+ dataframe_library=dataframe_library
+ )
+
+ loop = asyncio.get_running_loop()
+ result = await loop.run_in_executor(self.executor, _query_df_arrow_stream)
+ return result
+
def create_query_context(self,
query: Optional[Union[str, bytes]] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
@@ -501,7 +580,7 @@ class AsyncClient:
cmd: str,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
data: Union[str, bytes] = None,
- settings: Dict[str, Any] = None,
+ settings: Optional[Dict[str, Any]] = None,
use_database: bool = True,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> Union[str, int, Sequence[str], QuerySummary]:
@@ -642,6 +721,43 @@ class AsyncClient:
result = await loop.run_in_executor(self.executor, _insert_arrow)
return result
+ async def insert_df_arrow(
+ self,
+ table: str,
+ df: Union["pd.DataFrame", "pl.DataFrame"],
+ database: Optional[str] = None,
+ settings: Optional[Dict] = None,
+ transport_settings: Optional[Dict[str, str]] = None,
+ ) -> QuerySummary:
+ """
+ Insert a pandas DataFrame with PyArrow backend or a polars DataFrame into ClickHouse using Arrow format.
+ This method is optimized for DataFrames that already use Arrow format, providing
+ better performance than the standard insert_df method.
+
+ Validation is performed and an exception will be raised if this requirement is not met.
+ Polars DataFrames are natively Arrow-based and don't require additional validation.
+
+ :param table: ClickHouse table name
+ :param df: Pandas DataFrame with PyArrow dtype backend or Polars DataFrame
+ :param database: Optional ClickHouse database name
+ :param settings: Optional dictionary of ClickHouse settings (key/string values)
+ :param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
+ :return: QuerySummary with summary information, throws exception if insert fails
+ """
+
+ def _insert_df_arrow():
+ return self.client.insert_df_arrow(
+ table=table,
+ df=df,
+ database=database,
+ settings=settings,
+ transport_settings=transport_settings,
+ )
+
+ loop = asyncio.get_running_loop()
+ result = await loop.run_in_executor(self.executor, _insert_df_arrow)
+ return result
+
async def create_insert_context(self,
table: str,
column_names: Optional[Union[str, Sequence[str]]] = None,
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
index 86556f46c77..77dfcb9a9d0 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
@@ -16,10 +16,10 @@ from clickhouse_connect.datatypes import dynamic as dynamic_module
from clickhouse_connect.driver import tzutil
from clickhouse_connect.driver.common import dict_copy, StreamContext, coerce_int, coerce_bool
from clickhouse_connect.driver.constants import CH_VERSION_WITH_PROTOCOL, PROTOCOL_VERSION_WITH_LOW_CARD
-from clickhouse_connect.driver.exceptions import ProgrammingError, OperationalError
+from clickhouse_connect.driver.exceptions import ProgrammingError, OperationalError, DataError
from clickhouse_connect.driver.external import ExternalData
from clickhouse_connect.driver.insert import InsertContext
-from clickhouse_connect.driver.options import check_arrow, check_pandas, check_numpy
+from clickhouse_connect.driver.options import check_arrow, check_pandas, check_numpy, check_polars, pd, arrow, pl, IS_PANDAS_2
from clickhouse_connect.driver.summary import QuerySummary
from clickhouse_connect.driver.models import ColumnDef, SettingDef, SettingStatus
from clickhouse_connect.driver.query import QueryResult, to_arrow, to_arrow_batches, QueryContext, arrow_buffer
@@ -30,6 +30,7 @@ logger = logging.getLogger(__name__)
arrow_str_setting = 'output_format_arrow_string_as_string'
+# pylint: disable=too-many-lines
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-positional-arguments,too-many-instance-attributes
class Client(ABC):
"""
@@ -357,6 +358,7 @@ class Client(ABC):
:return: Numpy array representing the result set
"""
check_numpy()
+ self._add_integration_tag("numpy")
return self._context_query(locals(), use_numpy=True).np_result
# pylint: disable=duplicate-code,too-many-arguments,unused-argument
@@ -378,6 +380,7 @@ class Client(ABC):
:return: Generator that yield a numpy array per block representing the result set
"""
check_numpy()
+ self._add_integration_tag("numpy")
return self._context_query(locals(), use_numpy=True, streaming=True).np_stream
# pylint: disable=duplicate-code,unused-argument
@@ -403,6 +406,7 @@ class Client(ABC):
:return: Pandas dataframe representing the result set
"""
check_pandas()
+ self._add_integration_tag("pandas")
return self._context_query(locals(), use_numpy=True, as_pandas=True).df_result
# pylint: disable=duplicate-code,unused-argument
@@ -428,6 +432,7 @@ class Client(ABC):
:return: Generator that yields a Pandas dataframe per block representing the result set
"""
check_pandas()
+ self._add_integration_tag("pandas")
return self._context_query(locals(), use_numpy=True,
as_pandas=True,
streaming=True).df_stream
@@ -547,6 +552,7 @@ class Client(ABC):
:return: PyArrow.Table
"""
check_arrow()
+ self._add_integration_tag("arrow")
settings = self._update_arrow_settings(settings, use_strings)
return to_arrow(self.raw_query(query,
parameters,
@@ -573,6 +579,7 @@ class Client(ABC):
:return: Generator that yields a PyArrow.Table for per block representing the result set
"""
check_arrow()
+ self._add_integration_tag("arrow")
settings = self._update_arrow_settings(settings, use_strings)
return to_arrow_batches(self.raw_stream(query,
parameters,
@@ -581,6 +588,111 @@ class Client(ABC):
external_data=external_data,
transport_settings=transport_settings))
+ def query_df_arrow(self,
+ query: str,
+ parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
+ settings: Optional[Dict[str, Any]] = None,
+ use_strings: Optional[bool] = None,
+ external_data: Optional[ExternalData] = None,
+ transport_settings: Optional[Dict[str, str]] = None,
+ dataframe_library: str = "pandas"
+ ) -> Union["pd.DataFrame", "pl.DataFrame"]:
+ """
+ Query method using the ClickHouse Arrow format to return a DataFrame
+ with PyArrow dtype backend. This provides better performance and memory efficiency
+ compared to the standard query_df method, though fewer output formatting options.
+
+ :param query: Query statement/format string
+ :param parameters: Optional dictionary used to format the query
+ :param settings: Optional dictionary of ClickHouse settings (key/string values)
+ :param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
+ :param external_data: ClickHouse "external data" to send with query
+ :param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
+ :param dataframe_library: Library to use for DataFrame creation ("pandas" or "polars")
+ :return: DataFrame (pandas or polars based on dataframe_library parameter)
+ """
+ check_arrow()
+
+ if dataframe_library == "pandas":
+ check_pandas()
+ self._add_integration_tag("pandas")
+ if not IS_PANDAS_2:
+ raise ProgrammingError("PyArrow-backed dtypes are only supported when using pandas 2.x.")
+
+ def converter(table: arrow.Table) -> pd.DataFrame:
+ return table.to_pandas(types_mapper=pd.ArrowDtype, safe=False)
+
+ elif dataframe_library == "polars":
+ check_polars()
+ self._add_integration_tag("polars")
+
+ def converter(table: arrow.Table) -> pl.DataFrame:
+ return pl.from_arrow(table)
+
+ else:
+ raise ValueError(f"dataframe_library must be 'pandas' or 'polars', got '{dataframe_library}'")
+
+ arrow_table = self.query_arrow(
+ query=query,
+ parameters=parameters,
+ settings=settings,
+ use_strings=use_strings,
+ external_data=external_data,
+ transport_settings=transport_settings,
+ )
+
+ return converter(arrow_table)
+
+ def query_df_arrow_stream(self,
+ query: str,
+ parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
+ settings: Optional[Dict[str, Any]] = None,
+ use_strings: Optional[bool] = None,
+ external_data: Optional[ExternalData] = None,
+ transport_settings: Optional[Dict[str, str]] = None,
+ dataframe_library: str = "pandas") -> StreamContext:
+ """
+ Query method that returns the results as a stream of DataFrames with PyArrow dtype backend.
+ Each DataFrame represents a block from the ClickHouse response.
+
+ :param query: Query statement/format string
+ :param parameters: Optional dictionary used to format the query
+ :param settings: Optional dictionary of ClickHouse settings (key/string values)
+ :param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
+ :param external_data: ClickHouse "external data" to send with query
+ :param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
+ :param dataframe_library: Library to use for DataFrame creation ("pandas" or "polars")
+ :return: StreamContext that yields DataFrames (pandas or polars based on dataframe_library parameter)
+ """
+ check_arrow()
+ if dataframe_library == "pandas":
+ check_pandas()
+ self._add_integration_tag("pandas")
+ if not IS_PANDAS_2:
+ raise ProgrammingError("PyArrow-backed dtypes are only supported when using pandas 2.x.")
+
+ def converter(table: "arrow.Table") -> "pd.DataFrame":
+ return table.to_pandas(types_mapper=pd.ArrowDtype, safe=False)
+ elif dataframe_library == "polars":
+ check_polars()
+ self._add_integration_tag("polars")
+
+ def converter(table: arrow.Table) -> pl.DataFrame:
+ return pl.from_arrow(table)
+ else:
+ raise ValueError(f"dataframe_library must be 'pandas' or 'polars', got '{dataframe_library}'")
+ settings = self._update_arrow_settings(settings, use_strings)
+ raw_stream = self.raw_stream(
+ query, parameters, settings, fmt="ArrowStream", external_data=external_data, transport_settings=transport_settings
+ )
+ reader = arrow.ipc.open_stream(raw_stream)
+
+ def df_generator():
+ for batch in reader:
+ yield converter(batch)
+
+ return StreamContext(raw_stream, df_generator())
+
def _update_arrow_settings(self,
settings: Optional[Dict[str, Any]],
use_strings: Optional[bool]) -> Dict[str, Any]:
@@ -702,6 +814,7 @@ class Client(ABC):
:return: QuerySummary with summary information, throws exception if insert fails
"""
check_pandas()
+ self._add_integration_tag("pandas")
if context is None:
if column_names is None:
column_names = df.columns
@@ -731,11 +844,72 @@ class Client(ABC):
:param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
"""
check_arrow()
+ self._add_integration_tag("arrow")
full_table = table if '.' in table or not database else f'{database}.{table}'
compression = self.write_compression if self.write_compression in ('zstd', 'lz4') else None
column_names, insert_block = arrow_buffer(arrow_table, compression)
return self.raw_insert(full_table, column_names, insert_block, settings, 'Arrow', transport_settings)
+ def insert_df_arrow(self,
+ table: str,
+ df: Union["pd.DataFrame", "pl.DataFrame"],
+ database: Optional[str] = None,
+ settings: Optional[Dict] = None,
+ transport_settings: Optional[Dict[str, str]] = None) -> QuerySummary:
+ """
+ Insert a pandas DataFrame with PyArrow backend or a polars DataFrame into ClickHouse using Arrow format.
+ This method is optimized for DataFrames that already use Arrow format, providing
+ better performance than the standard insert_df method.
+
+ Validation is performed and an exception will be raised if this requirement is not met.
+ Polars DataFrames are natively Arrow-based and don't require additional validation.
+
+ :param table: ClickHouse table name
+ :param df: Pandas DataFrame with PyArrow dtype backend or Polars DataFrame
+ :param database: Optional ClickHouse database name
+ :param settings: Optional dictionary of ClickHouse settings (key/string values)
+ :param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
+ :return: QuerySummary with summary information, throws exception if insert fails
+ """
+ check_arrow()
+
+ if pd is not None and isinstance(df, pd.DataFrame):
+ df_lib = "pandas"
+ elif pl is not None and isinstance(df, pl.DataFrame):
+ df_lib = "polars"
+ else:
+ if pd is None and pl is None:
+ raise ImportError("A DataFrame library (pandas or polars) must be installed to use insert_df_arrow.")
+ raise TypeError(f"df must be either a pandas DataFrame or polars DataFrame, got {type(df).__name__}")
+
+ if df_lib == "pandas":
+ if not IS_PANDAS_2:
+ raise ProgrammingError("PyArrow-backed dtypes are only supported when using pandas 2.x.")
+
+ non_arrow_cols = [col for col, dtype in df.dtypes.items() if not isinstance(dtype, pd.ArrowDtype)]
+ if non_arrow_cols:
+ raise ProgrammingError(
+ f"insert_df_arrow requires all columns to use PyArrow dtypes. Non-Arrow columns found: [{', '.join(non_arrow_cols)}]. "
+ )
+ try:
+ arrow_table = arrow.Table.from_pandas(df, preserve_index=False)
+ except Exception as e:
+ raise DataError(f"Failed to convert pandas DataFrame to Arrow table: {e}") from e
+ else:
+ try:
+ arrow_table = df.to_arrow()
+ except Exception as e:
+ raise DataError(f"Failed to convert polars DataFrame to Arrow table: {e}") from e
+
+ self._add_integration_tag(df_lib)
+ return self.insert_arrow(
+ table=table,
+ arrow_table=arrow_table,
+ database=database,
+ settings=settings,
+ transport_settings=transport_settings,
+ )
+
def create_insert_context(self,
table: str,
column_names: Optional[Union[str, Sequence[str]]] = None,
@@ -823,6 +997,11 @@ class Client(ABC):
return False
return True
+ # pylint: disable=no-self-use
+ def _add_integration_tag(self, name: str) -> None:
+ """Transport hook to surface 3rd party lib integration info (default: no-op)."""
+ return
+
@abstractmethod
def data_insert(self, context: InsertContext) -> QuerySummary:
"""
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py
index 38bbd527839..76ccc7f24a0 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py
@@ -2,12 +2,11 @@ import array
import struct
import sys
-from typing import Sequence, MutableSequence, Dict, Optional, Union, Generator
+from typing import Sequence, MutableSequence, Dict, Optional, Union, Generator, Callable
from clickhouse_connect.driver.exceptions import ProgrammingError, StreamClosedError, DataError
from clickhouse_connect.driver.types import Closable
-
# pylint: disable=invalid-name
must_swap = sys.byteorder == 'big'
int_size = array.array('i').itemsize
@@ -218,3 +217,74 @@ class StreamContext:
self._in_context = False
self.source.close()
self.gen = None
+
+# pylint: disable=too-many-return-statements
+def get_rename_method(method: Optional[str]) -> Optional[Callable[[str], str]]:
+ def _to_camel(s: str) -> str:
+ if not s:
+ return ""
+ out, up = [], False
+ for ch in s:
+ if ch.isspace() or ch == "_":
+ up = True
+ elif up:
+ out.append(ch.upper())
+ up = False
+ else:
+ out.append(ch)
+ return "".join(out)
+
+ def _to_underscore(s: str) -> str:
+ if not s:
+ return ""
+ out, prev = [], 0
+ for ch in s:
+ if ch.isspace():
+ if prev == 0:
+ out.append("_")
+ prev = 1
+ elif ch.isupper():
+ if prev == 0:
+ out.append("_")
+ out.append(ch.lower())
+ elif prev == 1:
+ out.append(ch.lower())
+ else:
+ out.append(ch)
+ prev = 2
+ else:
+ out.append(ch)
+ prev = 0
+ return "".join(out)[1:] if out and out[0] == "_" else "".join(out)
+
+ def _remove_prefix(s: str) -> str:
+ i = s.rfind(".")
+ return s[i + 1 :] if i >= 0 else s
+
+ if not method:
+ return None
+
+ name = method.strip().upper()
+
+ if name == "NONE":
+ return None
+ if name == "REMOVE_PREFIX":
+ return _remove_prefix
+ if name == "TO_CAMELCASE":
+ return _to_camel
+ if name == "TO_CAMELCASE_WITHOUT_PREFIX":
+ return lambda s: _to_camel(_remove_prefix(s))
+ if name == "TO_UNDERSCORE":
+ return _to_underscore
+ if name == "TO_UNDERSCORE_WITHOUT_PREFIX":
+ return lambda s: _to_underscore(_remove_prefix(s))
+
+ valid_options = [
+ "none",
+ "remove_prefix",
+ "to_camelcase",
+ "to_camelcase_without_prefix",
+ "to_underscore",
+ "to_underscore_without_prefix",
+ ]
+ raise ValueError(f"Invalid option '{name}'. Expected one of {valid_options}")
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/context.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/context.py
index 0da870bcb43..b5d4fbeab5f 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/context.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/context.py
@@ -1,6 +1,6 @@
import logging
import re
-from typing import Optional, Dict, Union, Any
+from typing import Optional, Dict, Union, Any, Callable
logger = logging.getLogger(__name__)
@@ -44,6 +44,7 @@ class BaseQueryContext:
self.use_extended_dtypes = use_extended_dtypes
self._active_col_fmt = None
self._active_col_type_fmts = _empty_map
+ self.column_renamer: Optional[Callable[[str], str]] = None
def start_column(self, name: str):
self.column_name = name
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py
index 5acc49830e8..41d188dad16 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py
@@ -1,9 +1,11 @@
import array
+
from datetime import datetime, date, tzinfo
from ipaddress import IPv4Address
from typing import Sequence, Optional, Any
from uuid import UUID, SafeUUID
+from clickhouse_connect.driver import tzutil
from clickhouse_connect.driver.common import int_size
from clickhouse_connect.driver.errors import NONE_IN_NULLABLE_COLUMN
from clickhouse_connect.driver.types import ByteSource
@@ -29,7 +31,7 @@ def read_ipv4_col(source: ByteSource, num_rows: int):
def read_datetime_col(source: ByteSource, num_rows: int, tz_info: Optional[tzinfo]):
src_array = source.read_array('I', num_rows)
if tz_info is None:
- fts = datetime.utcfromtimestamp
+ fts = tzutil.utcfromtimestamp
return [fts(ts) for ts in src_array]
fts = datetime.fromtimestamp
return [fts(ts, tz_info) for ts in src_array]
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
index c055c639675..0f36b87c531 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
@@ -3,6 +3,8 @@ import json
import logging
import re
import uuid
+from importlib import import_module
+from importlib.metadata import version as dist_version
from base64 import b64encode
from typing import Optional, Dict, Any, Sequence, Union, List, Callable, Generator, BinaryIO
from urllib.parse import urlencode
@@ -40,13 +42,15 @@ class HttpClient(Client):
valid_transport_settings = {'database', 'buffer_size', 'session_id',
'compress', 'decompress', 'session_timeout',
'session_check', 'query_id', 'quota_key',
- 'wait_end_of_query', 'client_protocol_version'}
+ 'wait_end_of_query', 'client_protocol_version',
+ 'role'}
optional_transport_settings = {'send_progress_in_http_headers',
'http_headers_progress_interval_ms',
'enable_http_compression'}
_owns_pool_manager = False
- # pylint: disable=too-many-positional-arguments,too-many-arguments,too-many-locals,too-many-branches,too-many-statements,unused-argument
+ # R0917: too-many-positional-arguments
+ # pylint: disable=too-many-arguments,R0917,too-many-locals,too-many-branches,too-many-statements,unused-argument
def __init__(self,
interface: str,
host: str,
@@ -75,7 +79,9 @@ class HttpClient(Client):
show_clickhouse_errors: Optional[bool] = None,
autogenerate_session_id: Optional[bool] = None,
tls_mode: Optional[str] = None,
- proxy_path: str = ''):
+ proxy_path: str = '',
+ form_encode_query_params: bool = False,
+ rename_response_column: Optional[str] = None):
"""
Create an HTTP ClickHouse Connect client
See clickhouse_connect.get_client for parameters
@@ -85,6 +91,7 @@ class HttpClient(Client):
proxy_path = '/' + proxy_path
self.url = f'{interface}://{host}:{port}{proxy_path}'
self.headers = {}
+ self.form_encode_query_params = form_encode_query_params
self.params = dict_copy(HttpClient.params)
ch_settings = dict_copy(settings, self.params)
self.http = pool_mgr
@@ -125,6 +132,7 @@ class HttpClient(Client):
elif (not client_cert or tls_mode in ('strict', 'proxy')) and username:
self.headers['Authorization'] = 'Basic ' + b64encode(f'{username}:{password}'.encode()).decode()
+ self._reported_libs = set()
self.headers['User-Agent'] = common.build_client_name(client_name)
self._read_format = self._write_format = 'Native'
self._transform = NativeTransform()
@@ -140,6 +148,7 @@ class HttpClient(Client):
self._send_comp_setting = False
self._progress_interval = None
self._active_session = None
+ self._rename_response_column = rename_response_column
# allow to override the global autogenerate_session_id setting via the constructor params
_autogenerate_session_id = common.get_setting('autogenerate_session_id') \
@@ -211,18 +220,42 @@ class HttpClient(Client):
if self.protocol_version:
params['client_protocol_version'] = self.protocol_version
context.block_info = True
- params.update(context.bind_params)
params.update(self._validate_settings(context.settings))
+ context.rename_response_column = self._rename_response_column
if not context.is_insert and columns_only_re.search(context.uncommented_query):
- response = self._raw_request(f'{context.final_query}\n FORMAT JSON',
- params, headers, retries=self.query_retries)
+ # Mirror normal query behavior for form encoding and external data
+ fmt_json_query = f'{context.final_query}\n FORMAT JSON'
+ if self.form_encode_query_params:
+ fields = {'query': fmt_json_query}
+ fields.update(context.bind_params)
+ if context.external_data: # Deal with form encoding + external data
+ params.update(context.external_data.query_params)
+ fields.update(context.external_data.form_data)
+ response = self._raw_request(bytes(), params, headers, retries=self.query_retries, fields=fields)
+ elif context.external_data: # Deal with external data without form encoding
+ fields = context.external_data.form_data
+ params.update(context.bind_params)
+ params.update(context.external_data.query_params)
+ params['query'] = fmt_json_query
+ response = self._raw_request(bytes(), params, headers, retries=self.query_retries, fields=fields)
+ else: # Legacy behavior (plain body, bind params in URL)
+ params.update(context.bind_params)
+ response = self._raw_request(fmt_json_query,
+ params, headers, retries=self.query_retries)
json_result = json.loads(response.data)
# ClickHouse will respond with a JSON object of meta, data, and some other objects
# We just grab the column names and column types from the metadata sub object
names: List[str] = []
types: List[ClickHouseType] = []
+ renamer = context.column_renamer
for col in json_result['meta']:
- names.append(col['name'])
+ name = col['name']
+ if renamer is not None:
+ try:
+ name = renamer(name)
+ except Exception as e: # pylint: disable=broad-exception-caught
+ logger.debug("Failed to rename col '%s'. Skipping rename. Error: %s", name, e)
+ names.append(name)
types.append(registry.get_from_name(col['type']))
return QueryResult([], None, tuple(names), tuple(types))
@@ -231,12 +264,23 @@ class HttpClient(Client):
if self._send_comp_setting:
params['enable_http_compression'] = '1'
final_query = self._prep_query(context)
- if context.external_data:
+ fields = {}
+ # Setup additional query parameters and body
+ if self.form_encode_query_params:
+ body = bytes()
+ fields['query'] = final_query
+ fields.update(context.bind_params)
+ if context.external_data:
+ params.update(context.external_data.query_params)
+ fields.update(context.external_data.form_data)
+ elif context.external_data:
+ params.update(context.bind_params)
body = bytes()
params['query'] = final_query
params.update(context.external_data.query_params)
fields = context.external_data.form_data
else:
+ params.update(context.bind_params)
body = final_query
fields = None
headers['Content-Type'] = 'text/plain; charset=utf-8'
@@ -380,25 +424,47 @@ class HttpClient(Client):
return QuerySummary(self._summary(response))
def _error_handler(self, response: HTTPResponse, retried: bool = False) -> None:
- if self.show_clickhouse_errors:
+ """
+ Handles HTTP errors. Tries to be robust and provide maximum context.
+ """
+ try:
+ body = ""
+ # Always try to read the response body for context.
try:
- err_content = get_response_data(response)
+ # get_response_data reads body and decodes it for the error message
+ raw_body = get_response_data(response)
+ body = common.format_error(
+ raw_body.decode(errors="backslashreplace")
+ ).strip()
except Exception: # pylint: disable=broad-except
- err_content = None
- finally:
- response.close()
+ # If we can't read or decode the body, we'll proceed without it
+ logger.warning("Failed to read error response body", exc_info=True)
- err_str = f'HTTPDriver for {self.url} returned response code {response.status}'
- err_code = response.headers.get(ex_header)
- if err_code:
- err_str = f'HTTPDriver for {self.url} received ClickHouse error code {err_code}'
- if err_content:
- err_msg = common.format_error(err_content.decode(errors='backslashreplace'))
- if err_msg.startswith('Code'):
- err_str = f'{err_str}\n {err_msg}'
- else:
- err_str = 'The ClickHouse server returned an error.'
+ # Build the error message
+ if self.show_clickhouse_errors:
+ err_code = response.headers.get(ex_header)
+ if err_code:
+ # Prioritize the specific ClickHouse exception code if it exists.
+ err_str = f"Received ClickHouse exception, code: {err_code}"
+ else:
+ # Otherwise, just use the generic HTTP status
+ err_str = f"HTTP driver received HTTP status {response.status}"
+ if body:
+ # Always append the body if it exists
+ err_str = f"{err_str}, server response: {body}"
+ else:
+ # Simple message for when detailed errors are disabled
+ err_str = "The ClickHouse server returned an error"
+
+ # Add the URL for additional context
+ err_str = f"{err_str} (for url {self.url})"
+
+ finally:
+ # Ensure closed response to prevent resource leaks
+ response.close()
+
+ # Raise the appropriate exception class
raise OperationalError(err_str) if retried else DatabaseError(err_str) from None
def _raw_request(self,
@@ -519,19 +585,75 @@ class HttpClient(Client):
params = self._validate_settings(settings or {})
if use_database and self.database:
params['database'] = self.database
- params.update(bind_params)
- if external_data:
- if isinstance(final_query, bytes):
- raise ProgrammingError('Cannot combine binary query data with `External Data`')
+ fields = {}
+ # Setup query body
+ if external_data and not self.form_encode_query_params and isinstance(final_query, bytes):
+ raise ProgrammingError("Binary query cannot be placed in URL when using External Data; enable form encoding.")
+ # Setup additional query parameters and body
+ if self.form_encode_query_params:
+ body = bytes()
+ fields['query'] = final_query
+ fields.update(bind_params)
+ if external_data:
+ params.update(external_data.query_params)
+ fields.update(external_data.form_data)
+ elif external_data:
+ params.update(bind_params)
body = bytes()
params['query'] = final_query
params.update(external_data.query_params)
fields = external_data.form_data
else:
+ params.update(bind_params)
body = final_query
fields = None
return body, params, fields
+ # pylint: disable=broad-exception-caught
+ def _add_integration_tag(self, name: str):
+ """
+ Dynamically adds a product (like pandas or sqlalchemy) to the User-Agent string details section.
+ """
+ if not common.get_setting("send_integration_tags") or name in self._reported_libs:
+ return
+
+ try:
+ ver = "unknown"
+ try:
+ ver = dist_version(name)
+ except Exception:
+ try:
+ mod = import_module(name)
+ ver = getattr(mod, "__version__", "unknown")
+ except Exception:
+ pass
+
+ product_info = f"{name}/{ver}"
+
+ ua = self.headers.get("User-Agent", "")
+ start = ua.find("(")
+ if start == -1:
+ return
+ end = ua.find(")", start + 1)
+ if end == -1:
+ return
+
+ details = ua[start + 1 : end].strip()
+
+ if product_info in details:
+ self._reported_libs.add(name)
+ return
+
+ new_details = f"{product_info}; {details}" if details else product_info
+ new_ua = f"{ua[: start + 1]}{new_details}{ua[end:]}"
+ self.headers["User-Agent"] = new_ua.strip()
+
+ self._reported_libs.add(name)
+ logger.debug("Added '%s' to User-Agent", product_info)
+
+ except Exception as e:
+ logger.debug("Problem adding '%s' to User-Agent: %s", name, e)
+
def ping(self):
"""
See BaseClient doc_string for this method
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py
index 37bb24e616e..c649b9a2fe9 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py
@@ -1,5 +1,5 @@
import atexit
-import http
+import http.client
import logging
import multiprocessing
import os
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/options.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/options.py
index 4cec665c03a..c8a3834bc0b 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/options.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/options.py
@@ -2,6 +2,8 @@ from clickhouse_connect.driver.exceptions import NotSupportedError
pd_time_test = None
pd_extended_dtypes = False
+PANDAS_VERSION = None
+IS_PANDAS_2 = None
try:
import numpy as np
@@ -10,6 +12,8 @@ except ImportError:
try:
import pandas as pd
+ PANDAS_VERSION = tuple(map(int, pd.__version__.split(".")[:2]))
+ IS_PANDAS_2 = PANDAS_VERSION >= (2, 0)
pd_extended_dtypes = not pd.__version__.startswith('0')
try:
from pandas.core.dtypes.common import is_datetime64_dtype
@@ -33,6 +37,10 @@ try:
except ImportError:
arrow = None
+try:
+ import polars as pl
+except ImportError:
+ pl = None
def check_numpy():
if np:
@@ -50,3 +58,9 @@ def check_arrow():
if arrow:
return arrow
raise NotSupportedError('PyArrow package is not installed')
+
+
+def check_polars():
+ if pl:
+ return pl
+ raise NotSupportedError("Polars package is not installed")
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
index 6c764cfaea6..de490a5744a 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
@@ -10,7 +10,7 @@ from pytz.exceptions import UnknownTimeZoneError
from clickhouse_connect.driver import tzutil
from clickhouse_connect.driver.binding import bind_query
-from clickhouse_connect.driver.common import dict_copy, empty_gen, StreamContext
+from clickhouse_connect.driver.common import dict_copy, empty_gen, StreamContext, get_rename_method
from clickhouse_connect.driver.external import ExternalData
from clickhouse_connect.driver.types import Matrix, Closable
from clickhouse_connect.driver.exceptions import StreamClosedError, ProgrammingError
@@ -53,7 +53,8 @@ class QueryContext(BaseQueryContext):
streaming: bool = False,
apply_server_tz: bool = False,
external_data: Optional[ExternalData] = None,
- transport_settings: Optional[Dict[str, str]] = None):
+ transport_settings: Optional[Dict[str, str]] = None,
+ rename_response_column: Optional[str] = None):
"""
Initializes various configuration settings for the query context
@@ -118,9 +119,20 @@ class QueryContext(BaseQueryContext):
self.as_pandas = as_pandas
self.use_pandas_na = as_pandas and pd_extended_dtypes
self.streaming = streaming
+ self._rename_response_column: Optional[str] = rename_response_column
+ self.column_renamer = get_rename_method(rename_response_column)
self._update_query()
@property
+ def rename_response_column(self) -> Optional[str]:
+ return self._rename_response_column
+
+ @rename_response_column.setter
+ def rename_response_column(self, method: Optional[str]):
+ self._rename_response_column = method
+ self.column_renamer = get_rename_method(method)
+
+ @property
def is_select(self) -> bool:
return select_re.search(self.uncommented_query) is not None
@@ -192,7 +204,8 @@ class QueryContext(BaseQueryContext):
as_pandas: bool = False,
streaming: bool = False,
external_data: Optional[ExternalData] = None,
- transport_settings: Optional[Dict[str, str]] = None) -> 'QueryContext':
+ transport_settings: Optional[Dict[str, str]] = None,
+ rename_response_column: Optional[str] = None) -> 'QueryContext':
"""
Creates Query context copy with parameters overridden/updated as appropriate.
"""
@@ -214,7 +227,8 @@ class QueryContext(BaseQueryContext):
streaming,
self.apply_server_tz,
self.external_data if external_data is None else external_data,
- self.transport_settings if transport_settings is None else transport_settings)
+ self.transport_settings if transport_settings is None else transport_settings,
+ self.rename_response_column if rename_response_column is None else rename_response_column)
def _update_query(self):
self.final_query, self.bind_params = bind_query(self.query, self.parameters, self.server_tz)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/transform.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/transform.py
index b5ae795c9ad..9edee9e10f2 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/transform.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/transform.py
@@ -22,6 +22,7 @@ class NativeTransform:
names = []
col_types = []
block_num = 0
+ renamer = context.column_renamer
def get_block():
nonlocal block_num
@@ -35,10 +36,11 @@ class NativeTransform:
return None
num_rows = source.read_leb128()
for col_num in range(num_cols):
- name = source.read_leb128_str()
+ orig_name = source.read_leb128_str()
type_name = source.read_leb128_str()
if block_num == 0:
- names.append(name)
+ disp_name = renamer(orig_name) if renamer is not None else orig_name
+ names.append(disp_name)
col_type = registry.get_from_name(type_name)
col_types.append(col_type)
else:
@@ -46,7 +48,7 @@ class NativeTransform:
if num_rows == 0:
result_block.append(tuple())
else:
- context.start_column(name)
+ context.start_column(orig_name)
column = col_type.read_column(source, num_rows, context)
result_block.append(column)
except Exception as ex:
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/tzutil.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/tzutil.py
index c27b51dd72c..d2157c14766 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/tzutil.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/tzutil.py
@@ -33,6 +33,9 @@ def normalize_timezone(timezone: pytz.timezone) -> Tuple[pytz.timezone, bool]:
return timezone, False
+def utcfromtimestamp(ts: float) -> datetime:
+ return datetime.fromtimestamp(ts, tz=pytz.UTC).replace(tzinfo=None)
+
try:
local_tz = pytz.timezone(os.environ.get('TZ', ''))
except pytz.UnknownTimeZoneError:
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driverc/dataconv.pyx b/contrib/python/clickhouse-connect/clickhouse_connect/driverc/dataconv.pyx
index da3718d1163..713b3c24575 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driverc/dataconv.pyx
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driverc/dataconv.pyx
@@ -19,6 +19,7 @@ from uuid import UUID, SafeUUID
from libc.string cimport memcpy
from datetime import tzinfo
+from clickhouse_connect.driver import tzutil
from clickhouse_connect.driver.errors import NONE_IN_NULLABLE_COLUMN
@cython.boundscheck(False)
@@ -63,7 +64,7 @@ def read_datetime_col(ResponseBuffer buffer, unsigned long long num_rows, tzinfo
cdef char * loc = buffer.read_bytes_c(4 * num_rows)
cdef object column = PyTuple_New(num_rows), v
if tzinfo is None:
- fts = datetime.utcfromtimestamp
+ fts = tzutil.utcfromtimestamp
while x < num_rows:
v = fts((<unsigned int*>loc)[0])
PyTuple_SET_ITEM(column, x, v)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/tools/datagen.py b/contrib/python/clickhouse-connect/clickhouse_connect/tools/datagen.py
index 490d8529166..f956d382a71 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/tools/datagen.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/tools/datagen.py
@@ -16,9 +16,10 @@ from clickhouse_connect.datatypes.registry import get_from_name
from clickhouse_connect.datatypes.special import UUID
from clickhouse_connect.datatypes.string import String, FixedString
from clickhouse_connect.datatypes.temporal import Date, Date32, DateTime, DateTime64
+from clickhouse_connect.driver import tzutil
from clickhouse_connect.driver.common import array_sizes
-dt_from_ts = datetime.utcfromtimestamp
+dt_from_ts = tzutil.utcfromtimestamp
dt_from_ts_tz = datetime.fromtimestamp
epoch_date = date(1970, 1, 1)
date32_start_date = date(1925, 1, 1)
@@ -181,7 +182,9 @@ def random_ipv6():
ip_int = (int(random() * 4294967296) << 96) | (int(random() * 4294967296)) | (
int(random() * 4294967296) << 32) | ( int(random() * 4294967296) << 64)
return IPv6Address(ip_int)
- return IPv4Address(int(random() * 2 ** 32))
+ # Return mapped IPv4 as IPv6
+ ipv4_int = int(random() * 2 ** 32)
+ return IPv6Address(f"::ffff:{IPv4Address(ipv4_int)}")
def random_nested(keys: Sequence[str], types: Sequence[ClickHouseType], col_def: RandomValueDef):
diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make
index cead3e1f6d9..7cc8b67f8a5 100644
--- a/contrib/python/clickhouse-connect/ya.make
+++ b/contrib/python/clickhouse-connect/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(0.8.18)
+VERSION(0.9.2)
LICENSE(Apache-2.0)
@@ -40,6 +40,7 @@ PY_SRCS(
clickhouse_connect/cc_sqlalchemy/dialect.py
clickhouse_connect/cc_sqlalchemy/inspector.py
clickhouse_connect/cc_sqlalchemy/sql/__init__.py
+ clickhouse_connect/cc_sqlalchemy/sql/compiler.py
clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py
clickhouse_connect/cc_sqlalchemy/sql/preparer.py
clickhouse_connect/common.py
diff --git a/contrib/python/pytest-lazy-fixtures/.dist-info/METADATA b/contrib/python/pytest-lazy-fixtures/.dist-info/METADATA
index 377892b4c57..9d32737ca3c 100644
--- a/contrib/python/pytest-lazy-fixtures/.dist-info/METADATA
+++ b/contrib/python/pytest-lazy-fixtures/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: pytest-lazy-fixtures
-Version: 1.3.4
+Version: 1.4.0
Summary: Allows you to use fixtures in @pytest.mark.parametrize.
Project-URL: Homepage, https://github.com/dev-petrov/pytest-lazy-fixtures
Project-URL: Repository, https://github.com/dev-petrov/pytest-lazy-fixtures
@@ -88,7 +88,7 @@ def test_func(arg1, arg2):
```
And there is some useful wrapper called `lfc` (`lazy_fixture_callable`).
-It can work with any callable and your fixtures, e.g.
+It can work with any callable and your fixtures.
```python
import pytest
@@ -123,6 +123,12 @@ def entity_format():
return _entity_format
+# Recommended: implicit injection from the callable's parameter names
[email protected]("as_text", [lfc(lambda entity: str(entity))])
+def test_lfc_injection_basic(as_text):
+ assert as_text == "1"
+
+# Advanced: manual control over what to inject (explicit values into a callable)
@pytest.mark.parametrize(
"message",
[
@@ -149,6 +155,68 @@ def test_lazy_fixture_callable_with_attr_lf(result):
assert result == 3
```
+### Injecting fixtures from a callable signature
+
+`lfc` automatically injects fixtures based on the callable's parameter names. If a parameter name matches a fixture name, that fixture will be resolved and passed into the callable. This implicit injection is the standard, recommended behavior. You can still override any parameter explicitly using `lf`, either positionally or by name.
+Default values prevent implicit injection for that parameter.
+
+Examples:
+
+```python
+import pytest
+from pytest_lazy_fixtures import lf, lfc
+
+# Some fixtures used in the examples
+def alpha():
+ return 10
+
+def beta():
+ return 20
+
+def gamma():
+ return 30
+
+# Simple implicit injection by parameter name
[email protected]("result", [lfc(lambda alpha: alpha + 1)])
+def test_signature_injection_basic(result):
+ assert result == 11
+
+# Override an implicitly matched parameter (positional or keyword)
+ "result",
+ [
+ lfc(lambda alpha: alpha + 1, lf("beta")), # positional override
+ lfc(lambda alpha: alpha + 1, alpha=lf("beta")), # keyword override
+ ],
+)
+def test_signature_injection_override(result):
+ assert result == 21
+
+# Mix implicit and explicit params
+ "pair",
+ [
+ lfc(lambda alpha, beta: (alpha, beta), beta=lf("gamma")), # first implicit, second explicit
+ lfc(lambda alpha, beta: (alpha, beta), lf("gamma")), # first explicit, second implicit
+ ],
+)
+def test_signature_injection_mixed(pair):
+ assert pair in [(10, 30), (30, 20)]
+
+# 4) Defaults prevent implicit injection for that parameter
[email protected]("val", [lfc(lambda alpha=999: alpha)])
+def test_signature_injection_defaults(val):
+ assert val == 999
+```
+
+Notes:
+- Implicit injection only happens for parameters without an explicit value and without a default.
+- Explicit arguments passed to `lfc` (positional or keyword) always take precedence over implicit injection.
+- The mapping of positional explicit arguments follows the callable's parameter order.
+
## Contributing
Contributions are very welcome. Tests can be run with `pytest`.
diff --git a/contrib/python/pytest-lazy-fixtures/README.md b/contrib/python/pytest-lazy-fixtures/README.md
index e77941bd699..1fba577e9d4 100644
--- a/contrib/python/pytest-lazy-fixtures/README.md
+++ b/contrib/python/pytest-lazy-fixtures/README.md
@@ -74,7 +74,7 @@ def test_func(arg1, arg2):
```
And there is some useful wrapper called `lfc` (`lazy_fixture_callable`).
-It can work with any callable and your fixtures, e.g.
+It can work with any callable and your fixtures.
```python
import pytest
@@ -109,6 +109,12 @@ def entity_format():
return _entity_format
+# Recommended: implicit injection from the callable's parameter names
[email protected]("as_text", [lfc(lambda entity: str(entity))])
+def test_lfc_injection_basic(as_text):
+ assert as_text == "1"
+
+# Advanced: manual control over what to inject (explicit values into a callable)
@pytest.mark.parametrize(
"message",
[
@@ -135,6 +141,68 @@ def test_lazy_fixture_callable_with_attr_lf(result):
assert result == 3
```
+### Injecting fixtures from a callable signature
+
+`lfc` automatically injects fixtures based on the callable's parameter names. If a parameter name matches a fixture name, that fixture will be resolved and passed into the callable. This implicit injection is the standard, recommended behavior. You can still override any parameter explicitly using `lf`, either positionally or by name.
+Default values prevent implicit injection for that parameter.
+
+Examples:
+
+```python
+import pytest
+from pytest_lazy_fixtures import lf, lfc
+
+# Some fixtures used in the examples
+def alpha():
+ return 10
+
+def beta():
+ return 20
+
+def gamma():
+ return 30
+
+# Simple implicit injection by parameter name
[email protected]("result", [lfc(lambda alpha: alpha + 1)])
+def test_signature_injection_basic(result):
+ assert result == 11
+
+# Override an implicitly matched parameter (positional or keyword)
+ "result",
+ [
+ lfc(lambda alpha: alpha + 1, lf("beta")), # positional override
+ lfc(lambda alpha: alpha + 1, alpha=lf("beta")), # keyword override
+ ],
+)
+def test_signature_injection_override(result):
+ assert result == 21
+
+# Mix implicit and explicit params
+ "pair",
+ [
+ lfc(lambda alpha, beta: (alpha, beta), beta=lf("gamma")), # first implicit, second explicit
+ lfc(lambda alpha, beta: (alpha, beta), lf("gamma")), # first explicit, second implicit
+ ],
+)
+def test_signature_injection_mixed(pair):
+ assert pair in [(10, 30), (30, 20)]
+
+# 4) Defaults prevent implicit injection for that parameter
[email protected]("val", [lfc(lambda alpha=999: alpha)])
+def test_signature_injection_defaults(val):
+ assert val == 999
+```
+
+Notes:
+- Implicit injection only happens for parameters without an explicit value and without a default.
+- Explicit arguments passed to `lfc` (positional or keyword) always take precedence over implicit injection.
+- The mapping of positional explicit arguments follows the callable's parameter order.
+
## Contributing
Contributions are very welcome. Tests can be run with `pytest`.
diff --git a/contrib/python/pytest-lazy-fixtures/pytest_lazy_fixtures/lazy_fixture_callable.py b/contrib/python/pytest-lazy-fixtures/pytest_lazy_fixtures/lazy_fixture_callable.py
index 4e371763f7d..b073f4e1d3a 100644
--- a/contrib/python/pytest-lazy-fixtures/pytest_lazy_fixtures/lazy_fixture_callable.py
+++ b/contrib/python/pytest-lazy-fixtures/pytest_lazy_fixtures/lazy_fixture_callable.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import inspect
from inspect import isfunction
from typing import TYPE_CHECKING, Callable
@@ -9,22 +10,56 @@ if TYPE_CHECKING:
import pytest
+def _fill_unbound_params(
+ func: Callable[..., object], args: tuple[object, ...], kwargs: dict[str, object]
+) -> dict[str, object]:
+ """Analyze a callable's signature and bind lazy fixtures to unbound parameters.
+
+ This function takes a callable and its provided arguments, examines the callable's
+ signature, and returns a dictionary that maps any unbound parameter names to their
+ corresponding lazy fixtures.
+
+ Only parameters that are not already bound through the provided args/kwargs will
+ get mapped to lazy fixtures. Any parameters that have existing values provided
+ maintain those values.
+ """
+
+ try:
+ sig = inspect.signature(func)
+ except (ValueError, TypeError):
+ # Cowardly refuse to figure out the missing params
+ return {}
+
+ bound = sig.bind_partial(*args, **kwargs)
+
+ # Apply the defaults arguments, as we don't want to override them.
+ bound.apply_defaults()
+
+ unbound = [name for name in sig.parameters if name not in bound.arguments]
+
+ return {unbound_param: LazyFixtureWrapper(unbound_param) for unbound_param in unbound}
+
+
class LazyFixtureCallableWrapper(LazyFixtureWrapper):
_func: Callable | None
args: tuple
kwargs: dict
def __init__(self, callable_or_name: Callable | str, *args, **kwargs):
+ self.args = args
+ self.kwargs = kwargs
+
if callable(callable_or_name):
self._func = callable_or_name
self.name = (
callable_or_name.__name__ if isfunction(callable_or_name) else callable_or_name.__class__.__name__
)
+ # If we have a direct callable, analyze its signature and pre-fill
+ # any unbound parameters with lf(param_name).
+ self.kwargs.update(_fill_unbound_params(self._func, self.args, self.kwargs))
else:
self.name = callable_or_name
self._func = None
- self.args = args
- self.kwargs = kwargs
def get_func(self, request: pytest.FixtureRequest) -> Callable:
func = self._func
diff --git a/contrib/python/pytest-lazy-fixtures/ya.make b/contrib/python/pytest-lazy-fixtures/ya.make
index e8efb1b06d8..909297765fb 100644
--- a/contrib/python/pytest-lazy-fixtures/ya.make
+++ b/contrib/python/pytest-lazy-fixtures/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(1.3.4)
+VERSION(1.4.0)
LICENSE(MIT)
diff --git a/contrib/python/pytest-mock/py3/.dist-info/METADATA b/contrib/python/pytest-mock/py3/.dist-info/METADATA
index 5b6926488ea..fea492d9e38 100644
--- a/contrib/python/pytest-mock/py3/.dist-info/METADATA
+++ b/contrib/python/pytest-mock/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: pytest-mock
-Version: 3.15.0
+Version: 3.15.1
Summary: Thin-wrapper around the mock package for easier use with pytest
Author-email: Bruno Oliveira <[email protected]>
License: MIT
diff --git a/contrib/python/pytest-mock/py3/pytest_mock/_util.py b/contrib/python/pytest-mock/py3/pytest_mock/_util.py
index ad830caeefa..d3a732ac278 100644
--- a/contrib/python/pytest-mock/py3/pytest_mock/_util.py
+++ b/contrib/python/pytest-mock/py3/pytest_mock/_util.py
@@ -15,7 +15,7 @@ def get_mock_module(config):
config.getini("mock_use_standalone_module")
)
if use_standalone_module:
- from unittest import mock
+ import mock
_mock_module = mock
else:
diff --git a/contrib/python/pytest-mock/py3/pytest_mock/_version.py b/contrib/python/pytest-mock/py3/pytest_mock/_version.py
index 7b8272a4140..7b769ace60a 100644
--- a/contrib/python/pytest-mock/py3/pytest_mock/_version.py
+++ b/contrib/python/pytest-mock/py3/pytest_mock/_version.py
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
commit_id: COMMIT_ID
__commit_id__: COMMIT_ID
-__version__ = version = '3.15.0'
-__version_tuple__ = version_tuple = (3, 15, 0)
+__version__ = version = '3.15.1'
+__version_tuple__ = version_tuple = (3, 15, 1)
__commit_id__ = commit_id = None
diff --git a/contrib/python/pytest-mock/py3/pytest_mock/plugin.py b/contrib/python/pytest-mock/py3/pytest_mock/plugin.py
index f4dbfc3ec6e..ef996121333 100644
--- a/contrib/python/pytest-mock/py3/pytest_mock/plugin.py
+++ b/contrib/python/pytest-mock/py3/pytest_mock/plugin.py
@@ -157,13 +157,16 @@ class MockerFixture:
"""
self._mock_cache.remove(mock)
- def spy(self, obj: object, name: str) -> MockType:
+ def spy(
+ self, obj: object, name: str, duplicate_iterators: bool = False
+ ) -> MockType:
"""
Create a spy of method. It will run method normally, but it is now
possible to use `mock` call features with it, like call count.
:param obj: An object.
:param name: A method in object.
+ :param duplicate_iterators: Whether to keep a copy of the returned iterator in `spy_return_iter`.
:return: Spy object.
"""
method = getattr(obj, name)
@@ -177,7 +180,7 @@ class MockerFixture:
spy_obj.spy_exception = e
raise
else:
- if isinstance(r, Iterator):
+ if duplicate_iterators and isinstance(r, Iterator):
r, duplicated_iterator = itertools.tee(r, 2)
spy_obj.spy_return_iter = duplicated_iterator
else:
diff --git a/contrib/python/pytest-mock/py3/tests/test_pytest_mock.py b/contrib/python/pytest-mock/py3/tests/test_pytest_mock.py
index 499c0a2b99d..f484705870f 100644
--- a/contrib/python/pytest-mock/py3/tests/test_pytest_mock.py
+++ b/contrib/python/pytest-mock/py3/tests/test_pytest_mock.py
@@ -541,13 +541,15 @@ def test_callable_like_spy(testdir: Any, mocker: MockerFixture) -> None:
@pytest.mark.parametrize("iterator", [(i for i in range(3)), iter([0, 1, 2])])
-def test_spy_return_iter(mocker: MockerFixture, iterator: Iterator[int]) -> None:
+def test_spy_return_iter_duplicates_iterator_when_enabled(
+ mocker: MockerFixture, iterator: Iterator[int]
+) -> None:
class Foo:
def bar(self) -> Iterator[int]:
return iterator
foo = Foo()
- spy = mocker.spy(foo, "bar")
+ spy = mocker.spy(foo, "bar", duplicate_iterators=True)
result = list(foo.bar())
assert result == [0, 1, 2]
@@ -559,8 +561,27 @@ def test_spy_return_iter(mocker: MockerFixture, iterator: Iterator[int]) -> None
assert isinstance(return_value, Iterator)
[email protected]("iterator", [(i for i in range(3)), iter([0, 1, 2])])
+def test_spy_return_iter_is_not_set_when_disabled(
+ mocker: MockerFixture, iterator: Iterator[int]
+) -> None:
+ class Foo:
+ def bar(self) -> Iterator[int]:
+ return iterator
+
+ foo = Foo()
+ spy = mocker.spy(foo, "bar", duplicate_iterators=False)
+ result = list(foo.bar())
+
+ assert result == [0, 1, 2]
+ assert spy.spy_return is not None
+ assert spy.spy_return_iter is None
+ [return_value] = spy.spy_return_list
+ assert isinstance(return_value, Iterator)
+
+
@pytest.mark.parametrize("iterable", [(0, 1, 2), [0, 1, 2], range(3)])
-def test_spy_return_iter_ignore_plain_iterable(
+def test_spy_return_iter_ignores_plain_iterable(
mocker: MockerFixture, iterable: Iterable[int]
) -> None:
class Foo:
@@ -568,7 +589,7 @@ def test_spy_return_iter_ignore_plain_iterable(
return iterable
foo = Foo()
- spy = mocker.spy(foo, "bar")
+ spy = mocker.spy(foo, "bar", duplicate_iterators=True)
result = foo.bar()
assert result == iterable
@@ -588,7 +609,7 @@ def test_spy_return_iter_resets(mocker: MockerFixture) -> None:
return self.iterables.pop(0)
foo = Foo()
- spy = mocker.spy(foo, "bar")
+ spy = mocker.spy(foo, "bar", duplicate_iterators=True)
result_iterator = list(foo.bar())
assert result_iterator == [0, 1, 2]
@@ -644,7 +665,7 @@ def assert_argument_introspection(left: Any, right: Any) -> Generator[None, None
expected = "\n ".join(util._compare_eq_iterable(left, right, verbose)) # type:ignore[arg-type]
else:
expected = "\n ".join(
- util._compare_eq_iterable(left, right, lambda t, *_: t, verbose)
+ util._compare_eq_iterable(left, right, lambda t, *_, **__: t, verbose) # type:ignore[arg-type]
)
assert expected in str(e)
else:
diff --git a/contrib/python/pytest-mock/py3/ya.make b/contrib/python/pytest-mock/py3/ya.make
index ec2d0bd0536..42088e49795 100644
--- a/contrib/python/pytest-mock/py3/ya.make
+++ b/contrib/python/pytest-mock/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(3.15.0)
+VERSION(3.15.1)
LICENSE(MIT)