summaryrefslogtreecommitdiffstats
path: root/contrib/python
diff options
context:
space:
mode:
authorYDBot <[email protected]>2026-06-11 08:53:28 +0000
committerYDBot <[email protected]>2026-06-11 08:53:28 +0000
commit63678b5fc10100d46f20e5a1cda9293bc452db13 (patch)
treec3aa90056ecff5847be136f4d4c6176dff81ef4a /contrib/python
parentfe7198e04ab06a7dc38155db710a6094c1802ea8 (diff)
parenta005685f299e0619bfddca532730794a5e62fd80 (diff)
Merge pull request #43191 from ydb-platform/merge-rightlib-260611-0131
Diffstat (limited to 'contrib/python')
-rw-r--r--contrib/python/clickhouse-connect/.dist-info/METADATA28
-rw-r--r--contrib/python/clickhouse-connect/README.md21
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/_version.py2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py34
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/__init__.py21
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/adapter.py210
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/impl.py365
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/utils.py85
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py77
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py3
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/dictionary.py53
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/tableengine.py350
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py47
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/engines.py1
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py161
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py131
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py89
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py138
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py186
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/sqlparse.py62
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/types.py1
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py24
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/binding.py29
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py33
-rw-r--r--contrib/python/clickhouse-connect/ya.make10
-rw-r--r--contrib/python/hypothesis/py3/.dist-info/METADATA2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/control.py34
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/errors.py48
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/extra/_patching.py13
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/extra/django/_fields.py2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py11
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py56
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py12
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py31
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py18
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/coverage.py2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/entropy.py2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py11
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/provisional.py1
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/stateful.py55
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py7
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py24
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/strategies/_internal/regex.py100
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py66
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/version.py2
-rw-r--r--contrib/python/hypothesis/py3/ya.make2
48 files changed, 2308 insertions, 356 deletions
diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA
index 3f24e9dbb73..fb2f9d27c55 100644
--- a/contrib/python/clickhouse-connect/.dist-info/METADATA
+++ b/contrib/python/clickhouse-connect/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: clickhouse-connect
-Version: 1.0.1
+Version: 1.1.0
Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
Home-page: https://github.com/ClickHouse/clickhouse-connect
Author: ClickHouse Inc.
@@ -27,6 +27,9 @@ Requires-Dist: lz4; python_version < "3.14"
Requires-Dist: lz4>=4.4.5; python_version >= "3.14"
Provides-Extra: sqlalchemy
Requires-Dist: sqlalchemy<3.0,>=1.4.40; extra == "sqlalchemy"
+Provides-Extra: alembic
+Requires-Dist: sqlalchemy<3.0,>=1.4.40; extra == "alembic"
+Requires-Dist: alembic>=1.16; extra == "alembic"
Provides-Extra: numpy
Requires-Dist: numpy; extra == "numpy"
Provides-Extra: pandas
@@ -43,7 +46,7 @@ Requires-Dist: tzlocal>=4.0; extra == "tzlocal"
Provides-Extra: tzdata
Requires-Dist: tzdata; extra == "tzdata"
Provides-Extra: async
-Requires-Dist: aiohttp>=3.8.0; extra == "async"
+Requires-Dist: aiohttp>=3.9.0; extra == "async"
Dynamic: author
Dynamic: author-email
Dynamic: classifier
@@ -105,6 +108,7 @@ Supported features include:
`ARRAY JOIN` (single and multi-column), `FINAL`, and `SAMPLE`
- `VALUES` table function syntax
- Lightweight `DELETE` statements
+- **Alembic** schema migrations (autogenerate, upgrade/downgrade, ClickHouse engine support)
A small number of features require SQLAlchemy 2.x: `Values.cte()` and certain literal-rendering behaviors.
All other dialect features, including those used by Superset, work on both 1.4 and 2.x.
@@ -114,6 +118,26 @@ Basic ORM usage works for insert-heavy, read-focused workloads: declarative mode
provided. UPDATE compilation, foreign key/relationship reflection, autoincrement/RETURNING, and cascade operations
are not implemented. The dialect is best suited for SQLAlchemy Core usage and Superset connectivity.
+#### Alembic Migrations
+
+ClickHouse Connect supports [Alembic](https://alembic.sqlalchemy.org/) for schema migrations, including
+autogeneration of migration scripts from SQLAlchemy metadata. ClickHouse table engines (`MergeTree`,
+`ReplacingMergeTree`, etc.) and dictionaries are preserved through the migration lifecycle.
+
+Supported operations include create/drop table, add/alter/drop/rename column, type and nullability
+changes, defaults, comments, and ClickHouse-specific features like `IF EXISTS` guards, column
+placement with `AFTER`, and operation-level `clickhouse_settings` on column add/alter/drop.
+
+To get started, install the Alembic extra:
+
+```bash
+pip install clickhouse-connect[alembic]
+```
+
+See the [Alembic worked example](clickhouse_connect/cc_sqlalchemy/alembic/WORKED_EXAMPLE.md) for a
+full end-to-end walkthrough covering setup, autogeneration, upgrades, downgrades, and manual
+migration operations.
+
### Asyncio Support
ClickHouse Connect provides native async support using aiohttp. To use the async client,
diff --git a/contrib/python/clickhouse-connect/README.md b/contrib/python/clickhouse-connect/README.md
index 243dac6c5c4..aa8abeb49b3 100644
--- a/contrib/python/clickhouse-connect/README.md
+++ b/contrib/python/clickhouse-connect/README.md
@@ -45,6 +45,7 @@ Supported features include:
`ARRAY JOIN` (single and multi-column), `FINAL`, and `SAMPLE`
- `VALUES` table function syntax
- Lightweight `DELETE` statements
+- **Alembic** schema migrations (autogenerate, upgrade/downgrade, ClickHouse engine support)
A small number of features require SQLAlchemy 2.x: `Values.cte()` and certain literal-rendering behaviors.
All other dialect features, including those used by Superset, work on both 1.4 and 2.x.
@@ -54,6 +55,26 @@ Basic ORM usage works for insert-heavy, read-focused workloads: declarative mode
provided. UPDATE compilation, foreign key/relationship reflection, autoincrement/RETURNING, and cascade operations
are not implemented. The dialect is best suited for SQLAlchemy Core usage and Superset connectivity.
+#### Alembic Migrations
+
+ClickHouse Connect supports [Alembic](https://alembic.sqlalchemy.org/) for schema migrations, including
+autogeneration of migration scripts from SQLAlchemy metadata. ClickHouse table engines (`MergeTree`,
+`ReplacingMergeTree`, etc.) and dictionaries are preserved through the migration lifecycle.
+
+Supported operations include create/drop table, add/alter/drop/rename column, type and nullability
+changes, defaults, comments, and ClickHouse-specific features like `IF EXISTS` guards, column
+placement with `AFTER`, and operation-level `clickhouse_settings` on column add/alter/drop.
+
+To get started, install the Alembic extra:
+
+```bash
+pip install clickhouse-connect[alembic]
+```
+
+See the [Alembic worked example](clickhouse_connect/cc_sqlalchemy/alembic/WORKED_EXAMPLE.md) for a
+full end-to-end walkthrough covering setup, autogeneration, upgrades, downgrades, and manual
+migration operations.
+
### Asyncio Support
ClickHouse Connect provides native async support using aiohttp. To use the async client,
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/_version.py b/contrib/python/clickhouse-connect/clickhouse_connect/_version.py
index 86774adcc16..b2b60a5505a 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/_version.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/_version.py
@@ -1 +1 @@
-version = "1.0.1"
+version = "1.1.0"
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py
index 9762c64a969..ead54e625e2 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/__init__.py
@@ -1,9 +1,39 @@
+from sqlalchemy import Table
+from sqlalchemy.dialects import registry
+
from clickhouse_connect import driver_name
+from clickhouse_connect.cc_sqlalchemy import types
from clickhouse_connect.cc_sqlalchemy.datatypes.base import schema_types
+from clickhouse_connect.cc_sqlalchemy.ddl import tableengine as engines
+from clickhouse_connect.cc_sqlalchemy.ddl.dictionary import Dictionary
from clickhouse_connect.cc_sqlalchemy.sql import final, sample
-from clickhouse_connect.cc_sqlalchemy.sql.clauses import ArrayJoin, ClickHouseJoin, array_join, ch_join
+from clickhouse_connect.cc_sqlalchemy.sql.clauses import ArrayJoin, ClickHouseJoin, Lambda, array_join, ch_join
+from clickhouse_connect.dbapi.cursor import Cursor
+
+registry.register("clickhouse", "clickhouse_connect.cc_sqlalchemy.dialect", "ClickHouseDialect")
+registry.register("clickhouse.connect", "clickhouse_connect.cc_sqlalchemy.dialect", "ClickHouseDialect")
dialect_name = driver_name
ischema_names = schema_types
-__all__ = ["dialect_name", "ischema_names", "array_join", "ArrayJoin", "ch_join", "ClickHouseJoin", "final", "sample"]
+CH_DIALECT = dialect_name
+ClickhouseDictionary = Dictionary
+
+__all__ = [
+ "dialect_name",
+ "CH_DIALECT",
+ "ischema_names",
+ "array_join",
+ "ArrayJoin",
+ "ch_join",
+ "ClickHouseJoin",
+ "Lambda",
+ "final",
+ "sample",
+ "Dictionary",
+ "ClickhouseDictionary",
+ "engines",
+ "types",
+ "Cursor",
+ "Table",
+]
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/__init__.py
new file mode 100644
index 00000000000..110f3eca872
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/__init__.py
@@ -0,0 +1,21 @@
+from clickhouse_connect.cc_sqlalchemy.alembic.adapter import (
+ clickhouse_writer,
+ include_object,
+ patch_alembic_version,
+)
+from clickhouse_connect.cc_sqlalchemy.alembic.impl import ClickHouseImpl
+from clickhouse_connect.cc_sqlalchemy.alembic.utils import (
+ make_include_name,
+ make_include_object,
+ prevent_empty_migrations,
+)
+
+__all__ = [
+ "patch_alembic_version",
+ "clickhouse_writer",
+ "include_object",
+ "ClickHouseImpl",
+ "make_include_name",
+ "make_include_object",
+ "prevent_empty_migrations",
+]
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/adapter.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/adapter.py
new file mode 100644
index 00000000000..d444b69e73a
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/adapter.py
@@ -0,0 +1,210 @@
+from alembic.autogenerate import render
+from alembic.autogenerate.api import AutogenContext
+from alembic.autogenerate.compare import comparators
+from alembic.operations import Operations, ops
+from alembic.runtime.migration import MigrationContext
+from alembic.util import DispatchPriority, PriorityDispatchResult
+
+from clickhouse_connect.cc_sqlalchemy.datatypes.base import ChSqlaType
+from clickhouse_connect.cc_sqlalchemy.sql.ddlcompiler import ClickHouseDDLHelper
+
+
[email protected]_operation("add_column")
+class ClickHouseAddColumnOp(ops.AddColumnOp):
+ """Re-registers op.add_column with a **kw signature."""
+
+ @classmethod
+ def add_column(cls, operations, table_name, column, *, schema=None, if_not_exists=None, **kw):
+ return operations.invoke(
+ ops.AddColumnOp(
+ table_name,
+ column,
+ schema=schema,
+ if_not_exists=if_not_exists,
+ **kw,
+ )
+ )
+
+
+def patch_alembic_version(context: MigrationContext):
+ """
+ Compatibility hook for existing migration environments.
+
+ Version-table behavior now lives on ClickHouseImpl and no longer requires
+ monkey-patching the Alembic context.
+ """
+ return context
+
+
+def _add_common_imports(directive):
+ directive.imports.add("from clickhouse_connect import cc_sqlalchemy")
+ directive.imports.add("from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import * # noqa: F401,F403")
+ directive.imports.add("from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import * # noqa: F401,F403")
+
+
+def clickhouse_writer(context: MigrationContext, revision, directives):
+ """
+ A processing hook for autogeneration.
+
+ Ensures that generated migration scripts include necessary imports
+ and that ClickHouse-specific constructs like Engines are preserved.
+ """
+ for directive in directives:
+ if directive.upgrade_ops and not directive.upgrade_ops.is_empty():
+ _add_common_imports(directive)
+
+ if directive.downgrade_ops and not directive.downgrade_ops.is_empty():
+ _add_common_imports(directive)
+
+
+def render_clickhouse_column(column, autogen_context: AutogenContext) -> str:
+ rendered = render._user_defined_render("column", column, autogen_context)
+ if rendered is not False:
+ return rendered
+
+ args = []
+ opts = []
+
+ if column.server_default:
+ rendered_default = render._render_server_default(column.server_default, autogen_context)
+ if rendered_default:
+ if render._should_render_server_default_positionally(column.server_default):
+ args.append(rendered_default)
+ else:
+ opts.append(("server_default", rendered_default))
+
+ if column.autoincrement is not None and column.autoincrement != render.sqla_compat.AUTOINCREMENT_DEFAULT:
+ opts.append(("autoincrement", column.autoincrement))
+
+ explicit_nullable = ClickHouseDDLHelper.explicit_column_nullable(column)
+ if column.nullable is not None and explicit_nullable is not None:
+ opts.append(("nullable", column.nullable))
+
+ if column.system:
+ opts.append(("system", column.system))
+
+ if column.comment:
+ opts.append(("comment", repr(column.comment)))
+
+ return "{prefix}Column({name!r}, {type}, {args}{kwargs})".format(
+ prefix=render._sqlalchemy_autogenerate_prefix(autogen_context),
+ name=render._ident(column.name),
+ type=render._repr_type(column.type, autogen_context),
+ args=", ".join(str(arg) for arg in args) + ", " if args else "",
+ kwargs=", ".join(
+ [f"{key}={value}" for key, value in opts]
+ + [f"{key}={render._render_potential_expr(value, autogen_context)}" for key, value in column.kwargs.items()]
+ ),
+ )
+
+
[email protected]_for(ops.CreateTableOp, replace=True)
+def render_create_table(autogen_context: AutogenContext, op: ops.CreateTableOp) -> str:
+ table = op.to_table()
+
+ args = [column for column in [render_clickhouse_column(column, autogen_context) for column in table.columns] if column] + sorted(
+ [
+ constraint
+ for constraint in [render._render_constraint(cons, autogen_context, op._namespace_metadata) for cons in table.constraints]
+ if constraint is not None
+ ]
+ )
+
+ if len(args) > render.MAX_PYTHON_ARGS:
+ args_sql = "*[" + ",\n".join(args) + "]"
+ else:
+ args_sql = ",\n".join(args)
+
+ prefix = render._alembic_autogenerate_prefix(autogen_context)
+ rendered = f"{prefix}create_table({render._ident(op.table_name)!r},\n{args_sql}"
+ if op.schema:
+ rendered += f",\nschema={render._ident(op.schema)!r}"
+
+ if table.comment:
+ rendered += f",\ncomment={render._ident(table.comment)!r}"
+
+ if table.info:
+ rendered += f",\ninfo={table.info!r}"
+
+ for key in sorted(op.kw):
+ rendered += f",\n{key.replace(' ', '_')}={op.kw[key]!r}"
+
+ if op.if_not_exists is not None:
+ rendered += f",\nif_not_exists={bool(op.if_not_exists)!r}"
+
+ rendered += "\n)"
+ return rendered
+
+
[email protected]_for(ops.AddColumnOp, replace=True)
+def render_add_column(autogen_context: AutogenContext, op: ops.AddColumnOp) -> str:
+ schema, table_name, column, if_not_exists = op.schema, op.table_name, op.column, op.if_not_exists
+ prefix = render._alembic_autogenerate_prefix(autogen_context)
+ rendered_column = render_clickhouse_column(column, autogen_context)
+ if autogen_context._has_batch:
+ return f"{prefix}add_column({rendered_column})"
+ rendered = f"{prefix}add_column({table_name!r}, {rendered_column}"
+ if schema:
+ rendered += f", schema={schema!r}"
+ if if_not_exists is not None:
+ rendered += f", if_not_exists={if_not_exists!r}"
+ for key in sorted(op.kw):
+ rendered += f", {key}={op.kw[key]!r}"
+ return rendered + ")"
+
+
[email protected]_for(ops.DropTableOp, replace=True)
+def render_drop_table(autogen_context: AutogenContext, op: ops.DropTableOp) -> str:
+ prefix = render._alembic_autogenerate_prefix(autogen_context)
+ rendered = f"{prefix}drop_table({render._ident(op.table_name)!r}"
+ arguments = []
+ if op.schema:
+ arguments.append(f"schema={render._ident(op.schema)!r}")
+ if op.if_exists is not None:
+ arguments.append(f"if_exists={bool(op.if_exists)!r}")
+ for key in sorted(op.table_kw):
+ arguments.append(f"{key.replace(' ', '_')}={op.table_kw[key]!r}")
+ if arguments:
+ rendered += ",\n" + ",\n".join(arguments)
+ rendered += ")"
+ return rendered
+
+
+def include_object(object_, name, type_, reflected, compare_to):
+ """
+ Standard filter for ClickHouse system tables and internal objects.
+ """
+ # Guard against None name which can happen in some Alembic versions/contexts
+ if not name:
+ return True
+
+ if type_ == "table":
+ if name == "alembic_version":
+ return False
+ # Ignore system tables
+ if object_.schema == "system":
+ return False
+ # Ignore internal tables (Materialized View storage)
+ if name.startswith(".inner"):
+ return False
+
+ return True
+
+
[email protected]_for("column", qualifier="clickhousedb", priority=DispatchPriority.FIRST, subgroup="nullable")
+def compare_nullable(context, alter_column_op, schema, table_name, column_name, inspector_column, metadata_column):
+ inspector_type = inspector_column.type
+ metadata_type = metadata_column.type
+ if not isinstance(inspector_type, ChSqlaType) or not isinstance(metadata_type, ChSqlaType):
+ return PriorityDispatchResult.CONTINUE
+
+ inspector_nullable = inspector_type.nullable
+ explicit_nullable = ClickHouseDDLHelper.explicit_column_nullable(metadata_column)
+ if explicit_nullable is None and not metadata_type.nullable:
+ metadata_nullable = inspector_nullable
+ else:
+ metadata_nullable = ClickHouseDDLHelper.column_nullable(metadata_column)
+ alter_column_op.existing_nullable = inspector_nullable
+ if inspector_nullable != metadata_nullable:
+ alter_column_op.modify_nullable = metadata_nullable
+ return PriorityDispatchResult.STOP
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/impl.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/impl.py
new file mode 100644
index 00000000000..771692ad015
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/impl.py
@@ -0,0 +1,365 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any
+
+from alembic.ddl.impl import DefaultImpl
+from alembic.util import CommandError
+from sqlalchemy import Column, MetaData, String, Table, text
+from sqlalchemy.sql.dml import Delete, Update
+
+from clickhouse_connect.cc_sqlalchemy.datatypes.base import ChSqlaType, sqla_type_from_name
+from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Array as ChSqlaArray
+from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Enum as ChSqlaEnum
+from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Map as ChSqlaMap
+from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Nullable
+from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Tuple as ChSqlaTuple
+from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import MergeTree
+from clickhouse_connect.cc_sqlalchemy.sql import full_table
+from clickhouse_connect.cc_sqlalchemy.sql.ddlcompiler import (
+ ClickHouseDDLHelper,
+ column_specification,
+)
+from clickhouse_connect.driver.binding import quote_identifier
+
+
+def _render_ch_type(type_obj):
+ """Render a ChSqlaType as valid Python source for autogen migrations"""
+ wrappers = type_obj.type_def.wrappers
+ if isinstance(type_obj, ChSqlaEnum):
+ keys = list(type_obj.type_def.keys)
+ values = list(type_obj.type_def.values)
+ rendered = f"{type_obj.__class__.__name__}(keys={keys!r}, values={values!r})"
+ elif isinstance(type_obj, ChSqlaArray):
+ rendered = f"Array({_render_inner(type_obj.type_def.values[0])})"
+ elif isinstance(type_obj, ChSqlaMap):
+ key, value = type_obj.type_def.values
+ rendered = f"Map({_render_inner(key)}, {_render_inner(value)})"
+ elif isinstance(type_obj, ChSqlaTuple):
+ elements = ", ".join(_render_inner(v) for v in type_obj.type_def.values)
+ rendered = f"Tuple({elements})"
+ else:
+ return str(type_obj.name)
+ for wrapper in reversed(wrappers):
+ rendered = f"{wrapper}({rendered})"
+ return rendered
+
+
+def _render_inner(name):
+ return _render_ch_type(sqla_type_from_name(name))
+
+
+class ClickHouseImpl(DefaultImpl):
+ __dialect__ = "clickhousedb"
+ transactional_ddl = False
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ if self.context_opts.get("include_schemas") and not self.context_opts.get("version_table_schema") and self.connection is not None:
+ current_database = self.connection.execute(text("SELECT currentDatabase()")).scalar()
+ if current_database:
+ self.context_opts["version_table_schema"] = current_database
+
+ def version_table_impl(
+ self,
+ *,
+ version_table: str,
+ version_table_schema: str | None,
+ version_table_pk: bool,
+ **_kw: Any,
+ ) -> Table:
+ return Table(
+ version_table,
+ MetaData(),
+ Column("version_num", String(32), nullable=False),
+ MergeTree(order_by="version_num"),
+ schema=version_table_schema,
+ )
+
+ def _exec(
+ self,
+ construct,
+ execution_options=None,
+ multiparams=None,
+ params=None,
+ ) -> Any:
+ if isinstance(construct, Update) and self._is_version_table_construct(construct):
+ return self._exec_version_update(construct, execution_options)
+ if isinstance(construct, Delete) and self._is_version_table_construct(construct):
+ return self._exec_version_delete(construct, execution_options)
+ return super()._exec(
+ construct,
+ execution_options=execution_options,
+ multiparams=multiparams,
+ params=params or {},
+ )
+
+ def add_column(
+ self,
+ table_name: str,
+ column: Column,
+ *,
+ schema: str | None = None,
+ if_not_exists: bool | None = None,
+ **kw: Any,
+ ) -> None:
+ sql = [
+ "ALTER TABLE",
+ full_table(table_name, schema),
+ "ADD COLUMN",
+ ]
+ if if_not_exists:
+ sql.append("IF NOT EXISTS")
+ sql.append(column_specification(self.dialect, column))
+ after = kw.get("after") or ClickHouseDDLHelper.get_option(column, "after")
+ if after:
+ sql.extend(["AFTER", quote_identifier(after)])
+ settings = ClickHouseDDLHelper.render_settings(kw.get("clickhouse_settings"))
+ if settings:
+ sql.extend(["SETTINGS", settings])
+ self._exec(text(" ".join(sql)))
+
+ def drop_column(
+ self,
+ table_name: str,
+ column: Column,
+ *,
+ schema: str | None = None,
+ if_exists: bool | None = None,
+ **kw: Any,
+ ) -> None:
+ sql = ["ALTER TABLE", full_table(table_name, schema), "DROP COLUMN"]
+ if if_exists:
+ sql.append("IF EXISTS")
+ sql.append(quote_identifier(column.name))
+ settings = ClickHouseDDLHelper.render_settings(kw.get("clickhouse_settings"))
+ if settings:
+ sql.extend(["SETTINGS", settings])
+ self._exec(text(" ".join(sql)))
+
+ def create_table_comment(self, table: Table) -> None:
+ self._exec(text(self._comment_table_sql(table, table.comment)))
+
+ def drop_table_comment(self, table: Table) -> None:
+ self._exec(text(self._comment_table_sql(table, None)))
+
+ def alter_column(
+ self,
+ table_name: str,
+ column_name: str,
+ *,
+ nullable: bool | None = None,
+ server_default=False,
+ name: str | None = None,
+ type_=None,
+ schema: str | None = None,
+ autoincrement: bool | None = None,
+ comment=False,
+ existing_comment: str | None = None,
+ existing_type=None,
+ existing_server_default=None,
+ existing_nullable: bool | None = None,
+ existing_autoincrement: bool | None = None,
+ if_exists: bool | None = None,
+ **kw: Any,
+ ) -> None:
+ if autoincrement is not None or existing_autoincrement is not None:
+ return
+ if name is not None:
+ rename_sql = ["ALTER TABLE", full_table(table_name, schema), "RENAME COLUMN"]
+ if if_exists:
+ rename_sql.append("IF EXISTS")
+ rename_sql.extend([quote_identifier(column_name), "TO", quote_identifier(name)])
+ self._exec(text(" ".join(rename_sql)))
+ column_name = name
+
+ settings = ClickHouseDDLHelper.render_settings(kw.get("clickhouse_settings"))
+ will_modify = nullable is not None or server_default is not False or type_ is not None
+
+ if comment is not False and not will_modify:
+ self._exec(text(self._comment_column_sql(table_name, column_name, comment, schema, settings)))
+
+ if not will_modify:
+ return
+
+ if type_ is not None:
+ effective_type = type_
+ else:
+ effective_type = existing_type
+ if effective_type is None:
+ raise CommandError(f"ClickHouse alter_column requires existing_type for {table_name}.{column_name}")
+ if nullable is not None:
+ effective_type = self._set_type_nullable(effective_type, nullable)
+
+ sql = [
+ "ALTER TABLE",
+ full_table(table_name, schema),
+ "MODIFY COLUMN",
+ ]
+ if if_exists:
+ sql.append("IF EXISTS")
+ sql.append(
+ column_specification(
+ self.dialect,
+ Column(
+ column_name,
+ effective_type,
+ server_default=None if server_default is False else server_default,
+ comment=existing_comment if comment is False else comment,
+ ),
+ )
+ )
+ if settings:
+ sql.extend(["SETTINGS", settings])
+ self._exec(text(" ".join(sql)))
+
+ def compare_type(self, inspector_column, metadata_column) -> bool:
+ inspector_type = inspector_column.type
+ metadata_type = metadata_column.type
+ explicit_nullable = ClickHouseDDLHelper.explicit_column_nullable(metadata_column)
+ if explicit_nullable is None and isinstance(inspector_type, ChSqlaType) and isinstance(metadata_type, ChSqlaType):
+ inspector_type = ClickHouseDDLHelper.without_nullable(inspector_type)
+ metadata_type = ClickHouseDDLHelper.without_nullable(metadata_type)
+ else:
+ metadata_type = ClickHouseDDLHelper.effective_column_type(metadata_column)
+ inspector_type = self._normalize_type_name(inspector_type)
+ metadata_type = self._normalize_type_name(metadata_type)
+ return inspector_type != metadata_type
+
+ def compare_server_default(
+ self,
+ inspector_column,
+ metadata_column,
+ rendered_metadata_default,
+ rendered_inspector_default,
+ ):
+ return self._normalize_default(rendered_inspector_default) != self._normalize_default(rendered_metadata_default)
+
+ def render_type(self, type_obj, autogen_context):
+ if not isinstance(type_obj, ChSqlaType):
+ return False
+ return _render_ch_type(type_obj)
+
+ def _exec_version_update(self, construct: Update, execution_options=None):
+ # Alembic emits a normal SQLAlchemy Update here, but ClickHouse version tracking
+ # needs insert + mutation delete semantics. SQLAlchemy does not expose a stable
+ # public API for these values across versions, so this depends on the current
+ # Update internals.
+ values = construct._values
+ if not values:
+ raise CommandError("ClickHouse Alembic version update is missing values")
+ version_value = self._compile_clause(list(values.values())[0])
+ where_clause = self._compile_version_where(construct)
+ self._exec(text(f"INSERT INTO {self._version_table_name} (version_num) VALUES ({version_value})"))
+ self._exec(text(f"ALTER TABLE {self._version_table_name} DELETE WHERE {where_clause} SETTINGS mutations_sync = 2"))
+ return SimpleNamespace(rowcount=1)
+
+ def _exec_version_delete(self, construct: Delete, execution_options=None):
+ where_clause = self._compile_version_where(construct)
+ return super()._exec(
+ text(f"ALTER TABLE {self._version_table_name} DELETE WHERE {where_clause} SETTINGS mutations_sync = 2"),
+ execution_options=execution_options,
+ )
+
+ @property
+ def _version_table_name(self) -> str:
+ schema = self.context_opts.get("version_table_schema")
+ table = self.context_opts.get("version_table", "alembic_version")
+ if schema:
+ return f"{quote_identifier(schema)}.{quote_identifier(table)}"
+ return quote_identifier(table)
+
+ def _is_version_table_construct(self, construct) -> bool:
+ table = getattr(construct, "table", None)
+ if table is None:
+ return False
+ if table.name != self.context_opts.get("version_table", "alembic_version"):
+ return False
+ expected_schema = self.context_opts.get("version_table_schema")
+ # Alembic captures version_table_schema before ClickHouseImpl.__init__
+ # has a chance to set it, so the _version Table may have schema=None
+ # while context_opts has the auto-detected database name.
+ if table.schema == expected_schema:
+ return True
+ if table.schema is None and expected_schema is not None:
+ return True
+ return False
+
+ def _compile_version_where(self, construct) -> str:
+ predicates = []
+ for expression in construct._where_criteria:
+ # SQLAlchemy does not provide a public helper for pulling these predicates
+ # back apart, so this relies on the current binary expression structure.
+ column_name = getattr(getattr(expression, "left", None), "name", None)
+ if not column_name:
+ predicates.append(self._compile_clause(expression))
+ continue
+ right = self._compile_clause(expression.right)
+ predicates.append(f"{quote_identifier(column_name)} = {right}")
+ return " AND ".join(predicates)
+
+ def _compile_clause(self, clause) -> str:
+ return str(
+ clause.compile(
+ dialect=self.dialect,
+ compile_kwargs={"literal_binds": True},
+ )
+ )
+
+ def _comment_column_sql(
+ self,
+ table_name: str,
+ column_name: str,
+ comment: str | None,
+ schema: str | None,
+ settings: str,
+ ) -> str:
+ sql = [
+ "ALTER TABLE",
+ full_table(table_name, schema),
+ "COMMENT COLUMN",
+ quote_identifier(column_name),
+ ClickHouseDDLHelper.render_comment(comment),
+ ]
+ if settings:
+ sql.extend(["SETTINGS", settings])
+ return " ".join(sql)
+
+ @staticmethod
+ def _comment_table_sql(table: Table, comment: str | None) -> str:
+ return " ".join(
+ [
+ "ALTER TABLE",
+ full_table(table.name, table.schema),
+ "MODIFY COMMENT",
+ ClickHouseDDLHelper.render_comment(comment),
+ ]
+ )
+
+ @staticmethod
+ def _normalize_default(default: str | None) -> str | None:
+ if default is None:
+ return None
+ return default.strip()
+
+ @staticmethod
+ def _normalize_type_name(type_: Any) -> str:
+ if hasattr(type_, "name"):
+ return str(type_.name).replace(" ", "")
+ return str(type_).replace(" ", "")
+
+ @staticmethod
+ def _set_type_nullable(type_: Any, nullable: bool):
+ if isinstance(type_, type) and issubclass(type_, ChSqlaType):
+ type_ = type_()
+ if not isinstance(type_, ChSqlaType):
+ return type_
+ if nullable:
+ if type_.nullable:
+ return type_
+
+ return Nullable(type_)
+ if not type_.nullable:
+ return type_
+ wrappers = tuple(wrapper for wrapper in type_.type_def.wrappers if wrapper != "Nullable")
+ return type_.__class__(type_def=type_.type_def.__class__(wrappers, type_.type_def.keys, type_.type_def.values))
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/utils.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/utils.py
new file mode 100644
index 00000000000..35ff8fb7b94
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/alembic/utils.py
@@ -0,0 +1,85 @@
+from collections.abc import Callable
+from typing import Any
+
+from alembic.operations.ops import MigrationScript
+from alembic.runtime.migration import MigrationContext
+
+from clickhouse_connect.cc_sqlalchemy.alembic.adapter import (
+ include_object as base_include_object,
+)
+
+
+def make_include_name(
+ include_schemas: frozenset[str] | None = None, exclude_mv_pattern: str = "_mv", default_schema: str = "default"
+) -> Callable:
+ """Factory for include_name callback"""
+
+ def include_name_callback(name: str | None, type_: str, parent_names: dict) -> bool:
+ if type_ == "schema":
+ schema_name = name if name else default_schema
+ if include_schemas is not None:
+ return schema_name in include_schemas
+ return True
+
+ if type_ == "table":
+ if isinstance(name, str) and name.endswith(exclude_mv_pattern):
+ return False
+ schema = parent_names.get("schema_name") or default_schema
+ if include_schemas is not None:
+ return schema in include_schemas
+ return True
+
+ return True
+
+ return include_name_callback
+
+
+def make_include_object(
+ exclude_tables: frozenset[str] | None = None,
+ include_schemas: frozenset[str] | None = None,
+ exclude_mv_pattern: str = "_mv",
+ base_include_object_fn: Callable | None = None,
+) -> Callable:
+ """Factory for include_object callback"""
+
+ def include_object_callback(object_: Any, name: str | None, type_: str, reflected: bool, compare_to: Any) -> bool:
+ if base_include_object_fn and not base_include_object_fn(object_, name, type_, reflected, compare_to):
+ return False
+
+ if not base_include_object(object_, name, type_, reflected, compare_to):
+ return False
+
+ if type_ == "table":
+ if include_schemas and object_.schema not in include_schemas:
+ return False
+
+ if isinstance(name, str) and name.endswith(exclude_mv_pattern):
+ return False
+
+ if exclude_tables:
+ fullname = f"{object_.schema}.{name}" if object_.schema else name
+ if fullname in exclude_tables:
+ return False
+ if name in exclude_tables:
+ return False
+
+ return True
+
+ return include_object_callback
+
+
+def prevent_empty_migrations(writer_fn: Callable) -> Callable:
+ """Wrapper to prevent empty migration generation"""
+
+ def wrapper(context: MigrationContext, revision: Any, directives: list[MigrationScript]) -> None:
+ if not directives:
+ return
+ config = context.config
+ if getattr(config.cmd_opts, "autogenerate", False):
+ script = directives[0]
+ if script.upgrade_ops.is_empty():
+ directives.clear()
+ return
+ writer_fn(context, revision, directives)
+
+ return wrapper
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py
index a11e4c1ee04..94d8e67fb40 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py
@@ -22,7 +22,7 @@ from sqlalchemy.types import (
String as SqlaString,
)
-from clickhouse_connect.cc_sqlalchemy.datatypes.base import ChSqlaType
+from clickhouse_connect.cc_sqlalchemy.datatypes.base import ChSqlaType, schema_types
from clickhouse_connect.datatypes.base import EMPTY_TYPE_DEF, LC_TYPE_DEF, NULLABLE_TYPE_DEF, TypeDef
from clickhouse_connect.datatypes.numeric import Enum8 as ChEnum8
from clickhouse_connect.datatypes.numeric import Enum16 as ChEnum16
@@ -256,15 +256,34 @@ class Date32(ChSqlaType, SqlaDate):
pass
+_TIMEZONE_SENTINEL = object()
+
+
+def _resolve_tz_alias(tz, timezone):
+ """Resolve `tz=` / `timezone=` alias (clickhouse-sqlalchemy naming). Returns the zone string or None.
+
+ timezone=False maps silently to None: SQLAlchemy's type-adaptation passes it when cloning
+ DateTime (inherited from SqlaDateTime.timezone default). timezone=True is rejected because
+ ClickHouse requires a concrete IANA zone.
+ """
+ if timezone is not _TIMEZONE_SENTINEL:
+ if timezone is True:
+ raise ArgumentError(
+ "timezone=True is not supported for ClickHouse DateTime types; "
+ "pass a named IANA zone string such as timezone='UTC' or timezone='America/New_York'"
+ )
+ if timezone is False:
+ return tz
+ if tz is not None:
+ raise ArgumentError("Cannot specify both 'tz' and 'timezone'; they are aliases")
+ return timezone
+ return tz
+
+
class DateTime(ChSqlaType, SqlaDateTime):
- def __init__(self, tz: str = None, type_def: TypeDef = None):
- """
- Date time constructor with optional ClickHouse timezone parameter if not constructed with TypeDef
- :param tz: IANA timezone key (e.g. "UTC", "America/New_York"). Resolved via the standard
- library zoneinfo module. On platforms without system zoneinfo data (notably
- Windows), install the tzdata package.
- :param type_def: TypeDef from parse_name function
- """
+ def __init__(self, tz: str = None, type_def: TypeDef = None, timezone=_TIMEZONE_SENTINEL):
+ """tz / timezone: IANA zone string (resolved via zoneinfo; install `tzdata` on Windows)."""
+ tz = _resolve_tz_alias(tz, timezone)
if not type_def:
if tz:
tzutil.resolve_zone(tz)
@@ -276,15 +295,9 @@ class DateTime(ChSqlaType, SqlaDateTime):
class DateTime64(ChSqlaType, SqlaDateTime):
- def __init__(self, precision: int = None, tz: str = None, type_def: TypeDef = None):
- """
- Date time constructor with precision and timezone parameters if not constructed with TypeDef
- :param precision: Usually 3/6/9 for mill/micro/nanosecond precision on ClickHouse side
- :param tz: IANA timezone key (e.g. "UTC", "America/New_York"). Resolved via the standard
- library zoneinfo module. On platforms without system zoneinfo data (notably
- Windows), install the tzdata package.
- :param type_def: TypeDef from parse_name function
- """
+ def __init__(self, precision: int = None, tz: str = None, type_def: TypeDef = None, timezone=_TIMEZONE_SENTINEL):
+ """precision: 3/6/9 for ms/us/ns. tz / timezone: IANA zone string."""
+ tz = _resolve_tz_alias(tz, timezone)
if not type_def:
if tz:
tzutil.resolve_zone(tz)
@@ -374,8 +387,6 @@ class Nullable:
"""
if callable(element):
return element(type_def=NULLABLE_TYPE_DEF)
- if element.low_card:
- raise ArgumentError("Low Cardinality type cannot be Nullable")
orig = element.type_def
wrappers = orig if "Nullable" in orig.wrappers else orig.wrappers + ("Nullable",)
return element.__class__(type_def=TypeDef(wrappers, orig.keys, orig.values))
@@ -446,19 +457,32 @@ class Tuple(ChSqlaType, UserDefinedType):
def __init__(
self,
+ *args,
elements: Sequence[ChSqlaType | type[ChSqlaType]] = None,
type_def: TypeDef = None,
):
- """
- Tuple constructor that can take a list of element types if not constructed from a TypeDef
- :param elements: sequence of ChSqlaType instance or class to use as tuple element types
- :param type_def: TypeDef from parse_name function
- """
+ """Tuple(UInt32, UUID) variadic form or Tuple(elements=[UInt32, UUID]) list form, not both."""
+ if type_def is None and not args and elements is None:
+ # SA's dialect_impl -> adapt -> constructor_copy can call cls() with no args
+ # because get_cls_kwargs doesn't see keyword-only args behind *args.
+ # adapt() below preserves the real type_def; this branch just avoids a crash.
+ type_def = EMPTY_TYPE_DEF
if not type_def:
+ if args and elements is not None:
+ raise ArgumentError("Cannot specify both positional elements and the 'elements' kwarg")
+ if args:
+ elements = args
values = [et() if callable(et) else et for et in elements]
type_def = TypeDef(values=tuple(v.name for v in values))
super().__init__(type_def)
+ def adapt(self, cls, **kw):
+ # Bypass SA's constructor_copy: it can't see keyword-only args behind *args and
+ # would produce an empty Tuple. Copy state directly.
+ inst = cls.__new__(cls)
+ inst.__dict__.update(self.__dict__)
+ return inst
+
class JSON(ChSqlaType, UserDefinedType):
"""
@@ -544,3 +568,6 @@ class QBit(ChSqlaType, UserDefinedType):
raise ArgumentError("QBit requires element_type and dimension parameters")
type_def = TypeDef(values=(element_type, dimension))
super().__init__(type_def)
+
+
+__all__ = sorted(schema_types) + ["LowCardinality", "Nullable"]
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py
index e69de29bb2d..5d1c585f94e 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py
@@ -0,0 +1,3 @@
+from clickhouse_connect.cc_sqlalchemy.ddl import tableengine
+
+__all__ = ["tableengine"]
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/dictionary.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/dictionary.py
new file mode 100644
index 00000000000..7f01591bfbc
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/dictionary.py
@@ -0,0 +1,53 @@
+from sqlalchemy import Table
+
+_DICTIONARY_KWARGS = ("source", "layout", "lifetime", "primary_key")
+
+
+def _pop_dictionary_kwargs(kwargs):
+ """Pop Dictionary-specific kwargs before Table validates them."""
+ return {k: kwargs.pop(k, None) for k in _DICTIONARY_KWARGS}
+
+
+def _apply_dictionary_metadata(table, popped):
+ """Set dialect-prefixed kwargs on the table after construction."""
+ table.kwargs["clickhouse_table_type"] = "dictionary"
+ if popped.get("source") is not None:
+ table.source = popped["source"]
+ table.kwargs["clickhouse_dictionary_source"] = popped["source"]
+ if popped.get("layout") is not None:
+ table.layout = popped["layout"]
+ table.kwargs["clickhouse_dictionary_layout"] = popped["layout"]
+ if popped.get("lifetime") is not None:
+ table.lifetime = popped["lifetime"]
+ table.kwargs["clickhouse_dictionary_lifetime"] = popped["lifetime"]
+ if popped.get("primary_key") is not None:
+ table.primary_key_def = popped["primary_key"]
+ table.kwargs["clickhouse_dictionary_primary_key"] = popped["primary_key"]
+
+
+class Dictionary(Table):
+ """
+ Represents a ClickHouse Dictionary.
+
+ Inherits from Table so it can be attached to metadata and have columns.
+
+ Custom kwargs must be intercepted before Table's dialect-kwarg validation
+ runs. The interception point differs between SQLAlchemy versions:
+
+ - SQA 1.4: Table.__new__ calls _init() directly, bypassing __init__
+ - SQA 2.x: Table.__new__ calls __init__() directly (no _init)
+
+ We override both to handle either path.
+ """
+
+ __visit_name__ = "dictionary"
+
+ def __init__(self, name, metadata, *args, **kwargs):
+ popped = _pop_dictionary_kwargs(kwargs)
+ super().__init__(name, metadata, *args, **kwargs)
+ _apply_dictionary_metadata(self, popped)
+
+ def _init(self, name, metadata, *args, **kwargs):
+ popped = _pop_dictionary_kwargs(kwargs)
+ super()._init(name, metadata, *args, **kwargs)
+ _apply_dictionary_metadata(self, popped)
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/tableengine.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/tableengine.py
index 9e5aaeba3a5..85759cfc6df 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/tableengine.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/ddl/tableengine.py
@@ -1,16 +1,42 @@
import logging
from collections.abc import Sequence
+from typing import Any
+from sqlalchemy import Column
from sqlalchemy.exc import ArgumentError, SQLAlchemyError
from sqlalchemy.sql.base import SchemaEventTarget
+from sqlalchemy.sql.elements import TextClause
from sqlalchemy.sql.visitors import Visitable
+from clickhouse_connect.cc_sqlalchemy.sql.sqlparse import split_top_level, walk_sql
+from clickhouse_connect.driver.binding import format_str, quote_identifier
+from clickhouse_connect.driver.parser import parse_callable
+
logger = logging.getLogger(__name__)
engine_map: dict[str, type["TableEngine"]] = {}
+EngineExpr = str | TextClause | Column
+EngineParam = EngineExpr | Sequence[EngineExpr] | None
+ENGINE_CLAUSES = ("ORDER BY", "PARTITION BY", "PRIMARY KEY", "SAMPLE BY", "TTL", "SETTINGS")
+
+
+def _render_engine_expr(value: EngineExpr) -> str:
+ if isinstance(value, TextClause):
+ return value.text
+ if isinstance(value, Column):
+ return quote_identifier(value.name)
+ return value
+
+
+def _render_setting_value(value: Any) -> str:
+ if isinstance(value, bool):
+ return "1" if value else "0"
+ if isinstance(value, (int, float)):
+ return str(value)
+ return format_str(str(value))
-def tuple_expr(expr_name, value):
+def tuple_expr(expr_name, value: EngineParam):
"""
Create a table parameter with a tuple or list correctly formatted
:param expr_name: parameter
@@ -21,8 +47,25 @@ def tuple_expr(expr_name, value):
return ""
v = f"{expr_name.strip()}"
if isinstance(value, (tuple, list)):
- return f" {v} ({','.join(value)})"
- return f"{v} {value}"
+ return f" {v} ({','.join(_render_engine_expr(item) for item in value)})"
+ return f"{v} {_render_engine_expr(value)}"
+
+
+def repr_engine_value(value: Any) -> str:
+ if isinstance(value, str):
+ return repr(value)
+ if isinstance(value, TextClause):
+ return f"sa.text({value.text!r})"
+ if isinstance(value, Column):
+ return repr(value.name)
+ if isinstance(value, tuple):
+ items = ", ".join(repr_engine_value(item) for item in value)
+ if len(value) == 1:
+ items += ","
+ return f"({items})"
+ if isinstance(value, list):
+ return f"[{', '.join(repr_engine_value(item) for item in value)}]"
+ return repr(value)
class TableEngine(SchemaEventTarget, Visitable):
@@ -40,10 +83,10 @@ class TableEngine(SchemaEventTarget, Visitable):
engine_map[cls.__name__] = cls
def __init__(self, kwargs):
-
Visitable.__init__(self)
self.name = self.__class__.__name__
te_name = f"{self.name} Table Engine"
+ self._orig_kwargs = kwargs.copy()
engine_args = []
for arg_name in self.arg_names:
v = kwargs.pop(arg_name, None)
@@ -62,12 +105,28 @@ class TableEngine(SchemaEventTarget, Visitable):
v = kwargs.pop(param_name, None)
if v is not None:
params.append(tuple_expr(param_name.upper().replace("_", " "), v))
+ settings = kwargs.pop("settings", None)
+ self.settings = settings or {}
self.full_engine = "Engine " + self.name
if engine_args:
self.full_engine += f"({', '.join(engine_args)})"
if params:
self.full_engine += " " + " ".join(params)
+ if self.settings:
+ settings_expr = ", ".join(f"{k} = {_render_setting_value(v)}" for k, v in self.settings.items())
+ self.full_engine += f" SETTINGS {settings_expr}"
+
+ def __repr__(self):
+ """Produce Python code representation of the engine for Alembic autogeneration."""
+ args = []
+ for k, v in self._orig_kwargs.items():
+ if k in {"self", "__class__"}:
+ continue
+ if v is None:
+ continue
+ args.append(f"{k}={repr_engine_value(v)}")
+ return f"{self.name}({', '.join(args)})"
def compile(self):
return self.full_engine
@@ -77,6 +136,8 @@ class TableEngine(SchemaEventTarget, Visitable):
def _set_parent(self, parent, **_kwargs):
parent.engine = self
+ if parent.kwargs.get("clickhouse_engine") is None and parent.kwargs.get("clickhousedb_engine") is None:
+ parent.kwargs["clickhouse_engine"] = self
class Memory(TableEngine):
@@ -128,28 +189,23 @@ class Distributed(TableEngine):
arg_names = ["cluster", "database", "table", "sharding_key", "policy_name"]
optional_args = {"sharding_key", "policy_name"}
- def __init__(
- self,
- cluster: str = None,
- database: str = None,
- table=None,
- sharding_key: str = None,
- policy_name: str = None,
- ):
+ def __init__(self, cluster: str = None, database: str = None, table=None, sharding_key: str = None, policy_name: str = None):
super().__init__(locals())
class MergeTree(TableEngine):
- eng_params = ["order_by", "partition_by", "primary_key", "sample_by"]
+ eng_params = ["order_by", "partition_by", "primary_key", "sample_by", "ttl"]
def __init__(
self,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -167,21 +223,35 @@ class AggregatingMergeTree(MergeTree):
class ReplacingMergeTree(TableEngine):
- arg_names = ["ver"]
+ arg_names = ["version", "is_deleted"]
optional_args = set(arg_names)
eng_params = MergeTree.eng_params
def __init__(
self,
ver: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ version: str = None,
+ is_deleted: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
- super().__init__(locals())
+ kwargs = {
+ "version": version or ver,
+ "is_deleted": is_deleted,
+ "order_by": order_by,
+ "primary_key": primary_key,
+ "partition_by": partition_by,
+ "sample_by": sample_by,
+ "ttl": ttl,
+ "settings": settings,
+ }
+ super().__init__(kwargs)
class CollapsingMergeTree(TableEngine):
@@ -191,12 +261,14 @@ class CollapsingMergeTree(TableEngine):
def __init__(
self,
sign: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -209,12 +281,14 @@ class VersionedCollapsingMergeTree(TableEngine):
self,
sign: str = None,
version: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -228,12 +302,14 @@ class GraphiteMergeTree(TableEngine):
self,
config_section: str = None,
version: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -246,14 +322,16 @@ class ReplicatedMergeTree(TableEngine):
def __init__(
self,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
zk_path: str = None,
replica: str = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -275,14 +353,16 @@ class ReplicatedReplacingMergeTree(TableEngine):
def __init__(
self,
ver: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
zk_path: str = None,
replica: str = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -296,14 +376,16 @@ class ReplicatedCollapsingMergeTree(TableEngine):
def __init__(
self,
sign: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
zk_path: str = None,
replica: str = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -318,14 +400,16 @@ class ReplicatedVersionedCollapsingMergeTree(TableEngine):
self,
sign: str = None,
version: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
zk_path: str = None,
replica: str = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -339,14 +423,16 @@ class ReplicatedGraphiteMergeTree(TableEngine):
def __init__(
self,
config_section: str = None,
- order_by: str = None,
- primary_key: str = None,
- partition_by: str = None,
- sample_by: str = None,
+ order_by: EngineParam = None,
+ primary_key: EngineParam = None,
+ partition_by: EngineParam = None,
+ sample_by: EngineParam = None,
zk_path: str = None,
replica: str = None,
+ ttl: EngineExpr | None = None,
+ settings: dict[str, Any] | None = None,
):
- if not order_by and not primary_key:
+ if order_by is None and primary_key is None:
raise ArgumentError(None, "Either PRIMARY KEY or ORDER BY must be specified")
super().__init__(locals())
@@ -371,22 +457,146 @@ class SharedGraphiteMergeTree(GraphiteMergeTree):
pass
+def _strip_string_quotes(value: Any) -> Any:
+ if isinstance(value, str) and len(value) > 1 and value[0] == value[-1] == "'":
+ return value[1:-1]
+ return value
+
+
+def _parse_positional_engine_args(full_engine: str, engine_cls: type["TableEngine"]) -> dict[str, Any]:
+ if not engine_cls.arg_names:
+ return {}
+ _, arg_values, _ = parse_callable(full_engine)
+ return {arg_name: _strip_string_quotes(arg_value) for arg_name, arg_value in zip(engine_cls.arg_names, arg_values) if arg_value != ""}
+
+
+def _find_clause_markers(sql: str) -> list[tuple[int, str]]:
+ markers = []
+ upper_sql = sql.upper()
+ for i, _char, depth in walk_sql(sql):
+ if depth != 0 or (i > 0 and not sql[i - 1].isspace()):
+ continue
+ for clause in ENGINE_CLAUSES:
+ if upper_sql.startswith(clause, i):
+ markers.append((i, clause))
+ break
+ return markers
+
+
+_CH_STRING_ESCAPES = {"\\": "\\", "'": "'", '"': '"', "`": "`", "n": "\n", "t": "\t", "r": "\r", "b": "\b", "f": "\f", "0": "\0"}
+
+
+def _decode_ch_string_literal(literal: str) -> str:
+ inner = literal[1:-1].replace("''", "'")
+ out: list[str] = []
+ i = 0
+ while i < len(inner):
+ ch = inner[i]
+ if ch == "\\" and i + 1 < len(inner):
+ out.append(_CH_STRING_ESCAPES.get(inner[i + 1], inner[i + 1]))
+ i += 2
+ else:
+ out.append(ch)
+ i += 1
+ return "".join(out)
+
+
+def _parse_settings_clause(raw_settings: str) -> dict[str, Any]:
+ settings: dict[str, Any] = {}
+ for pair in split_top_level(raw_settings):
+ if "=" not in pair:
+ continue
+ key, value = pair.split("=", 1)
+ key = key.strip()
+ value = value.strip()
+ if len(value) >= 2 and value[0] == "'" and value[-1] == "'":
+ settings[key] = _decode_ch_string_literal(value)
+ continue
+ try:
+ settings[key] = int(value)
+ continue
+ except ValueError:
+ pass
+ try:
+ settings[key] = float(value)
+ continue
+ except ValueError:
+ settings[key] = value
+ return settings
+
+
+def _parse_keyword_engine_clauses(clause_sql: str) -> dict[str, Any]:
+ params: dict[str, Any] = {}
+ markers = _find_clause_markers(clause_sql)
+ for index, (start, clause) in enumerate(markers):
+ value_start = start + len(clause)
+ value_end = markers[index + 1][0] if index + 1 < len(markers) else len(clause_sql)
+ value = clause_sql[value_start:value_end].strip()
+ if not value:
+ continue
+ if clause == "SETTINGS":
+ settings = _parse_settings_clause(value)
+ if settings:
+ params["settings"] = settings
+ continue
+ params[clause.lower().replace(" ", "_")] = value
+ return params
+
+
+def _parse_engine_params(full_engine: str, engine_cls: type["TableEngine"]) -> dict[str, Any]:
+ """Extract engine parameters from a full_engine expression for repr().
+
+ Parses both positional constructor args (e.g. the ``version`` in
+ ``ReplacingMergeTree(version)``) and keyword clauses (``ORDER BY``,
+ ``PARTITION BY``, etc.) so that reflected engines round-trip through
+ ``repr()`` correctly.
+ """
+ params = _parse_positional_engine_args(full_engine, engine_cls)
+ _, _, clause_sql = parse_callable(full_engine)
+ params.update(_parse_keyword_engine_clauses(clause_sql))
+ return params
+
+
def build_engine(full_engine: str) -> TableEngine | None:
"""
- Factory function to create TableEngine class from ClickHouse full_engine expression
- :param full_engine
- :return: TableEngine DDL element
+ Factory function to create TableEngine class from ClickHouse full_engine expression.
+
+ ClickHouse Cloud transparently rewrites user-facing engines (e.g. MergeTree)
+ to Shared* variants (e.g. SharedMergeTree) with Cloud-internal positional
+ args for replication paths. When reflecting, we map back to the base engine
+ class and drop those args so that repr() produces valid user-level DDL.
"""
if not full_engine:
return None
- name = full_engine.split(" ")[0].split("(")[0]
+ name, _, _ = parse_callable(full_engine)
try:
engine_cls = engine_map[name]
except KeyError:
if not name.startswith("System"):
logger.warning("Engine %s not found", name)
return None
+
+ # Map Shared* back to the base engine and discard Cloud-internal positional args.
+ # Cloud prepends replication path args (zk_path, replica) before the base engine's
+ # own positional args, e.g. SharedReplacingMergeTree('/path', '{replica}', ver).
+ base_name = name
+ if name.startswith("Shared"):
+ base_name = name[len("Shared") :]
+ base_cls = engine_map.get(base_name)
+ if base_cls is not None:
+ engine_cls = base_cls
+ _, all_args, clause_tail = parse_callable(full_engine)
+ # Cloud prepends exactly 2 args (zk_path, replica) — skip them
+ base_args = all_args[2:] if len(all_args) > 2 else ()
+ args_str = f"({','.join(str(a) for a in base_args)})" if base_args else ""
+ full_engine = base_name + args_str + (" " + clause_tail if clause_tail.strip() else "")
+
engine = engine_cls.__new__(engine_cls)
- engine.name = name
+ engine.name = base_name
engine.full_engine = full_engine
+ engine._orig_kwargs = _parse_engine_params(full_engine, engine_cls)
+ engine.settings = dict(engine._orig_kwargs.get("settings") or {})
return engine
+
+
+__all__ = sorted(engine_map) + ["build_engine", "engine_map"]
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py
index e6c8b9ddf49..46eec9166bd 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/dialect.py
@@ -1,9 +1,11 @@
+import sqlalchemy.schema as sa_schema
from sqlalchemy import text
from sqlalchemy.engine.default import DefaultDialect
+from sqlalchemy.exc import NoResultFound, NoSuchTableError
from clickhouse_connect import dbapi
from clickhouse_connect.cc_sqlalchemy import dialect_name, ischema_names
-from clickhouse_connect.cc_sqlalchemy.inspector import ChInspector
+from clickhouse_connect.cc_sqlalchemy.inspector import ChInspector, get_table_metadata
from clickhouse_connect.cc_sqlalchemy.sql import full_table
from clickhouse_connect.cc_sqlalchemy.sql.compiler import ChStatementCompiler
from clickhouse_connect.cc_sqlalchemy.sql.ddlcompiler import ChDDLCompiler
@@ -23,6 +25,8 @@ class ClickHouseDialect(DefaultDialect):
supports_native_decimal = True
supports_native_boolean = True
supports_statement_cache = False
+ supports_comments = True
+ inline_comments = True
returns_unicode_strings = True
postfetch_lastrowid = False
ddl_compiler = ChDDLCompiler
@@ -32,6 +36,30 @@ class ClickHouseDialect(DefaultDialect):
max_identifier_length = 127
ischema_names = ischema_names
inspector = ChInspector
+ construct_arguments = [
+ (
+ sa_schema.Table,
+ {
+ "engine": None,
+ "table_type": None,
+ "dictionary_source": None,
+ "dictionary_layout": None,
+ "dictionary_lifetime": None,
+ "dictionary_primary_key": None,
+ },
+ ),
+ (
+ sa_schema.Column,
+ {
+ "materialized": None,
+ "alias": None,
+ "codec": None,
+ "ttl": None,
+ "after": None,
+ "settings": None,
+ },
+ ),
+ ]
# SQA 1 compatibility
@@ -45,8 +73,8 @@ class ClickHouseDialect(DefaultDialect):
def import_dbapi(cls):
return dbapi
- def initialize(self, connection):
- pass
+ def _get_default_schema_name(self, connection):
+ return connection.execute(text("SELECT currentDatabase()")).scalar()
def get_schema_names(self, connection, **_):
return [row.name for row in connection.execute(text("SHOW DATABASES"))]
@@ -65,7 +93,7 @@ class ClickHouseDialect(DefaultDialect):
return []
def get_pk_constraint(self, connection, table_name, schema=None, **kw):
- return []
+ return {"constrained_columns": [], "name": None}
def get_foreign_keys(self, connection, table_name, schema=None, **kw):
return []
@@ -80,7 +108,14 @@ class ClickHouseDialect(DefaultDialect):
return []
def get_view_definition(self, connection, view_name, schema=None, **kw):
- pass
+ raise NoSuchTableError(f"{schema}.{view_name}" if schema else view_name)
+
+ def get_table_comment(self, connection, table_name, schema=None, **kw):
+ try:
+ table_metadata = get_table_metadata(connection, table_name, schema)
+ except NoResultFound:
+ raise NoSuchTableError(f"{schema}.{table_name}" if schema else table_name) from None
+ return {"text": table_metadata.comment or None}
def get_indexes(self, connection, table_name, schema=None, **kw):
return []
@@ -118,4 +153,4 @@ class ClickHouseDialect(DefaultDialect):
pass
def get_isolation_level(self, dbapi_conn):
- return None
+ return "AUTOCOMMIT"
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/engines.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/engines.py
new file mode 100644
index 00000000000..4679424d253
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/engines.py
@@ -0,0 +1 @@
+from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import * # noqa: F401, F403
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py
index 1f0040e47dd..067c9fa029c 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/inspector.py
@@ -1,44 +1,167 @@
+import ast
+import re
+from collections.abc import Collection
+from typing import Any
+
import sqlalchemy.schema as sa_schema
from sqlalchemy import text
from sqlalchemy.engine.reflection import Inspector
-from sqlalchemy.orm.exc import NoResultFound
+from sqlalchemy.exc import NoResultFound
-from clickhouse_connect.cc_sqlalchemy import dialect_name as dn
from clickhouse_connect.cc_sqlalchemy.datatypes.base import sqla_type_from_name
from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import build_engine
from clickhouse_connect.cc_sqlalchemy.sql import full_table
+from clickhouse_connect.cc_sqlalchemy.sql.sqlparse import (
+ extract_parenthesized_block,
+ find_top_level_clause,
+ split_top_level,
+)
-ch_col_args = ("default_type", "codec_expression", "ttl_expression")
+def _database_name(connection, schema: str | None) -> str:
+ if schema:
+ return schema
+ return connection.execute(text("SELECT currentDatabase()")).scalar()
-def get_engine(connection, table_name, schema=None):
+
+def get_table_metadata(connection, table_name, schema=None):
+ database = _database_name(connection, schema)
result_set = connection.execute(
- text("SELECT engine_full FROM system.tables WHERE database = :schema AND name = :table_name"),
- {"schema": schema, "table_name": table_name},
+ text("SELECT engine, engine_full, comment FROM system.tables WHERE database = :database AND name = :table_name"),
+ {"database": database, "table_name": table_name},
)
row = next(result_set, None)
if not row:
- raise NoResultFound(f"Table {schema}.{table_name} does not exist")
+ raise NoResultFound(f"Table {database}.{table_name} does not exist")
+ return row
+
+
+def get_engine(connection, table_name, schema=None):
+ row = get_table_metadata(connection, table_name, schema)
return build_engine(row.engine_full)
+def get_dictionary_create_sql(connection, table_name: str, schema: str | None = None) -> str:
+ create_sql = connection.execute(text(f"SHOW CREATE DICTIONARY {full_table(table_name, schema)}")).scalar()
+ return create_sql or ""
+
+
+def _parse_dictionary_column(definition: str) -> dict[str, Any]:
+ match = re.match(r"^`(?P<name>[^`]+)`\s+(?P<rest>.+)$", definition, flags=re.DOTALL)
+ if not match:
+ match = re.match(r"^(?P<name>\S+)\s+(?P<rest>.+)$", definition, flags=re.DOTALL)
+ if not match:
+ raise ValueError(f"Could not parse dictionary column definition: {definition}")
+
+ name = match.group("name")
+ remainder = match.group("rest").strip()
+ type_index, _ = find_top_level_clause(
+ remainder,
+ (" DEFAULT ", " MATERIALIZED ", " ALIAS ", " TTL ", " COMMENT ", " CODEC("),
+ )
+ type_name = remainder[:type_index].strip() if type_index != -1 else remainder
+ sqla_type = sqla_type_from_name(type_name.replace("\n", " "))
+ column = {
+ "name": name,
+ "type": sqla_type,
+ "nullable": sqla_type.nullable,
+ "autoincrement": False,
+ }
+
+ comment_index, comment_clause = find_top_level_clause(remainder, (" COMMENT ",))
+ if comment_clause:
+ comment_sql = remainder[comment_index + len(comment_clause) :].strip()
+ column["comment"] = ast.literal_eval(comment_sql)
+ remainder = remainder[:comment_index].rstrip()
+
+ default_index, default_clause = find_top_level_clause(remainder, (" DEFAULT ", " MATERIALIZED ", " ALIAS "))
+ if default_clause:
+ default_sql = remainder[default_index + len(default_clause) :].strip()
+ if default_clause == " DEFAULT ":
+ column["server_default"] = text(default_sql)
+ elif default_clause == " MATERIALIZED ":
+ column["clickhouse_materialized"] = text(default_sql)
+ elif default_clause == " ALIAS ":
+ column["clickhouse_alias"] = text(default_sql)
+ return column
+
+
+def get_dictionary_columns(connection, table_name: str, schema: str | None = None) -> list[dict[str, Any]]:
+ create_sql = get_dictionary_create_sql(connection, table_name, schema)
+ if not create_sql:
+ return []
+ start = create_sql.find("(")
+ if start == -1:
+ return []
+ column_block, _ = extract_parenthesized_block(create_sql, start)
+ return [_parse_dictionary_column(column_sql) for column_sql in split_top_level(column_block)]
+
+
+def get_dictionary_metadata(connection, table_name: str, schema: str | None = None) -> dict[str, Any]:
+ create_sql = get_dictionary_create_sql(connection, table_name, schema)
+ if not create_sql:
+ return {}
+
+ metadata: dict[str, Any] = {"clickhouse_table_type": "dictionary"}
+ for line in (line.strip() for line in create_sql.splitlines()):
+ if not line:
+ continue
+ if line.startswith("PRIMARY KEY "):
+ metadata["clickhouse_dictionary_primary_key"] = line[len("PRIMARY KEY ") :]
+ elif line.startswith("SOURCE(") and line.endswith(")"):
+ metadata["clickhouse_dictionary_source"] = line[len("SOURCE(") : -1]
+ elif line.startswith("LIFETIME(") and line.endswith(")"):
+ metadata["clickhouse_dictionary_lifetime"] = line[len("LIFETIME(") : -1]
+ elif line.startswith("LAYOUT(") and line.endswith(")"):
+ metadata["clickhouse_dictionary_layout"] = line[len("LAYOUT(") : -1]
+ elif line.startswith("COMMENT "):
+ metadata["comment"] = ast.literal_eval(line[len("COMMENT ") :])
+ return metadata
+
+
class ChInspector(Inspector):
- def reflect_table(self, table, include_columns, exclude_columns, *_args, **_kwargs):
+ def reflect_table(
+ self,
+ table,
+ *_args,
+ include_columns: Collection[str] | None = None,
+ exclude_columns: Collection[str] = (),
+ **_kwargs,
+ ):
schema = table.schema
- for col in self.get_columns(table.name, schema):
+ table_metadata = get_table_metadata(self.bind, table.name, schema)
+ if table_metadata.engine == "Dictionary":
+ reflected_columns = get_dictionary_columns(self.bind, table.name, schema)
+ else:
+ reflected_columns = self.get_columns(table.name, schema)
+
+ for col in reflected_columns:
name = col.pop("name")
if (include_columns and name not in include_columns) or (exclude_columns and name in exclude_columns):
continue
col_type = col.pop("type")
- col_args = {f"{dn}_{key}" if key in ch_col_args else key: value for key, value in col.items() if value}
+ col_args = {key: value for key, value in col.items() if value is not None}
table.append_column(sa_schema.Column(name, col_type, **col_args))
- table.engine = get_engine(self.bind, table.name, schema)
+ if table_metadata.engine == "Dictionary":
+ dictionary_metadata = get_dictionary_metadata(self.bind, table.name, schema)
+ table.comment = dictionary_metadata.pop("comment", None)
+ for key, value in dictionary_metadata.items():
+ table.kwargs[key] = value
+ return
+
+ table.engine = build_engine(table_metadata.engine_full)
+ table.comment = table_metadata.comment or None
+ if table.engine is not None:
+ table.kwargs["clickhouse_engine"] = table.engine
def get_columns(self, table_name, schema=None, **_kwargs):
+ table_metadata = get_table_metadata(self.bind, table_name, schema)
+ if table_metadata.engine == "Dictionary":
+ return get_dictionary_columns(self.bind, table_name, schema)
table_id = full_table(table_name, schema)
result_set = self.bind.execute(text(f"DESCRIBE TABLE {table_id}"))
if not result_set:
- raise NoResultFound(f"Table {full_table} does not exist")
+ raise NoResultFound(f"Table {table_id} does not exist")
columns = []
for row in result_set:
sqla_type = sqla_type_from_name(row.type.replace("\n", ""))
@@ -47,11 +170,15 @@ class ChInspector(Inspector):
"type": sqla_type,
"nullable": sqla_type.nullable,
"autoincrement": False,
- "default": row.default_expression,
- "default_type": row.default_type,
- "comment": row.comment,
- "codec_expression": row.codec_expression,
- "ttl_expression": row.ttl_expression,
+ "comment": row.comment or None,
+ "clickhouse_codec": row.codec_expression or None,
+ "clickhouse_ttl": text(row.ttl_expression) if row.ttl_expression else None,
}
+ if row.default_type == "DEFAULT" and row.default_expression:
+ col["server_default"] = text(row.default_expression)
+ elif row.default_type == "MATERIALIZED" and row.default_expression:
+ col["clickhouse_materialized"] = text(row.default_expression)
+ elif row.default_type == "ALIAS" and row.default_expression:
+ col["clickhouse_alias"] = text(row.default_expression)
columns.append(col)
return columns
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
index 88db89e3aa3..ec2cee4c942 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/__init__.py
@@ -1,10 +1,12 @@
-from sqlalchemy import Table
+from sqlalchemy import Table, and_
from sqlalchemy.sql.selectable import FromClause, Select
+from clickhouse_connect.cc_sqlalchemy.sql.clauses import ArrayJoin, LimitByClause, PreWhereClause
+from clickhouse_connect.cc_sqlalchemy.sql.clauses import array_join as _array_join_fromclause
from clickhouse_connect.driver.binding import quote_identifier
-# Dialect name used for non-rendering statement hints that only serve to
-# differentiate cache keys when FINAL/SAMPLE modifiers are applied.
+# Non-rendering statement-hint dialect tag. Used only to force distinct
+# compiled-statement cache keys when FINAL/SAMPLE/PREWHERE/LIMIT BY are applied.
_CH_MODIFIER_DIALECT = "_ch_modifier"
@@ -19,7 +21,6 @@ def format_table(table: Table):
def _resolve_target(select_stmt: Select, table: FromClause | None, method_name: str) -> FromClause:
- """Resolve the target FROM clause for ClickHouse modifiers (FINAL/SAMPLE)."""
if not isinstance(select_stmt, Select):
raise TypeError(f"{method_name}() expects a SQLAlchemy Select instance")
@@ -32,6 +33,10 @@ def _resolve_target(select_stmt: Select, table: FromClause | None, method_name:
raise ValueError(f"{method_name}() is ambiguous for statements with multiple FROM clauses. Specify the table explicitly.")
target = froms[0]
+ # FINAL/SAMPLE apply to the underlying table, not to an ArrayJoin wrapper.
+ while isinstance(target, ArrayJoin):
+ target = target.left
+
if not isinstance(target, FromClause):
raise TypeError("table must be a SQLAlchemy FromClause when provided")
@@ -39,32 +44,19 @@ def _resolve_target(select_stmt: Select, table: FromClause | None, method_name:
def _target_cache_key(target: FromClause) -> str:
- """Stable string identifying a FROM target for cache key differentiation."""
if hasattr(target, "fullname"):
return target.fullname
return target.name
def final(select_stmt: Select, table: FromClause | None = None) -> Select:
- """Apply the ClickHouse FINAL modifier to a select statement.
-
- FINAL forces ClickHouse to merge data parts before returning results,
- guaranteeing fully collapsed rows for ReplacingMergeTree, CollapsingMergeTree,
- and similar engines.
-
- Args:
- select_stmt: The SELECT statement to modify.
- table: The target table to apply FINAL to. Required when the query
- joins multiple tables, optional when there is a single FROM target.
- """
+ """Apply the ClickHouse FINAL modifier. For ReplacingMergeTree-family engines."""
target = _resolve_target(select_stmt, table, "final")
ch_final = getattr(select_stmt, "_ch_final", set())
if target in ch_final:
return select_stmt
- # with_statement_hint creates a generative copy and adds a non-rendering
- # hint that participates in the statement cache key.
hint_key = _target_cache_key(target)
new_stmt = select_stmt.with_statement_hint(f"FINAL:{hint_key}", dialect_name=_CH_MODIFIER_DIALECT)
new_stmt._ch_final = ch_final | {target}
@@ -72,24 +64,11 @@ def final(select_stmt: Select, table: FromClause | None = None) -> Select:
def _select_final(self: Select, table: FromClause | None = None) -> Select:
- """
- Select.final() convenience wrapper around the module-level final() helper.
- """
return final(self, table=table)
def sample(select_stmt: Select, sample_value: str | int | float, table: FromClause | None = None) -> Select:
- """Apply the ClickHouse SAMPLE modifier to a select statement.
-
- Args:
- select_stmt: The SELECT statement to modify.
- sample_value: The sample expression. Can be a float between 0 and 1
- for a fractional sample (e.g. 0.1 for 10%), an integer for an
- approximate row count, or a string for SAMPLE expressions like
- '1/10 OFFSET 1/2'.
- table: The target table to sample. Required when the query joins
- multiple tables, optional when there is a single FROM target.
- """
+ """Apply the ClickHouse SAMPLE modifier. sample_value may be a float (fraction), int (row count), or string expression like '1/10 OFFSET 1/2'."""
target = _resolve_target(select_stmt, table, "sample")
hint_key = _target_cache_key(target)
@@ -101,12 +80,92 @@ def sample(select_stmt: Select, sample_value: str | int | float, table: FromClau
def _select_sample(self: Select, sample_value: str | int | float, table: FromClause | None = None) -> Select:
- """
- Select.sample() convenience wrapper around the module-level sample() helper.
- """
return sample(self, sample_value=sample_value, table=table)
-# Monkey-patch the select class to add final and sample methods
+def _apply_array_join(select_stmt: Select, cols, alias, is_left: bool) -> Select:
+ if not isinstance(select_stmt, Select):
+ raise TypeError("array_join() expects a SQLAlchemy Select instance")
+
+ if not cols:
+ raise ValueError("array_join() requires at least one array column")
+
+ froms = select_stmt.get_final_froms()
+ if not froms:
+ raise ValueError("array_join() requires the Select to have a FROM clause to wrap.")
+ if len(froms) > 1:
+ raise ValueError(
+ "array_join() is ambiguous for statements with multiple FROM clauses. "
+ "Use the module-level array_join(left, array_column, ...) with select_from() instead."
+ )
+ target = froms[0]
+
+ columns = list(cols)
+ if len(columns) == 1:
+ array_column = columns[0]
+ alias_arg = alias
+ else:
+ array_column = columns
+ if alias is None:
+ alias_arg = None
+ elif isinstance(alias, (list, tuple)):
+ alias_arg = list(alias)
+ else:
+ raise ValueError("alias must be a list/tuple matching the number of columns when multiple columns are provided")
+
+ aj = _array_join_fromclause(target, array_column, alias=alias_arg, is_left=is_left)
+ return select_stmt.select_from(aj)
+
+
+def _select_array_join(self: Select, *cols, alias=None) -> Select:
+ return _apply_array_join(self, cols, alias, is_left=False)
+
+
+def _select_left_array_join(self: Select, *cols, alias=None) -> Select:
+ return _apply_array_join(self, cols, alias, is_left=True)
+
+
+def prewhere(select_stmt, whereclause):
+ """Apply ClickHouse PREWHERE. Multiple calls compose with AND."""
+ if not isinstance(select_stmt, Select):
+ raise TypeError("prewhere() expects a SQLAlchemy Select instance")
+
+ existing = getattr(select_stmt, "_ch_prewhere", None)
+ combined = and_(existing.whereclause, whereclause) if existing is not None else whereclause
+
+ # Hint key is str(combined) (structural, with bind placeholders) rather
+ # than id() so equivalent statements share a compiled-statement cache entry.
+ new_stmt = select_stmt.with_statement_hint(f"PREWHERE:{str(combined)}", dialect_name=_CH_MODIFIER_DIALECT)
+ new_stmt._ch_prewhere = PreWhereClause(combined)
+ return new_stmt
+
+
+def limit_by(select_stmt, by_clauses, limit, offset=None):
+ """Apply ClickHouse LIMIT BY (top-N per group). Renders `LIMIT [offset,] limit BY by_clauses`."""
+ if not isinstance(select_stmt, Select):
+ raise TypeError("limit_by() expects a SQLAlchemy Select instance")
+
+ by_tuple = tuple(by_clauses)
+ if not by_tuple:
+ raise ValueError("limit_by() requires at least one by_clause")
+
+ by_key = ",".join(str(c) for c in by_tuple)
+ new_stmt = select_stmt.with_statement_hint(f"LIMIT_BY:{limit}:{offset}:{by_key}", dialect_name=_CH_MODIFIER_DIALECT)
+ new_stmt._ch_limit_by = LimitByClause(by_tuple, limit, offset)
+ return new_stmt
+
+
+def _select_prewhere(self, whereclause):
+ return prewhere(self, whereclause)
+
+
+def _select_limit_by(self, by_clauses, limit, offset=None):
+ return limit_by(self, by_clauses, limit, offset)
+
+
Select.sample = _select_sample
Select.final = _select_final
+Select.array_join = _select_array_join
+Select.left_array_join = _select_left_array_join
+Select.prewhere = _select_prewhere
+Select.limit_by = _select_limit_by
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py
index d6e5578b7a7..0ce50e0a241 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/clauses.py
@@ -1,5 +1,7 @@
from sqlalchemy import and_, true
+from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.base import Immutable
+from sqlalchemy.sql.elements import ColumnElement, Label
from sqlalchemy.sql.selectable import FromClause, Join
from sqlalchemy.sql.visitors import InternalTraversal
@@ -99,6 +101,30 @@ class ArrayJoin(Immutable, FromClause):
self.array_columns = [(clone(col, **kw), alias) for col, alias in self.array_columns]
+@compiles(ArrayJoin)
+def _compile_array_join(element, compiler, **kw):
+ """Render an ArrayJoin FromClause. Registered via @compiles so any compiler
+ (including the default StrSQLCompiler used for statement introspection) can
+ render it. A SQLAlchemy Label becomes the ARRAY JOIN alias so downstream
+ `column("name")` references bind; an explicit alias= argument overrides.
+ """
+ kw.pop("asfrom", None)
+ kw.pop("from_linter", None)
+ left = compiler.process(element.left, asfrom=True, **kw)
+ join_type = "LEFT ARRAY JOIN" if element.is_left else "ARRAY JOIN"
+ parts = []
+ for col, explicit_alias in element.array_columns:
+ if explicit_alias is None and isinstance(col, Label):
+ body_text = compiler.process(col.element, **kw)
+ col_text = f"{body_text} AS {compiler.preparer.quote(col.name)}"
+ else:
+ col_text = compiler.process(col, **kw)
+ if explicit_alias is not None:
+ col_text += f" AS {compiler.preparer.quote(explicit_alias)}"
+ parts.append(col_text)
+ return f"{left} {join_type} {', '.join(parts)}"
+
+
def array_join(left, array_column, alias=None, is_left=False):
"""Create an ARRAY JOIN clause.
@@ -301,3 +327,66 @@ def ch_join(
_is_cross=cross,
using=using,
)
+
+
+class PreWhereClause:
+ """State container for ClickHouse PREWHERE, stored on a Select and rendered by the dialect compiler."""
+
+ def __init__(self, whereclause):
+ self.whereclause = whereclause
+
+
+class LimitByClause:
+ """State container for ClickHouse LIMIT BY (top-N per group). Renders as `LIMIT [offset,] limit BY by_clauses`."""
+
+ def __init__(self, by_clauses, limit, offset=None):
+ self.by_clauses = tuple(by_clauses)
+ self.limit = limit
+ self.offset = offset
+
+
+class Lambda(ColumnElement):
+ """ClickHouse lambda expression for higher-order functions (arrayMap, arrayFilter, arraySort).
+
+ Lambda(params, body) where params is a parameter name string or a list/tuple
+ of parameter names, and body is any SQLAlchemy ColumnElement. Use
+ `sqlalchemy.column(name)` to reference lambda params inside body. Renders as
+ `param -> body` for one param, `(p1, p2) -> body` for multiple.
+
+ Intentionally does NOT introspect Python lambdas (too brittle across
+ closures and default args). Pass an explicit ColumnElement body instead.
+
+ Example:
+ func.arrayMap(Lambda('x', column('x') * 2), table.c.numbers)
+ """
+
+ __visit_name__ = "lambda_expr"
+
+ def __init__(self, params, body):
+ super().__init__()
+ if isinstance(params, str):
+ param_list = (params,)
+ elif isinstance(params, (list, tuple)):
+ if not params:
+ raise ValueError("Lambda requires at least one parameter name")
+ param_list = tuple(params)
+ else:
+ raise TypeError("Lambda params must be a string or a list/tuple of strings")
+ for p in param_list:
+ if not isinstance(p, str):
+ raise TypeError("Lambda parameter names must be strings")
+ if not p.isidentifier():
+ raise ValueError(f"Lambda parameter name '{p}' is not a valid identifier")
+ # Not `self.params`: ColumnElement.params is a bind-parameter method on the base class.
+ self.param_names = param_list
+ self.body = body
+
+
+@compiles(Lambda)
+def _compile_lambda(element, compiler, **kw):
+ """Render a Lambda as ClickHouse lambda syntax via @compiles so any compiler can render it."""
+ body_text = compiler.process(element.body, **kw)
+ if len(element.param_names) == 1:
+ return f"{element.param_names[0]} -> {body_text}"
+ params_text = ", ".join(element.param_names)
+ return f"({params_text}) -> {body_text}"
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
index bbe7a4e9640..9dc41b21154 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/compiler.py
@@ -2,11 +2,44 @@ from sqlalchemy.exc import CompileError
from sqlalchemy.sql import elements, sqltypes
from sqlalchemy.sql.compiler import SQLCompiler
-from clickhouse_connect.cc_sqlalchemy import ArrayJoin
from clickhouse_connect.cc_sqlalchemy.datatypes.base import ChSqlaType
from clickhouse_connect.cc_sqlalchemy.sql import format_table
+def _find_outermost_marker(text, markers):
+ """Earliest index in `text` where any of `markers` appears at paren depth 0, skipping
+ string literals (single-quoted) and backtick-quoted identifiers. -1 if no match.
+ Used to splice PREWHERE into a SELECT body without matching subquery clauses.
+ """
+ depth = 0
+ i = 0
+ n = len(text)
+ while i < n:
+ c = text[i]
+ if c == "'" or c == "`":
+ quote = c
+ i += 1
+ while i < n:
+ if text[i] == "\\" and i + 1 < n:
+ i += 2
+ continue
+ if text[i] == quote:
+ i += 1
+ break
+ i += 1
+ continue
+ if c == "(":
+ depth += 1
+ elif c == ")":
+ depth -= 1
+ elif depth == 0:
+ for marker in markers:
+ if text.startswith(marker, i):
+ return i
+ i += 1
+ return -1
+
+
def _resolve_ch_type_name(sqla_type):
"""Resolve a SQLAlchemy type instance to a ClickHouse type name string.
@@ -117,23 +150,7 @@ class ChStatementCompiler(SQLCompiler):
return v
- def visit_array_join(self, array_join_clause, asfrom=False, from_linter=None, **kw):
- left = self.process(array_join_clause.left, asfrom=True, from_linter=from_linter, **kw)
- join_type = "LEFT ARRAY JOIN" if array_join_clause.is_left else "ARRAY JOIN"
-
- parts = []
- for col, alias in array_join_clause.array_columns:
- col_text = self.process(col, **kw)
- if alias is not None:
- col_text += f" AS {self.preparer.quote(alias)}"
- parts.append(col_text)
-
- return f"{left} {join_type} {', '.join(parts)}"
-
def visit_join(self, join, **kw):
- if isinstance(join, ArrayJoin):
- return self.visit_array_join(join, **kw)
-
left = self.process(join.left, **kw)
right = self.process(join.right, **kw)
onclause = join.onclause
@@ -225,28 +242,79 @@ class ChStatementCompiler(SQLCompiler):
**kw,
)
+ def _ch_modifier_attr(self, select, compile_state, attr, default):
+ """Read a CH modifier attribute."""
+ val = getattr(select, attr, None)
+ if val is not None:
+ return val
+ if compile_state is not None:
+ orig = getattr(compile_state, "select_statement", None)
+ if orig is not None and orig is not select:
+ return getattr(orig, attr, default)
+ return default
+
def _compose_select_body(self, text, select, compile_state, inner_columns, froms, byfrom, toplevel, kwargs):
- ch_final = getattr(select, "_ch_final", set())
- ch_sample = getattr(select, "_ch_sample", {})
+ ch_final = self._ch_modifier_attr(select, compile_state, "_ch_final", set())
+ ch_sample = self._ch_modifier_attr(select, compile_state, "_ch_sample", {})
+ ch_prewhere = self._ch_modifier_attr(select, compile_state, "_ch_prewhere", None)
+ ch_limit_by = self._ch_modifier_attr(select, compile_state, "_ch_limit_by", None)
- if ch_final or ch_sample:
- mods = {}
- for target in ch_final | set(ch_sample):
- parts = []
- if target in ch_final:
- parts.append("FINAL")
- if target in ch_sample:
- parts.append(f"SAMPLE {ch_sample[target]}")
- mods[target] = " ".join(parts)
+ prev_lb = getattr(self, "_ch_active_limit_by", None)
+ self._ch_active_limit_by = ch_limit_by
- prev = getattr(self, "_ch_from_modifiers", None)
- self._ch_from_modifiers = mods
- try:
- return super()._compose_select_body(text, select, compile_state, inner_columns, froms, byfrom, toplevel, kwargs)
- finally:
- self._ch_from_modifiers = prev
+ try:
+ if ch_final or ch_sample:
+ mods = {}
+ for target in ch_final | set(ch_sample):
+ parts = []
+ if target in ch_final:
+ parts.append("FINAL")
+ if target in ch_sample:
+ parts.append(f"SAMPLE {ch_sample[target]}")
+ mods[target] = " ".join(parts)
+
+ prev = getattr(self, "_ch_from_modifiers", None)
+ self._ch_from_modifiers = mods
+ try:
+ result = super()._compose_select_body(text, select, compile_state, inner_columns, froms, byfrom, toplevel, kwargs)
+ finally:
+ self._ch_from_modifiers = prev
+ else:
+ result = super()._compose_select_body(text, select, compile_state, inner_columns, froms, byfrom, toplevel, kwargs)
+ finally:
+ self._ch_active_limit_by = prev_lb
+
+ if ch_prewhere is not None:
+ prewhere_text = self.process(ch_prewhere.whereclause, **kwargs)
+ prewhere_segment = f" \nPREWHERE {prewhere_text}"
+ markers = (" \nWHERE ", " GROUP BY ", " \nHAVING ", " ORDER BY ", "\n LIMIT ")
+ insert_at = _find_outermost_marker(result, markers)
+ if insert_at == -1:
+ result = result + prewhere_segment
+ else:
+ result = result[:insert_at] + prewhere_segment + result[insert_at:]
- return super()._compose_select_body(text, select, compile_state, inner_columns, froms, byfrom, toplevel, kwargs)
+ # LIMIT BY: SA calls limit_clause() only when there's a regular LIMIT/OFFSET.
+ # Without one, it's never called, so append the LIMIT BY here instead.
+ if ch_limit_by is not None and not select._has_row_limiting_clause:
+ result += self._render_ch_limit_by(ch_limit_by, kwargs)
+
+ return result
+
+ def _render_ch_limit_by(self, ch_limit_by, kw):
+ by_text = ", ".join(self.process(col, **kw) for col in ch_limit_by.by_clauses)
+ offset_prefix = f"{ch_limit_by.offset}, " if ch_limit_by.offset is not None else ""
+ return f"\n LIMIT {offset_prefix}{ch_limit_by.limit} BY {by_text}"
+
+ def limit_clause(self, select, **kw):
+ text = ""
+ ch_limit_by = getattr(select, "_ch_limit_by", None)
+ if ch_limit_by is None:
+ ch_limit_by = getattr(self, "_ch_active_limit_by", None)
+ if ch_limit_by is not None:
+ text += self._render_ch_limit_by(ch_limit_by, kw)
+ text += super().limit_clause(select, **kw)
+ return text
def visit_table(self, table, asfrom=False, iscrud=False, ashint=False, fromhints=None, enclosing_alias=None, **kwargs):
result = super().visit_table(
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py
index e514b3ee95e..fee54f2df27 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py
@@ -1,8 +1,113 @@
+from __future__ import annotations
+
+from typing import Any
+
from sqlalchemy import Column
+from sqlalchemy.exc import CompileError
+from sqlalchemy.sql import sqltypes
from sqlalchemy.sql.compiler import DDLCompiler
+from clickhouse_connect.cc_sqlalchemy.datatypes.base import ChSqlaType
+from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Nullable
from clickhouse_connect.cc_sqlalchemy.sql import format_table
-from clickhouse_connect.driver.binding import quote_identifier
+from clickhouse_connect.datatypes.base import TypeDef
+from clickhouse_connect.driver.binding import format_str, quote_identifier
+
+
+class ClickHouseDDLHelper:
+ dialect_names = ("clickhousedb", "clickhouse")
+
+ @classmethod
+ def get_option(cls, obj: Any, name: str) -> Any:
+ kwargs = getattr(obj, "kwargs", None)
+ if kwargs is not None:
+ for prefix in cls.dialect_names:
+ key = f"{prefix}_{name}"
+ if key in kwargs and kwargs[key] is not None:
+ return kwargs[key]
+ dialect_options = getattr(obj, "dialect_options", None)
+ if dialect_options:
+ for prefix in cls.dialect_names:
+ options = dialect_options.get(prefix)
+ if options and options.get(name) is not None:
+ return options.get(name)
+ return None
+
+ @classmethod
+ def is_dictionary(cls, table: Any) -> bool:
+ return getattr(table, "__visit_name__", None) == "dictionary" or cls.get_option(table, "table_type") == "dictionary"
+
+ @classmethod
+ def dictionary_option(cls, table: Any, name: str) -> Any:
+ attr_name = "primary_key_def" if name == "primary_key" else name
+ value = getattr(table, attr_name, None)
+ if value is not None:
+ return value
+ return cls.get_option(table, f"dictionary_{name}")
+
+ @staticmethod
+ def explicit_column_nullable(column: Column) -> bool | None:
+ user_defined = getattr(column, "_user_defined_nullable", None)
+ if isinstance(user_defined, bool):
+ return user_defined
+ return None
+
+ @staticmethod
+ def column_nullable(column: Column) -> bool:
+ column_type = getattr(column, "type", None)
+ if isinstance(column_type, ChSqlaType) and column_type.nullable:
+ return True
+ explicit_nullable = ClickHouseDDLHelper.explicit_column_nullable(column)
+ if explicit_nullable is not None:
+ return explicit_nullable
+ return False
+
+ @staticmethod
+ def effective_column_type(column: Column):
+ column_type = column.type
+ if not isinstance(column_type, ChSqlaType):
+ return column_type
+ if column_type.nullable:
+ return column_type
+ explicit_nullable = ClickHouseDDLHelper.explicit_column_nullable(column)
+ if not explicit_nullable:
+ return column_type
+
+ return Nullable(column_type)
+
+ @staticmethod
+ def without_nullable(type_):
+ if not isinstance(type_, ChSqlaType) or not type_.nullable:
+ return type_
+ type_def = type_.type_def
+ wrappers = tuple(wrapper for wrapper in type_def.wrappers if wrapper != "Nullable")
+ return type_.__class__(type_def=TypeDef(wrappers, type_def.keys, type_def.values))
+
+ @staticmethod
+ def render_settings(settings: dict[str, Any] | None) -> str:
+ if not settings:
+ return ""
+ return ", ".join(f"{key} = {ClickHouseDDLHelper._render_setting_value(value)}" for key, value in settings.items())
+
+ @staticmethod
+ def render_comment(comment: str | None) -> str:
+ if comment is None:
+ return "''"
+ escaped = comment.replace("'", "''")
+ return f"'{escaped}'"
+
+ @staticmethod
+ def _render_setting_value(value: Any) -> str:
+ if isinstance(value, bool):
+ return "1" if value else "0"
+ if isinstance(value, (int, float)):
+ return str(value)
+ return format_str(str(value))
+
+
+def column_specification(dialect, column: Column) -> str:
+ compiler = dialect.ddl_compiler(dialect, None)
+ return compiler.get_column_specification(column)
class ChDDLCompiler(DDLCompiler):
@@ -14,10 +119,83 @@ class ChDDLCompiler(DDLCompiler):
def visit_create_table(self, create, **_):
table = create.element
- text = f"CREATE TABLE {format_table(table)} ("
+ if_not_exists = " IF NOT EXISTS" if getattr(create, "if_not_exists", False) else ""
+
+ if ClickHouseDDLHelper.is_dictionary(table):
+ return self._visit_create_dictionary(create, table, if_not_exists)
+
+ engine = getattr(table, "engine", None) or ClickHouseDDLHelper.get_option(table, "engine")
+ if engine is None:
+ raise CompileError(
+ f"ClickHouse table '{table.name}' requires an engine — specify e.g. MergeTree(order_by='id') as a table argument"
+ )
+ text = f"CREATE TABLE{if_not_exists} {format_table(table)} ("
+ text += ", ".join([self.get_column_specification(c.element) for c in create.columns])
+ text += ") " + engine.compile()
+ if table.comment:
+ text += f" COMMENT {self.sql_compiler.render_literal_value(table.comment, sqltypes.STRINGTYPE)}"
+ return text
+
+ def _visit_create_dictionary(self, create, dictionary, if_not_exists: str):
+ text = f"CREATE DICTIONARY{if_not_exists} {format_table(dictionary)} ("
text += ", ".join([self.get_column_specification(c.element) for c in create.columns])
- return text + ") " + table.engine.compile()
+ text += ")"
+
+ primary_key = ClickHouseDDLHelper.dictionary_option(dictionary, "primary_key")
+ if primary_key:
+ text += f" PRIMARY KEY {primary_key}"
+
+ source = ClickHouseDDLHelper.dictionary_option(dictionary, "source")
+ if source:
+ text += f" SOURCE({source})"
+
+ layout = ClickHouseDDLHelper.dictionary_option(dictionary, "layout")
+ if layout:
+ layout = layout if "(" in layout else f"{layout}()"
+ text += f" LAYOUT({layout})"
+
+ lifetime = ClickHouseDDLHelper.dictionary_option(dictionary, "lifetime")
+ if lifetime:
+ text += f" LIFETIME({lifetime})"
+
+ if dictionary.comment:
+ text += f" COMMENT {self.sql_compiler.render_literal_value(dictionary.comment, sqltypes.STRINGTYPE)}"
+
+ return text
+
+ def visit_drop_table(self, drop, **_):
+ table = drop.element
+ if_exists = " IF EXISTS" if getattr(drop, "if_exists", False) else ""
+ if ClickHouseDDLHelper.is_dictionary(table):
+ return f"DROP DICTIONARY{if_exists} {format_table(table)}"
+ return f"DROP TABLE{if_exists} {format_table(table)}"
+
+ def visit_add_column(self, create, **_):
+ return f"ALTER TABLE {format_table(create.element)} ADD COLUMN {self.get_column_specification(create.column)}"
+
+ def visit_drop_column(self, drop, **_):
+ return f"ALTER TABLE {format_table(drop.element)} DROP COLUMN {quote_identifier(drop.column.name)}"
def get_column_specification(self, column: Column, **_):
- text = f"{quote_identifier(column.name)} {column.type.compile()}"
+ text = f"{quote_identifier(column.name)} {ClickHouseDDLHelper.effective_column_type(column).compile()}"
+ materialized = ClickHouseDDLHelper.get_option(column, "materialized")
+ alias = ClickHouseDDLHelper.get_option(column, "alias")
+ if materialized is not None:
+ text += f" MATERIALIZED {self.render_default_string(materialized)}"
+ return text
+ if alias is not None:
+ text += f" ALIAS {self.render_default_string(alias)}"
+ return text
+ default = self.get_column_default_string(column)
+ if default is not None:
+ text += f" DEFAULT {default}"
+ codec = ClickHouseDDLHelper.get_option(column, "codec")
+ if codec is not None:
+ codec_sql = codec if isinstance(codec, str) else ", ".join(str(item) for item in codec)
+ text += f" CODEC({codec_sql})"
+ ttl = ClickHouseDDLHelper.get_option(column, "ttl")
+ if ttl is not None:
+ text += f" TTL {self.render_default_string(ttl)}"
+ if column.comment:
+ text += f" COMMENT {self.sql_compiler.render_literal_value(column.comment, sqltypes.STRINGTYPE)}"
return text
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/sqlparse.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/sqlparse.py
new file mode 100644
index 00000000000..6afdfe0e654
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/sql/sqlparse.py
@@ -0,0 +1,62 @@
+def walk_sql(sql: str, start: int = 0):
+ """Yield (index, char, depth) for unquoted chars, tracking paren depth."""
+ depth = 0
+ quote_char = None
+ escape = False
+ for i in range(start, len(sql)):
+ char = sql[i]
+ if escape:
+ escape = False
+ continue
+ if quote_char:
+ if char == "\\" and quote_char == "'":
+ escape = True
+ elif char == quote_char:
+ quote_char = None
+ continue
+ if char in {"'", '"', "`"}:
+ quote_char = char
+ continue
+ if char == "(":
+ depth += 1
+ elif char == ")":
+ depth -= 1
+ yield i, char, depth
+
+
+def extract_parenthesized_block(sql: str, start: int) -> tuple[str, int]:
+ """Return the content and closing index of the first parenthesized block."""
+ block_start = -1
+ for i, char, depth in walk_sql(sql, start):
+ if char == "(" and depth == 1 and block_start == -1:
+ block_start = i + 1
+ elif char == ")" and depth == 0 and block_start != -1:
+ return sql[block_start:i], i
+ raise ValueError("Could not parse parenthesized SQL block")
+
+
+def split_top_level(sql: str, delimiter: str = ",") -> list[str]:
+ """Split SQL on *delimiter* only at the top nesting level."""
+ parts = []
+ part_start = 0
+ for i, char, depth in walk_sql(sql):
+ if char == delimiter and depth == 0:
+ part = sql[part_start:i].strip()
+ if part:
+ parts.append(part)
+ part_start = i + 1
+ tail = sql[part_start:].strip()
+ if tail:
+ parts.append(tail)
+ return parts
+
+
+def find_top_level_clause(sql: str, clauses: tuple[str, ...]) -> tuple[int, str | None]:
+ """Find the first occurrence of any *clause* at top nesting level."""
+ upper_sql = sql.upper()
+ for i, _char, depth in walk_sql(sql):
+ if depth == 0:
+ for clause in clauses:
+ if upper_sql.startswith(clause, i):
+ return i, clause
+ return -1, None
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/types.py b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/types.py
new file mode 100644
index 00000000000..6e3640bdfdc
--- /dev/null
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/cc_sqlalchemy/types.py
@@ -0,0 +1 @@
+from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import * # noqa: F401, F403
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
index 973018cc521..1e9568f347e 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/asyncclient.py
@@ -983,7 +983,13 @@ class AsyncClient(Client):
try:
url = f"{self.url}/ping"
timeout = aiohttp.ClientTimeout(total=3.0)
- async with session.get(url, timeout=timeout) as response:
+ get_kwargs: dict[str, Any] = {"timeout": timeout}
+ if self.server_host_name:
+ get_kwargs["headers"] = {"Host": self.server_host_name}
+ if self._ssl_context is not None:
+ get_kwargs["ssl"] = self._ssl_context
+ get_kwargs["server_hostname"] = self.server_host_name
+ async with session.get(url, **get_kwargs) as response:
return 200 <= response.status < 300
except (aiohttp.ClientError, asyncio.TimeoutError):
logger.debug("ping failed", exc_info=True)
@@ -1945,6 +1951,9 @@ class AsyncClient(Client):
# Construct full URL (aiohttp doesn't have base_url)
url = f"{self.url}/"
request_kwargs = {"method": method, "url": url, "params": final_params, "headers": req_headers}
+ if self.server_host_name and self._ssl_context is not None:
+ request_kwargs["ssl"] = self._ssl_context
+ request_kwargs["server_hostname"] = self.server_host_name
if hasattr(self, "_proxy_url") and self._proxy_url:
request_kwargs["proxy"] = self._proxy_url
if files:
@@ -1988,13 +1997,20 @@ class AsyncClient(Client):
except aiohttp.ServerConnectionError as e:
msg = str(e)
if "Connection reset" in msg or "Remote end closed" in msg or "Cannot connect" in msg or "Server disconnected" in msg:
- if attempts == 1:
+ # Always allow at least one retry on a clean connection error so a single stale
+ # keep-alive socket doesn't surface to the caller, and additionally honor the
+ # retries budget when it is larger (e.g. query_retries for reads), so that
+ # bursts of stale pooled connections can be drained before giving up.
+ max_attempts = max(2, retries + 1)
+ if attempts < max_attempts:
if retry_body is not None:
data = await retry_body()
- logger.debug("Retrying after connection error with rebuilt body")
+ logger.debug("Retrying after connection error with rebuilt body (attempt %s/%s)", attempts, max_attempts)
+ await asyncio.sleep(0.1 * attempts)
continue
if data is None or isinstance(data, (bytes, bytearray, str, dict)):
- logger.debug("Retrying after connection error from remote host")
+ logger.debug("Retrying after connection error from remote host (attempt %s/%s)", attempts, max_attempts)
+ await asyncio.sleep(0.1 * attempts)
continue
raise OperationalError(f"Network Error: {msg}") from e
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/binding.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/binding.py
index 9c63713e6dc..3734589a2e2 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/binding.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/binding.py
@@ -32,14 +32,33 @@ class DT64Param:
return f"'{s}'"
-def quote_identifier(identifier: str):
- first_char = identifier[0]
- if first_char in ("`", '"') and identifier[-1] == first_char:
- # Identifier is already quoted, assume that it's valid
- return identifier
+def quote_identifier(identifier: str) -> str:
+ if len(identifier) >= 2:
+ quote = identifier[0]
+ if quote in ("`", '"') and identifier[-1] == quote and _is_validly_quoted(identifier, quote):
+ return identifier
return f"`{escape_str(identifier)}`"
+def _is_validly_quoted(identifier: str, quote: str) -> bool:
+ # Accepts backslash escapes (\X) and doubled-quote escapes (`` or "").
+ i, end = 1, len(identifier) - 1
+ while i < end:
+ c = identifier[i]
+ if c == "\\":
+ if i + 1 >= end:
+ return False
+ i += 2
+ elif c == quote:
+ if i + 1 < end and identifier[i + 1] == quote:
+ i += 2
+ else:
+ return False
+ else:
+ i += 1
+ return True
+
+
def finalize_query(query: str, parameters: Sequence | dict[str, Any] | None, server_tz: tzinfo | None = None) -> str:
query = query.rstrip(";")
if not parameters:
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
index 576b3fd59ba..f9bcf4d690b 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
@@ -2,6 +2,7 @@ import io
import json
import logging
import re
+import time
import uuid
from base64 import b64encode
from collections.abc import Callable, Generator, Sequence
@@ -568,18 +569,24 @@ class HttpClient(Client):
try:
response = self.http.request(method, url, **kwargs)
except HTTPError as ex:
- if isinstance(ex.__context__, ConnectionResetError) and attempts == 1:
- # The server closed the connection, probably because the Keep Alive has expired
- # We should be safe to retry, as ClickHouse should not have processed anything on a connection
- # that it killed. We also only retry this once, as multiple disconnects are unlikely to be
- # related to the Keep Alive settings
+ # Always allow at least one retry on a clean connection error so a single stale
+ # keep-alive socket doesn't surface to the caller, and additionally honor the
+ # retries budget when it is larger (e.g. query_retries for reads), so that
+ # bursts of stale pooled connections can be drained before giving up.
+ max_attempts = max(2, retries + 1)
+ if isinstance(ex.__context__, ConnectionResetError) and attempts < max_attempts:
+ # The server closed the connection, probably because the Keep Alive has expired.
+ # We should be safe to retry, as ClickHouse should not have processed anything on
+ # a connection that it killed.
body = kwargs.get("body")
if retry_body is not None:
kwargs["body"] = retry_body()
- logger.debug("Retrying remotely closed connection with rebuilt body")
+ logger.debug("Retrying remotely closed connection with rebuilt body (attempt %s/%s)", attempts, max_attempts)
+ time.sleep(0.1 * attempts)
continue
if body is None or isinstance(body, (bytes, bytearray, str)):
- logger.debug("Retrying remotely closed connection")
+ logger.debug("Retrying remotely closed connection (attempt %s/%s)", attempts, max_attempts)
+ time.sleep(0.1 * attempts)
continue
logger.warning("Unexpected Http Driver Exception")
err_url = f" ({self.url})" if self.show_clickhouse_errors else ""
@@ -612,7 +619,7 @@ class HttpClient(Client):
See BaseClient doc_string for this method
"""
body, params, fields = self._prep_raw_query(query, parameters, settings, fmt, use_database, external_data)
- return self._raw_request(body, params, fields=fields, headers=transport_settings).data
+ return self._raw_request(body, params, fields=fields, headers=transport_settings, retries=self.query_retries).data
def raw_stream(
self,
@@ -628,7 +635,15 @@ class HttpClient(Client):
See BaseClient doc_string for this method
"""
body, params, fields = self._prep_raw_query(query, parameters, settings, fmt, use_database, external_data)
- return self._raw_request(body, params, fields=fields, stream=True, server_wait=False, headers=transport_settings)
+ return self._raw_request(
+ body,
+ params,
+ fields=fields,
+ stream=True,
+ server_wait=False,
+ headers=transport_settings,
+ retries=self.query_retries,
+ )
def _prep_raw_query(
self,
diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make
index 5a14deb8876..f57136f7567 100644
--- a/contrib/python/clickhouse-connect/ya.make
+++ b/contrib/python/clickhouse-connect/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(1.0.1)
+VERSION(1.1.0)
LICENSE(Apache-2.0)
@@ -31,19 +31,27 @@ PY_SRCS(
clickhouse_connect/__init__.py
clickhouse_connect/_version.py
clickhouse_connect/cc_sqlalchemy/__init__.py
+ clickhouse_connect/cc_sqlalchemy/alembic/__init__.py
+ clickhouse_connect/cc_sqlalchemy/alembic/adapter.py
+ clickhouse_connect/cc_sqlalchemy/alembic/impl.py
+ clickhouse_connect/cc_sqlalchemy/alembic/utils.py
clickhouse_connect/cc_sqlalchemy/datatypes/__init__.py
clickhouse_connect/cc_sqlalchemy/datatypes/base.py
clickhouse_connect/cc_sqlalchemy/datatypes/sqltypes.py
clickhouse_connect/cc_sqlalchemy/ddl/__init__.py
clickhouse_connect/cc_sqlalchemy/ddl/custom.py
+ clickhouse_connect/cc_sqlalchemy/ddl/dictionary.py
clickhouse_connect/cc_sqlalchemy/ddl/tableengine.py
clickhouse_connect/cc_sqlalchemy/dialect.py
+ clickhouse_connect/cc_sqlalchemy/engines.py
clickhouse_connect/cc_sqlalchemy/inspector.py
clickhouse_connect/cc_sqlalchemy/sql/__init__.py
clickhouse_connect/cc_sqlalchemy/sql/clauses.py
clickhouse_connect/cc_sqlalchemy/sql/compiler.py
clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py
clickhouse_connect/cc_sqlalchemy/sql/preparer.py
+ clickhouse_connect/cc_sqlalchemy/sql/sqlparse.py
+ clickhouse_connect/cc_sqlalchemy/types.py
clickhouse_connect/common.py
clickhouse_connect/datatypes/__init__.py
clickhouse_connect/datatypes/base.py
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA
index 053754adaf7..5765424bbb5 100644
--- a/contrib/python/hypothesis/py3/.dist-info/METADATA
+++ b/contrib/python/hypothesis/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: hypothesis
-Version: 6.152.9
+Version: 6.153.2
Summary: The property-based testing library for Python
Author-email: "David R. MacIver and Zac Hatfield-Dodds" <[email protected]>
License-Expression: MPL-2.0
diff --git a/contrib/python/hypothesis/py3/hypothesis/control.py b/contrib/python/hypothesis/py3/hypothesis/control.py
index d0aee23d2f7..5d94150846c 100644
--- a/contrib/python/hypothesis/py3/hypothesis/control.py
+++ b/contrib/python/hypothesis/py3/hypothesis/control.py
@@ -14,7 +14,8 @@ import random
from collections import defaultdict
from collections.abc import Callable, Generator, Sequence
from contextlib import contextmanager
-from typing import Any, Literal, NoReturn, Optional, overload
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Literal, NoReturn, Optional, overload
from weakref import WeakKeyDictionary
from hypothesis import Verbosity, settings
@@ -29,6 +30,9 @@ from hypothesis.utils.deprecation import note_deprecation
from hypothesis.utils.dynamicvariables import DynamicVariable
from hypothesis.vendor.pretty import ArgLabelsT, IDKey, PrettyPrintFunction, pretty
+if TYPE_CHECKING:
+ from typing_extensions import Self
+
def _calling_function_location(what: str, frame: Any) -> str:
where = frame.f_back
@@ -100,7 +104,7 @@ def current_build_context() -> "BuildContext":
@contextmanager
-def deprecate_random_in_strategy(fmt, *args):
+def deprecate_random_in_strategy(fmt: str, *args: Any) -> Generator[None, None, None]:
from hypothesis.internal import entropy
state_before = random.getstate()
@@ -221,12 +225,17 @@ class BuildContext:
return kwargs, arg_labels
- def __enter__(self):
+ def __enter__(self) -> "Self":
self.assign_variable = _current_build_context.with_value(self)
self.assign_variable.__enter__()
return self
- def __exit__(self, exc_type, exc_value, tb):
+ def __exit__(
+ self,
+ exc_type: type[BaseException] | None,
+ exc_value: BaseException | None,
+ tb: TracebackType | None,
+ ) -> None:
self.assign_variable.__exit__(exc_type, exc_value, tb)
errors = []
for task in self.tasks:
@@ -240,7 +249,7 @@ class BuildContext:
raise BaseExceptionGroup("Cleanup failed", errors) from exc_value
-def cleanup(teardown):
+def cleanup(teardown: Callable[[], Any]) -> None:
"""Register a function to be called when the current test has finished
executing. Any exceptions thrown in teardown will be printed but not
rethrown.
@@ -255,10 +264,11 @@ def cleanup(teardown):
context.tasks.append(teardown)
-def should_note():
+def should_note() -> bool:
context = _current_build_context.value
if context is None:
raise InvalidArgument("Cannot make notes outside of a test")
+ assert settings.default is not None
return context.is_final or settings.default.verbosity >= Verbosity.verbose
@@ -270,7 +280,7 @@ def note(value: object) -> None:
report(value)
-def event(value: str, payload: str | int | float = "") -> None:
+def event(value: str, payload: Any = "") -> None:
"""Record an event that occurred during this test. Statistics on the number of test
runs with each event will be reported at the end if you run Hypothesis in
statistics reporting mode.
@@ -283,17 +293,19 @@ def event(value: str, payload: str | int | float = "") -> None:
raise InvalidArgument("Cannot record events outside of a test")
avoid_realization = context.data.provider.avoid_realization
- payload = _event_to_string(
+ payload = _serialize_event(
payload, allowed_types=(str, int, float), avoid_realization=avoid_realization
)
- value = _event_to_string(value, avoid_realization=avoid_realization)
+ value = _serialize_event(value, avoid_realization=avoid_realization)
context.data.events[value] = payload
-_events_to_strings: WeakKeyDictionary = WeakKeyDictionary()
+_events_to_strings: WeakKeyDictionary[Any, str] = WeakKeyDictionary()
-def _event_to_string(event, *, allowed_types=str, avoid_realization):
+def _serialize_event(
+ event: Any, *, allowed_types: tuple[type, ...] = (str,), avoid_realization: bool
+) -> Any:
if isinstance(event, allowed_types):
return event
diff --git a/contrib/python/hypothesis/py3/hypothesis/errors.py b/contrib/python/hypothesis/py3/hypothesis/errors.py
index daf81828c97..c7de51eef44 100644
--- a/contrib/python/hypothesis/py3/hypothesis/errors.py
+++ b/contrib/python/hypothesis/py3/hypothesis/errors.py
@@ -8,11 +8,17 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
+from collections.abc import Mapping
from datetime import timedelta
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
from hypothesis.internal.compat import ExceptionGroup
+if TYPE_CHECKING:
+ from hypothesis.internal.conjecture.choice import ChoiceConstraintsT
+else:
+ ChoiceConstraintsT = Mapping
+
class HypothesisException(Exception):
"""Generic parent class for exceptions thrown by Hypothesis."""
@@ -85,6 +91,13 @@ class FlakyReplay(Flaky):
self._interesting_origins = interesting_origins
+def _render_constraints(show: Mapping[str, object], other: Mapping[str, object]) -> str:
+ assert show.keys() == other.keys()
+ return ", ".join(
+ f"{k}={'...' if v == other[k] else repr(v)}" for k, v in show.items()
+ )
+
+
class FlakyStrategyDefinition(Flaky):
"""
This function appears to cause inconsistent data generation.
@@ -99,6 +112,39 @@ class FlakyStrategyDefinition(Flaky):
See also the :doc:`flaky failures tutorial </tutorial/flaky>`.
"""
+ _BASE_MESSAGE = (
+ "Inconsistent data generation! Data generation behaved differently "
+ "between test cases. Is your data generation depending on external "
+ "state?"
+ )
+
+ @classmethod
+ def with_detail(cls, detail: str) -> "FlakyStrategyDefinition":
+ return cls(f"{cls._BASE_MESSAGE}\n\n{detail}")
+
+ @classmethod
+ def from_mismatch(
+ cls,
+ expected_type: str,
+ expected_constraints: ChoiceConstraintsT,
+ actual_type: str,
+ actual_constraints: ChoiceConstraintsT,
+ ) -> "FlakyStrategyDefinition":
+ if actual_type != expected_type:
+ detail = (
+ "The second test case drew a different type of value than the first.\n"
+ f" first: {expected_type}\n"
+ f" second: {actual_type}\n"
+ )
+ else:
+ detail = (
+ f"The second test case drew type {actual_type} with different constraints "
+ "than the first.\n"
+ f" first: {_render_constraints(expected_constraints, actual_constraints)}\n"
+ f" second: {_render_constraints(actual_constraints, expected_constraints)}\n"
+ )
+ return cls.with_detail(detail)
+
class _WrappedBaseException(Exception):
"""Used internally for wrapping BaseExceptions as components of FlakyFailure."""
diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py b/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py
index 0417703c213..006fd7f79fc 100644
--- a/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py
+++ b/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py
@@ -317,7 +317,18 @@ def make_patch(
fromfile=f"./{fname}", # git strips the first part of the path by default
tofile=f"./{fname}",
)
- diffs.append("".join(ud))
+ # difflib.unified_diff omits the `\ No newline at end of file` marker
+ # that git requires; add it ourselves. We check every line because the
+ # marker may apply to a `-` line that isn't the last line of the diff.
+ # See https://github.com/HypothesisWorks/hypothesis/issues/4744.
+ lines = []
+ for line in ud:
+ if line.endswith("\n"):
+ lines.append(line)
+ else:
+ lines.append(line + "\n")
+ lines.append("\\ No newline at end of file\n")
+ diffs.append("".join(lines))
return "".join(diffs)
diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/django/_fields.py b/contrib/python/hypothesis/py3/hypothesis/extra/django/_fields.py
index 2dece58af29..185af5a45a8 100644
--- a/contrib/python/hypothesis/py3/hypothesis/extra/django/_fields.py
+++ b/contrib/python/hypothesis/py3/hypothesis/extra/django/_fields.py
@@ -242,7 +242,7 @@ def _for_text(field):
# We can infer a vastly more precise strategy by considering the
# validators as well as the field type. This is a minimal proof of
# concept, but we intend to leverage the idea much more heavily soon.
- # See https://github.com/HypothesisWorks/hypothesis-python/issues/1116
+ # See https://github.com/HypothesisWorks/hypothesis/issues/1116
regexes = [
re.compile(v.regex, v.flags) if isinstance(v.regex, str) else v.regex
for v in field.validators
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
index ff2d8d521ca..f05b8201376 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
@@ -30,6 +30,7 @@ from typing import (
from hypothesis.errors import (
CannotProceedScopeT,
ChoiceTooLarge,
+ FlakyStrategyDefinition,
Frozen,
InvalidArgument,
StopTest,
@@ -1202,7 +1203,15 @@ class ConjectureData:
self.start_span(label=label)
try:
if not at_top_level:
- return unwrapped.do_draw(self)
+ try:
+ return unwrapped.do_draw(self)
+ except FlakyStrategyDefinition as err:
+ # Record the strategy stack as the error unwinds, so that an
+ # inconsistent-generation failure is explained in terms of the
+ # strategies being drawn from, not just the choice sequence.
+ # The top-level draw adds its own "while generating ..." note.
+ add_note(err, f"while drawing from {strategy!r}")
+ raise
assert start_time is not None
key = observe_as or f"generate:unlabeled_{len(self.draw_times)}"
try:
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
index df02449a917..a4b44af053d 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
@@ -53,13 +53,6 @@ class PreviouslyUnseenBehaviour(HypothesisException):
pass
-_FLAKY_STRAT_MSG = (
- "Inconsistent data generation! Data generation behaved differently "
- "between different runs. Is your data generation depending on external "
- "state?"
-)
-
-
EMPTY: frozenset[int] = frozenset()
@@ -442,7 +435,7 @@ class TreeNode:
self.__forced = set()
self.__forced.add(i)
- def split_at(self, i: int) -> None:
+ def split_at(self, i: int, *, new_value: object = None) -> None:
"""
Splits the tree so that it can incorporate a decision at the draw call
corresponding to the node at position i.
@@ -451,7 +444,11 @@ class TreeNode:
"""
if i in self.forced:
- raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+ raise FlakyStrategyDefinition.with_detail(
+ f"The {self.choice_types[i]} value was forced to "
+ f"{self.values[i]!r} in the first run, but the second run "
+ f"drew {new_value!r}.\n"
+ )
assert not self.is_exhausted
@@ -931,7 +928,7 @@ class DataTree:
key: ChoiceT,
random: Random,
) -> ChoiceT:
- (generator, children, rejected) = self._get_children_cache(
+ generator, children, rejected = self._get_children_cache(
choice_type, constraints, key=key
)
# Keep a stock of 100 potentially-valid children at all times.
@@ -961,7 +958,7 @@ class DataTree:
child: ChoiceT,
key: ChoiceT,
) -> None:
- (_generator, children, rejected) = self._get_children_cache(
+ _generator, children, rejected = self._get_children_cache(
choice_type, constraints, key=key
)
rejected.add(child)
@@ -1050,7 +1047,12 @@ class TreeRecordingObserver(DataObserver):
choice_type != node.choice_types[i]
or constraints != node.constraints[i]
):
- raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+ raise FlakyStrategyDefinition.from_mismatch(
+ node.choice_types[i],
+ node.constraints[i],
+ choice_type,
+ constraints,
+ )
# Note that we don't check whether a previously
# forced value is now free. That will be caught
# if we ever split the node there, but otherwise
@@ -1058,9 +1060,12 @@ class TreeRecordingObserver(DataObserver):
# means we skip a hash set lookup on every
# draw and that's a pretty niche failure mode.
if was_forced and i not in node.forced:
- raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+ raise FlakyStrategyDefinition.with_detail(
+ f"The {choice_type} value was forced to a specific value "
+ f"but was not forced on the first run.\n"
+ )
if value != node.values[i]:
- node.split_at(i)
+ node.split_at(i, new_value=value)
assert i == len(node.values)
new_node = TreeNode()
assert isinstance(node.transition, Branch)
@@ -1095,7 +1100,7 @@ class TreeRecordingObserver(DataObserver):
compute_max_children(choice_type, constraints) == 1
and not was_forced
):
- node.split_at(i)
+ node.split_at(i, new_value=value)
assert isinstance(node.transition, Branch)
self._current_node = node.transition.children[value]
self._index_in_current_node = 0
@@ -1103,11 +1108,18 @@ class TreeRecordingObserver(DataObserver):
assert trans.status != Status.OVERRUN
# We tried to draw where history says we should have
# stopped
- raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+ raise FlakyStrategyDefinition.with_detail(
+ "The second run drew more data than the first run.\n"
+ )
else:
assert isinstance(trans, Branch), trans
if choice_type != trans.choice_type or constraints != trans.constraints:
- raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+ raise FlakyStrategyDefinition.from_mismatch(
+ trans.choice_type,
+ trans.constraints,
+ choice_type,
+ constraints,
+ )
try:
self._current_node = trans.children[value]
except KeyError:
@@ -1127,7 +1139,10 @@ class TreeRecordingObserver(DataObserver):
self._current_node.transition is not None
and not isinstance(self._current_node.transition, Killed)
):
- raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+ raise FlakyStrategyDefinition.with_detail(
+ "The second run stopped drawing earlier than the first run, "
+ "which continued to draw more data.\n"
+ )
if self._current_node.transition is None:
self._current_node.transition = Killed(TreeNode())
@@ -1148,7 +1163,10 @@ class TreeRecordingObserver(DataObserver):
node = self._current_node
if i < len(node.values) or isinstance(node.transition, Branch):
- raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+ raise FlakyStrategyDefinition.with_detail(
+ "The second run stopped drawing earlier than the first run, "
+ "which continued to draw more data.\n"
+ )
new_transition = Conclusion(status, interesting_origin)
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
index 70df17d0502..ded037098d6 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
@@ -27,6 +27,7 @@ from hypothesis.database import ExampleDatabase, choices_from_bytes, choices_to_
from hypothesis.errors import (
BackendCannotProceed,
FlakyBackendFailure,
+ FlakyStrategyDefinition,
HypothesisException,
InvalidArgument,
StopTest,
@@ -103,7 +104,7 @@ CACHE_SIZE: int = 10000
MIN_TEST_CALLS: int = 10
# we use this to isolate Hypothesis from interacting with the global random,
-# to make it easier to reason about our global random warning logic easier (see
+# to make it easier to reason about our global random warning logic (see
# deprecate_random_in_strategy).
_random = Random()
@@ -562,8 +563,15 @@ class ConjectureRunner:
interrupted = True
data.freeze()
return
- except BaseException:
+ except BaseException as err:
data.freeze()
+ if isinstance(err, FlakyStrategyDefinition) and data._stateful_repr_parts:
+ # In a stateful test, surface the steps leading up to the
+ # inconsistency.
+ report(
+ "Steps leading up to this error:\n"
+ + "\n".join(f" {s}" for s in data._stateful_repr_parts)
+ )
if self.settings.backend != "hypothesis":
try:
realize_choices(data, for_failure=True)
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py
index a8483a3650a..afba5962315 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py
@@ -310,13 +310,15 @@ _local_constants = Constants(
bytes=SortedSet(),
strings=SortedSet(),
)
-# modules that we've already seen and processed for local constants. These are
+# Modules that we've already seen and processed for local constants. These are
# are all modules, not necessarily local ones. This lets us quickly see which
# modules are new without an expensive path.resolve() or is_local_module_file
# cache lookup.
-# We track by module object when hashable, falling back to the module name
-# (str key in sys.modules) for unhashable entries like SimpleNamespace.
-_seen_modules: set = set()
+#
+# We track by id so we can handle users that put unhashable types like SimpleNamespace
+# into sys.modules. ModuleType.__hash__ falls back to id, so this is equivalent
+# in the standard case.
+_seen_modules: set[int] = set()
_sys_modules_len: int | None = None
@@ -352,27 +354,20 @@ def _get_local_constants() -> Constants:
# careful: store sys.modules length when we first check to avoid race conditions
# with other threads loading a module before we set _sys_modules_len.
if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
- new_modules = []
- for name, module in list(sys.modules.items()):
- try:
- seen = module in _seen_modules
- except TypeError:
- # unhashable module (e.g. SimpleNamespace); fall back to name
- seen = name in _seen_modules
- if not seen:
- new_modules.append((name, module))
+ new_modules = [
+ module
+ for module in list(sys.modules.values())
+ if id(module) not in _seen_modules
+ ]
# Repeated SortedSet unions are expensive. Do the initial unions on a
# set(), then do a one-time union with _local_constants after.
new_constants = Constants()
- for name, module in new_modules:
+ for module in new_modules:
if (
module_file := getattr(module, "__file__", None)
) is not None and is_local_module_file(module_file):
new_constants |= constants_from_module(module)
- try:
- _seen_modules.add(module)
- except TypeError:
- _seen_modules.add(name)
+ _seen_modules.add(id(module))
_local_constants |= new_constants
_sys_modules_len = sys_modules_len
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py
index 5b283bb7515..f36739f0f4e 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py
@@ -30,6 +30,7 @@ from hypothesis.internal.conjecture.choice import (
choice_key,
choice_permitted,
choice_to_index,
+ choices_key,
)
from hypothesis.internal.conjecture.data import (
ConjectureData,
@@ -663,15 +664,18 @@ class Shrinker:
"""
nodes = self.nodes
target_types = tuple(nodes[i].type for i in range(start, end))
- current_keys = tuple(choice_key(nodes[i].value) for i in range(start, end))
- seen: set[tuple[Any, ...]] = {current_keys}
- for s2, e2 in sorted(self.shrink_target.arg_slices):
- if (s2, e2) == (start, end) or (e2 - s2) != (end - start):
+ current_key = choices_key(tuple(nodes[i].value for i in range(start, end)))
+ seen: set[tuple[Any, ...]] = {current_key}
+ for start2, end2 in sorted(self.shrink_target.arg_slices):
+ if (start2, end2) == (start, end) or (end2 - start2) != (end - start):
continue
- if tuple(nodes[s2 + j].type for j in range(end - start)) != target_types:
+ if (
+ tuple(nodes[start2 + j].type for j in range(end - start))
+ != target_types
+ ):
continue
- borrowed = tuple(nodes[s2 + j].value for j in range(end - start))
- key = tuple(choice_key(v) for v in borrowed)
+ borrowed = tuple(nodes[start2 + j].value for j in range(end - start))
+ key = choices_key(borrowed)
if key in seen:
continue
seen.add(key)
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py
index 8290ec6737d..5bf746d2929 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py
@@ -161,7 +161,7 @@ class Shrinker:
Does nothing by default.
"""
- def short_circuit(self):
+ def short_circuit(self) -> bool:
"""Possibly attempt to do some shrinking.
If this returns True, the ``run`` method will terminate early
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py b/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py
index b4d148b5f74..b793200be04 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py
@@ -34,7 +34,7 @@ Func = TypeVar("Func", bound=Callable)
pretty_file_name_cache: dict[str, str] = {}
-def pretty_file_name(f):
+def pretty_file_name(f: str) -> str:
try:
return pretty_file_name_cache[f]
except KeyError:
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py b/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py
index 63477860bf5..84a277d112d 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py
@@ -199,7 +199,7 @@ def get_seeder_and_restorer(
# handles this for us.
#
# This command reproduces at time of writing:
- # pytest hypothesis-python/tests/ -k test_intervals_are_equivalent_to_their_lists
+ # pytest hypothesis/tests/ -k test_intervals_are_equivalent_to_their_lists
# --parallel-threads 2
for k, ref in RANDOMS_TO_MANAGE.data.copy().items(): # type: ignore
r = ref()
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py b/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py
index 356abc48117..583f5c13c53 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py
@@ -8,14 +8,21 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
+from typing import TYPE_CHECKING
+
from hypothesis.errors import FailedHealthCheck
+if TYPE_CHECKING:
+ from hypothesis._settings import HealthCheck, settings as Settings
+
-def fail_health_check(settings, message, label):
+def fail_health_check(
+ settings: "Settings", message: str, health_check: "HealthCheck"
+) -> None:
# Tell pytest to omit the body of this function from tracebacks
# https://docs.pytest.org/en/latest/example/simple.html#writing-well-integrated-assertion-helpers
__tracebackhide__ = True
- if label in settings.suppress_health_check:
+ if health_check in settings.suppress_health_check:
return
raise FailedHealthCheck(message)
diff --git a/contrib/python/hypothesis/py3/hypothesis/provisional.py b/contrib/python/hypothesis/py3/hypothesis/provisional.py
index 8becf9269af..d5b5e07910d 100644
--- a/contrib/python/hypothesis/py3/hypothesis/provisional.py
+++ b/contrib/python/hypothesis/py3/hypothesis/provisional.py
@@ -16,6 +16,7 @@ Point releases may move or break the contents at any time!
Internet strategies should conform to :rfc:`3986` or the authoritative
definitions it links to. If not, report the bug!
"""
+
# https://tools.ietf.org/html/rfc3696
import string
diff --git a/contrib/python/hypothesis/py3/hypothesis/stateful.py b/contrib/python/hypothesis/py3/hypothesis/stateful.py
index 2103fdcb2c5..24e6588d11e 100644
--- a/contrib/python/hypothesis/py3/hypothesis/stateful.py
+++ b/contrib/python/hypothesis/py3/hypothesis/stateful.py
@@ -50,7 +50,7 @@ from hypothesis.internal.reflection import (
)
from hypothesis.internal.validation import check_type
from hypothesis.reporting import current_verbosity, report
-from hypothesis.strategies._internal.featureflags import FeatureStrategy
+from hypothesis.strategies._internal.featureflags import FeatureFlags, FeatureStrategy
from hypothesis.strategies._internal.strategies import (
Ex,
OneOfStrategy,
@@ -132,8 +132,7 @@ def get_state_machine_test(
def output(s):
if print_steps:
report(s)
- if observability_enabled():
- cd._stateful_repr_parts.append(s)
+ cd._stateful_repr_parts.append(s)
try:
output(f"state = {machine.__class__.__name__}()")
@@ -1099,13 +1098,30 @@ class RuleStrategy(SearchStrategy):
def __init__(self, machine: RuleBasedStateMachine) -> None:
super().__init__()
self.machine = machine
- self.rules = machine.rules.copy()
+ self.rules, rule_names, self.rules_strategy = self._setup_for(type(machine))
self.enabled_rules_strategy = st.shared(
- FeatureStrategy(at_least_one_of={r.function.__name__ for r in self.rules}),
+ FeatureStrategy(at_least_one_of=rule_names),
key=("enabled rules", machine),
)
+ # Reuse a single filtered strategy across steps instead of rebuilding it
+ # each time. Rebuilding forced a recompute of the (uncached) sampled_from
+ # label, which is O(number of rules) per step. The filter predicate reads
+ # the feature flags set by do_draw on each step.
+ self._feature_flags: FeatureFlags | None = None
+ self._enabled_rules_strategy = self.rules_strategy.filter(self._rule_is_enabled)
+
+ @classmethod
+ @lru_cache
+ def _setup_for(
+ cls, machine_type: type[RuleBasedStateMachine]
+ ) -> tuple[list["Rule"], frozenset[str], SearchStrategy]:
+ # Cache (per machine class) the work of sorting the rules and building
+ # the sampled_from strategy, which is O(number of rules) and would
+ # otherwise be repeated every time the machine is instantiated; see
+ # https://github.com/HypothesisWorks/hypothesis/issues/4465.
+ rules = machine_type.setup_state().rules.copy()
# The order is a bit arbitrary. Primarily we're trying to group rules
# that write to the same location together, and to put rules with no
# target first as they have less effect on the structure. We order from
@@ -1113,18 +1129,29 @@ class RuleStrategy(SearchStrategy):
# data. This probably won't work especially well and we could be
# smarter about it, but it's better than just doing it in definition
# order.
- self.rules.sort(
+ rules.sort(
key=lambda rule: (
sorted(rule.targets),
len(rule.arguments),
rule.function.__name__,
)
)
- self.rules_strategy = st.sampled_from(self.rules)
+ rule_names = frozenset(r.function.__name__ for r in rules)
+ return (rules, rule_names, st.sampled_from(rules))
def __repr__(self):
return f"{self.__class__.__name__}(machine={self.machine.__class__.__name__}({{...}}))"
+ def _rule_is_enabled(self, r):
+ # Note: The order of the filters here is actually quite important,
+ # because checking is_enabled makes choices, so increases the size of
+ # the choice sequence. This means that if we are in a case where many
+ # rules are invalid we would make a lot more choices if we ask if they
+ # are enabled before we ask if they are valid, so our test cases would
+ # be artificially large.
+ assert self._feature_flags is not None
+ return self.is_valid(r) and self._feature_flags.is_enabled(r.function.__name__)
+
def do_draw(self, data):
if not any(self.is_valid(rule) for rule in self.rules):
rules = ", ".join([rule.function.__name__ for rule in self.rules])
@@ -1134,18 +1161,8 @@ class RuleStrategy(SearchStrategy):
)
raise InvalidDefinition(msg) from None
- feature_flags = data.draw(self.enabled_rules_strategy)
-
- def rule_is_enabled(r):
- # Note: The order of the filters here is actually quite important,
- # because checking is_enabled makes choices, so increases the size of
- # the choice sequence. This means that if we are in a case where many
- # rules are invalid we would make a lot more choices if we ask if they
- # are enabled before we ask if they are valid, so our test cases would
- # be artificially large.
- return self.is_valid(r) and feature_flags.is_enabled(r.function.__name__)
-
- rule = data.draw(self.rules_strategy.filter(rule_is_enabled))
+ self._feature_flags = data.draw(self.enabled_rules_strategy)
+ rule = data.draw(self._enabled_rules_strategy)
arguments = {}
for k, strat in rule.arguments_strategies.items():
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py
index 3a8e45ad0a2..9ef4a555c41 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py
@@ -508,7 +508,7 @@ def iterables(
# {"a": st.integers(), "b": st.booleans()}
# )
# * the arguments may be of any dict-compatible type, in which case the return
-# value will be of that type instead of dit
+# value will be of that type instead of dict
#
# Overloads may help here, but I doubt we'll be able to satisfy all these
# constraints.
@@ -1409,6 +1409,11 @@ def _from_type(thing: type[Ex]) -> SearchStrategy[Ex]:
if not types.is_a_type(thing):
if isinstance(thing, str):
# See https://github.com/HypothesisWorks/hypothesis/issues/3016
+ # String forward references like "LinkedList" can be converted to
+ # ForwardRef objects if they are valid Python identifiers.
+ # See https://github.com/HypothesisWorks/hypothesis/issues/4542
+ if thing.isidentifier():
+ return deferred(lambda thing=thing: from_type(typing.ForwardRef(thing)))
raise InvalidArgument(
f"Got {thing!r} as a type annotation, but the forward-reference "
"could not be resolved from a string to a type. Consider using "
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py
index e9af4c063b0..e507462b28d 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py
@@ -10,10 +10,12 @@
import threading
import warnings
-from collections.abc import Callable
+from collections.abc import Callable, Generator
from contextlib import contextmanager
+from typing import Any, TypeVar
from hypothesis.errors import HypothesisWarning, InvalidArgument
+from hypothesis.internal.conjecture.data import ConjectureData
from hypothesis.internal.reflection import (
get_pretty_function_description,
is_first_param_referenced_in_function,
@@ -27,31 +29,33 @@ from hypothesis.strategies._internal.strategies import (
)
from hypothesis.utils.deprecation import note_deprecation
+T = TypeVar("T")
+
class LimitReached(BaseException):
pass
-class LimitedStrategy(SearchStrategy):
- def __init__(self, strategy):
+class LimitedStrategy(SearchStrategy[T]):
+ def __init__(self, strategy: SearchStrategy[T]):
super().__init__()
self.base_strategy = strategy
self._threadlocal = threading.local()
@property
- def marker(self):
+ def marker(self) -> int:
return getattr(self._threadlocal, "marker", 0)
@marker.setter
- def marker(self, value):
+ def marker(self, value: int) -> None:
self._threadlocal.marker = value
@property
- def currently_capped(self):
+ def currently_capped(self) -> bool:
return getattr(self._threadlocal, "currently_capped", False)
@currently_capped.setter
- def currently_capped(self, value):
+ def currently_capped(self, value: bool) -> None:
self._threadlocal.currently_capped = value
def __repr__(self) -> str:
@@ -60,7 +64,7 @@ class LimitedStrategy(SearchStrategy):
def do_validate(self) -> None:
self.base_strategy.validate()
- def do_draw(self, data):
+ def do_draw(self, data: ConjectureData) -> T:
assert self.currently_capped
if self.marker <= 0:
raise LimitReached
@@ -68,7 +72,7 @@ class LimitedStrategy(SearchStrategy):
return data.draw(self.base_strategy)
@contextmanager
- def capped(self, max_templates):
+ def capped(self, max_templates: int) -> Generator[None, None, None]:
try:
was_capped = self.currently_capped
self.currently_capped = True
@@ -157,7 +161,7 @@ class RecursiveStrategy(SearchStrategy):
f"max_leaves={self.max_leaves!r}"
)
- def do_draw(self, data):
+ def do_draw(self, data: ConjectureData) -> Any:
min_leaves_retries = 0
while True:
try:
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/regex.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/regex.py
index b51e90ec0c9..19e6a9e82ff 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/regex.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/regex.py
@@ -10,6 +10,8 @@
import operator
import re
+from dataclasses import dataclass
+from re import Pattern
from hypothesis.errors import InvalidArgument
from hypothesis.internal import charmap
@@ -29,9 +31,12 @@ except ImportError: # Python < 3.11
ATOMIC_GROUP = object()
POSSESSIVE_REPEAT = object()
+from typing import Any, AnyStr
+
from hypothesis import reject, strategies as st
-from hypothesis.internal.charmap import as_general_categories, categories
+from hypothesis.internal.charmap import CategoryName, as_general_categories, categories
from hypothesis.internal.compat import add_note, int_to_byte
+from hypothesis.strategies import SearchStrategy
UNICODE_CATEGORIES = set(categories())
@@ -58,7 +63,7 @@ BYTES_LOOKUP = {
}
-GROUP_CACHE_STRATEGY: st.SearchStrategy[dict] = st.shared(
+GROUP_CACHE_STRATEGY: SearchStrategy[dict] = st.shared(
st.builds(dict), key="hypothesis.regex.group_cache"
)
@@ -101,19 +106,16 @@ def clear_cache_after_draw(draw, base_strategy):
return result
-def chars_not_in_alphabet(alphabet, string):
- # Given a string, return a tuple of the characters which are not in alphabet
+def chars_not_in_alphabet(alphabet: SearchStrategy | None, string: str) -> set[str]:
+ # given a string, returns the characters which are not in the alphabet
if alphabet is None:
- return ()
- intset = unwrap_strategies(alphabet).intervals
- return tuple(c for c in string if c not in intset)
+ return set()
+ return {c for c in string if c not in unwrap_strategies(alphabet).intervals}
+@dataclass(slots=True, frozen=False)
class Context:
- __slots__ = ["flags"]
-
- def __init__(self, flags):
- self.flags = flags
+ flags: re.RegexFlag
class CharactersBuilder:
@@ -126,13 +128,19 @@ class CharactersBuilder:
:param flags: Regex flags. They affect how and which characters are matched
"""
- def __init__(self, *, negate=False, flags=0, alphabet):
- self._categories = set()
- self._whitelist_chars = set()
- self._blacklist_chars = set()
+ def __init__(
+ self,
+ *,
+ negate: bool = False,
+ flags: re.RegexFlag = re.RegexFlag(0),
+ alphabet: SearchStrategy,
+ ):
+ self._categories: set[CategoryName] = set()
+ self._whitelist_chars: set[str] = set()
+ self._blacklist_chars: set[str] = set()
self._negate = negate
self._ignorecase = flags & re.IGNORECASE
- self.code_to_char = chr
+ self.code_to_char: Any = chr
self._alphabet = unwrap_strategies(alphabet)
if flags & re.ASCII:
self._alphabet = OneCharStringStrategy(
@@ -140,7 +148,7 @@ class CharactersBuilder:
)
@property
- def strategy(self):
+ def strategy(self) -> SearchStrategy:
"""Returns resulting strategy that generates configured char set."""
# Start by getting the set of all characters allowed by the pattern
white_chars = self._whitelist_chars - self._blacklist_chars
@@ -180,7 +188,7 @@ class CharactersBuilder:
else:
raise NotImplementedError(f"Unknown character category: {category}")
- def add_char(self, c):
+ def add_char(self, c: str) -> None:
"""Add given char to the whitelist."""
self._whitelist_chars.add(c)
if (
@@ -192,7 +200,7 @@ class CharactersBuilder:
class BytesBuilder(CharactersBuilder):
- def __init__(self, *, negate=False, flags=0):
+ def __init__(self, *, negate: bool = False, flags: re.RegexFlag = re.RegexFlag(0)):
self._whitelist_chars = set()
self._blacklist_chars = set()
self._negate = negate
@@ -227,13 +235,18 @@ def maybe_pad(draw, regex, strategy, left_pad_strategy, right_pad_strategy):
return result
-def base_regex_strategy(regex, parsed=None, alphabet=None):
+def base_regex_strategy(
+ regex: Pattern[AnyStr],
+ *,
+ parsed: sre_parse.SubPattern | None = None,
+ alphabet: SearchStrategy | None,
+) -> SearchStrategy:
if parsed is None:
parsed = sre_parse.parse(regex.pattern, flags=regex.flags)
try:
s = _strategy(
parsed,
- context=Context(flags=regex.flags),
+ context=Context(flags=re.RegexFlag(regex.flags)),
is_unicode=isinstance(regex.pattern, str),
alphabet=alphabet,
)
@@ -244,9 +257,13 @@ def base_regex_strategy(regex, parsed=None, alphabet=None):
def regex_strategy(
- regex, fullmatch, *, alphabet, _temp_jsonschema_hack_no_end_newline=False
-):
- if not hasattr(regex, "pattern"):
+ regex: AnyStr | Pattern[AnyStr],
+ fullmatch: bool,
+ *,
+ alphabet: SearchStrategy | None,
+ _temp_jsonschema_hack_no_end_newline: bool = False,
+) -> SearchStrategy:
+ if not isinstance(regex, Pattern):
regex = re.compile(regex)
is_unicode = isinstance(regex.pattern, str)
@@ -256,18 +273,22 @@ def regex_strategy(
if fullmatch:
if not parsed:
return st.just("" if is_unicode else b"")
- return base_regex_strategy(regex, parsed, alphabet).filter(regex.fullmatch)
+ return base_regex_strategy(regex, parsed=parsed, alphabet=alphabet).filter(
+ regex.fullmatch
+ )
if not parsed:
if is_unicode:
+ assert alphabet is not None
return st.text(alphabet=alphabet)
else:
return st.binary()
if is_unicode:
- base_padding_strategy = st.text(alphabet=alphabet)
- empty = st.just("")
- newline = st.just("\n")
+ assert alphabet is not None
+ base_padding_strategy: SearchStrategy = st.text(alphabet=alphabet)
+ empty: SearchStrategy = st.just("")
+ newline: SearchStrategy = st.just("\n")
else:
base_padding_strategy = st.binary()
empty = st.just(b"")
@@ -304,12 +325,20 @@ def regex_strategy(
else:
left_pad = empty
- base = base_regex_strategy(regex, parsed, alphabet).filter(regex.search)
+ base = base_regex_strategy(regex, parsed=parsed, alphabet=alphabet).filter(
+ regex.search
+ )
return maybe_pad(regex, base, left_pad, right_pad)
-def _strategy(codes, context, is_unicode, *, alphabet):
+def _strategy(
+ codes: sre_parse.SubPattern,
+ context: Context,
+ is_unicode: bool,
+ *,
+ alphabet: SearchStrategy | None,
+) -> SearchStrategy:
"""Convert SRE regex parse tree to strategy that generates strings matching
that regex represented by that parse tree.
@@ -341,8 +370,8 @@ def _strategy(codes, context, is_unicode, *, alphabet):
return _strategy(codes, context, is_unicode, alphabet=alphabet)
if is_unicode:
- empty = ""
- to_char = chr
+ empty: Any = ""
+ to_char: Any = chr
else:
empty = b""
to_char = int_to_byte
@@ -441,6 +470,7 @@ def _strategy(codes, context, is_unicode, *, alphabet):
# Regex '[abc0-9]' (set of characters)
negate = value[0][0] == sre.NEGATE
if is_unicode:
+ assert alphabet is not None
builder = CharactersBuilder(
flags=context.flags, negate=negate, alphabet=alphabet
)
@@ -465,7 +495,7 @@ def _strategy(codes, context, is_unicode, *, alphabet):
low, high = charset_value
chars = empty.join(map(builder.code_to_char, range(low, high + 1)))
if len(chars) == len(
- invalid := set(chars_not_in_alphabet(alphabet, chars))
+ invalid := chars_not_in_alphabet(alphabet, chars)
):
raise IncompatibleWithAlphabet(
f"Charset '[{chr(low)}-{chr(high)}]' contains characters {invalid!r} "
@@ -475,7 +505,9 @@ def _strategy(codes, context, is_unicode, *, alphabet):
if isinstance(c, int):
c = int_to_byte(c)
if c not in invalid:
- builder.add_char(c)
+ # add_char is typed as str, but BytesBuilder shares it
+ # and add_char's body works for bytes too.
+ builder.add_char(c) # type: ignore[arg-type]
elif charset_code == sre.CATEGORY:
# Regex '[\w]' (char category)
builder.add_category(charset_value)
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py
index cbeac0c42e5..f2ff62bfd66 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py
@@ -234,7 +234,7 @@ class SearchStrategy(Generic[Ex]):
# this works so I'm not looking into it further atm.
__label: int | UniqueIdentifier | None = None
- def __init__(self):
+ def __init__(self) -> None:
self.validate_called: dict[int, bool] = {}
def is_currently_empty(self, data: ConjectureData) -> bool:
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py
index b2b3eab417a..b65fea49398 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py
@@ -24,6 +24,7 @@ import os
import random
import re
import sys
+import types
import typing
import uuid
import warnings
@@ -219,6 +220,43 @@ def type_sorting_key(t):
return (is_container, repr(t))
+def _resolve_forward_ref_in_caller(forward_arg: str) -> typing.Any:
+ """Try to resolve a forward reference name by walking up the call stack.
+
+ This allows us to resolve recursive forward references like:
+ A = list[Union["A", str]]
+
+ where "A" refers to the type alias being defined.
+
+ To avoid false positives from namespace collisions, we only return a value
+ if all frames that define this name have the same value (unambiguous).
+ """
+ found_value: typing.Any = None
+ found_loc: str | None = None
+ frame: types.FrameType | None = sys._getframe()
+ while frame is not None:
+ loc = f"{frame.f_code.co_filename}:{frame.f_lineno}"
+ # Check locals first, then globals
+ for namespace in (frame.f_locals, frame.f_globals):
+ if forward_arg in namespace:
+ value = namespace[forward_arg]
+ if found_loc is None:
+ found_value = value
+ found_loc = loc
+ elif value is not found_value:
+ # Ambiguous: different values in different frames
+ warnings.warn(
+ f"Could not resolve forward reference {forward_arg!r} "
+ f"because it is ambiguous between {found_loc} and {loc}. "
+ "Rename one to remove the ambiguity.",
+ HypothesisWarning,
+ stacklevel=2,
+ )
+ return None
+ frame = frame.f_back
+ return found_value
+
+
def _compatible_args(args, superclass_args):
"""Check that the args of two generic types are compatible for try_issubclass."""
assert superclass_args is not None
@@ -628,6 +666,20 @@ def from_typing_type(thing):
and thing.__forward_arg__ in vars(builtins)
):
return st.from_type(getattr(builtins, thing.__forward_arg__))
+ elif (not mapping) and isinstance(thing, typing.ForwardRef):
+ resolved = None
+ if sys.version_info[:2] >= (3, 14):
+ # Prefer the ForwardRef.evaluate() method, added in 3.14.
+ try:
+ resolved = thing.evaluate()
+ except Exception:
+ pass
+ # Try to resolve non-builtin forward references by walking up the call stack.
+ # This handles recursive forward references like A = list[Union["A", str]].
+ if resolved is None: # pragma: no branch
+ resolved = _resolve_forward_ref_in_caller(thing.__forward_arg__)
+ if resolved is not None and is_a_type(resolved):
+ return st.from_type(resolved)
def is_maximal(t):
# For each k in the mapping, we use it if it's the most general type
@@ -961,13 +1013,17 @@ def resolve_Type(thing):
args = list(args)
for i, a in enumerate(args):
if type(a) in (typing.ForwardRef, str):
+ name = getattr(a, "__forward_arg__", a)
try:
- args[i] = getattr(builtins, getattr(a, "__forward_arg__", a))
+ args[i] = getattr(builtins, name)
except AttributeError:
- raise ResolutionFailed(
- f"Cannot find the type referenced by {thing} - try using "
- f"st.register_type_strategy({thing}, st.from_type(...))"
- ) from None
+ resolved = _resolve_forward_ref_in_caller(name)
+ if resolved is None or not is_a_type(resolved):
+ raise ResolutionFailed(
+ f"Cannot find the type referenced by {thing} - try using "
+ f"st.register_type_strategy({thing}, ...)"
+ ) from None
+ args[i] = resolved
return st.sampled_from(sorted(args, key=type_sorting_key))
diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py
index a37af144e24..7ea0a18e4e5 100644
--- a/contrib/python/hypothesis/py3/hypothesis/version.py
+++ b/contrib/python/hypothesis/py3/hypothesis/version.py
@@ -8,5 +8,5 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
-__version_info__ = (6, 152, 9)
+__version_info__ = (6, 153, 2)
__version__ = ".".join(map(str, __version_info__))
diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make
index 6b8398b4059..3390164dd29 100644
--- a/contrib/python/hypothesis/py3/ya.make
+++ b/contrib/python/hypothesis/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(6.152.9)
+VERSION(6.153.2)
LICENSE(MPL-2.0)