Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2024-04-08 10:44:40 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2024-04-08 12:32:02 +0300
commit: 32db6a72f86c4e0a30084436dc0a9b4b5e28c393 (patch)
tree: e7bf9d5688fd1f09620183410d3a936de9d7c08a
parent: 72eeab5172756159750eef875745e2a6f5b0004f (diff)
download: ydb-32db6a72f86c4e0a30084436dc0a9b4b5e28c393.tar.gz
20 files changed, 93 insertions, 39 deletions
diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA
index 9b873eb781..7e568e330b 100644
--- a/contrib/python/clickhouse-connect/.dist-info/METADATA
+++ b/contrib/python/clickhouse-connect/.dist-info/METADATA
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: clickhouse-connect
-Version: 0.7.3
+Version: 0.7.4
 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
 Home-page: https://github.com/ClickHouse/clickhouse-connect
 Author: ClickHouse Inc.
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
index 0ab8b1bafd..33c23e6624 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
@@ -1 +1 @@
-version = '0.7.3'
+version = '0.7.4'
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
index ae6d9b7a8e..cf16ec24ec 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
@@ -19,7 +19,8 @@ from clickhouse_connect.driver.external import ExternalData
 from clickhouse_connect.driver.insert import InsertContext
 from clickhouse_connect.driver.summary import QuerySummary
 from clickhouse_connect.driver.models import ColumnDef, SettingDef, SettingStatus
-from clickhouse_connect.driver.query import QueryResult, to_arrow, QueryContext, arrow_buffer, quote_identifier
+from clickhouse_connect.driver.query import QueryResult, to_arrow, to_arrow_batches, QueryContext, arrow_buffer, \
+    quote_identifier
 
 io.DEFAULT_BUFFER_SIZE = 1024 * 256
 logger = logging.getLogger(__name__)
@@ -255,7 +256,8 @@ class Client(ABC):
                   settings: Optional[Dict[str, Any]] = None,
                   fmt: str = None,
                   use_database: bool = True,
-                  external_data: Optional[ExternalData] = None) -> bytes:
+                  external_data: Optional[ExternalData] = None,
+                  stream: bool = False) -> Union[bytes, io.IOBase]:
         """
         Query method that simply returns the raw ClickHouse format bytes
         :param query: Query statement/format string
@@ -348,7 +350,7 @@ class Client(ABC):
         """
         Query method that returns the results as a StreamContext.  For parameter values, see the
         create_query_context method
-        :return: Pandas dataframe representing the result set
+        :return: Generator that yields a Pandas dataframe per block representing the result set
         """
         return self._context_query(locals(), use_numpy=True,
                                    as_pandas=True,
@@ -462,6 +464,39 @@ class Client(ABC):
         :param external_data ClickHouse "external data" to send with query
         :return: PyArrow.Table
         """
+        settings = self._update_arrow_settings(settings, use_strings)
+        return to_arrow(self.raw_query(query,
+                                       parameters,
+                                       settings,
+                                       fmt='Arrow',
+                                       external_data=external_data))
+
+    def query_arrow_stream(self,
+                           query: str,
+                           parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
+                           settings: Optional[Dict[str, Any]] = None,
+                           use_strings: Optional[bool] = None,
+                           external_data: Optional[ExternalData] = None) -> StreamContext:
+        """
+        Query method that returns the results as a stream of Arrow tables
+        :param query: Query statement/format string
+        :param parameters: Optional dictionary used to format the query
+        :param settings: Optional dictionary of ClickHouse settings (key/string values)
+        :param use_strings:  Convert ClickHouse String type to Arrow string type (instead of binary)
+        :param external_data ClickHouse "external data" to send with query
+        :return: Generator that yields a PyArrow.Table for per block representing the result set
+        """
+        settings = self._update_arrow_settings(settings, use_strings)
+        return to_arrow_batches(self.raw_query(query,
+                                               parameters,
+                                               settings,
+                                               fmt='ArrowStream',
+                                               external_data=external_data,
+                                               stream=True))
+
+    def _update_arrow_settings(self,
+                               settings: Optional[Dict[str, Any]],
+                               use_strings: Optional[bool]) -> Dict[str, Any]:
         settings = dict_copy(settings)
         if self.database:
             settings['database'] = self.database
@@ -473,11 +508,7 @@ class Client(ABC):
             if not str_status.is_writable:
                 raise OperationalError(f'Cannot change readonly {arrow_str_setting} to {use_strings}')
             settings[arrow_str_setting] = '1' if use_strings else '0'
-        return to_arrow(self.raw_query(query,
-                                       parameters,
-                                       settings,
-                                       fmt='Arrow',
-                                       external_data=external_data))
+        return settings
 
     @abstractmethod
     def command(self,
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py
index 71adb00321..84a91c9415 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py
@@ -125,7 +125,7 @@ def coerce_int(val: Optional[Union[str, int]]) -> int:
 def coerce_bool(val: Optional[Union[str, bool]]):
     if not val:
         return False
-    return val in (True, 'True', 'true', '1')
+    return val is True or (isinstance(val, str) and val.lower() in ('true', '1', 'y', 'yes'))
 
 
 class SliceView(Sequence):
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py
index c1a9b62aad..5acc49830e 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py
@@ -122,7 +122,7 @@ def write_str_col(column: Sequence, nullable: bool, encoding: Optional[str], des
             if encoding:
                 x = x.encode(encoding)
             else:
-                x = b''
+                x = bytes(x)
             sz = len(x)
             while True:
                 b = sz & 0x7f
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
index c4a2da2393..1a35470b43 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py
@@ -449,8 +449,11 @@ class HttpClient(Client):
 
     def raw_query(self, query: str,
                   parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
-                  settings: Optional[Dict[str, Any]] = None, fmt: str = None,
-                  use_database: bool = True, external_data: Optional[ExternalData] = None) -> bytes:
+                  settings: Optional[Dict[str, Any]] = None,
+                  fmt: str = None,
+                  use_database: bool = True,
+                  external_data: Optional[ExternalData] = None,
+                  stream: bool = False) -> Union[bytes, HTTPResponse]:
         """
         See BaseClient doc_string for this method
         """
@@ -469,7 +472,8 @@ class HttpClient(Client):
         else:
             body = final_query
             fields = None
-        return self._raw_request(body, params, fields=fields).data
+        response = self._raw_request(body, params, fields=fields, stream=stream)
+        return response if stream else response.data
 
     def close(self):
         if self._owns_pool_manager:
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
index 2483d61222..6ad3fae9f1 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
@@ -5,6 +5,7 @@ import uuid
 import pytz
 
 from enum import Enum
+from io import IOBase
 from typing import Any, Tuple, Dict, Sequence, Optional, Union, Generator
 from datetime import date, datetime, tzinfo
 
@@ -489,6 +490,12 @@ def to_arrow(content: bytes):
     return reader.read_all()
 
 
+def to_arrow_batches(buffer: IOBase) -> StreamContext:
+    pyarrow = check_arrow()
+    reader = pyarrow.ipc.open_stream(buffer)
+    return StreamContext(buffer, reader)
+
+
 def arrow_buffer(table) -> Tuple[Sequence[str], bytes]:
     pyarrow = check_arrow()
     sink = pyarrow.BufferOutputStream()
diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make
index d082921604..cd5de03ec7 100644
--- a/contrib/python/clickhouse-connect/ya.make
+++ b/contrib/python/clickhouse-connect/ya.make
@@ -2,7 +2,7 @@
 
 PY3_LIBRARY()
 
-VERSION(0.7.3)
+VERSION(0.7.4)
 
 LICENSE(Apache-2.0)
 
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA
index 17a8050e56..59c5ae6875 100644
--- a/contrib/python/hypothesis/py3/.dist-info/METADATA
+++ b/contrib/python/hypothesis/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hypothesis
-Version: 6.99.12
+Version: 6.99.13
 Summary: A library for property-based testing
 Home-page: https://hypothesis.works
 Author: David R. MacIver and Zac Hatfield-Dodds
@@ -41,7 +41,7 @@ Requires-Dist: exceptiongroup >=1.0.0 ; python_version < "3.11"
 Provides-Extra: all
 Requires-Dist: black >=19.10b0 ; extra == 'all'
 Requires-Dist: click >=7.0 ; extra == 'all'
-Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'all'
+Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'all'
 Requires-Dist: django >=3.2 ; extra == 'all'
 Requires-Dist: dpcontracts >=0.4 ; extra == 'all'
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'all'
@@ -64,7 +64,7 @@ Provides-Extra: codemods
 Requires-Dist: libcst >=0.3.16 ; extra == 'codemods'
 Provides-Extra: crosshair
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'crosshair'
-Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'crosshair'
+Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'crosshair'
 Provides-Extra: dateutil
 Requires-Dist: python-dateutil >=1.4 ; extra == 'dateutil'
 Provides-Extra: django
diff --git a/contrib/python/hypothesis/py3/hypothesis/core.py b/contrib/python/hypothesis/py3/hypothesis/core.py
index 402382c6aa..ccd5c43b6e 100644
--- a/contrib/python/hypothesis/py3/hypothesis/core.py
+++ b/contrib/python/hypothesis/py3/hypothesis/core.py
@@ -786,7 +786,6 @@ class StateForActualGivenExecution:
         self.explain_traces = defaultdict(set)
         self._start_timestamp = time.time()
         self._string_repr = ""
-        self._jsonable_arguments = {}
         self._timing_features = {}
 
     @property
@@ -913,7 +912,7 @@ class StateForActualGivenExecution:
                     ),
                 )
                 self._string_repr = printer.getvalue()
-                self._jsonable_arguments = {
+                data._observability_arguments = {
                     **dict(enumerate(map(to_jsonable, args))),
                     **{k: to_jsonable(v) for k, v in kwargs.items()},
                 }
@@ -1085,19 +1084,23 @@ class StateForActualGivenExecution:
             # Conditional here so we can save some time constructing the payload; in
             # other cases (without coverage) it's cheap enough to do that regardless.
             if TESTCASE_CALLBACKS:
-                if self.failed_normally or self.failed_due_to_deadline:
-                    phase = "shrink"
-                elif runner := getattr(self, "_runner", None):
+                if runner := getattr(self, "_runner", None):
                     phase = runner._current_phase
+                elif self.failed_normally or self.failed_due_to_deadline:
+                    phase = "shrink"
                 else:  # pragma: no cover  # in case of messing with internals
                     phase = "unknown"
+                backend_desc = f", using backend={self.settings.backend!r}" * (
+                    self.settings.backend != "hypothesis"
+                    and not getattr(runner, "_switch_to_hypothesis_provider", False)
+                )
                 tc = make_testcase(
                     start_timestamp=self._start_timestamp,
                     test_name_or_nodeid=self.test_identifier,
                     data=data,
-                    how_generated=f"generated during {phase} phase",
+                    how_generated=f"during {phase} phase{backend_desc}",
                     string_repr=self._string_repr,
-                    arguments={**self._jsonable_arguments, **data._observability_args},
+                    arguments=data._observability_args,
                     timing=self._timing_features,
                     coverage=tractable_coverage_report(trace) or None,
                     phase=phase,
@@ -1217,7 +1220,7 @@ class StateForActualGivenExecution:
                     "status": "passed" if sys.exc_info()[0] else "failed",
                     "status_reason": str(origin or "unexpected/flaky pass"),
                     "representation": self._string_repr,
-                    "arguments": self._jsonable_arguments,
+                    "arguments": ran_example._observability_args,
                     "how_generated": "minimal failing example",
                     "features": {
                         **{
diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py b/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py
index 8f53076d72..d3678e3f9f 100644
--- a/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py
+++ b/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py
@@ -121,7 +121,7 @@ class AddExamplesCodemod(VisitorBasedCodemodCommand):
                     cst.Module([]).code_for_node(via),
                     mode=black.FileMode(line_length=self.line_length),
                 )
-            except ImportError:
+            except (ImportError, AttributeError):
                 return None  # See https://github.com/psf/black/pull/4224
             via = cst.parse_expression(pretty.strip())
         return cst.Decorator(via)
diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py b/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py
index ce0993ab3b..8c82f63114 100644
--- a/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py
+++ b/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py
@@ -424,12 +424,12 @@ class ArrayStrategy(st.SearchStrategy):
             while elements.more():
                 i = data.draw_integer(0, self.array_size - 1)
                 if i in assigned:
-                    elements.reject()
+                    elements.reject("chose an array index we've already used")
                     continue
                 val = data.draw(self.elements_strategy)
                 if self.unique:
                     if val in seen:
-                        elements.reject()
+                        elements.reject("chose an element we've already used")
                         continue
                     else:
                         seen.add(val)
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
index 3f15a974ef..701105bd5e 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
@@ -2273,13 +2273,13 @@ class ConjectureData:
         # (in fact, it is possible that giving up early here results in more time
         # for useful shrinks to run).
         if node.ir_type != ir_type:
-            self.mark_invalid()
+            self.mark_invalid(f"(internal) want a {ir_type} but have a {node.ir_type}")
 
         # if a node has different kwargs (and so is misaligned), but has a value
         # that is allowed by the expected kwargs, then we can coerce this node
         # into an aligned one by using its value. It's unclear how useful this is.
         if not ir_value_permitted(node.value, node.ir_type, kwargs):
-            self.mark_invalid()
+            self.mark_invalid(f"(internal) got a {ir_type} but outside the valid range")
 
         return node
 
@@ -2348,7 +2348,7 @@ class ConjectureData:
         strategy.validate()
 
         if strategy.is_empty:
-            self.mark_invalid("strategy is empty")
+            self.mark_invalid(f"empty strategy {self!r}")
 
         if self.depth >= MAX_DEPTH:
             self.mark_invalid("max depth exceeded")
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
index 6791b28e04..ddf8c0e090 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
@@ -809,6 +809,15 @@ class ConjectureRunner:
 
             self.test_function(data)
 
+            if (
+                data.status == Status.OVERRUN
+                and max_length < BUFFER_SIZE
+                and "invalid because" not in data.events
+            ):
+                data.events["invalid because"] = (
+                    "reduced max size for early examples (avoids flaky health checks)"
+                )
+
             self.generate_mutations_from(data)
 
             # Although the optimisations are logically a distinct phase, we
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py b/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py
index 40f609b75a..c71ce28642 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py
@@ -61,7 +61,7 @@ if IN_COVERAGE_TESTS:
         if key in written:
             return
         written.add(key)
-        with open("branch-check", mode="a", encoding="utf-8") as log:
+        with open(f"branch-check-{os.getpid()}", mode="a", encoding="utf-8") as log:
             log.write(json.dumps({"name": name, "value": value}) + "\n")
 
     description_stack = []
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py
index aff19d805c..a532d054cd 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py
@@ -36,7 +36,7 @@ def make_testcase(
     start_timestamp: float,
     test_name_or_nodeid: str,
     data: ConjectureData,
-    how_generated: str = "unknown",
+    how_generated: str,
     string_repr: str = "<unknown>",
     arguments: Optional[dict] = None,
     timing: Dict[str, float],
diff --git a/contrib/python/hypothesis/py3/hypothesis/stateful.py b/contrib/python/hypothesis/py3/hypothesis/stateful.py
index d5af39b5c0..8c8272df7b 100644
--- a/contrib/python/hypothesis/py3/hypothesis/stateful.py
+++ b/contrib/python/hypothesis/py3/hypothesis/stateful.py
@@ -478,7 +478,7 @@ class BundleReferenceStrategy(SearchStrategy):
         machine = data.draw(self_strategy)
         bundle = machine.bundle(self.name)
         if not bundle:
-            data.mark_invalid()
+            data.mark_invalid(f"Cannot draw from empty bundle {self.name!r}")
         # Shrink towards the right rather than the left. This makes it easier
         # to delete data generated earlier, as when the error is towards the
         # end there can be a lot of hard to remove padding.
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py
index f2c33fa8c5..427d8c5ed2 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py
@@ -155,7 +155,7 @@ class DatetimeStrategy(SearchStrategy):
 
         # If we happened to end up with a disallowed imaginary time, reject it.
         if (not self.allow_imaginary) and datetime_does_not_exist(result):
-            data.mark_invalid("nonexistent datetime")
+            data.mark_invalid(f"{result} does not exist (usually a DST transition)")
         return result
 
     def draw_naive_datetime_and_combine(self, data, tz):
diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py
index 14d8902f65..27443504c2 100644
--- a/contrib/python/hypothesis/py3/hypothesis/version.py
+++ b/contrib/python/hypothesis/py3/hypothesis/version.py
@@ -8,5 +8,5 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
-__version_info__ = (6, 99, 12)
+__version_info__ = (6, 99, 13)
 __version__ = ".".join(map(str, __version_info__))
diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make
index 604d175b40..dbfb51e4f1 100644
--- a/contrib/python/hypothesis/py3/ya.make
+++ b/contrib/python/hypothesis/py3/ya.make
@@ -2,7 +2,7 @@
 
 PY3_LIBRARY()
 
-VERSION(6.99.12)
+VERSION(6.99.13)
 
 LICENSE(MPL-2.0)
author	robot-piglet <robot-piglet@yandex-team.com>	2024-04-08 10:44:40 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2024-04-08 12:32:02 +0300
commit	32db6a72f86c4e0a30084436dc0a9b4b5e28c393 (patch)
tree	e7bf9d5688fd1f09620183410d3a936de9d7c08a
parent	72eeab5172756159750eef875745e2a6f5b0004f (diff)
download	ydb-32db6a72f86c4e0a30084436dc0a9b4b5e28c393.tar.gz