diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-04-08 10:44:40 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-04-08 12:32:02 +0300 |
commit | 32db6a72f86c4e0a30084436dc0a9b4b5e28c393 (patch) | |
tree | e7bf9d5688fd1f09620183410d3a936de9d7c08a | |
parent | 72eeab5172756159750eef875745e2a6f5b0004f (diff) | |
download | ydb-32db6a72f86c4e0a30084436dc0a9b4b5e28c393.tar.gz |
Intermediate changes
20 files changed, 93 insertions, 39 deletions
diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA index 9b873eb781..7e568e330b 100644 --- a/contrib/python/clickhouse-connect/.dist-info/METADATA +++ b/contrib/python/clickhouse-connect/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: clickhouse-connect -Version: 0.7.3 +Version: 0.7.4 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset Home-page: https://github.com/ClickHouse/clickhouse-connect Author: ClickHouse Inc. diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py index 0ab8b1bafd..33c23e6624 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py @@ -1 +1 @@ -version = '0.7.3' +version = '0.7.4' diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py index ae6d9b7a8e..cf16ec24ec 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py @@ -19,7 +19,8 @@ from clickhouse_connect.driver.external import ExternalData from clickhouse_connect.driver.insert import InsertContext from clickhouse_connect.driver.summary import QuerySummary from clickhouse_connect.driver.models import ColumnDef, SettingDef, SettingStatus -from clickhouse_connect.driver.query import QueryResult, to_arrow, QueryContext, arrow_buffer, quote_identifier +from clickhouse_connect.driver.query import QueryResult, to_arrow, to_arrow_batches, QueryContext, arrow_buffer, \ + quote_identifier io.DEFAULT_BUFFER_SIZE = 1024 * 256 logger = logging.getLogger(__name__) @@ -255,7 +256,8 @@ class Client(ABC): settings: Optional[Dict[str, Any]] = None, fmt: str = None, use_database: bool = True, - external_data: Optional[ExternalData] = None) -> bytes: + external_data: Optional[ExternalData] = None, + stream: bool = False) -> Union[bytes, io.IOBase]: """ Query method that simply returns the raw ClickHouse format bytes :param query: Query statement/format string @@ -348,7 +350,7 @@ class Client(ABC): """ Query method that returns the results as a StreamContext. For parameter values, see the create_query_context method - :return: Pandas dataframe representing the result set + :return: Generator that yields a Pandas dataframe per block representing the result set """ return self._context_query(locals(), use_numpy=True, as_pandas=True, @@ -462,6 +464,39 @@ class Client(ABC): :param external_data ClickHouse "external data" to send with query :return: PyArrow.Table """ + settings = self._update_arrow_settings(settings, use_strings) + return to_arrow(self.raw_query(query, + parameters, + settings, + fmt='Arrow', + external_data=external_data)) + + def query_arrow_stream(self, + query: str, + parameters: Optional[Union[Sequence, Dict[str, Any]]] = None, + settings: Optional[Dict[str, Any]] = None, + use_strings: Optional[bool] = None, + external_data: Optional[ExternalData] = None) -> StreamContext: + """ + Query method that returns the results as a stream of Arrow tables + :param query: Query statement/format string + :param parameters: Optional dictionary used to format the query + :param settings: Optional dictionary of ClickHouse settings (key/string values) + :param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary) + :param external_data ClickHouse "external data" to send with query + :return: Generator that yields a PyArrow.Table for per block representing the result set + """ + settings = self._update_arrow_settings(settings, use_strings) + return to_arrow_batches(self.raw_query(query, + parameters, + settings, + fmt='ArrowStream', + external_data=external_data, + stream=True)) + + def _update_arrow_settings(self, + settings: Optional[Dict[str, Any]], + use_strings: Optional[bool]) -> Dict[str, Any]: settings = dict_copy(settings) if self.database: settings['database'] = self.database @@ -473,11 +508,7 @@ class Client(ABC): if not str_status.is_writable: raise OperationalError(f'Cannot change readonly {arrow_str_setting} to {use_strings}') settings[arrow_str_setting] = '1' if use_strings else '0' - return to_arrow(self.raw_query(query, - parameters, - settings, - fmt='Arrow', - external_data=external_data)) + return settings @abstractmethod def command(self, diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py index 71adb00321..84a91c9415 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py @@ -125,7 +125,7 @@ def coerce_int(val: Optional[Union[str, int]]) -> int: def coerce_bool(val: Optional[Union[str, bool]]): if not val: return False - return val in (True, 'True', 'true', '1') + return val is True or (isinstance(val, str) and val.lower() in ('true', '1', 'y', 'yes')) class SliceView(Sequence): diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py index c1a9b62aad..5acc49830e 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py @@ -122,7 +122,7 @@ def write_str_col(column: Sequence, nullable: bool, encoding: Optional[str], des if encoding: x = x.encode(encoding) else: - x = b'' + x = bytes(x) sz = len(x) while True: b = sz & 0x7f diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py index c4a2da2393..1a35470b43 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py @@ -449,8 +449,11 @@ class HttpClient(Client): def raw_query(self, query: str, parameters: Optional[Union[Sequence, Dict[str, Any]]] = None, - settings: Optional[Dict[str, Any]] = None, fmt: str = None, - use_database: bool = True, external_data: Optional[ExternalData] = None) -> bytes: + settings: Optional[Dict[str, Any]] = None, + fmt: str = None, + use_database: bool = True, + external_data: Optional[ExternalData] = None, + stream: bool = False) -> Union[bytes, HTTPResponse]: """ See BaseClient doc_string for this method """ @@ -469,7 +472,8 @@ class HttpClient(Client): else: body = final_query fields = None - return self._raw_request(body, params, fields=fields).data + response = self._raw_request(body, params, fields=fields, stream=stream) + return response if stream else response.data def close(self): if self._owns_pool_manager: diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py index 2483d61222..6ad3fae9f1 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py @@ -5,6 +5,7 @@ import uuid import pytz from enum import Enum +from io import IOBase from typing import Any, Tuple, Dict, Sequence, Optional, Union, Generator from datetime import date, datetime, tzinfo @@ -489,6 +490,12 @@ def to_arrow(content: bytes): return reader.read_all() +def to_arrow_batches(buffer: IOBase) -> StreamContext: + pyarrow = check_arrow() + reader = pyarrow.ipc.open_stream(buffer) + return StreamContext(buffer, reader) + + def arrow_buffer(table) -> Tuple[Sequence[str], bytes]: pyarrow = check_arrow() sink = pyarrow.BufferOutputStream() diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make index d082921604..cd5de03ec7 100644 --- a/contrib/python/clickhouse-connect/ya.make +++ b/contrib/python/clickhouse-connect/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(0.7.3) +VERSION(0.7.4) LICENSE(Apache-2.0) diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index 17a8050e56..59c5ae6875 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: hypothesis -Version: 6.99.12 +Version: 6.99.13 Summary: A library for property-based testing Home-page: https://hypothesis.works Author: David R. MacIver and Zac Hatfield-Dodds @@ -41,7 +41,7 @@ Requires-Dist: exceptiongroup >=1.0.0 ; python_version < "3.11" Provides-Extra: all Requires-Dist: black >=19.10b0 ; extra == 'all' Requires-Dist: click >=7.0 ; extra == 'all' -Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'all' +Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'all' Requires-Dist: django >=3.2 ; extra == 'all' Requires-Dist: dpcontracts >=0.4 ; extra == 'all' Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'all' @@ -64,7 +64,7 @@ Provides-Extra: codemods Requires-Dist: libcst >=0.3.16 ; extra == 'codemods' Provides-Extra: crosshair Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'crosshair' -Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'crosshair' +Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'crosshair' Provides-Extra: dateutil Requires-Dist: python-dateutil >=1.4 ; extra == 'dateutil' Provides-Extra: django diff --git a/contrib/python/hypothesis/py3/hypothesis/core.py b/contrib/python/hypothesis/py3/hypothesis/core.py index 402382c6aa..ccd5c43b6e 100644 --- a/contrib/python/hypothesis/py3/hypothesis/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/core.py @@ -786,7 +786,6 @@ class StateForActualGivenExecution: self.explain_traces = defaultdict(set) self._start_timestamp = time.time() self._string_repr = "" - self._jsonable_arguments = {} self._timing_features = {} @property @@ -913,7 +912,7 @@ class StateForActualGivenExecution: ), ) self._string_repr = printer.getvalue() - self._jsonable_arguments = { + data._observability_arguments = { **dict(enumerate(map(to_jsonable, args))), **{k: to_jsonable(v) for k, v in kwargs.items()}, } @@ -1085,19 +1084,23 @@ class StateForActualGivenExecution: # Conditional here so we can save some time constructing the payload; in # other cases (without coverage) it's cheap enough to do that regardless. if TESTCASE_CALLBACKS: - if self.failed_normally or self.failed_due_to_deadline: - phase = "shrink" - elif runner := getattr(self, "_runner", None): + if runner := getattr(self, "_runner", None): phase = runner._current_phase + elif self.failed_normally or self.failed_due_to_deadline: + phase = "shrink" else: # pragma: no cover # in case of messing with internals phase = "unknown" + backend_desc = f", using backend={self.settings.backend!r}" * ( + self.settings.backend != "hypothesis" + and not getattr(runner, "_switch_to_hypothesis_provider", False) + ) tc = make_testcase( start_timestamp=self._start_timestamp, test_name_or_nodeid=self.test_identifier, data=data, - how_generated=f"generated during {phase} phase", + how_generated=f"during {phase} phase{backend_desc}", string_repr=self._string_repr, - arguments={**self._jsonable_arguments, **data._observability_args}, + arguments=data._observability_args, timing=self._timing_features, coverage=tractable_coverage_report(trace) or None, phase=phase, @@ -1217,7 +1220,7 @@ class StateForActualGivenExecution: "status": "passed" if sys.exc_info()[0] else "failed", "status_reason": str(origin or "unexpected/flaky pass"), "representation": self._string_repr, - "arguments": self._jsonable_arguments, + "arguments": ran_example._observability_args, "how_generated": "minimal failing example", "features": { **{ diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py b/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py index 8f53076d72..d3678e3f9f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/_patching.py @@ -121,7 +121,7 @@ class AddExamplesCodemod(VisitorBasedCodemodCommand): cst.Module([]).code_for_node(via), mode=black.FileMode(line_length=self.line_length), ) - except ImportError: + except (ImportError, AttributeError): return None # See https://github.com/psf/black/pull/4224 via = cst.parse_expression(pretty.strip()) return cst.Decorator(via) diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py b/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py index ce0993ab3b..8c82f63114 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py @@ -424,12 +424,12 @@ class ArrayStrategy(st.SearchStrategy): while elements.more(): i = data.draw_integer(0, self.array_size - 1) if i in assigned: - elements.reject() + elements.reject("chose an array index we've already used") continue val = data.draw(self.elements_strategy) if self.unique: if val in seen: - elements.reject() + elements.reject("chose an element we've already used") continue else: seen.add(val) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py index 3f15a974ef..701105bd5e 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py @@ -2273,13 +2273,13 @@ class ConjectureData: # (in fact, it is possible that giving up early here results in more time # for useful shrinks to run). if node.ir_type != ir_type: - self.mark_invalid() + self.mark_invalid(f"(internal) want a {ir_type} but have a {node.ir_type}") # if a node has different kwargs (and so is misaligned), but has a value # that is allowed by the expected kwargs, then we can coerce this node # into an aligned one by using its value. It's unclear how useful this is. if not ir_value_permitted(node.value, node.ir_type, kwargs): - self.mark_invalid() + self.mark_invalid(f"(internal) got a {ir_type} but outside the valid range") return node @@ -2348,7 +2348,7 @@ class ConjectureData: strategy.validate() if strategy.is_empty: - self.mark_invalid("strategy is empty") + self.mark_invalid(f"empty strategy {self!r}") if self.depth >= MAX_DEPTH: self.mark_invalid("max depth exceeded") diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py index 6791b28e04..ddf8c0e090 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py @@ -809,6 +809,15 @@ class ConjectureRunner: self.test_function(data) + if ( + data.status == Status.OVERRUN + and max_length < BUFFER_SIZE + and "invalid because" not in data.events + ): + data.events["invalid because"] = ( + "reduced max size for early examples (avoids flaky health checks)" + ) + self.generate_mutations_from(data) # Although the optimisations are logically a distinct phase, we diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py b/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py index 40f609b75a..c71ce28642 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/coverage.py @@ -61,7 +61,7 @@ if IN_COVERAGE_TESTS: if key in written: return written.add(key) - with open("branch-check", mode="a", encoding="utf-8") as log: + with open(f"branch-check-{os.getpid()}", mode="a", encoding="utf-8") as log: log.write(json.dumps({"name": name, "value": value}) + "\n") description_stack = [] diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py index aff19d805c..a532d054cd 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py @@ -36,7 +36,7 @@ def make_testcase( start_timestamp: float, test_name_or_nodeid: str, data: ConjectureData, - how_generated: str = "unknown", + how_generated: str, string_repr: str = "<unknown>", arguments: Optional[dict] = None, timing: Dict[str, float], diff --git a/contrib/python/hypothesis/py3/hypothesis/stateful.py b/contrib/python/hypothesis/py3/hypothesis/stateful.py index d5af39b5c0..8c8272df7b 100644 --- a/contrib/python/hypothesis/py3/hypothesis/stateful.py +++ b/contrib/python/hypothesis/py3/hypothesis/stateful.py @@ -478,7 +478,7 @@ class BundleReferenceStrategy(SearchStrategy): machine = data.draw(self_strategy) bundle = machine.bundle(self.name) if not bundle: - data.mark_invalid() + data.mark_invalid(f"Cannot draw from empty bundle {self.name!r}") # Shrink towards the right rather than the left. This makes it easier # to delete data generated earlier, as when the error is towards the # end there can be a lot of hard to remove padding. diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py index f2c33fa8c5..427d8c5ed2 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/datetime.py @@ -155,7 +155,7 @@ class DatetimeStrategy(SearchStrategy): # If we happened to end up with a disallowed imaginary time, reject it. if (not self.allow_imaginary) and datetime_does_not_exist(result): - data.mark_invalid("nonexistent datetime") + data.mark_invalid(f"{result} does not exist (usually a DST transition)") return result def draw_naive_datetime_and_combine(self, data, tz): diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index 14d8902f65..27443504c2 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 99, 12) +__version_info__ = (6, 99, 13) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index 604d175b40..dbfb51e4f1 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.99.12) +VERSION(6.99.13) LICENSE(MPL-2.0) |